diff --git a/e2e/cloud/toolkit-opencode-real.test.ts b/e2e/cloud/toolkit-opencode-real.test.ts new file mode 100644 index 000000000..5bed45359 --- /dev/null +++ b/e2e/cloud/toolkit-opencode-real.test.ts @@ -0,0 +1,103 @@ +// The toolkit-connect performance bug, reproduced with the REAL opencode +// binary in a REAL terminal. A user adds a dozen sources to their workspace, +// makes a toolkit, and points OpenCode at /mcp/toolkits/. OpenCode runs +// its own OAuth (discovery, DCR, PKCE) and then `mcp list`. The whole session +// runs in one recorded PTY: the run's terminal.cast replays exactly what a +// user at a shell sees. +// +// Before the fix, building the toolkit session walks the WHOLE catalog with a +// per-tool policy resolution (an N+1 that scales with total tools, not toolkit +// size), so `mcp list` blows past OpenCode's connect timeout and the server +// shows "failed". After the fix the walk is two batched reads and the same +// command shows "connected". The catalog is seeded over the public API exactly +// as a user would build it. +import { join } from "node:path"; + +import { expect } from "@effect/vitest"; +import { Effect } from "effect"; + +import { scenario } from "../src/scenario"; +import { Api, Cli, OpenCode, RunDir, Target } from "../src/services"; +import { catalogApi, seedLargeCatalog } from "../scenarios/support/large-catalog"; + +const SERVER_NAME = "executor"; + +scenario( + "Toolkits · the real OpenCode binary connects to a toolkit over a large catalog", + { timeout: 240_000 }, + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const opencode = yield* OpenCode; + const runDir = yield* RunDir; + const cli = yield* Cli; + const { client: makeClient } = yield* Api; + + const identity = yield* target.newIdentity(); + const email = identity.credentials?.email ?? identity.label; + const client = yield* makeClient(catalogApi, identity); + + // Build a production-shaped workspace: one real spec + ten more sources, + // a toolkit scoped to one of them. ~3,300 tools across 11 sources — large + // enough that the pre-fix per-tool policy N+1 pushes the toolkit connect + // past OpenCode's client timeout. + const seeded = yield* seedLargeCatalog(client); + + yield* Effect.gen(function* () { + const toolkitUrl = new URL( + `/mcp/toolkits/${seeded.toolkitSlug}`, + target.baseUrl, + ).toString(); + const home = opencode.makeHome(SERVER_NAME, toolkitUrl); + // First-run database migration happens off camera. + yield* Effect.sync(() => opencode.warmUp(home)); + + yield* cli.session( + ["bash", "--norc"], + async (term) => { + await term.screen.waitForText("$", { timeoutMs: 10_000 }); + + const outputAfter = (text: string, line: string): string | null => { + const echoed = text.lastIndexOf(line); + if (echoed === -1) return null; + const after = text.slice(echoed + line.length); + return after.trimEnd().endsWith("\n$") ? after : null; + }; + const sh = async (line: string, timeoutMs: number) => { + await term.keyboard.type(line); + await term.keyboard.press("Enter"); + const snapshot = await term.screen.waitUntil( + (current) => outputAfter(current.text, line) !== null, + { timeoutMs }, + ); + return outputAfter(snapshot.text, line) ?? ""; + }; + + // OpenCode completes MCP OAuth for real: discovery, DCR, PKCE, its + // own scope request, its own token store. + const consent = opencode.completeOAuthConsent(home, email, home.openedUrls().length); + const auth = await sh(`opencode mcp auth ${SERVER_NAME}`, 90_000); + await consent; + expect(auth, "opencode mcp auth completes").not.toContain("failed"); + + // The load-bearing line: listing the toolkit server forces OpenCode + // to establish the session, which builds the execute-tool + // description over the whole catalog. Pre-fix this is where the + // per-tool N+1 times the client out; post-fix it returns promptly. + const listed = await sh("opencode mcp list", 120_000); + expect( + listed, + "OpenCode connects to the toolkit even with a large catalog behind it", + ).toContain("connected"); + }, + { + cwd: home.projectDir, + env: { ...home.env, PS1: "$ ", BASH_SILENCE_DEPRECATION_WARNING: "1" }, + record: join(runDir, "terminal.cast"), + viewport: { cols: 100, rows: 40 }, + }, + ); + }).pipe(Effect.ensuring(seeded.cleanup)); + }), + ), +); diff --git a/e2e/cloud/toolkit-policy-perf.test.ts b/e2e/cloud/toolkit-policy-perf.test.ts new file mode 100644 index 000000000..37494ab97 --- /dev/null +++ b/e2e/cloud/toolkit-policy-perf.test.ts @@ -0,0 +1,186 @@ +// Cloud: a toolkit MCP endpoint must connect in time that does NOT scale with +// the size of the surrounding catalog. +// +// When a client (OpenCode, Claude Code, mcporter) connects to +// /mcp/toolkits/, the server builds the `execute` tool's description, +// which lists the workspace's connections, which (for a toolkit session) runs +// the policy engine over the WHOLE catalog to decide tool visibility. A +// per-tool policy resolution there is an N+1 that scales with total catalog +// size, not toolkit size: a workspace with thousands of tools across a dozen +// sources pushes the connect past the MCP client's connect timeout, and the +// toolkit appears permanently "failed" even though nothing is broken. +// +// This scenario seeds a production-shaped catalog (one real OpenAPI spec plus +// enough synthetic sources to look like a working workspace, ~3,300 tools over +// 11 sources) and asserts the catalog size adds only a small, bounded cost to +// connect. The control is a fresh identity with a near-empty catalog: it pays +// the same OAuth + MCP handshake, so the DIFFERENCE isolates the catalog-walk +// cost the fix removes. Before the fix this delta is tens of seconds (and in +// production, a hard timeout); after it, it is sub-second. + +import { expect } from "@effect/vitest"; +import { Effect } from "effect"; +import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; + +import { scenario } from "../src/scenario"; +import { Api, Mcp, Target } from "../src/services"; +import type { McpSurface } from "../src/surfaces/mcp"; +import type { Identity } from "../src/target"; +import { + catalogApi, + seedLargeCatalog, + unique, + type SeededCatalog, +} from "../scenarios/support/large-catalog"; + +const toolkitUrl = (baseUrl: string, slug: string): string => + new URL(`/mcp/toolkits/${slug}`, baseUrl).toString(); + +// The extra wall-clock a ~3,300-tool catalog is allowed to add to a toolkit +// connect over a near-empty one. Post-fix the catalog walk is a couple of +// batched reads (~tens of ms); pre-fix it is 2 uncached reads PER TOOL, tens +// of seconds. 10s sits an order of magnitude below the regression and well +// above OAuth/scheduling jitter, so it is decisive without being flaky. +const MAX_CATALOG_CONNECT_OVERHEAD_MS = 10_000; + +// One-operation OpenAPI spec for the control identity: a real toolkit session, +// minimal catalog, so its connect time is "the handshake without the walk". +const tinySpec = (baseUrl: string): string => + JSON.stringify({ + openapi: "3.0.3", + info: { title: "Tiny API", version: "1.0.0" }, + servers: [{ url: baseUrl }], + paths: { + "/ping/{id}": { + get: { + operationId: "getPing", + security: [{ apiKey: [] }], + parameters: [{ name: "id", in: "path", required: true, schema: { type: "string" } }], + responses: { "200": { description: "ok" } }, + }, + }, + }, + components: { securitySchemes: { apiKey: { type: "apiKey", in: "header", name: "x-tok" } } }, + }); + +/** Time how long a fresh MCP client takes to connect to a toolkit endpoint and + * read its advertised tools (the path that pays the catalog-walk cost). */ +const timeToolkitConnect = (mcp: McpSurface, identity: Identity, url: string) => + Effect.gen(function* () { + const session = mcp.session(identity, { url }); + const startedAt = Date.now(); + const defs = yield* session.describeTools(); + const elapsedMs = Date.now() - startedAt; + return { elapsedMs, toolNames: defs.map((d) => d.name) }; + }); + +scenario( + "Toolkits · connect time does not scale with catalog size", + { timeout: 240_000 }, + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const mcp = yield* Mcp; + const { client: makeClient } = yield* Api; + + // --- control: a fresh identity with a near-empty catalog --------------- + const controlIdentity = yield* target.newIdentity(); + const controlClient = yield* makeClient(catalogApi, controlIdentity); + const controlIntegration = unique("tiny"); + const controlToolkitName = unique("control-kit"); + + const control = yield* Effect.gen(function* () { + yield* controlClient.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: tinySpec("https://tiny.example") }, + slug: IntegrationSlug.make(controlIntegration), + baseUrl: "https://tiny.example", + authenticationTemplate: [ + { + slug: "apiKey", + type: "apiKey", + headers: { "x-tok": [{ type: "variable", name: "token" }] }, + }, + ], + }, + }); + yield* controlClient.connections.create({ + payload: { + owner: "org", + name: ConnectionName.make("conn0"), + integration: IntegrationSlug.make(controlIntegration), + template: AuthTemplateSlug.make("apiKey"), + value: "unused-token", + }, + }); + const toolkit = yield* controlClient.toolkits.create({ + payload: { owner: "org", name: controlToolkitName }, + }); + yield* controlClient.toolkits.createConnection({ + params: { toolkitId: toolkit.id }, + payload: { pattern: `${controlIntegration}.org.conn0.*` }, + }); + return yield* timeToolkitConnect( + mcp, + controlIdentity, + toolkitUrl(target.baseUrl, toolkit.slug), + ); + }).pipe( + Effect.ensuring( + controlClient.openapi + .removeSpec({ params: { slug: controlIntegration } }) + .pipe(Effect.ignore), + ), + ); + + expect(control.toolNames, "control toolkit advertises the execute tool").toContain("execute"); + + // --- subject: a fresh identity with a large, production-shaped catalog - + const bigIdentity = yield* target.newIdentity(); + const bigClient = yield* makeClient(catalogApi, bigIdentity); + let seededCatalog: SeededCatalog | undefined; + + yield* Effect.gen(function* () { + // 1 real source (Vercel, 322 ops) + 10 synthetic = 11 sources, ~2,200 + // tools. Sized so the pre-fix N+1 connect (~27s here) still completes + // under the MCP client's connect timeout, so the regression surfaces as + // a failed ASSERTION on the overhead rather than a connect crash. + const seeded = yield* seedLargeCatalog(bigClient, { + includeRealSpec: true, + syntheticSources: 10, + opsPerSource: 190, + }); + seededCatalog = seeded; + expect( + seeded.toolCount, + "the seeded catalog is large enough to expose the N+1 (thousands of tools)", + ).toBeGreaterThan(2_000); + expect( + seeded.integrationSlugs.length, + "the catalog spans a production-like number of sources", + ).toBeGreaterThanOrEqual(11); + + const big = yield* timeToolkitConnect( + mcp, + bigIdentity, + toolkitUrl(target.baseUrl, seeded.toolkitSlug), + ); + expect(big.toolNames, "large-catalog toolkit advertises the execute tool").toContain( + "execute", + ); + + const overhead = big.elapsedMs - control.elapsedMs; + expect( + overhead, + `catalog of ${seeded.toolCount} tools across ${seeded.integrationSlugs.length} sources ` + + `added ${overhead}ms to connect (big ${big.elapsedMs}ms vs control ${control.elapsedMs}ms); ` + + `the per-tool policy N+1 would make this scale into tens of seconds`, + ).toBeLessThan(MAX_CATALOG_CONNECT_OVERHEAD_MS); + }).pipe( + Effect.ensuring( + Effect.suspend(() => seededCatalog?.cleanup ?? Effect.void).pipe(Effect.ignore), + ), + ); + }), + ), +); diff --git a/e2e/scenarios/support/large-catalog.ts b/e2e/scenarios/support/large-catalog.ts new file mode 100644 index 000000000..3c925a9e6 --- /dev/null +++ b/e2e/scenarios/support/large-catalog.ts @@ -0,0 +1,220 @@ +// Seeds a production-shaped catalog for the toolkit-policy performance +// scenarios, plus the pure builders behind it. +// +// Why a large catalog at all: a toolkit MCP endpoint resolves tool visibility +// by running the policy engine over the WHOLE catalog on connect (the +// getDescription -> connections.list -> toolsList path), not just the toolkit's +// own connections. A per-tool resolution there is an N+1 that scales with total +// catalog size, so a realistic workspace (a real spec plus enough sources to +// look like one) is what surfaces the regression. +// +// `catalogApi` is exported so scenarios build their client from the SAME +// composition `seedLargeCatalog` is typed against — that keeps the seeding +// fully typed (no structural-client gymnastics) while staying DRY across the +// deterministic guard and the OpenCode recording. +import { randomBytes } from "node:crypto"; +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +import { Effect } from "effect"; +import type { HttpApi, HttpApiClient } from "effect/unstable/httpapi"; +import { composePluginApi } from "@executor-js/api/server"; +import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; +import { toolkitsPlugin } from "@executor-js/plugin-toolkits/server"; +import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; + +export const unique = (prefix: string): string => `${prefix}_${randomBytes(4).toString("hex")}`; + +const VERCEL_SPEC_PATH = fileURLToPath( + new URL("../../../packages/plugins/openapi/fixtures/vercel.json", import.meta.url), +); + +/** The real Vercel OpenAPI fixture (322 operations) as a JSON string. */ +export const vercelSpecText = (): string => readFileSync(VERCEL_SPEC_PATH, "utf-8"); + +/** An OpenAPI 3 doc with `ops` independent GET operations, shaped like a real + * REST surface (path params, an apiKey scheme, JSON responses). */ +export const syntheticSpec = (title: string, ops: number, baseUrl: string): string => { + const paths: Record = {}; + for (let i = 0; i < ops; i++) { + paths[`/resource${i}/{id}`] = { + get: { + operationId: `get_resource_${i}`, + summary: `Fetch resource ${i}`, + security: [{ apiKey: [] }], + parameters: [{ name: "id", in: "path", required: true, schema: { type: "string" } }], + responses: { + "200": { + description: "ok", + content: { + "application/json": { + schema: { type: "object", properties: { id: { type: "string" } } }, + }, + }, + }, + }, + }, + }; + } + return JSON.stringify({ + openapi: "3.0.3", + info: { title, version: "1.0.0" }, + servers: [{ url: baseUrl }], + paths, + components: { securitySchemes: { apiKey: { type: "apiKey", in: "header", name: "x-tok" } } }, + }); +}; + +export interface SeedSource { + readonly slug: string; + readonly specText: string; + readonly baseUrl: string; + readonly description?: string; +} + +export interface SeedPlan { + readonly sources: ReadonlyArray; + /** The first source's slug — the one the toolkit is scoped to. */ + readonly firstSlug: string; + /** Connection pattern the toolkit binds (the first source, org, conn0). */ + readonly toolkitConnectionPattern: string; +} + +export interface SeedOptions { + /** Synthetic sources on top of the real spec. Default 10. */ + readonly syntheticSources?: number; + /** Operations per synthetic source. Default 300. */ + readonly opsPerSource?: number; + /** Include the real Vercel fixture (322 ops) as one source. Default true. */ + readonly includeRealSpec?: boolean; +} + +/** + * Plan a large, production-shaped catalog. With the defaults (1 real + 10 + * synthetic sources, 300 ops each) it is ~3,300 tools across 11 sources, where + * the per-tool policy N+1 turns a toolkit connect from sub-second into a 30s+ + * client timeout. Each source gets exactly one org connection at `conn0`. + */ +export const planLargeCatalog = (options: SeedOptions = {}): SeedPlan => { + const syntheticSources = options.syntheticSources ?? 10; + const opsPerSource = options.opsPerSource ?? 300; + const includeRealSpec = options.includeRealSpec ?? true; + + const sources: SeedSource[] = []; + if (includeRealSpec) { + sources.push({ + slug: unique("vercel"), + specText: vercelSpecText(), + baseUrl: "https://api.vercel.com", + description: "Vercel API", + }); + } + for (let s = 0; s < syntheticSources; s++) { + sources.push({ + slug: unique("svc"), + specText: syntheticSpec(`Service ${s}`, opsPerSource, "https://service.example"), + baseUrl: "https://service.example", + }); + } + + const firstSlug = sources[0]!.slug; + return { + sources, + firstSlug, + toolkitConnectionPattern: `${firstSlug}.org.conn0.*`, + }; +}; + +// --------------------------------------------------------------------------- +// Typed seeding. Scenarios build their client from `catalogApi` so the client +// type here matches theirs exactly. +// --------------------------------------------------------------------------- + +/** The plugin API the seeding (and the scenarios) speak: OpenAPI specs + + * toolkits. Build the scenario's client from THIS so types line up. */ +export const catalogApi = composePluginApi([openApiHttpPlugin(), toolkitsPlugin()] as const); + +type GroupsOf = A extends HttpApi.HttpApi ? Groups : never; +export type CatalogClient = HttpApiClient.Client>; + +export interface SeededCatalog { + /** The org toolkit slug to point an MCP client at: /mcp/toolkits/. */ + readonly toolkitSlug: string; + readonly toolkitId: string; + /** Total tools in the identity's catalog after seeding (the N+1 multiplier). */ + readonly toolCount: number; + /** Integration slugs created, for assertions / debugging. */ + readonly integrationSlugs: ReadonlyArray; + /** Remove every spec + toolkit this seeder created. */ + readonly cleanup: Effect.Effect; +} + +/** + * Build the planned catalog under the current identity (one org connection per + * source) and a toolkit scoped to the first source, then report the total tool + * count and a finalizer. The toolkit's own surface is small; the connect cost + * the scenarios guard comes from the policy engine walking the whole catalog. + */ +export const seedLargeCatalog = ( + client: CatalogClient, + options: SeedOptions = {}, +): Effect.Effect => + Effect.gen(function* () { + const plan = planLargeCatalog(options); + + for (const source of plan.sources) { + yield* client.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: source.specText }, + slug: IntegrationSlug.make(source.slug), + baseUrl: source.baseUrl, + ...(source.description ? { description: source.description } : {}), + authenticationTemplate: [ + { + slug: "apiKey", + type: "apiKey", + headers: { "x-tok": [{ type: "variable", name: "token" }] }, + }, + ], + }, + }); + yield* client.connections.create({ + payload: { + owner: "org", + name: ConnectionName.make("conn0"), + integration: IntegrationSlug.make(source.slug), + template: AuthTemplateSlug.make("apiKey"), + value: "unused-token", + }, + }); + } + + const allTools = yield* client.tools.list({ query: {} }); + const toolCount = allTools.length; + + const toolkit = yield* client.toolkits.create({ + payload: { owner: "org", name: unique("perf-kit") }, + }); + yield* client.toolkits.createConnection({ + params: { toolkitId: toolkit.id }, + payload: { pattern: plan.toolkitConnectionPattern }, + }); + + const integrationSlugs = plan.sources.map((source) => source.slug); + const cleanup = Effect.gen(function* () { + yield* client.toolkits.remove({ params: { toolkitId: toolkit.id } }).pipe(Effect.ignore); + yield* Effect.forEach( + integrationSlugs, + (slug) => client.openapi.removeSpec({ params: { slug } }).pipe(Effect.ignore), + { discard: true }, + ); + }).pipe(Effect.ignore); + + return { + toolkitSlug: toolkit.slug, + toolkitId: toolkit.id, + toolCount, + integrationSlugs, + cleanup, + }; + }); diff --git a/e2e/src/clients/opencode.ts b/e2e/src/clients/opencode.ts index f4242063e..c9191e55c 100644 --- a/e2e/src/clients/opencode.ts +++ b/e2e/src/clients/opencode.ts @@ -66,10 +66,15 @@ export const makeOpenCodeHome = ( : {}), }), ); - // OpenCode launches the OAuth URL via `open`; the shim records it instead. - writeFileSync(join(binDir, "open"), `#!/bin/sh\necho "$@" >> ${openedUrlsFile}\nexit 0\n`, { - mode: 0o755, - }); + // OpenCode launches the OAuth URL via the platform browser opener; the shim + // records it instead. macOS uses `open`; Linux uses `xdg-open` (and a few + // fallback names), so shim them all to the same recorder — otherwise the URL + // is never captured on Linux and consent never completes. + for (const opener of ["open", "xdg-open", "www-browser", "x-www-browser"]) { + writeFileSync(join(binDir, opener), `#!/bin/sh\necho "$@" >> ${openedUrlsFile}\nexit 0\n`, { + mode: 0o755, + }); + } return { projectDir, diff --git a/packages/core/sdk/src/executor.ts b/packages/core/sdk/src/executor.ts index 341d44c61..7af4d08ec 100644 --- a/packages/core/sdk/src/executor.ts +++ b/packages/core/sdk/src/executor.ts @@ -2442,6 +2442,13 @@ export const createExecutor = EffectivePolicy; }; const compareProviderPolicyRule = ( @@ -2477,19 +2484,26 @@ export const createExecutor = => activeToolPolicyProvider - ? activeToolPolicyProvider.resolve - ? Effect.succeed({ - kind: "provider" as const, - provider: activeToolPolicyProvider, - rules: null, - }) - : activeToolPolicyProvider.list().pipe( - Effect.map((rules) => ({ + ? // Batched per-operation resolver: fetch all policy + connection state + // once, then resolve every tool in this operation against that + // snapshot. Avoids the per-tool resolve N+1 on the list surface. + activeToolPolicyProvider.prepare + ? activeToolPolicyProvider + .prepare() + .pipe(Effect.map((resolve) => ({ kind: "prepared" as const, resolve }))) + : activeToolPolicyProvider.resolve + ? Effect.succeed({ kind: "provider" as const, - provider: activeToolPolicyProvider!, - rules, - })), - ) + provider: activeToolPolicyProvider, + rules: null, + }) + : activeToolPolicyProvider.list().pipe( + Effect.map((rules) => ({ + kind: "provider" as const, + provider: activeToolPolicyProvider!, + rules, + })), + ) : core .findMany("tool_policy", {}) .pipe(Effect.map((rows) => ({ kind: "global" as const, rows }))); @@ -2499,13 +2513,20 @@ export const createExecutor = => - ruleSet.kind === "provider" - ? ruleSet.provider.resolve - ? ruleSet.provider.resolve({ toolId, defaultRequiresApproval }) - : Effect.succeed(resolveProviderPolicyFromRules(toolId, ruleSet.rules ?? [])) - : Effect.succeed( - resolveEffectivePolicy(toolId, ruleSet.rows, ownerRankForRow, defaultRequiresApproval), - ); + ruleSet.kind === "prepared" + ? Effect.succeed(ruleSet.resolve({ toolId, defaultRequiresApproval })) + : ruleSet.kind === "provider" + ? ruleSet.provider.resolve + ? ruleSet.provider.resolve({ toolId, defaultRequiresApproval }) + : Effect.succeed(resolveProviderPolicyFromRules(toolId, ruleSet.rules ?? [])) + : Effect.succeed( + resolveEffectivePolicy( + toolId, + ruleSet.rows, + ownerRankForRow, + defaultRequiresApproval, + ), + ); // ------------------------------------------------------------------ // Tools (read surface) diff --git a/packages/core/sdk/src/plugin.ts b/packages/core/sdk/src/plugin.ts index 2999c4a56..337d2d031 100644 --- a/packages/core/sdk/src/plugin.ts +++ b/packages/core/sdk/src/plugin.ts @@ -111,6 +111,26 @@ export interface ToolPolicyProvider { readonly toolId: string; readonly defaultRequiresApproval?: boolean; }) => Effect.Effect; + /** + * Batched per-operation resolver. When defined, core calls `prepare` once at + * the start of an operation (a single tools/list or tools/call), fetching all + * the underlying policy + connection state in one pass, and reuses the + * returned pure resolver for every tool in that operation. This avoids the + * per-tool `resolve` N+1 (2 uncached storage reads per tool) that scales with + * the total catalog size on `toolsList`. + * + * The resolver is intentionally per-operation scoped, not memoized on the + * provider: the provider instance is session-scoped (lives across many + * requests), so caching on it would serve stale policy state. Each operation + * gets a fresh snapshot. + */ + readonly prepare?: () => Effect.Effect< + (input: { + readonly toolId: string; + readonly defaultRequiresApproval?: boolean; + }) => EffectivePolicy, + StorageFailure + >; } // --------------------------------------------------------------------------- diff --git a/packages/plugins/toolkits/src/server.ts b/packages/plugins/toolkits/src/server.ts index a9e73e64f..0b70e3357 100644 --- a/packages/plugins/toolkits/src/server.ts +++ b/packages/plugins/toolkits/src/server.ts @@ -509,6 +509,38 @@ const makeToolkitsExtension = (ctx: PluginCtx) => { return resolveToolkitPolicy(toolId, connections, policies, defaultRequiresApproval); }); + // Batched form of `resolvePolicyForSlug`: fetch the toolkit, its policies, and + // its connections ONCE, then hand back a pure resolver core can run for every + // tool in a single tools/list or tools/call. `resolvePolicyForSlug` re-fetches + // policies + connections on every tool, which is the per-tool N+1 that scales + // with the whole catalog on the list surface. This is byte-for-byte the same + // resolution, just hoisted out of the loop. + const preparePolicyResolverForSlug = ( + slug: string, + ): Effect.Effect< + (input: { + readonly toolId: string; + readonly defaultRequiresApproval?: boolean; + }) => EffectivePolicy, + StorageFailure + > => + Effect.gen(function* () { + const toolkit = yield* getBySlugEntry(slug); + if (!toolkit) return () => blockedPolicy(); + const isOrg = toolkit.owner === "org"; + const policies = yield* listPoliciesForRecord(toolkit.data.id); + const connections = yield* listConnectionsForRecord(toolkit.data.id); + return (input: { readonly toolId: string; readonly defaultRequiresApproval?: boolean }) => { + if (isOrg && isPersonalDynamicToolId(input.toolId)) return blockedPolicy(); + return resolveToolkitPolicy( + input.toolId, + connections, + policies, + input.defaultRequiresApproval, + ); + }; + }); + return { list, create, @@ -527,6 +559,7 @@ const makeToolkitsExtension = (ctx: PluginCtx) => { removeConnection, policyRulesForSlug, resolvePolicyForSlug, + preparePolicyResolverForSlug, }; }; @@ -638,12 +671,18 @@ const ToolkitsHandlers = HttpApiBuilder.group(ExecutorApiWithToolkits, "toolkits ); const makePolicyProvider = ( - extension: Pick, + extension: Pick< + ToolkitsExtension, + "policyRulesForSlug" | "resolvePolicyForSlug" | "preparePolicyResolverForSlug" + >, slug: string, ): ToolPolicyProvider => ({ list: () => extension.policyRulesForSlug(slug), resolve: ({ toolId, defaultRequiresApproval }) => extension.resolvePolicyForSlug(slug, toolId, defaultRequiresApproval), + // Preferred path: core calls this once per operation, so the toolkit's + // policies + connections are fetched once instead of once per tool. + prepare: () => extension.preparePolicyResolverForSlug(slug), }); export const toolkitsPlugin = definePlugin((options: ToolkitsPluginOptions = {}) => {