From a12701a255a8d1511a5e09a80973a0d73c05c92f Mon Sep 17 00:00:00 2001 From: Rhys Sullivan Date: Fri, 26 Jun 2026 21:08:18 -0700 Subject: [PATCH 1/2] Surface code syntax errors descriptively instead of an opaque internal error A genuine parse error in code sent to execute (for example a const with no binding, or pasted smart quotes) was collapsed to an opaque "Internal tool error [id]", giving the model nothing to self-correct with. Two masking paths were involved: sucrase strip-time parse failures, and plain-JS syntax that slips past sucrase and fails at workerd module compile. Add a CodeCompilationError tagged error and classify both paths into the descriptive ExecuteResult.error channel, carrying the real parser message. Genuine Effect failures and infra defects still collapse to the opaque mask, so the MCP host security boundary is unchanged. Covered red-to-green at the runtime layer (real WorkerLoader), the host layer (real MCP client), and an e2e scenario over the real MCP surface. --- e2e/scenarios/mcp-execute.test.ts | 25 ++++ packages/hosts/mcp/src/tool-server.test.ts | 27 ++++ packages/kernel/core/src/effect-errors.ts | 17 +++ .../runtime-dynamic-worker/src/executor.ts | 116 ++++++++++++------ .../src/invocation.test.ts | 34 +++++ 5 files changed, 184 insertions(+), 35 deletions(-) diff --git a/e2e/scenarios/mcp-execute.test.ts b/e2e/scenarios/mcp-execute.test.ts index 811f342cc..925bd8efa 100644 --- a/e2e/scenarios/mcp-execute.test.ts +++ b/e2e/scenarios/mcp-execute.test.ts @@ -23,3 +23,28 @@ scenario( expect(result.text, "the sandbox returns the value").toBe("42"); }), ); + +scenario( + "MCP · a syntax error returns a descriptive message, not an opaque internal error", + {}, + Effect.gen(function* () { + const target = yield* Target; + const mcp = yield* Mcp; + const identity = yield* target.newIdentity(); + const session = mcp.session(identity); + + // A genuine parse error (a `const` with no binding) is the user's own + // mistake, surfaced before the code ever runs. The model needs the real + // reason to self-correct, so the sandbox must report it descriptively + // instead of collapsing it to "Internal tool error [id]". + const result = yield* session.call("execute", { + code: "const = 5; return 1;", + }); + + expect(result.ok, "a syntax error is reported as an error result").toBe(false); + expect(result.text, "the parser's reason reaches the model").toContain("Unexpected"); + expect(result.text, "the opaque mask is not used for syntax errors").not.toContain( + "Internal tool error", + ); + }), +); diff --git a/packages/hosts/mcp/src/tool-server.test.ts b/packages/hosts/mcp/src/tool-server.test.ts index d42b5f741..a178a2f54 100644 --- a/packages/hosts/mcp/src/tool-server.test.ts +++ b/packages/hosts/mcp/src/tool-server.test.ts @@ -815,6 +815,33 @@ describe("MCP host server — native elicitation mode", () => { }); }); + it("surfaces a code syntax error descriptively, not as an opaque internal error", async () => { + // The runtime classifies a genuine parse error (a compile failure in + // the user's own code) onto the success channel as ExecuteResult.error. + // It must reach the model verbatim so it can self-correct, in contrast + // to a genuine Effect failure, which stays opaque (see the + // "execution failure stays opaque" test that asserts the masked form). + const engine = makeStubEngine({ + execute: () => + Effect.succeed({ + result: null, + error: 'Unexpected token, expected "," (1:54)', + }), + }); + + await withNativeClient(engine, ELICITATION_CAPS, async (client) => { + const result = await client.callTool({ + name: "execute", + arguments: { code: "const { items } = await tools.search({ query: 'x' }" }, + }); + expect(result.isError).toBe(true); + const text = textOf(result); + expect(text).toContain("Unexpected token"); + expect(text).toContain("(1:54)"); + expect(text).not.toContain("Internal tool error"); + }); + }); + it("resume tool is hidden in native elicitation mode", async () => { await withNativeClient(makeStubEngine({}), ELICITATION_CAPS, async (client) => { const { tools } = await client.listTools(); diff --git a/packages/kernel/core/src/effect-errors.ts b/packages/kernel/core/src/effect-errors.ts index 8e22d2406..303417a7e 100644 --- a/packages/kernel/core/src/effect-errors.ts +++ b/packages/kernel/core/src/effect-errors.ts @@ -19,3 +19,20 @@ export class CodeExecutionError extends Data.TaggedError("CodeExecutionError")<{ readonly message: string; readonly cause?: unknown; }> {} + +/** + * Raised when user code fails to compile before it ever runs: a genuine + * syntax/parse error (smart quotes from a copy-paste, an unbalanced + * brace, `const = 5`) caught while stripping TypeScript ahead of the + * JS-only sandbox. Unlike `CodeExecutionError` this is the user's + * mistake, not a sandbox defect, so runtimes surface its `message` + * through the descriptive `ExecuteResult.error` channel instead of + * collapsing it to an opaque internal-error string. The original parser + * message (e.g. "Unexpected token (1:54)") is carried verbatim so the + * model can see and fix it. + */ +export class CodeCompilationError extends Data.TaggedError("CodeCompilationError")<{ + readonly runtime: string; + readonly message: string; + readonly cause?: unknown; +}> {} diff --git a/packages/kernel/runtime-dynamic-worker/src/executor.ts b/packages/kernel/runtime-dynamic-worker/src/executor.ts index b42727e24..5be08335f 100644 --- a/packages/kernel/runtime-dynamic-worker/src/executor.ts +++ b/packages/kernel/runtime-dynamic-worker/src/executor.ts @@ -14,6 +14,7 @@ import * as Data from "effect/Data"; import * as Effect from "effect/Effect"; import { + CodeCompilationError, recoverExecutionBody, stripTypeScript, type CodeExecutor, @@ -161,6 +162,23 @@ const renderTransportMessage = (value: unknown): string => { return String(value); }; +/** + * Does this worker-startup rejection describe a compile error in the + * user's own code rather than a sandbox/infra defect? Not all syntax + * errors are caught while stripping TypeScript: smart quotes from a + * paste, an unbalanced brace, and other plain-JS parse errors slip past + * sucrase and only fail when workerd compiles the generated module, + * surfacing here as "Failed to start Worker: Uncaught SyntaxError: ...". + * That is still the user's mistake, so we route it to the descriptive + * error channel (alongside the strip-time path) instead of collapsing it + * to an opaque internal error. The check is deliberately narrow: a + * genuine SyntaxError thrown at runtime by user code (e.g. JSON.parse) + * never reaches this catch, since the sandbox reports those through its + * own result envelope, not as a worker-startup failure. + */ +const isWorkerCompileErrorMessage = (message: string): boolean => + message.includes("Failed to start Worker") || message.includes("SyntaxError"); + export const serializeWorkerCause = (cause: Cause.Cause): SerializedWorkerError => { const failures = cause.reasons .filter(Cause.isFailReason) @@ -433,36 +451,51 @@ const startDynamicWorker = ( options: DynamicWorkerExecutorOptions, code: string, timeoutMs: number, -): Effect.Effect => - Effect.try({ - try: (): DynamicWorkerEntrypoint => { - const recoveredBody = recoverExecutionBody(code); - // The dynamic Worker isolate only accepts plain JavaScript; TS type - // syntax in user code (`: T`, `as T`, generics) would otherwise - // surface as "Unexpected token ':'" inside `evaluate()` and bubble - // out via DynamicWorkerExecutionError. Stripping here gives the - // model a clear syntax-error message at the front door instead. - const strippedBody = stripTypeScript(recoveredBody); - const executorModule = buildExecutorModule(strippedBody, timeoutMs); - const { [ENTRY_MODULE]: _, ...safeModules } = options.modules ?? {}; - - const worker = options.loader.get(`executor-${crypto.randomUUID()}`, () => ({ - compatibilityDate: "2025-06-01", - compatibilityFlags: ["nodejs_compat"], - mainModule: ENTRY_MODULE, - modules: { - ...safeModules, - [ENTRY_MODULE]: executorModule, - }, - globalOutbound: options.globalOutbound ?? null, - })); - - return asDynamicWorkerEntrypoint(worker.getEntrypoint()); - }, - catch: (cause) => - new DynamicWorkerExecutionError({ - message: renderTransportMessage(serializeWorkerErrorValue(cause)), - }), +): Effect.Effect => + Effect.gen(function* () { + // The dynamic Worker isolate only accepts plain JavaScript; TS type + // syntax in user code (`: T`, `as T`, generics) would otherwise + // surface as "Unexpected token ':'" inside `evaluate()`. Stripping + // here means valid TS just works. But this step is also where a + // genuine syntax error (smart quotes from a paste, an unbalanced + // brace, `const = 5`) first surfaces, with the parser's precise + // "Unexpected token (line:col)" message. That is the user's mistake, + // not a sandbox defect, so it gets its own `CodeCompilationError` + // and flows back through the descriptive `ExecuteResult.error` + // channel rather than collapsing to an opaque internal error. + const strippedBody = yield* Effect.try({ + try: () => stripTypeScript(recoverExecutionBody(code)), + catch: (cause) => + new CodeCompilationError({ + runtime: "dynamic-worker", + message: renderTransportMessage(serializeWorkerErrorValue(cause)), + cause, + }), + }); + + return yield* Effect.try({ + try: (): DynamicWorkerEntrypoint => { + const executorModule = buildExecutorModule(strippedBody, timeoutMs); + const { [ENTRY_MODULE]: _, ...safeModules } = options.modules ?? {}; + + const worker = options.loader.get(`executor-${crypto.randomUUID()}`, () => ({ + compatibilityDate: "2025-06-01", + compatibilityFlags: ["nodejs_compat"], + mainModule: ENTRY_MODULE, + modules: { + ...safeModules, + [ENTRY_MODULE]: executorModule, + }, + globalOutbound: options.globalOutbound ?? null, + })); + + return asDynamicWorkerEntrypoint(worker.getEntrypoint()); + }, + catch: (cause) => + new DynamicWorkerExecutionError({ + message: renderTransportMessage(serializeWorkerErrorValue(cause)), + }), + }); }).pipe( Effect.withSpan("executor.runtime.startup", { attributes: { @@ -478,7 +511,7 @@ const evaluate = ( options: DynamicWorkerExecutorOptions, code: string, toolInvoker: SandboxToolInvoker, -): Effect.Effect => { +): Effect.Effect => { const timeoutMs = Math.max(100, options.timeoutMs ?? DEFAULT_TIMEOUT_MS); return Effect.gen(function* () { @@ -487,10 +520,15 @@ const evaluate = ( const entrypoint = yield* startDynamicWorker(options, code, timeoutMs); const response = yield* Effect.tryPromise({ try: () => entrypoint.evaluate(dispatcher), - catch: (cause) => - new DynamicWorkerExecutionError({ - message: renderTransportMessage(serializeWorkerErrorValue(cause)), - }), + catch: (cause) => { + const message = renderTransportMessage(serializeWorkerErrorValue(cause)); + // A module that fails to compile at worker startup is the user's + // own syntax error (one that escaped the strip step), not a + // sandbox defect: surface it descriptively rather than opaquely. + return isWorkerCompileErrorMessage(message) + ? new CodeCompilationError({ runtime: "dynamic-worker", message, cause }) + : new DynamicWorkerExecutionError({ message }); + }, }).pipe( Effect.withSpan("executor.runtime.evaluate", { attributes: { "executor.runtime": "dynamic-worker" }, @@ -517,6 +555,14 @@ const runInDynamicWorker = ( toolInvoker: SandboxToolInvoker, ): Effect.Effect => evaluate(options, code, toolInvoker).pipe( + // A compile error is the user's own syntax mistake, not a sandbox + // defect. Fold it into the success channel as a descriptive + // `ExecuteResult.error` so the precise parser message reaches the + // model, exactly as a thrown runtime error does, instead of being + // collapsed to an opaque internal error by the host failure path. + Effect.catchTag("CodeCompilationError", (error) => + Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult), + ), Effect.withSpan("executor.code.exec.dynamic_worker", { attributes: { "executor.runtime": "dynamic-worker" }, }), diff --git a/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts b/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts index 1a8869f64..6fc7741ca 100644 --- a/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts +++ b/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts @@ -271,6 +271,40 @@ describe("makeDynamicWorkerExecutor", () => { expect(result.error).toBe("Internal tool error"); }); + it("surfaces a syntax error with the parser's descriptive message, not an opaque generic", async () => { + const executor = makeDynamicWorkerExecutor({ loader }); + const invoker = makeInvoker(() => null); + + // A genuine parse error: `const` with no binding name. Before the fix + // this threw in stripTypeScript, became a DynamicWorkerExecutionError + // on the failure channel, and the host collapsed it to the opaque + // "Internal tool error". The model needs the real reason to self-correct. + const result = await Effect.runPromise( + executor.execute("async () => { const = 5; return 1; }", invoker), + ); + + expect(result.result).toBeNull(); + expect(result.error).toBeDefined(); + expect(result.error).not.toBe("Internal tool error"); + expect(result.error).not.toContain("Internal tool error"); + expect(result.error?.toLowerCase()).toContain("unexpected"); + }); + + it("surfaces smart-quote paste errors descriptively", async () => { + const executor = makeDynamicWorkerExecutor({ loader }); + const invoker = makeInvoker(() => null); + + // Curly quotes from a copy-paste are the most common real-world cause: + // the snippet looks fine to a human but is invalid JavaScript. + const result = await Effect.runPromise( + executor.execute("async () => { return “hello”; }", invoker), + ); + + expect(result.result).toBeNull(); + expect(result.error).toBeDefined(); + expect(result.error).not.toBe("Internal tool error"); + }); + it("preserves public ExecutionToolError messages across the worker bridge", async () => { const executor = makeDynamicWorkerExecutor({ loader }); const invoker = { From fe201b6e66c8297a5d47b7e4751a81af0e2df2b0 Mon Sep 17 00:00:00 2001 From: Rhys Sullivan Date: Fri, 26 Jun 2026 21:40:40 -0700 Subject: [PATCH 2/2] Surface all sandbox runtime conditions descriptively, not just syntax errors Production telemetry (executor.runtime.* spans) showed the dynamic-worker runtime was collapsing several more error categories to an opaque "Internal tool error" before the model could act on them: a non-serializable return value (DataCloneError, "Could not serialize object of type X"), the isolate's CPU or memory limit, and being momentarily at worker capacity. The first pass only routed syntax errors descriptively. Replace the narrow syntax-only predicate with a classifier that sorts a sandbox rejection into compilation, runtime, or internal, applied at both the worker-startup and the evaluate-RPC catch. Compilation and runtime conditions carry their verbatim message through the descriptive ExecuteResult.error channel via the existing CodeCompilationError and a new SandboxRuntimeError; unrecognized defects stay on DynamicWorkerExecutionError and remain opaque, preserving the host's failure-channel boundary. The other runtimes (quickjs, deno-subprocess) already surface errors through ExecuteResult.error, so the masking was specific to dynamic-worker. Covered with a live WorkerLoader test for the serialization path and a classifier table pinning every production message category, alongside the existing host-layer descriptive-flow coverage. --- packages/kernel/core/src/effect-errors.ts | 20 +++ .../runtime-dynamic-worker/src/executor.ts | 158 ++++++++++++++---- .../src/invocation.test.ts | 66 ++++++++ 3 files changed, 210 insertions(+), 34 deletions(-) diff --git a/packages/kernel/core/src/effect-errors.ts b/packages/kernel/core/src/effect-errors.ts index 303417a7e..2b391a372 100644 --- a/packages/kernel/core/src/effect-errors.ts +++ b/packages/kernel/core/src/effect-errors.ts @@ -36,3 +36,23 @@ export class CodeCompilationError extends Data.TaggedError("CodeCompilationError readonly message: string; readonly cause?: unknown; }> {} + +/** + * Raised when the sandbox cannot run the user's code to completion for a + * reason that is not a syntax error but is still safe and useful to + * report back: the returned value is not serializable across the sandbox + * boundary (a Symbol, a host object like `Cloudflare`), the code exceeded + * the isolate's CPU or memory limit, or the sandbox is momentarily at + * capacity. Like `CodeCompilationError`, and unlike `CodeExecutionError`, + * this describes the user's own code or a transient, retryable condition + * rather than an executor defect, so runtimes surface its verbatim + * `message` through the descriptive `ExecuteResult.error` channel instead + * of collapsing it to an opaque internal-error string. Genuinely + * unexpected sandbox defects stay on `CodeExecutionError` and remain + * opaque, preserving the host's failure-channel boundary. + */ +export class SandboxRuntimeError extends Data.TaggedError("SandboxRuntimeError")<{ + readonly runtime: string; + readonly message: string; + readonly cause?: unknown; +}> {} diff --git a/packages/kernel/runtime-dynamic-worker/src/executor.ts b/packages/kernel/runtime-dynamic-worker/src/executor.ts index 5be08335f..4c0715364 100644 --- a/packages/kernel/runtime-dynamic-worker/src/executor.ts +++ b/packages/kernel/runtime-dynamic-worker/src/executor.ts @@ -16,6 +16,7 @@ import * as Effect from "effect/Effect"; import { CodeCompilationError, recoverExecutionBody, + SandboxRuntimeError, stripTypeScript, type CodeExecutor, type ExecuteOutputItem, @@ -162,22 +163,102 @@ const renderTransportMessage = (value: unknown): string => { return String(value); }; +const serializedErrorName = (value: SerializedWorkerErrorValue): string | null => + typeof value === "object" && + value !== null && + "name" in value && + typeof (value as { name?: unknown }).name === "string" + ? (value as { name: string }).name + : null; + /** - * Does this worker-startup rejection describe a compile error in the - * user's own code rather than a sandbox/infra defect? Not all syntax + * Signatures of a compile error in the user's own code. Not all syntax * errors are caught while stripping TypeScript: smart quotes from a * paste, an unbalanced brace, and other plain-JS parse errors slip past * sucrase and only fail when workerd compiles the generated module, - * surfacing here as "Failed to start Worker: Uncaught SyntaxError: ...". - * That is still the user's mistake, so we route it to the descriptive - * error channel (alongside the strip-time path) instead of collapsing it - * to an opaque internal error. The check is deliberately narrow: a - * genuine SyntaxError thrown at runtime by user code (e.g. JSON.parse) - * never reaches this catch, since the sandbox reports those through its - * own result envelope, not as a worker-startup failure. + * surfacing as "Failed to start Worker: Uncaught SyntaxError: ...". The + * bare V8 phrasings ("Unexpected token ...", "Invalid or unexpected + * token") are matched too, since some surfaces report the inner message + * without the wrapper. */ -const isWorkerCompileErrorMessage = (message: string): boolean => - message.includes("Failed to start Worker") || message.includes("SyntaxError"); +const COMPILE_SIGNATURES = [ + "Failed to start Worker", + "SyntaxError", + "Unexpected token", + "Invalid or unexpected token", +] as const; + +/** + * Signatures of a sandbox runtime condition that is the user's own + * concern (or transient and retryable) rather than an executor defect: a + * non-serializable return value, the isolate's CPU or memory limit, and + * being momentarily at worker capacity. These are the real categories + * seen in production on the `executor.runtime.*` spans, all of which were + * being collapsed to an opaque internal error before the model could act + * on them. + */ +const RUNTIME_SIGNATURES = [ + "could not be cloned", + "does not support serialization", + "Could not serialize", + "exceeded CPU", + "exceeded memory", + "Too many concurrent dynamic workers", +] as const; + +export type SandboxFailureKind = "compilation" | "runtime" | "internal"; + +/** + * Classify a sandbox rejection so the runtime knows whether to surface + * its message descriptively (the user's mistake or a transient, + * safe-to-report condition) or collapse it to an opaque internal error (a + * genuine, unexpected sandbox defect). Tool-invocation failures never + * reach here: the sandbox reports those through its own result envelope, + * so this only sees module compile failures, return-value serialization + * failures, isolate resource limits, and capacity rejections. Anything + * unrecognized stays "internal" and opaque, preserving the host's + * failure-channel boundary. + */ +export const classifySandboxFailure = ( + serialized: SerializedWorkerErrorValue, + message: string, +): SandboxFailureKind => { + const name = serializedErrorName(serialized); + if ( + name === "SyntaxError" || + COMPILE_SIGNATURES.some((signature) => message.includes(signature)) + ) { + return "compilation"; + } + if ( + name === "DataCloneError" || + RUNTIME_SIGNATURES.some((signature) => message.includes(signature)) + ) { + return "runtime"; + } + return "internal"; +}; + +/** + * Map a raw sandbox rejection (a thrown value from the worker loader or + * the `evaluate` RPC) to the typed error its classification calls for. + * Compilation and runtime conditions carry the verbatim message through + * the descriptive channel; unrecognized defects stay opaque. + */ +const toSandboxFailure = ( + cause: unknown, +): CodeCompilationError | SandboxRuntimeError | DynamicWorkerExecutionError => { + const serialized = serializeWorkerErrorValue(cause); + const message = renderTransportMessage(serialized); + switch (classifySandboxFailure(serialized, message)) { + case "compilation": + return new CodeCompilationError({ runtime: "dynamic-worker", message, cause }); + case "runtime": + return new SandboxRuntimeError({ runtime: "dynamic-worker", message, cause }); + default: + return new DynamicWorkerExecutionError({ message }); + } +}; export const serializeWorkerCause = (cause: Cause.Cause): SerializedWorkerError => { const failures = cause.reasons @@ -451,7 +532,10 @@ const startDynamicWorker = ( options: DynamicWorkerExecutorOptions, code: string, timeoutMs: number, -): Effect.Effect => +): Effect.Effect< + DynamicWorkerEntrypoint, + DynamicWorkerExecutionError | CodeCompilationError | SandboxRuntimeError +> => Effect.gen(function* () { // The dynamic Worker isolate only accepts plain JavaScript; TS type // syntax in user code (`: T`, `as T`, generics) would otherwise @@ -491,10 +575,11 @@ const startDynamicWorker = ( return asDynamicWorkerEntrypoint(worker.getEntrypoint()); }, - catch: (cause) => - new DynamicWorkerExecutionError({ - message: renderTransportMessage(serializeWorkerErrorValue(cause)), - }), + // A compile error that escaped the strip step, or a capacity + // rejection, can surface here at worker startup rather than at + // `evaluate`. Classify it so the user-actionable reason reaches the + // model instead of an opaque internal error. + catch: toSandboxFailure, }); }).pipe( Effect.withSpan("executor.runtime.startup", { @@ -511,7 +596,10 @@ const evaluate = ( options: DynamicWorkerExecutorOptions, code: string, toolInvoker: SandboxToolInvoker, -): Effect.Effect => { +): Effect.Effect< + ExecuteResult, + DynamicWorkerExecutionError | CodeCompilationError | SandboxRuntimeError +> => { const timeoutMs = Math.max(100, options.timeoutMs ?? DEFAULT_TIMEOUT_MS); return Effect.gen(function* () { @@ -520,15 +608,11 @@ const evaluate = ( const entrypoint = yield* startDynamicWorker(options, code, timeoutMs); const response = yield* Effect.tryPromise({ try: () => entrypoint.evaluate(dispatcher), - catch: (cause) => { - const message = renderTransportMessage(serializeWorkerErrorValue(cause)); - // A module that fails to compile at worker startup is the user's - // own syntax error (one that escaped the strip step), not a - // sandbox defect: surface it descriptively rather than opaquely. - return isWorkerCompileErrorMessage(message) - ? new CodeCompilationError({ runtime: "dynamic-worker", message, cause }) - : new DynamicWorkerExecutionError({ message }); - }, + // The evaluate RPC rejects for module compile failures that escaped + // the strip step, non-serializable return values, isolate resource + // limits, and capacity. All are the user's concern or transient, so + // classify and surface them descriptively rather than opaquely. + catch: toSandboxFailure, }).pipe( Effect.withSpan("executor.runtime.evaluate", { attributes: { "executor.runtime": "dynamic-worker" }, @@ -555,14 +639,20 @@ const runInDynamicWorker = ( toolInvoker: SandboxToolInvoker, ): Effect.Effect => evaluate(options, code, toolInvoker).pipe( - // A compile error is the user's own syntax mistake, not a sandbox - // defect. Fold it into the success channel as a descriptive - // `ExecuteResult.error` so the precise parser message reaches the - // model, exactly as a thrown runtime error does, instead of being - // collapsed to an opaque internal error by the host failure path. - Effect.catchTag("CodeCompilationError", (error) => - Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult), - ), + // A compile error or a reportable sandbox runtime condition (a + // non-serializable result, a CPU/memory limit, capacity) is the + // user's own concern or transient, not a sandbox defect. Fold both + // into the success channel as a descriptive `ExecuteResult.error` so + // the precise reason reaches the model, exactly as a thrown runtime + // error does, instead of being collapsed to an opaque internal error + // by the host failure path. Unrecognized defects stay on + // `DynamicWorkerExecutionError` and remain opaque. + Effect.catchTags({ + CodeCompilationError: (error) => + Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult), + SandboxRuntimeError: (error) => + Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult), + }), Effect.withSpan("executor.code.exec.dynamic_worker", { attributes: { "executor.runtime": "dynamic-worker" }, }), diff --git a/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts b/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts index 6fc7741ca..41e0cb65a 100644 --- a/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts +++ b/packages/kernel/runtime-dynamic-worker/src/invocation.test.ts @@ -6,6 +6,7 @@ import * as Effect from "effect/Effect"; import type { SandboxToolInvoker } from "@executor-js/codemode-core"; import { ExecutionToolError } from "@executor-js/execution"; import { + classifySandboxFailure, ToolDispatcher, makeDynamicWorkerExecutor, renderWorkerError, @@ -156,6 +157,52 @@ describe("serializeWorkerCause", () => { }); }); +describe("classifySandboxFailure", () => { + // Each string here is a real `status.message` seen on the + // executor.runtime.* spans in production. They reject the worker loader + // or the evaluate RPC and were collapsed to an opaque "Internal tool + // error" before the model could act on them. CPU/memory/capacity limits + // can't be triggered deterministically inside a unit-test isolate, so we + // pin their classification here; the syntax and serialization paths also + // have live WorkerLoader coverage above. + it.each([ + ["Failed to start Worker:\nUncaught SyntaxError: Unexpected token '='", "compilation"], + ["Unexpected token '{'", "compilation"], + ["Invalid or unexpected token", "compilation"], + ["Symbol(nope) could not be cloned.", "runtime"], + [ + 'Could not serialize object of type "Cloudflare". This type does not support serialization.', + "runtime", + ], + ["Worker exceeded CPU time limit.", "runtime"], + ["Worker exceeded memory limit.", "runtime"], + ["Too many concurrent dynamic workers", "runtime"], + ] as const)("classifies %j as %s", (message, expected) => { + expect(classifySandboxFailure({ __type: "Error", name: "Error", message }, message)).toBe( + expected, + ); + }); + + it("classifies a SyntaxError by name even when the message is bare", () => { + expect( + classifySandboxFailure({ __type: "Error", name: "SyntaxError", message: "boom" }, "boom"), + ).toBe("compilation"); + }); + + it("classifies a DataCloneError by name even when the message is bare", () => { + expect( + classifySandboxFailure({ __type: "Error", name: "DataCloneError", message: "boom" }, "boom"), + ).toBe("runtime"); + }); + + it("leaves an unrecognized defect opaque (internal)", () => { + const message = "the RPC receiver does not implement the method"; + expect(classifySandboxFailure({ __type: "Error", name: "TypeError", message }, message)).toBe( + "internal", + ); + }); +}); + describe("makeDynamicWorkerExecutor", () => { const loader = (env as { LOADER: WorkerLoader }).LOADER; @@ -305,6 +352,25 @@ describe("makeDynamicWorkerExecutor", () => { expect(result.error).not.toBe("Internal tool error"); }); + it("surfaces a non-serializable return value descriptively, not opaquely", async () => { + const executor = makeDynamicWorkerExecutor({ loader }); + const invoker = makeInvoker(() => null); + + // Returning a value that can't cross the sandbox boundary (a Symbol, a + // host object) rejects the evaluate RPC with a DataCloneError. That is + // the user's own code, not a sandbox defect, so the model needs to be + // told what it returned can't be serialized rather than getting an + // opaque "Internal tool error". Production analog: "Could not serialize + // object of type Cloudflare". + const result = await Effect.runPromise(executor.execute("async () => Symbol('nope')", invoker)); + + expect(result.result).toBeNull(); + expect(result.error).toBeDefined(); + expect(result.error).not.toBe("Internal tool error"); + expect(result.error).not.toContain("Internal tool error"); + expect(result.error?.toLowerCase()).toContain("could not be cloned"); + }); + it("preserves public ExecutionToolError messages across the worker bridge", async () => { const executor = makeDynamicWorkerExecutor({ loader }); const invoker = {