Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions e2e/scenarios/mcp-execute.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,28 @@ scenario(
expect(result.text, "the sandbox returns the value").toBe("42");
}),
);

scenario(
"MCP · a syntax error returns a descriptive message, not an opaque internal error",
{},
Effect.gen(function* () {
const target = yield* Target;
const mcp = yield* Mcp;
const identity = yield* target.newIdentity();
const session = mcp.session(identity);

// A genuine parse error (a `const` with no binding) is the user's own
// mistake, surfaced before the code ever runs. The model needs the real
// reason to self-correct, so the sandbox must report it descriptively
// instead of collapsing it to "Internal tool error [id]".
const result = yield* session.call("execute", {
code: "const = 5; return 1;",
});

expect(result.ok, "a syntax error is reported as an error result").toBe(false);
expect(result.text, "the parser's reason reaches the model").toContain("Unexpected");
expect(result.text, "the opaque mask is not used for syntax errors").not.toContain(
"Internal tool error",
);
}),
);
27 changes: 27 additions & 0 deletions packages/hosts/mcp/src/tool-server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,33 @@ describe("MCP host server — native elicitation mode", () => {
});
});

it("surfaces a code syntax error descriptively, not as an opaque internal error", async () => {
// The runtime classifies a genuine parse error (a compile failure in
// the user's own code) onto the success channel as ExecuteResult.error.
// It must reach the model verbatim so it can self-correct, in contrast
// to a genuine Effect failure, which stays opaque (see the
// "execution failure stays opaque" test that asserts the masked form).
const engine = makeStubEngine({
execute: () =>
Effect.succeed({
result: null,
error: 'Unexpected token, expected "," (1:54)',
}),
});

await withNativeClient(engine, ELICITATION_CAPS, async (client) => {
const result = await client.callTool({
name: "execute",
arguments: { code: "const { items } = await tools.search({ query: 'x' }" },
});
expect(result.isError).toBe(true);
const text = textOf(result);
expect(text).toContain("Unexpected token");
expect(text).toContain("(1:54)");
expect(text).not.toContain("Internal tool error");
});
});

it("resume tool is hidden in native elicitation mode", async () => {
await withNativeClient(makeStubEngine({}), ELICITATION_CAPS, async (client) => {
const { tools } = await client.listTools();
Expand Down
37 changes: 37 additions & 0 deletions packages/kernel/core/src/effect-errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,40 @@ export class CodeExecutionError extends Data.TaggedError("CodeExecutionError")<{
readonly message: string;
readonly cause?: unknown;
}> {}

/**
* Raised when user code fails to compile before it ever runs: a genuine
* syntax/parse error (smart quotes from a copy-paste, an unbalanced
* brace, `const = 5`) caught while stripping TypeScript ahead of the
* JS-only sandbox. Unlike `CodeExecutionError` this is the user's
* mistake, not a sandbox defect, so runtimes surface its `message`
* through the descriptive `ExecuteResult.error` channel instead of
* collapsing it to an opaque internal-error string. The original parser
* message (e.g. "Unexpected token (1:54)") is carried verbatim so the
* model can see and fix it.
*/
export class CodeCompilationError extends Data.TaggedError("CodeCompilationError")<{
readonly runtime: string;
readonly message: string;
readonly cause?: unknown;
}> {}

/**
* Raised when the sandbox cannot run the user's code to completion for a
* reason that is not a syntax error but is still safe and useful to
* report back: the returned value is not serializable across the sandbox
* boundary (a Symbol, a host object like `Cloudflare`), the code exceeded
* the isolate's CPU or memory limit, or the sandbox is momentarily at
* capacity. Like `CodeCompilationError`, and unlike `CodeExecutionError`,
* this describes the user's own code or a transient, retryable condition
* rather than an executor defect, so runtimes surface its verbatim
* `message` through the descriptive `ExecuteResult.error` channel instead
* of collapsing it to an opaque internal-error string. Genuinely
* unexpected sandbox defects stay on `CodeExecutionError` and remain
* opaque, preserving the host's failure-channel boundary.
*/
export class SandboxRuntimeError extends Data.TaggedError("SandboxRuntimeError")<{
readonly runtime: string;
readonly message: string;
readonly cause?: unknown;
}> {}
206 changes: 171 additions & 35 deletions packages/kernel/runtime-dynamic-worker/src/executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import * as Data from "effect/Data";
import * as Effect from "effect/Effect";

import {
CodeCompilationError,
recoverExecutionBody,
SandboxRuntimeError,
stripTypeScript,
type CodeExecutor,
type ExecuteOutputItem,
Expand Down Expand Up @@ -161,6 +163,103 @@ const renderTransportMessage = (value: unknown): string => {
return String(value);
};

const serializedErrorName = (value: SerializedWorkerErrorValue): string | null =>
typeof value === "object" &&
value !== null &&
"name" in value &&
typeof (value as { name?: unknown }).name === "string"
? (value as { name: string }).name
: null;

/**
* Signatures of a compile error in the user's own code. Not all syntax
* errors are caught while stripping TypeScript: smart quotes from a
* paste, an unbalanced brace, and other plain-JS parse errors slip past
* sucrase and only fail when workerd compiles the generated module,
* surfacing as "Failed to start Worker: Uncaught SyntaxError: ...". The
* bare V8 phrasings ("Unexpected token ...", "Invalid or unexpected
* token") are matched too, since some surfaces report the inner message
* without the wrapper.
*/
const COMPILE_SIGNATURES = [
"Failed to start Worker",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 security Startup Wrapper Leaks Internals

When worker startup fails for an executor or loader problem, workerd can still report it under the generic Failed to start Worker wrapper. This match sends that internal startup failure through CodeCompilationError, then ExecuteResult.error, so the model can receive internal loader or module details instead of the opaque execution error.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

"SyntaxError",
"Unexpected token",
"Invalid or unexpected token",
] as const;

/**
* Signatures of a sandbox runtime condition that is the user's own
* concern (or transient and retryable) rather than an executor defect: a
* non-serializable return value, the isolate's CPU or memory limit, and
* being momentarily at worker capacity. These are the real categories
* seen in production on the `executor.runtime.*` spans, all of which were
* being collapsed to an opaque internal error before the model could act
* on them.
*/
const RUNTIME_SIGNATURES = [
"could not be cloned",
"does not support serialization",
"Could not serialize",
"exceeded CPU",
"exceeded memory",
"Too many concurrent dynamic workers",
] as const;

export type SandboxFailureKind = "compilation" | "runtime" | "internal";

/**
* Classify a sandbox rejection so the runtime knows whether to surface
* its message descriptively (the user's mistake or a transient,
* safe-to-report condition) or collapse it to an opaque internal error (a
* genuine, unexpected sandbox defect). Tool-invocation failures never
* reach here: the sandbox reports those through its own result envelope,
* so this only sees module compile failures, return-value serialization
* failures, isolate resource limits, and capacity rejections. Anything
* unrecognized stays "internal" and opaque, preserving the host's
* failure-channel boundary.
*/
export const classifySandboxFailure = (
serialized: SerializedWorkerErrorValue,
message: string,
): SandboxFailureKind => {
const name = serializedErrorName(serialized);
if (
name === "SyntaxError" ||
COMPILE_SIGNATURES.some((signature) => message.includes(signature))
) {
return "compilation";
}
if (
name === "DataCloneError" ||
RUNTIME_SIGNATURES.some((signature) => message.includes(signature))
) {
return "runtime";
}
return "internal";
};

/**
* Map a raw sandbox rejection (a thrown value from the worker loader or
* the `evaluate` RPC) to the typed error its classification calls for.
* Compilation and runtime conditions carry the verbatim message through
* the descriptive channel; unrecognized defects stay opaque.
*/
const toSandboxFailure = (
cause: unknown,
): CodeCompilationError | SandboxRuntimeError | DynamicWorkerExecutionError => {
const serialized = serializeWorkerErrorValue(cause);
const message = renderTransportMessage(serialized);
switch (classifySandboxFailure(serialized, message)) {
case "compilation":
return new CodeCompilationError({ runtime: "dynamic-worker", message, cause });
case "runtime":
return new SandboxRuntimeError({ runtime: "dynamic-worker", message, cause });
default:
return new DynamicWorkerExecutionError({ message });
}
};

export const serializeWorkerCause = (cause: Cause.Cause<unknown>): SerializedWorkerError => {
const failures = cause.reasons
.filter(Cause.isFailReason)
Expand Down Expand Up @@ -433,36 +532,55 @@ const startDynamicWorker = (
options: DynamicWorkerExecutorOptions,
code: string,
timeoutMs: number,
): Effect.Effect<DynamicWorkerEntrypoint, DynamicWorkerExecutionError> =>
Effect.try({
try: (): DynamicWorkerEntrypoint => {
const recoveredBody = recoverExecutionBody(code);
// The dynamic Worker isolate only accepts plain JavaScript; TS type
// syntax in user code (`: T`, `as T`, generics) would otherwise
// surface as "Unexpected token ':'" inside `evaluate()` and bubble
// out via DynamicWorkerExecutionError. Stripping here gives the
// model a clear syntax-error message at the front door instead.
const strippedBody = stripTypeScript(recoveredBody);
const executorModule = buildExecutorModule(strippedBody, timeoutMs);
const { [ENTRY_MODULE]: _, ...safeModules } = options.modules ?? {};

const worker = options.loader.get(`executor-${crypto.randomUUID()}`, () => ({
compatibilityDate: "2025-06-01",
compatibilityFlags: ["nodejs_compat"],
mainModule: ENTRY_MODULE,
modules: {
...safeModules,
[ENTRY_MODULE]: executorModule,
},
globalOutbound: options.globalOutbound ?? null,
}));

return asDynamicWorkerEntrypoint(worker.getEntrypoint());
},
catch: (cause) =>
new DynamicWorkerExecutionError({
message: renderTransportMessage(serializeWorkerErrorValue(cause)),
}),
): Effect.Effect<
DynamicWorkerEntrypoint,
DynamicWorkerExecutionError | CodeCompilationError | SandboxRuntimeError
> =>
Effect.gen(function* () {
// The dynamic Worker isolate only accepts plain JavaScript; TS type
// syntax in user code (`: T`, `as T`, generics) would otherwise
// surface as "Unexpected token ':'" inside `evaluate()`. Stripping
// here means valid TS just works. But this step is also where a
// genuine syntax error (smart quotes from a paste, an unbalanced
// brace, `const = 5`) first surfaces, with the parser's precise
// "Unexpected token (line:col)" message. That is the user's mistake,
// not a sandbox defect, so it gets its own `CodeCompilationError`
// and flows back through the descriptive `ExecuteResult.error`
// channel rather than collapsing to an opaque internal error.
const strippedBody = yield* Effect.try({
try: () => stripTypeScript(recoverExecutionBody(code)),
catch: (cause) =>
new CodeCompilationError({
runtime: "dynamic-worker",
message: renderTransportMessage(serializeWorkerErrorValue(cause)),
cause,
}),
});

return yield* Effect.try({
try: (): DynamicWorkerEntrypoint => {
const executorModule = buildExecutorModule(strippedBody, timeoutMs);
const { [ENTRY_MODULE]: _, ...safeModules } = options.modules ?? {};

const worker = options.loader.get(`executor-${crypto.randomUUID()}`, () => ({
compatibilityDate: "2025-06-01",
compatibilityFlags: ["nodejs_compat"],
mainModule: ENTRY_MODULE,
modules: {
...safeModules,
[ENTRY_MODULE]: executorModule,
},
globalOutbound: options.globalOutbound ?? null,
}));

return asDynamicWorkerEntrypoint(worker.getEntrypoint());
},
// A compile error that escaped the strip step, or a capacity
// rejection, can surface here at worker startup rather than at
// `evaluate`. Classify it so the user-actionable reason reaches the
// model instead of an opaque internal error.
catch: toSandboxFailure,
});
}).pipe(
Effect.withSpan("executor.runtime.startup", {
attributes: {
Expand All @@ -478,7 +596,10 @@ const evaluate = (
options: DynamicWorkerExecutorOptions,
code: string,
toolInvoker: SandboxToolInvoker,
): Effect.Effect<ExecuteResult, DynamicWorkerExecutionError> => {
): Effect.Effect<
ExecuteResult,
DynamicWorkerExecutionError | CodeCompilationError | SandboxRuntimeError
> => {
const timeoutMs = Math.max(100, options.timeoutMs ?? DEFAULT_TIMEOUT_MS);

return Effect.gen(function* () {
Expand All @@ -487,10 +608,11 @@ const evaluate = (
const entrypoint = yield* startDynamicWorker(options, code, timeoutMs);
const response = yield* Effect.tryPromise({
try: () => entrypoint.evaluate(dispatcher),
catch: (cause) =>
new DynamicWorkerExecutionError({
message: renderTransportMessage(serializeWorkerErrorValue(cause)),
}),
// The evaluate RPC rejects for module compile failures that escaped
// the strip step, non-serializable return values, isolate resource
// limits, and capacity. All are the user's concern or transient, so
// classify and surface them descriptively rather than opaquely.
catch: toSandboxFailure,
}).pipe(
Effect.withSpan("executor.runtime.evaluate", {
attributes: { "executor.runtime": "dynamic-worker" },
Expand All @@ -517,6 +639,20 @@ const runInDynamicWorker = (
toolInvoker: SandboxToolInvoker,
): Effect.Effect<ExecuteResult, DynamicWorkerExecutionError> =>
evaluate(options, code, toolInvoker).pipe(
// A compile error or a reportable sandbox runtime condition (a
// non-serializable result, a CPU/memory limit, capacity) is the
// user's own concern or transient, not a sandbox defect. Fold both
// into the success channel as a descriptive `ExecuteResult.error` so
// the precise reason reaches the model, exactly as a thrown runtime
// error does, instead of being collapsed to an opaque internal error
// by the host failure path. Unrecognized defects stay on
// `DynamicWorkerExecutionError` and remain opaque.
Effect.catchTags({
CodeCompilationError: (error) =>
Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult),
SandboxRuntimeError: (error) =>
Effect.succeed({ result: null, error: error.message } satisfies ExecuteResult),
}),
Effect.withSpan("executor.code.exec.dynamic_worker", {
attributes: { "executor.runtime": "dynamic-worker" },
}),
Expand Down
Loading
Loading