diff --git a/bun.lock b/bun.lock index 720667976..67947eabf 100644 --- a/bun.lock +++ b/bun.lock @@ -662,6 +662,24 @@ "vitest": "catalog:", }, }, + "packages/hosts/eve": { + "name": "@executor-js/host-eve", + "version": "0.1.0", + "dependencies": { + "@executor-js/codemode-core": "workspace:*", + "@executor-js/execution": "workspace:*", + "effect": "catalog:", + "zod": "4.3.6", + }, + "devDependencies": { + "@effect/vitest": "catalog:", + "@executor-js/runtime-quickjs": "workspace:*", + "@executor-js/sdk": "workspace:*", + "@types/node": "catalog:", + "bun-types": "catalog:", + "vitest": "catalog:", + }, + }, "packages/hosts/mcp": { "name": "@executor-js/host-mcp", "version": "1.4.4", @@ -1644,6 +1662,8 @@ "@executor-js/host-cloudflare": ["@executor-js/host-cloudflare@workspace:apps/host-cloudflare"], + "@executor-js/host-eve": ["@executor-js/host-eve@workspace:packages/hosts/eve"], + "@executor-js/host-mcp": ["@executor-js/host-mcp@workspace:packages/hosts/mcp"], "@executor-js/host-selfhost": ["@executor-js/host-selfhost@workspace:apps/host-selfhost"], diff --git a/packages/hosts/eve/CHANGELOG.md b/packages/hosts/eve/CHANGELOG.md new file mode 100644 index 000000000..ea3a6bd9d --- /dev/null +++ b/packages/hosts/eve/CHANGELOG.md @@ -0,0 +1 @@ +# @executor-js/host-eve diff --git a/packages/hosts/eve/README.md b/packages/hosts/eve/README.md new file mode 100644 index 000000000..2f8eb70c3 --- /dev/null +++ b/packages/hosts/eve/README.md @@ -0,0 +1,105 @@ +# @executor-js/host-eve + +Expose Executor's tool catalog to a [Vercel **eve**](https://vercel.com/eve) agent. + +eve agents discover one typed tool per file under `agent/tools/*.ts`. Executor's +catalog is large by design (discover-by-intent, not one tool per API), so rather +than generating hundreds of tool files, this host mirrors the Executor MCP host: +it surfaces Executor's **codemode** surface as two tools the model drives directly. + +- **`execute`** runs TypeScript against Executor's sandboxed tools runtime + (`tools.search(...)`, `tools.describe.tool(...)`, `tools.github.issues.list(...)`). +- **`resume`** answers an auth / approval / form pause raised mid-execution, using + the `executionId` the paused `execute` result returned. + +This package never imports `eve` at runtime. The factory returns plain objects +shaped to satisfy eve's `defineTool`, so eve stays a peer of your agent project, +not a dependency of this host. + +## Usage + +Build the engine once and share it across both tools (a paused execution lives in +that engine instance's memory, so `execute` and `resume` must come from the same +engine). + +```ts +// agent/executor.ts +import { createExecutionEngine } from "@executor-js/execution/promise"; +import { createExecutorEveTools } from "@executor-js/host-eve"; + +// `executor` (an Executor client) and `codeExecutor` (a sandbox runtime, e.g. +// the QuickJS code executor) are wired the same way the CLI / local app do it. +import { executor, codeExecutor } from "./runtime.ts"; + +const engine = createExecutionEngine({ executor, codeExecutor }); + +// Top-level await: the `execute` description (workflow + configured namespaces) +// is read from the engine once and baked in before eve compiles the manifest. +export const executorTools = await createExecutorEveTools({ engine }); +``` + +```ts +// agent/tools/execute.ts +import { defineTool } from "eve/tools"; +import { executorTools } from "../executor.ts"; + +export default defineTool(executorTools.execute); +``` + +```ts +// agent/tools/resume.ts +import { defineTool } from "eve/tools"; +import { executorTools } from "../executor.ts"; + +export default defineTool(executorTools.resume); +``` + +The model now writes Executor codemode in `execute`, and when a call needs OAuth +or an approval it gets back an `executionId` and calls `resume`. + +## Config + +`createExecutorEveTools(config)` accepts either: + +- `{ engine }`: a pre-built `@executor-js/execution/promise` engine (recommended; + lets you share one engine and its trace context), or +- `{ executor, codeExecutor }`: the pieces, and the factory builds the engine. + +Plus optional: + +- `description`: override the `execute` tool description. When omitted, the + dynamic description is read from `engine.getDescription()` and baked in. +- `onDefect(error, correlationId)`: called when a tool body throws an unexpected + defect. Defaults to `console.error`. The model only ever sees an opaque + `Internal tool error [id]`; the cause is logged out-of-band so it can't leak + internal context through the tool result. + +## Return shape + +Every tool resolves to an `ExecutorToolEnvelope`: + +```ts +{ + status: string; + text: string; + data: Record; +} +``` + +`text` is the model-facing render; `data` is the full structured payload kept for +eve Agent Runs and `outputSchema` consumers. `toModelOutput` projects the +envelope down to `{ type: "text", value: text }` so the model reads only `text`. + +## Approval + +Executor's own pause/resume **is** the human-in-the-loop mechanism: a sensitive +call pauses mid-execution and `resume` continues it. If you also want a pre-call +gate on the whole `execute` tool, add eve's native approval in your tool file: + +```ts +import { defineTool } from "eve/tools"; +import { always } from "eve/tools/approval"; +import { executorTools } from "../executor.ts"; + +export default defineTool({ ...executorTools.execute, needsApproval: always() }); +``` diff --git a/packages/hosts/eve/package.json b/packages/hosts/eve/package.json new file mode 100644 index 000000000..2e37f986e --- /dev/null +++ b/packages/hosts/eve/package.json @@ -0,0 +1,33 @@ +{ + "name": "@executor-js/host-eve", + "version": "0.1.0", + "private": true, + "description": "Expose Executor's tool catalog to a Vercel eve agent as codemode execute/resume tools.", + "type": "module", + "exports": { + ".": { + "types": "./src/index.ts", + "default": "./src/index.ts" + } + }, + "scripts": { + "typecheck": "tsgo --noEmit", + "test": "vitest run", + "test:watch": "vitest", + "typecheck:slow": "bunx tsc --noEmit -p tsconfig.json" + }, + "dependencies": { + "@executor-js/codemode-core": "workspace:*", + "@executor-js/execution": "workspace:*", + "effect": "catalog:", + "zod": "4.3.6" + }, + "devDependencies": { + "@effect/vitest": "catalog:", + "@executor-js/runtime-quickjs": "workspace:*", + "@executor-js/sdk": "workspace:*", + "@types/node": "catalog:", + "bun-types": "catalog:", + "vitest": "catalog:" + } +} diff --git a/packages/hosts/eve/src/index.integration.test.ts b/packages/hosts/eve/src/index.integration.test.ts new file mode 100644 index 000000000..23b74034c --- /dev/null +++ b/packages/hosts/eve/src/index.integration.test.ts @@ -0,0 +1,85 @@ +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { createExecutor, definePlugin } from "@executor-js/sdk"; +import { makeTestConfig } from "@executor-js/sdk/testing"; +import { makeQuickJsExecutor } from "@executor-js/runtime-quickjs"; +import { createExecutionEngine } from "@executor-js/execution"; +import { toPromiseExecutionEngine } from "@executor-js/execution/promise"; + +import { createExecutorEveTools } from "./index"; + +// --------------------------------------------------------------------------- +// Integration: drive the eve tools against the REAL execution stack (real +// QuickJS sandbox + real Executor), not a stubbed engine. Proves the adapter +// actually runs model-authored TypeScript and surfaces real results/errors. +// +// `createExecutor` requires a Scope, so these run as `it.effect`: the executor +// is acquired in the test scope and the adapter's Promise API is bridged back +// with `Effect.promise`. +// --------------------------------------------------------------------------- + +const codeExecutor = makeQuickJsExecutor(); + +const emptyPlugin = definePlugin(() => ({ + id: "empty-eve-test" as const, + storage: () => ({}), + staticSources: () => [], +})); + +const buildTools = Effect.gen(function* () { + const executor = yield* createExecutor(makeTestConfig({ plugins: [emptyPlugin()] as const })); + const engine = toPromiseExecutionEngine(createExecutionEngine({ executor, codeExecutor })); + return yield* Effect.promise(() => createExecutorEveTools({ engine })); +}); + +describe("integration: real engine + QuickJS sandbox", () => { + it.effect("evaluates real TypeScript and returns the result", () => + Effect.gen(function* () { + const tools = yield* buildTools; + const out = yield* Effect.promise(() => tools.execute.execute({ code: "return 1 + 1" })); + + expect(out.status).toBe("completed"); + expect(out.data.result).toBe(2); + expect(out.text).toContain("2"); + }), + ); + + it.effect("injects the Executor tools runtime into the sandbox", () => + Effect.gen(function* () { + const tools = yield* buildTools; + // `tools` is the runtime surface the model drives; proving it exists in + // the sandbox confirms the adapter wired the real engine, not bare JS. + const out = yield* Effect.promise(() => + tools.execute.execute({ code: "return typeof tools.search" }), + ); + + expect(out.status).toBe("completed"); + expect(out.data.result).toBe("function"); + }), + ); + + it.effect("surfaces a real runtime error as an error envelope (never throws)", () => + Effect.gen(function* () { + const tools = yield* buildTools; + const out = yield* Effect.promise(() => + tools.execute.execute({ code: "return missingReference" }), + ); + + expect(out.status).toBe("error"); + expect(out.text.toLowerCase()).toContain("error"); + }), + ); + + it.effect("resume against the real engine reports an unknown execution", () => + Effect.gen(function* () { + const tools = yield* buildTools; + const out = yield* Effect.promise(() => + tools.resume.execute({ executionId: "exec_nope", action: "accept", content: "{}" }), + ); + + expect(out.status).toBe("execution_not_found"); + expect(out.data).toMatchObject({ executionId: "exec_nope", recovery: "re_execute" }); + }), + ); +}); diff --git a/packages/hosts/eve/src/index.test.ts b/packages/hosts/eve/src/index.test.ts new file mode 100644 index 000000000..d012bd6b1 --- /dev/null +++ b/packages/hosts/eve/src/index.test.ts @@ -0,0 +1,223 @@ +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import type { + ExecutionEngine, + ExecutionResult, + PausedExecution, + ResumeResponse, +} from "@executor-js/execution/promise"; + +import { + createExecutorEveTools, + type ExecuteToolInput, + type ExecutorEveTool, + type ExecutorEveTools, + type ExecutorEveToolsConfig, + type ExecutorToolEnvelope, + type ResumeToolInput, +} from "./index"; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +const stubEngine = (overrides: Partial): ExecutionEngine => ({ + execute: async () => ({ result: "default" }), + executeWithPause: async (): Promise => ({ + status: "completed", + result: { result: "default" }, + }), + resume: async (): Promise => null, + getPausedExecution: async () => null, + getDescription: async () => "test executor", + ...overrides, +}); + +// A structural ElicitationContext is all `formatPausedExecution` reads +// (request._tag/message/requestedSchema, address, args), so no need to build +// the real tagged Schema instances here. +const formPause = (id: string): PausedExecution => + // oxlint-disable-next-line executor/no-double-cast -- test fixture: structural shape is sufficient for formatPausedExecution + ({ + id, + elicitationContext: { + request: { _tag: "FormElicitation", message: "Approve this action?", requestedSchema: {} }, + address: "github.issues.create", + args: { title: "Hi" }, + }, + }) as unknown as PausedExecution; + +// --------------------------------------------------------------------------- +// execute +// --------------------------------------------------------------------------- + +describe("execute tool", () => { + it("returns a completed envelope and projects text to the model", async () => { + const tools: ExecutorEveTools = await createExecutorEveTools({ + engine: stubEngine({ + executeWithPause: async () => ({ status: "completed", result: { result: "hello" } }), + }), + }); + + const out: ExecutorToolEnvelope = await tools.execute.execute({ code: "return 'hello'" }); + + expect(out.status).toBe("completed"); + expect(out.text).toContain("hello"); + expect(out.data.status).toBe("completed"); + expect(tools.execute.toModelOutput(out)).toEqual({ type: "text", value: out.text }); + }); + + it("surfaces a pause as a resumable envelope carrying the executionId", async () => { + const tools = await createExecutorEveTools({ + engine: stubEngine({ + executeWithPause: async () => ({ status: "paused", execution: formPause("exec_1") }), + }), + }); + + const out = await tools.execute.execute({ code: "await tools.github.issues.create({})" }); + + expect(out.status).toBe("waiting_for_interaction"); + expect(out.data.executionId).toBe("exec_1"); + expect(out.text).toContain("Approve this action?"); + expect(out.text).toContain("exec_1"); + }); + + it("never throws to the agent: a defect becomes an opaque error envelope", async () => { + const seen: Array<{ error: unknown; correlationId: string }> = []; + const tools = await createExecutorEveTools({ + engine: stubEngine({ + // Simulate a sandbox defect rejecting at the host boundary, the way the + // Promise engine surfaces a failed Effect. + executeWithPause: () => Effect.runPromise(Effect.fail({ kind: "sandbox-defect" })), + }), + onDefect: (error, correlationId) => seen.push({ error, correlationId }), + }); + + const out = await tools.execute.execute({ code: "boom" }); + + expect(out.status).toBe("error"); + expect(out.text).toMatch(/Internal tool error \[[0-9a-f]{8}\]/); + // The internal cause is logged out-of-band, never surfaced to the model. + expect(out.text).not.toContain("sandbox-defect"); + expect(out.data).not.toMatchObject({ kind: "sandbox-defect" }); + expect(seen).toHaveLength(1); + expect(out.text).toContain(seen[0]!.correlationId); + }); + + it("rejects empty/whitespace code at the schema", async () => { + const tools = await createExecutorEveTools({ engine: stubEngine({}), description: "x" }); + const execute: ExecutorEveTool = tools.execute; + expect(execute.inputSchema.safeParse({ code: "" }).success).toBe(false); + expect(execute.inputSchema.safeParse({ code: " " }).success).toBe(false); + expect(execute.inputSchema.safeParse({ code: "return 1" }).success).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// resume +// --------------------------------------------------------------------------- + +describe("resume tool", () => { + it("parses JSON content and forwards the response to the same engine", async () => { + const calls: Array<{ id: string; response: ResumeResponse }> = []; + const tools = await createExecutorEveTools({ + engine: stubEngine({ + resume: async (id, response) => { + calls.push({ id, response }); + return { status: "completed", result: { result: "done" } }; + }, + }), + }); + + const input: ResumeToolInput = { + executionId: "exec_1", + action: "accept", + content: '{"name":"value"}', + }; + const out = await tools.resume.execute(input); + + expect(out.status).toBe("completed"); + expect(calls).toEqual([ + { id: "exec_1", response: { action: "accept", content: { name: "value" } } }, + ]); + }); + + it("treats default/empty/non-object content as no content", async () => { + const seen: Array = []; + const tools = await createExecutorEveTools({ + engine: stubEngine({ + resume: async (_id, response) => { + seen.push(response.content); + return { status: "completed", result: { result: "ok" } }; + }, + }), + }); + + await tools.resume.execute({ executionId: "e", action: "accept", content: "{}" }); + await tools.resume.execute({ executionId: "e", action: "accept", content: " " }); + await tools.resume.execute({ executionId: "e", action: "accept", content: "[1,2]" }); + + expect(seen).toEqual([undefined, undefined, undefined]); + }); + + it("explains how to recover when the executionId is unknown", async () => { + const tools = await createExecutorEveTools({ + engine: stubEngine({ resume: async () => null }), + }); + + const out = await tools.resume.execute({ + executionId: "gone", + action: "cancel", + content: "{}", + }); + + expect(out.status).toBe("execution_not_found"); + expect(out.data).toMatchObject({ executionId: "gone", recovery: "re_execute" }); + expect(out.text).toContain("Re-run execute"); + }); + + it("defaults content so the schema accepts a bare accept", async () => { + const { resume } = await createExecutorEveTools({ engine: stubEngine({}), description: "x" }); + const parsed = resume.inputSchema.parse({ executionId: "e", action: "accept" }); + expect(parsed.content).toBe("{}"); + }); +}); + +// --------------------------------------------------------------------------- +// description / wiring +// --------------------------------------------------------------------------- + +describe("description", () => { + it("bakes the engine's dynamic description by default", async () => { + let calls = 0; + const tools = await createExecutorEveTools({ + engine: stubEngine({ + getDescription: async () => { + calls += 1; + return "dynamic: github, gmail"; + }, + }), + }); + + expect(tools.execute.description).toBe("dynamic: github, gmail"); + expect(calls).toBe(1); + }); + + it("uses an explicit description override without touching the engine", async () => { + let calls = 0; + const config: ExecutorEveToolsConfig = { + engine: stubEngine({ + getDescription: async () => { + calls += 1; + return "unused"; + }, + }), + description: "custom execute description", + }; + const tools = await createExecutorEveTools(config); + + expect(tools.execute.description).toBe("custom execute description"); + expect(calls).toBe(0); + }); +}); diff --git a/packages/hosts/eve/src/index.ts b/packages/hosts/eve/src/index.ts new file mode 100644 index 000000000..9323e0109 --- /dev/null +++ b/packages/hosts/eve/src/index.ts @@ -0,0 +1,275 @@ +// --------------------------------------------------------------------------- +// @executor-js/host-eve: expose Executor to a Vercel `eve` agent. +// +// `eve` agents discover one typed tool per file under `agent/tools/*.ts`, where +// the filename becomes the tool name. This host mirrors the Executor MCP host +// (`@executor-js/host-mcp`): instead of projecting Executor's (large) catalog as +// hundreds of eve tool files, it exposes Executor's codemode surface as two +// tools the model drives directly: +// +// - `execute` runs TypeScript against Executor's sandboxed tools runtime +// (`tools.search(...)`, `tools.describe.tool(...)`, `tools.github.issues.list(...)`). +// - `resume` answers an auth/approval pause raised mid-execution, using the +// `executionId` the paused `execute` result returned. +// +// Both tools are plain objects shaped to satisfy eve's `defineTool` argument, so +// this package never imports `eve` at runtime (it is a beta peer the consuming +// agent already depends on). The factory returns BOTH tools sharing ONE engine: +// a paused execution lives in that engine instance's memory, so `execute` and +// `resume` must be built from the same engine or a resume can never find its +// pause. +// --------------------------------------------------------------------------- + +import * as z from "zod/v4"; +import { Option, Schema } from "effect"; +import type * as Cause from "effect/Cause"; +import type { CodeExecutionError, ExecuteResult } from "@executor-js/codemode-core"; + +import { + createExecutionEngine, + formatExecuteResult, + formatPausedExecution, + type ExecutionEngine, + type ExecutionEngineConfig, + type PausedExecution, +} from "@executor-js/execution/promise"; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/** + * What every Executor eve tool returns. `text` is the model-facing render (the + * same envelope text the MCP host surfaces); `data` is the full structured + * payload, preserved for eve Agent Runs / `outputSchema` consumers. `toModelOutput` + * projects this down to just `text` so the model reads the rendered view. + */ +export type ExecutorToolEnvelope = { + readonly status: string; + readonly text: string; + readonly data: Record; +}; + +/** + * A tool definition structurally compatible with eve's `defineTool` argument. + * Drop one into `agent/tools/.ts` with + * `export default defineTool(executorTools.execute)`. + */ +export type ExecutorEveTool = { + readonly description: string; + readonly inputSchema: z.ZodType; + readonly execute: (input: Input) => Promise; + readonly toModelOutput: (output: ExecutorToolEnvelope) => { + readonly type: "text"; + readonly value: string; + }; +}; + +const executeInputSchema = z.object({ + code: z + .string() + .trim() + .min(1) + .describe( + "TypeScript to run against the Executor tools runtime. Discover with " + + "`tools.search({ query })`, inspect with `tools.describe.tool({ path })`, " + + "then call typed tools like `tools.github.issues.list({ owner, repo })`. " + + "Return a value to send it to the model.", + ), +}); + +const resumeInputSchema = z.object({ + executionId: z.string().min(1).describe("The executionId returned by a paused execute result."), + action: z + .enum(["accept", "decline", "cancel"]) + .describe("How to answer the paused interaction (auth/approval/form)."), + content: z + .string() + .default("{}") + .describe("Optional JSON object response for form elicitations; defaults to none."), +}); + +export type ExecuteToolInput = z.infer; +export type ResumeToolInput = z.infer; + +export type ExecutorEveTools = { + readonly execute: ExecutorEveTool; + readonly resume: ExecutorEveTool; +}; + +type SharedConfig = { + /** + * Override the `execute` tool description. When omitted, the dynamic + * description (workflow + configured namespaces) is read from the engine via + * `getDescription()` and baked in at build time. + */ + readonly description?: string; + /** + * Called when a tool body throws an unexpected defect (not a domain failure, + * which is already returned as an error envelope). Defaults to `console.error`. + * The model only ever sees an opaque `Internal tool error [id]`. + */ + readonly onDefect?: (error: unknown, correlationId: string) => void; +}; + +export type ExecutorEveToolsConfig = + | ({ readonly engine: ExecutionEngine } & SharedConfig) + | (ExecutionEngineConfig & SharedConfig); + +// --------------------------------------------------------------------------- +// Envelope helpers +// --------------------------------------------------------------------------- + +const RESUME_DESCRIPTION = [ + "Resume an Executor execution that paused for auth, approval, or a form.", + "Call this with the executionId from a paused execute result. Use action", + '"accept" to proceed (with content matching any requested schema), or', + '"decline"/"cancel" to reject. After a browser/URL flow, call with "accept".', +].join(" "); + +const toCompletedEnvelope = (result: ExecuteResult): ExecutorToolEnvelope => { + const formatted = formatExecuteResult(result); + const status = + typeof formatted.structured.status === "string" ? formatted.structured.status : "completed"; + return { status, text: formatted.text, data: formatted.structured }; +}; + +const toPausedEnvelope = (execution: PausedExecution): ExecutorToolEnvelope => { + const formatted = formatPausedExecution(execution); + return { status: "waiting_for_interaction", text: formatted.text, data: formatted.structured }; +}; + +// A paused execution lives in the engine's memory: it expires after a few +// minutes and dies if the host restarts. Either way the recovery is the same, +// so tell the model rather than hand it a bare miss. +const missingExecutionEnvelope = (executionId: string): ExecutorToolEnvelope => ({ + status: "execution_not_found", + text: [ + `No paused execution: ${executionId}.`, + "It expired or was lost when its session restarted (paused executions stay resumable only briefly).", + "Re-run execute with the original code to get a fresh executionId.", + ].join(" "), + data: { status: "execution_not_found", executionId, recovery: "re_execute" }, +}); + +const toModelOutput = ( + output: ExecutorToolEnvelope, +): { readonly type: "text"; readonly value: string } => ({ + type: "text", + value: output.text, +}); + +const newCorrelationId = (): string => + Math.floor(Math.random() * 0x1_0000_0000) + .toString(16) + .padStart(8, "0"); + +const defaultOnDefect = (error: unknown, correlationId: string): void => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: best-effort defect logging must tolerate non-serializable causes + try { + console.error(`[executor:eve] tool defect correlation_id=${correlationId}`, error); + } catch { + /* ignore logger failures */ + } +}; + +// `execute`/`resume` failures reaching the eve host are infra defects. Domain +// failures from tools come back as success-channel error envelopes via +// `formatExecuteResult`. Emit an opaque generic plus a correlation id and log +// the cause out-of-band so the model can't read internal context off it. +const runEnvelope = async ( + onDefect: (error: unknown, correlationId: string) => void, + run: () => Promise, +): Promise => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: the Promise engine orphans typed errors as rejections; catch to keep the agent's tool loop alive + try { + return await run(); + } catch (error) { + const correlationId = newCorrelationId(); + onDefect(error, correlationId); + const text = `Internal tool error [${correlationId}]`; + return { status: "error", text: `Error: ${text}`, data: { status: "error", error: text } }; + } +}; + +// Tool input is model-authored JSON. Decode it through Effect Schema (no +// JSON.parse / try-catch in domain code): the Record schema rejects arrays and +// scalars, and a decode failure (malformed or non-object) degrades to "no +// content" rather than failing the resume. Mirrors the MCP host's parser. +const JsonObjectFromString = Schema.fromJsonString(Schema.Record(Schema.String, Schema.Unknown)); +const decodeJsonObjectString = Schema.decodeUnknownOption(JsonObjectFromString); + +const parseJsonContent = (raw: string): Record | undefined => { + if (raw === "{}") return undefined; + const parsed = decodeJsonObjectString(raw); + return Option.isSome(parsed) ? parsed.value : undefined; +}; + +// --------------------------------------------------------------------------- +// Tool builders +// --------------------------------------------------------------------------- + +const buildExecuteTool = ( + engine: ExecutionEngine, + description: string, + onDefect: (error: unknown, correlationId: string) => void, +): ExecutorEveTool => ({ + description, + inputSchema: executeInputSchema, + execute: ({ code }) => + runEnvelope(onDefect, async () => { + const outcome = await engine.executeWithPause(code); + return outcome.status === "completed" + ? toCompletedEnvelope(outcome.result) + : toPausedEnvelope(outcome.execution); + }), + toModelOutput, +}); + +const buildResumeTool = ( + engine: ExecutionEngine, + onDefect: (error: unknown, correlationId: string) => void, +): ExecutorEveTool => ({ + description: RESUME_DESCRIPTION, + inputSchema: resumeInputSchema, + execute: ({ executionId, action, content }) => + runEnvelope(onDefect, async () => { + const outcome = await engine.resume(executionId, { + action, + content: parseJsonContent(content), + }); + if (outcome === null) return missingExecutionEnvelope(executionId); + return outcome.status === "completed" + ? toCompletedEnvelope(outcome.result) + : toPausedEnvelope(outcome.execution); + }), + toModelOutput, +}); + +// --------------------------------------------------------------------------- +// Factory +// --------------------------------------------------------------------------- + +/** + * Build the Executor `execute` + `resume` tools for a Vercel `eve` agent. + * + * Pass either a pre-built Promise engine (`{ engine }`) or the pieces to build + * one (`{ executor, codeExecutor }`, from `@executor-js/execution/promise`). + * Both returned tools share the one engine, so a `resume` can always find the + * pause its `execute` raised. + * + * Async because the `execute` description is read from the engine once and + * baked in (ESM top-level `await` resolves it before eve reads the module's + * default export). Pass `description` to skip that and stay synchronous-shaped. + */ +export const createExecutorEveTools = async ( + config: ExecutorEveToolsConfig, +): Promise => { + const engine = "engine" in config ? config.engine : createExecutionEngine(config); + const description = config.description ?? (await engine.getDescription()); + const onDefect = config.onDefect ?? defaultOnDefect; + return { + execute: buildExecuteTool(engine, description, onDefect), + resume: buildResumeTool(engine, onDefect), + }; +}; diff --git a/packages/hosts/eve/tsconfig.json b/packages/hosts/eve/tsconfig.json new file mode 100644 index 000000000..42ef14d33 --- /dev/null +++ b/packages/hosts/eve/tsconfig.json @@ -0,0 +1,25 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "Bundler", + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "lib": ["ES2022"], + "types": ["bun-types", "node"], + "noUnusedLocals": true, + "noImplicitOverride": true, + "plugins": [ + { + "name": "@effect/language-service", + "ignoreEffectSuggestionsInTscExitCode": true, + "ignoreEffectWarningsInTscExitCode": true, + "diagnosticSeverity": { + "globalErrorInEffectCatch": "off" + } + } + ] + }, + "include": ["src/**/*.ts"] +} diff --git a/packages/hosts/eve/vitest.config.ts b/packages/hosts/eve/vitest.config.ts new file mode 100644 index 000000000..ae847ff6d --- /dev/null +++ b/packages/hosts/eve/vitest.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["src/**/*.test.ts"], + }, +});