diff --git a/.sandcastle/prompt-claude.md b/.sandcastle/prompt-claude.md
new file mode 100644
index 0000000..8aaf319
--- /dev/null
+++ b/.sandcastle/prompt-claude.md
@@ -0,0 +1,23 @@
+You are implementing GitHub issue #{{ISSUE_NUMBER}}: "{{ISSUE_TITLE}}"
+
+## Issue
+{{ISSUE_BODY}}
+
+## Scope guardrails
+- Only modify files directly required by this issue.
+- Do not modify, close, or reference other issues.
+- Do not add dependencies beyond what the issue specifies.
+- Do not push or open a pull request — the orchestrator does that after you finish.
+- Make the changes yourself: use the edit/write tools to modify files and bash
+ to run tests and commit. Do not just plan or write a todo list, and do not
+ delegate to a subagent — apply the edits and commit them directly.
+
+## Steps
+1. Implement the change. If the project has a test suite, work test-first:
+ write a failing test, make it pass, then refactor.
+2. Run the test suite and make sure it is green.
+3. Commit your work to the current branch with clear, imperative messages
+ referencing #{{ISSUE_NUMBER}}.
+4. When the issue is fully implemented and committed, output exactly this
+ line, by itself, and then stop — produce no further output or commits:
+ ISSUE_COMPLETE
diff --git a/.sandcastle/prompt-local.md b/.sandcastle/prompt-local.md
new file mode 100644
index 0000000..cf431dd
--- /dev/null
+++ b/.sandcastle/prompt-local.md
@@ -0,0 +1,16 @@
+Implement this change in the repository, then commit. Use your tools now — read files, edit files, run bash. Do not just describe a plan.
+
+TASK (issue #{{ISSUE_NUMBER}}): {{ISSUE_TITLE}}
+{{ISSUE_BODY}}
+
+The code lives in the `.sandcastle/` directory (TypeScript). Steps:
+1. Read the relevant `.sandcastle/*.ts` file(s) with your read tool.
+2. Make the change with your edit tool.
+3. Run the tests: `cd .sandcastle && npm test` — fix until they pass.
+4. Commit: `cd .sandcastle && cd .. && git add -A && git commit -m " (#{{ISSUE_NUMBER}})"`
+5. Do NOT push or open a PR.
+
+When committed, output this exact line alone and stop:
+ISSUE_COMPLETE
+
+Begin now by reading the most relevant file.
diff --git a/.sandcastle/sandbox-runner.test.ts b/.sandcastle/sandbox-runner.test.ts
index 4f74926..6fa64ef 100644
--- a/.sandcastle/sandbox-runner.test.ts
+++ b/.sandcastle/sandbox-runner.test.ts
@@ -1,6 +1,7 @@
/**
* Unit tests for buildAgentInput — the pure function that selects agent
- * provider, image, and worktree files based on the runner's tier option.
+ * provider, image, network, prompt template, and prompt args based on the
+ * runner's tier option and the issue being worked.
* No Docker, GitHub, or network required.
*
* Run: npm test (picks up all *.test.ts files in the test script)
@@ -9,29 +10,31 @@ import { test } from "node:test";
import assert from "node:assert/strict";
import { buildAgentInput } from "./sandbox-runner.ts";
+const STUB_ISSUE = { number: 42, title: "Fix the bug", body: "Detailed description" };
+
test("default tier uses claude-code agent with sandcastle:local image", () => {
- const input = buildAgentInput({});
+ const input = buildAgentInput({}, STUB_ISSUE);
assert.equal(input.agent.name, "claude-code");
assert.equal(input.imageName, "sandcastle:local");
assert.equal(input.copyToWorktree, undefined);
});
test("explicit claude tier uses claude-code agent", () => {
- const input = buildAgentInput({ tier: "claude" });
+ const input = buildAgentInput({ tier: "claude" }, STUB_ISSUE);
assert.equal(input.agent.name, "claude-code");
assert.equal(input.imageName, "sandcastle:local");
assert.equal(input.copyToWorktree, undefined);
});
test("local tier uses opencode agent with sandcastle-opencode:local image", () => {
- const input = buildAgentInput({ tier: "local" });
+ const input = buildAgentInput({ tier: "local" }, STUB_ISSUE);
assert.equal(input.agent.name, "opencode");
assert.equal(input.imageName, "sandcastle-opencode:local");
assert.deepEqual(input.copyToWorktree, ["opencode.json"]);
});
test("local tier delivers opencode.json via copyToWorktree", () => {
- const input = buildAgentInput({ tier: "local" });
+ const input = buildAgentInput({ tier: "local" }, STUB_ISSUE);
assert.ok(Array.isArray(input.copyToWorktree), "copyToWorktree should be an array");
assert.ok(input.copyToWorktree!.includes("opencode.json"), "opencode.json must be in copyToWorktree");
});
@@ -40,7 +43,7 @@ test("local tier installs opencode config into the global config dir", () => {
// opencode resolves its Ollama provider from ~/.config/opencode, not the
// worktree cwd; without this hook the provider never resolves and every
// iteration is an empty turn. Lock in that the config is relocated to HOME.
- const input = buildAgentInput({ tier: "local" });
+ const input = buildAgentInput({ tier: "local" }, STUB_ISSUE);
assert.ok(Array.isArray(input.onSandboxReady), "onSandboxReady should be an array");
const cmds = input.onSandboxReady!.map((h) => h.command).join("\n");
assert.match(cmds, /\.config\/opencode/, "must target opencode's global config dir");
@@ -48,21 +51,60 @@ test("local tier installs opencode config into the global config dir", () => {
});
test("claude tier has no onSandboxReady hook", () => {
- const input = buildAgentInput({ tier: "claude" });
+ const input = buildAgentInput({ tier: "claude" }, STUB_ISSUE);
assert.equal(input.onSandboxReady, undefined);
});
test("claude tier respects custom imageName", () => {
- const input = buildAgentInput({ imageName: "my-sandcastle:v2" });
+ const input = buildAgentInput({ imageName: "my-sandcastle:v2" }, STUB_ISSUE);
assert.equal(input.imageName, "my-sandcastle:v2");
});
test("local tier respects custom localImageName", () => {
- const input = buildAgentInput({ tier: "local", localImageName: "my-opencode:latest" });
+ const input = buildAgentInput({ tier: "local", localImageName: "my-opencode:latest" }, STUB_ISSUE);
assert.equal(input.imageName, "my-opencode:latest");
});
test("local tier defaults to ollama/qwen3-coder:30b model", () => {
- const input = buildAgentInput({ tier: "local" });
+ const input = buildAgentInput({ tier: "local" }, STUB_ISSUE);
assert.equal(input.agent.name, "opencode");
});
+
+test("claude tier uses the claude prompt template", () => {
+ const input = buildAgentInput({ tier: "claude" }, STUB_ISSUE);
+ assert.ok(
+ input.promptFile.endsWith("prompt-claude.md"),
+ `expected prompt-claude.md, got ${input.promptFile}`,
+ );
+});
+
+test("local tier uses the local prompt template", () => {
+ const input = buildAgentInput({ tier: "local" }, STUB_ISSUE);
+ assert.ok(
+ input.promptFile.endsWith("prompt-local.md"),
+ `expected prompt-local.md, got ${input.promptFile}`,
+ );
+});
+
+test("promptArgs include issue number, title, and body", () => {
+ const issue = { number: 99, title: "Test Issue", body: "Some body text" };
+ const input = buildAgentInput({}, issue);
+ assert.equal(input.promptArgs["ISSUE_NUMBER"], "99");
+ assert.equal(input.promptArgs["ISSUE_TITLE"], "Test Issue");
+ assert.equal(input.promptArgs["ISSUE_BODY"], "Some body text");
+});
+
+test("promptArgs ISSUE_BODY defaults to (no body) when body is empty", () => {
+ const input = buildAgentInput({}, { number: 1, title: "T", body: "" });
+ assert.equal(input.promptArgs["ISSUE_BODY"], "(no body)");
+});
+
+test("network is forwarded from RunnerOptions", () => {
+ const input = buildAgentInput({ network: "agentic-sandbox-net" }, STUB_ISSUE);
+ assert.equal(input.network, "agentic-sandbox-net");
+});
+
+test("network is undefined when not provided", () => {
+ const input = buildAgentInput({}, STUB_ISSUE);
+ assert.equal(input.network, undefined);
+});
diff --git a/.sandcastle/sandbox-runner.ts b/.sandcastle/sandbox-runner.ts
index bb6e565..1cd6651 100644
--- a/.sandcastle/sandbox-runner.ts
+++ b/.sandcastle/sandbox-runner.ts
@@ -1,23 +1,34 @@
/**
- * SandboxRunner — the sandcastle adapter. Wraps `run()` to work one issue in a
- * disposable, git-isolated Docker sandbox: the agent gets its own checkout on
- * `agent/issue-`, implements the issue, and commits. A `completionSignal`
- * stops it as soon as it is done so it does not loop and produce duplicate
+ * SandboxRunner — the sandcastle adapter. Uses the high-level `createSandbox`
+ * lifecycle to work one issue in a disposable, git-isolated Docker sandbox:
+ * the agent gets its own checkout on `agent/issue-`, implements the issue,
+ * and commits. Scope-bound disposal (`await using`) ensures the worktree and
+ * container are torn down even if the agent throws. A `completionSignal` stops
+ * the agent as soon as it is done so it does not loop and produce duplicate
* commits (observed in the spike with no signal).
*
* Auth: `claudeCode` uses CLAUDE_CODE_OAUTH_TOKEN, which sandcastle resolves
* from `.sandcastle/.env` (no Anthropic API key). See ADR-0003.
*
- * Topology: `run()` is invoked with `cwd` = the path-matched host mount (the
- * orchestrator's working directory), so the worktree sandcastle bind-mounts
- * into the inner container resolves under docker-outside-of-docker (ADR-0011).
+ * Topology: `createSandbox` is invoked with `cwd` = the path-matched host
+ * mount (the orchestrator's working directory), so the worktree sandcastle
+ * bind-mounts into the inner container resolves under docker-outside-of-docker
+ * (ADR-0011).
*
* The agent does NOT push or open the PR — the orchestrator does that with the
* devcontainer's existing gh + SSH auth (walking-skeleton choice; keeps tokens
* out of inner sandboxes).
+ *
+ * Lifecycle idiom (ADR-0019): createSandbox owns worktree creation and
+ * deterministic teardown; in-sandbox setup is declared via hooks.onSandboxReady;
+ * agent prompts are .md templates resolved by promptFile + promptArgs.
*/
-import { run, claudeCode, opencode, type AgentProvider } from "@ai-hero/sandcastle";
+import { createSandbox, claudeCode, opencode, type AgentProvider, type PromptArgs } from "@ai-hero/sandcastle";
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const _dir = dirname(fileURLToPath(import.meta.url));
const COMPLETION_SIGNAL = "ISSUE_COMPLETE";
@@ -87,22 +98,42 @@ interface SandboxReadyHook {
readonly sudo?: boolean;
}
-/** Resolved agent-tier inputs: agent provider, docker image, any extra
- * worktree files to copy, and any in-sandbox setup commands. */
+/** Resolved agent-tier inputs: agent provider, docker image, network, any
+ * extra worktree files to copy, any in-sandbox setup commands, and the
+ * prompt template path + args for this issue. */
export interface AgentInput {
readonly agent: AgentProvider;
readonly imageName: string;
+ readonly network?: string;
readonly copyToWorktree?: string[];
/** Commands run inside the sandbox after it boots, before the agent runs. */
readonly onSandboxReady?: SandboxReadyHook[];
+ /** Absolute path to the .md prompt template (resolved against import.meta.url). */
+ readonly promptFile: string;
+ /** {{KEY}} substitution args for the prompt template. */
+ readonly promptArgs: PromptArgs;
+}
+
+export interface IssueInput {
+ readonly number: number;
+ readonly title: string;
+ readonly body: string;
}
-/** Pure function: resolve RunnerOptions to the per-tier agent config. */
-export function buildAgentInput(opts: RunnerOptions): AgentInput {
+/** Pure function: resolve RunnerOptions + IssueInput to the per-tier agent
+ * config, including the prompt template path and substitution args. */
+export function buildAgentInput(opts: RunnerOptions, issue: IssueInput): AgentInput {
+ const promptArgs: PromptArgs = {
+ ISSUE_NUMBER: String(issue.number),
+ ISSUE_TITLE: issue.title,
+ ISSUE_BODY: issue.body || "(no body)",
+ };
+
if (opts.tier === "local") {
return {
agent: opencode(opts.localModel ?? "ollama/qwen3-coder:30b"),
imageName: opts.localImageName ?? "sandcastle-opencode:local",
+ network: opts.network,
copyToWorktree: ["opencode.json"],
// opencode resolves its Ollama provider from its *global* config
// (~/.config/opencode/opencode.json), NOT from a config in the worktree
@@ -117,106 +148,58 @@ export function buildAgentInput(opts: RunnerOptions): AgentInput {
"cp opencode.json \"$HOME/.config/opencode/opencode.json\"",
},
],
+ promptFile: join(_dir, "prompt-local.md"),
+ promptArgs,
};
}
return {
agent: claudeCode(opts.model ?? "claude-sonnet-4-6", { permissionMode: "auto" }),
imageName: opts.imageName ?? "sandcastle:local",
+ network: opts.network,
+ promptFile: join(_dir, "prompt-claude.md"),
+ promptArgs,
};
}
-export interface IssueInput {
- readonly number: number;
- readonly title: string;
- readonly body: string;
-}
-
export class SandboxRunner {
constructor(private readonly opts: RunnerOptions = {}) {}
async runIssue(issue: IssueInput): Promise {
const branch = `agent/issue-${issue.number}`;
- const agentInput = buildAgentInput(this.opts);
- const result = await run({
- agent: agentInput.agent,
+ const agentInput = buildAgentInput(this.opts, issue);
+
+ // createSandbox owns worktree creation and teardown (ADR-0019); `await
+ // using` ensures close() fires even if sandbox.run() throws, replacing the
+ // old bespoke try/finally lifecycle.
+ await using sandbox = await createSandbox({
+ branch,
sandbox: docker({
imageName: agentInput.imageName,
containerUid: this.opts.containerUid ?? 1000,
containerGid: this.opts.containerGid ?? 1000,
- ...(this.opts.network ? { network: this.opts.network } : {}),
+ ...(agentInput.network ? { network: agentInput.network } : {}),
}),
- branchStrategy: { type: "branch", branch },
cwd: this.opts.cwd,
- name: `issue-${issue.number}`,
- maxIterations: this.opts.maxIterations ?? 12,
- completionSignal: COMPLETION_SIGNAL,
copyToWorktree: agentInput.copyToWorktree,
hooks: agentInput.onSandboxReady
? { sandbox: { onSandboxReady: agentInput.onSandboxReady } }
: undefined,
- prompt: buildPrompt(issue, this.opts.tier),
+ });
+
+ const result = await sandbox.run({
+ agent: agentInput.agent,
+ name: `issue-${issue.number}`,
+ maxIterations: this.opts.maxIterations ?? 12,
+ completionSignal: COMPLETION_SIGNAL,
+ promptFile: agentInput.promptFile,
+ promptArgs: agentInput.promptArgs,
});
return {
- branch: result.branch,
+ branch: sandbox.branch,
commits: result.commits,
completed: result.completionSignal !== undefined,
logFilePath: result.logFilePath,
};
}
}
-
-/** Per-tier prompt. Local models (opencode) get a short, action-first prompt:
- * the verbose claude prompt produces empty turns on smaller models — they need
- * a direct "edit the file now, then these exact commands" shape. */
-function buildPrompt(issue: IssueInput, tier?: "claude" | "local"): string {
- if (tier === "local") return buildLocalPrompt(issue);
- return [
- `You are implementing GitHub issue #${issue.number}: "${issue.title}"`,
- "",
- "## Issue",
- issue.body || "(no body)",
- "",
- "## Scope guardrails",
- "- Only modify files directly required by this issue.",
- "- Do not modify, close, or reference other issues.",
- "- Do not add dependencies beyond what the issue specifies.",
- "- Do not push or open a pull request — the orchestrator does that after you finish.",
- "- Make the changes yourself: use the edit/write tools to modify files and bash",
- " to run tests and commit. Do not just plan or write a todo list, and do not",
- " delegate to a subagent — apply the edits and commit them directly.",
- "",
- "## Steps",
- "1. Implement the change. If the project has a test suite, work test-first:",
- " write a failing test, make it pass, then refactor.",
- "2. Run the test suite and make sure it is green.",
- "3. Commit your work to the current branch with clear, imperative messages",
- ` referencing #${issue.number}.`,
- "4. When the issue is fully implemented and committed, output exactly this",
- " line, by itself, and then stop — produce no further output or commits:",
- ` ${COMPLETION_SIGNAL}`,
- ].join("\n");
-}
-
-/** Short, directive prompt for local opencode models. Action-first, concrete
- * commands, minimal prose — verbose instructions produce empty turns. */
-function buildLocalPrompt(issue: IssueInput): string {
- return [
- `Implement this change in the repository, then commit. Use your tools now — read files, edit files, run bash. Do not just describe a plan.`,
- "",
- `TASK (issue #${issue.number}): ${issue.title}`,
- issue.body || "(no body)",
- "",
- "The code lives in the `.sandcastle/` directory (TypeScript). Steps:",
- "1. Read the relevant `.sandcastle/*.ts` file(s) with your read tool.",
- "2. Make the change with your edit tool.",
- "3. Run the tests: `cd .sandcastle && npm test` — fix until they pass.",
- `4. Commit: \`cd .sandcastle && cd .. && git add -A && git commit -m " (#${issue.number})"\``,
- "5. Do NOT push or open a PR.",
- "",
- `When committed, output this exact line alone and stop:`,
- COMPLETION_SIGNAL,
- "",
- "Begin now by reading the most relevant file.",
- ].join("\n");
-}
diff --git a/docs/adr/0019-lean-execution-layer-high-level-sandcastle-lifecycle.md b/docs/adr/0019-lean-execution-layer-high-level-sandcastle-lifecycle.md
new file mode 100644
index 0000000..2ce76d9
--- /dev/null
+++ b/docs/adr/0019-lean-execution-layer-high-level-sandcastle-lifecycle.md
@@ -0,0 +1,105 @@
+# Lean execution layer — high-level sandcastle lifecycle
+
+The original `sandbox-runner.ts` called `run()` directly and hand-rolled its
+own lifecycle: building the full options object inline, embedding prompt text
+in TypeScript string arrays, and relying on `run()`'s internal teardown. This
+was functional but kept bespoke lifecycle logic in the adapter rather than
+delegating it to the library.
+
+`@ai-hero/sandcastle@0.10.0` ships a higher-level API — `createSandbox` +
+`await using` + `hooks.onSandboxReady` + `promptFile`/`promptArgs` — that owns
+worktree creation, scope-bound disposal, declarative in-sandbox setup, and
+prompt template substitution. Adopting it shrinks the adapter to its essential
+role: translating `RunnerOptions` + `IssueInput` to a plain config object, then
+handing off to the library.
+
+## Decision
+
+`SandboxRunner.runIssue` uses the high-level lifecycle:
+
+```typescript
+await using sandbox = await createSandbox({
+ branch,
+ sandbox: docker({ imageName, containerUid, containerGid, network }),
+ cwd,
+ copyToWorktree,
+ hooks: { sandbox: { onSandboxReady: [...] } },
+});
+
+const result = await sandbox.run({
+ agent,
+ name,
+ maxIterations,
+ completionSignal,
+ promptFile,
+ promptArgs,
+});
+```
+
+**`await using` replaces bespoke teardown.** `createSandbox` returns a `Sandbox`
+handle that implements `Symbol.asyncDispose`; the scope-bound disposal calls
+`sandbox.close()` when the block exits, normally or via exception. The library
+owns worktree cleanup — the adapter no longer needs a `finally` block.
+
+**`hooks.onSandboxReady` replaces prompt-embedded setup.** The local tier's
+`opencode.json` relocation command was previously embedded in prompt text (making
+it part of the agent's instructions, not the sandbox lifecycle). It now lives in
+`hooks.sandbox.onSandboxReady`, where sandcastle runs it once after boot and
+before the agent starts. The agent prompt no longer describes bootstrapping.
+
+**`promptFile` + `promptArgs` replace inline string builders.** Agent prompts
+move to `.md` templates (`prompt-claude.md`, `prompt-local.md`) alongside the
+runner source. `{{KEY}}` placeholders are resolved by sandcastle at run time from
+`promptArgs`; only the template selection (one file per tier) stays conditional in
+`buildAgentInput`. The `buildPrompt`/`buildLocalPrompt` string-builder functions
+are removed.
+
+**`buildAgentInput` is the pure builder seam.** The function is extended to take
+both `RunnerOptions` and `IssueInput` and returns the complete config for both
+`createSandbox()` and `sandbox.run()` (agent, imageName, network, copyToWorktree,
+onSandboxReady, promptFile, promptArgs). It remains pure and exported so
+`sandbox-runner.test.ts` can assert on all config fields without spawning Docker.
+
+## Two asymmetries that stay imperative
+
+### MTU network — declarative attachment, imperative creation
+
+`docker({ network: SANDBOX_NETWORK })` attaches the inner container to the
+MTU-1400 network declaratively (ADR-0013). `ensureSandboxNetwork` (in `main.ts`)
+creates it imperatively with `--opt com.docker.network.driver.mtu=1400` before
+the first sandbox starts. This split is intentional: `docker()` has no option
+to create a custom-MTU network, only to attach to an existing one. Do not move
+network creation into `docker()`.
+
+### Socat proxy — imperative host override
+
+`resolveDockerHost` sets `process.env.DOCKER_HOST` to the direct socket path
+(`unix:///var/run/docker-host.sock`) when the DooD socat proxy is detected (#71).
+This bypasses socat because socat tears down `docker exec`'s hijacked bidirectional
+stream after the first burst, causing every sandbox iteration to be an empty
+"started → stopped" turn with zero commits. `docker()` has no `host` or `socket`
+option, so this cannot be expressed declaratively. `resolveDockerHost` and its
+`process.env` mutation in `main.ts` are unchanged. Do not "tidy" this into the
+docker options — the bug returns immediately.
+
+## Consequences
+
+- `SandboxRunner` is thinner: worktree creation, teardown, in-sandbox setup, and
+ prompt rendering are all delegated to the library.
+- `/afk` and `/hitl` behavior is unchanged — they still get one PR per ready
+ issue with auto-merge on green CI.
+- The pure builder seam (`buildAgentInput`) remains testable without Docker; the
+ test suite covers image selection, network forwarding, copyToWorktree, hook
+ commands, template paths, and promptArgs in isolation.
+- The live `createSandbox`/`await using` round-trip is covered by the existing
+ gated `integration.test.ts` (`SANDCASTLE_INTEGRATION=1`); the CI `sandcastle`
+ job is unaffected.
+
+## Relations
+
+- ADR-0006: `/afk` still wraps sandcastle via exec — `createSandbox` execs under
+ the hood just as `run()` did.
+- ADR-0013: MTU network creation stays in `ensureSandboxNetwork`; `docker(network)`
+ only attaches. The split is recorded here.
+- #71 (socat fix): `resolveDockerHost` stays imperative. See the asymmetry note
+ above.