diff --git a/.claude/commands/exec.md b/.claude/commands/exec.md index bcececb..f5a4a48 100644 --- a/.claude/commands/exec.md +++ b/.claude/commands/exec.md @@ -1,14 +1,17 @@ # /exec -Run a shell command in the Docker sandbox. +Run a shell command — works in both host-driven and cockpit mode. **Usage:** `/exec ` -**How:** Call `mcp__docker__run_command` with: -- `command`: the shell command to run -- `service`: `"devcontainer"` +**How (context-aware — single corpus, no forks):** -**This is the only place `mcp__docker__run_command` is called.** All other commands use `/exec`. +- **Cockpit mode** (`AGENTIC_IN_CONTAINER` is set in the environment): run the command using the Bash tool in the local shell. +- **Host mode** (no `AGENTIC_IN_CONTAINER`): call `mcp__docker__run_command` with `service: "devcontainer"`, routing to the sandbox over docker MCP. + +Check `process.env.AGENTIC_IN_CONTAINER` (or `$AGENTIC_IN_CONTAINER` in the shell) to decide which path to take. + +**This is the only shell-dispatch boundary.** All slash commands (including `/tdd`) call `/exec` — they work unchanged in both modes. **Examples:** - `/exec npm test` diff --git a/.devcontainer/.dockerignore b/.devcontainer/.dockerignore index 67f14a0..1d86f04 100644 --- a/.devcontainer/.dockerignore +++ b/.devcontainer/.dockerignore @@ -2,3 +2,5 @@ !Dockerfile !claude-persist-setup !afk +!cockpit +!cockpit-settings.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index d043860..90687b7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,13 +5,24 @@ FROM mcr.microsoft.com/devcontainers/base:ubuntu-24.04 # fixes the socket group for the non-root user at container startup, so no # manual CLI install or DOCKER_GID handling is needed here. +# Cockpit-mode marker: any in-container process can detect it is inside the +# agentic-dev devcontainer by checking AGENTIC_IN_CONTAINER (ADR-0018). +# /exec uses this to run commands in the local shell instead of via docker MCP. +ENV AGENTIC_IN_CONTAINER=1 + COPY claude-persist-setup /usr/local/bin/claude-persist-setup # Ergonomic orchestrator launchers: `afk` / `hitl` from anywhere in a project, # instead of `.sandcastle/run.sh afk …`. The mode is dispatched from the invoked # name, so `hitl` is a symlink to the same script (busybox-style multi-call). COPY afk /usr/local/bin/afk + +# Cockpit shim: `cockpit` (or `docker compose exec devcontainer cockpit`) lands +# Claude Code in the workspace working_dir without the user having to cd first. +COPY cockpit /usr/local/bin/cockpit + RUN chmod +x /usr/local/bin/claude-persist-setup /usr/local/bin/afk \ + /usr/local/bin/cockpit \ && ln -sf afk /usr/local/bin/hitl # Optionally bake the orchestrator SOURCE (ADR-0016) + workflow slash commands + @@ -65,4 +76,13 @@ RUN if [ -n "$BAKE_ORCHESTRATOR" ]; then \ chown -R vscode:vscode /opt/agentic-orchestrator /opt/agentic-commands /opt/agentic-skills ; \ fi +# Global Claude Code settings for cockpit mode (ADR-0018): pre-allow the +# workflow tools (local shell, gh, git, afk/hitl) so the human doesn't get +# permission prompts when driving /grill→/to-prd→/to-issues from inside. +# Installed by claude-persist-setup into ~/.claude/settings.json at create time. +# A workspace .claude/settings.local.json still overrides per Claude Code's +# normal settings hierarchy. +COPY cockpit-settings.json /opt/agentic-settings/settings.json +RUN chown -R vscode:vscode /opt/agentic-settings + USER vscode diff --git a/.devcontainer/claude-persist-setup b/.devcontainer/claude-persist-setup index 82add71..c5ac1bc 100755 --- a/.devcontainer/claude-persist-setup +++ b/.devcontainer/claude-persist-setup @@ -68,3 +68,14 @@ if [ -d "$BAKED_SKILLS" ]; then done echo "Installed upstream engineering-discipline skills → ~/.claude/skills" fi + +# Install the global cockpit-mode settings (ADR-0018): pre-allow gh, git, +# afk/hitl so the human isn't prompted for every workflow command when driving +# the workflow from inside the container. Only installed when the baked file +# exists; a workspace .claude/settings.local.json overrides via Claude Code's +# normal settings hierarchy. Refreshed on every create so a newer image wins. +BAKED_SETTINGS="/opt/agentic-settings/settings.json" +if [ -f "$BAKED_SETTINGS" ]; then + cp -f "$BAKED_SETTINGS" "${PERSIST_DATA}/settings.json" + echo "Installed cockpit-mode global settings → ~/.claude/settings.json" +fi diff --git a/.devcontainer/cockpit b/.devcontainer/cockpit new file mode 100644 index 0000000..9545651 --- /dev/null +++ b/.devcontainer/cockpit @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# cockpit — launch Claude Code in the workspace directory for cockpit mode +# (ADR-0018). Lets `docker compose exec -it devcontainer cockpit` drop the +# human into the right working dir without a manual `cd` first. +set -euo pipefail +cd "${WORKSPACE_FOLDER:-/workspace}" +exec claude "$@" diff --git a/.devcontainer/cockpit-settings.json b/.devcontainer/cockpit-settings.json new file mode 100644 index 0000000..0f073e4 --- /dev/null +++ b/.devcontainer/cockpit-settings.json @@ -0,0 +1,10 @@ +{ + "permissions": { + "allow": [ + "Bash(gh *)", + "Bash(git *)", + "Bash(afk*)", + "Bash(hitl*)" + ] + } +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 7b9075f..1590dee 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -67,6 +67,19 @@ services: source: ${DOCKER_SOCK:-/var/run/docker.sock} target: /var/run/docker.sock + # Cockpit-mode credential passthrough (ADR-0018): forward GitHub and Claude + # credentials from the host shell into the container so a human can run the + # full workflow from inside (`docker compose exec -it devcontainer cockpit`). + # Values are taken from the host environment — nothing is committed here. + # Unset host vars are forwarded as empty strings (harmless; the TS resolver + # and gh/claude CLIs treat empty as absent). AGENTIC_IN_CONTAINER is baked + # into the image via the Dockerfile ENV directive, not forwarded here. + environment: + GH_TOKEN: ${GH_TOKEN:-} + GITHUB_TOKEN: ${GITHUB_TOKEN:-} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN:-} + working_dir: ${WORKSPACE_FOLDER:-/workspace} command: sleep infinity diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts index 3111497..d7ec499 100644 --- a/.sandcastle/main.ts +++ b/.sandcastle/main.ts @@ -36,6 +36,63 @@ export { SANDBOX_LABEL }; const sh = promisify(execFile); +/** + * Parse dotenv-style content (KEY=VALUE, # comments, blank lines) into a + * plain object. Supports only the simple KEY=VALUE syntax used by + * orchestrator.env — no quoted strings, no variable expansion. + * Pure: no I/O; accepts raw file content so callers can be tested without disk. + */ +export function parseOrchEnv(content: string): Record { + const result: Record = {}; + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + const eqIdx = trimmed.indexOf('='); + if (eqIdx < 1) continue; + const key = trimmed.slice(0, eqIdx).trim(); + const value = trimmed.slice(eqIdx + 1); + if (key) result[key] = value; + } + return result; +} + +export interface ResolvedCredentials { + readonly GH_TOKEN: string | undefined; + readonly GITHUB_TOKEN: string | undefined; + readonly ANTHROPIC_API_KEY: string | undefined; + readonly CLAUDE_CODE_OAUTH_TOKEN: string | undefined; + /** True when AGENTIC_IN_CONTAINER is set (cockpit mode). */ + readonly cockpit: boolean; +} + +/** + * Resolve agent credentials from the process env and parsed orchestrator.env + * content, with process env taking precedence over orchestrator.env. Pure: no + * disk I/O or process.env mutation — accepts both sources as plain objects. + * + * Precedence: a non-empty value in `env` wins; empty or absent values fall + * through to `orchEnv`. The `cockpit` field reflects whether AGENTIC_IN_CONTAINER + * is set in `env` (the marker baked into the devcontainer image, ADR-0018). + */ +export function resolveCredentials( + env: Record, + orchEnv: Record = {}, +): ResolvedCredentials { + const pick = (key: string): string | undefined => { + const envVal = env[key]; + if (envVal !== undefined && envVal !== '') return envVal; + const orchVal = orchEnv[key]; + return orchVal !== undefined && orchVal !== '' ? orchVal : undefined; + }; + return { + GH_TOKEN: pick('GH_TOKEN'), + GITHUB_TOKEN: pick('GITHUB_TOKEN'), + ANTHROPIC_API_KEY: pick('ANTHROPIC_API_KEY'), + CLAUDE_CODE_OAUTH_TOKEN: pick('CLAUDE_CODE_OAUTH_TOKEN'), + cockpit: Boolean(env['AGENTIC_IN_CONTAINER']), + }; +} + /** Read the concurrency cap from AGENTIC_CONCURRENCY (default 1, serial). */ export function parseConcurrency(): number { return Math.max(1, Number(process.env.AGENTIC_CONCURRENCY ?? "1") || 1); diff --git a/.sandcastle/package-lock.json b/.sandcastle/package-lock.json index c4779e4..94471f7 100644 --- a/.sandcastle/package-lock.json +++ b/.sandcastle/package-lock.json @@ -5,6 +5,7 @@ "packages": { "": { "name": "agentic-orchestrator", + "license": "MIT", "devDependencies": { "@ai-hero/sandcastle": "^0.10.0", "@types/node": "^26.0.0", diff --git a/.sandcastle/reduce.test.ts b/.sandcastle/reduce.test.ts index b98e1b4..85c4f14 100644 --- a/.sandcastle/reduce.test.ts +++ b/.sandcastle/reduce.test.ts @@ -10,7 +10,7 @@ import { test } from "node:test"; import assert from "node:assert/strict"; import { reduce, READY_LABEL, type State, type CiStatus, type Pr } from "./reduce.ts"; import { parseBlockedBy } from "./issue-source.ts"; -import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent } from "./main.ts"; +import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials } from "./main.ts"; import { createHmac } from "node:crypto"; import { SANDBOX_LABEL, PROJECT_LABEL_KEY, deriveProject } from "./sandbox-runner.ts"; @@ -830,3 +830,87 @@ test("demo: A blocks B — A starts first; after A merges, B enters ready-set", { type: "StartSandbox", issueId: 2 }, ]); }); + +// ─── parseOrchEnv ───────────────────────────────────────────────────────────── + +test("parseOrchEnv: empty string returns {}", () => { + assert.deepEqual(parseOrchEnv(""), {}); +}); + +test("parseOrchEnv: comment-only lines are skipped", () => { + assert.deepEqual(parseOrchEnv("# this is a comment\n# another\n"), {}); +}); + +test("parseOrchEnv: blank lines are skipped", () => { + assert.deepEqual(parseOrchEnv("\n\n\n"), {}); +}); + +test("parseOrchEnv: KEY=value parses correctly", () => { + assert.deepEqual(parseOrchEnv("GH_TOKEN=abc123"), { GH_TOKEN: "abc123" }); +}); + +test("parseOrchEnv: blank value is preserved as empty string", () => { + assert.deepEqual(parseOrchEnv("GH_TOKEN="), { GH_TOKEN: "" }); +}); + +test("parseOrchEnv: multiple vars parsed in order", () => { + const result = parseOrchEnv("GH_TOKEN=tok\nANTHROPIC_API_KEY=sk-key\n"); + assert.equal(result.GH_TOKEN, "tok"); + assert.equal(result.ANTHROPIC_API_KEY, "sk-key"); +}); + +test("parseOrchEnv: comment lines mixed with vars are skipped", () => { + const result = parseOrchEnv("# set this\nGH_TOKEN=mytoken\n# done\n"); + assert.deepEqual(result, { GH_TOKEN: "mytoken" }); +}); + +// ─── resolveCredentials ─────────────────────────────────────────────────────── + +test("resolveCredentials: env-only credentials resolve", () => { + const creds = resolveCredentials({ GH_TOKEN: "from-env" }); + assert.equal(creds.GH_TOKEN, "from-env"); +}); + +test("resolveCredentials: orchestrator.env-only credentials resolve", () => { + const creds = resolveCredentials({}, { GH_TOKEN: "from-orch" }); + assert.equal(creds.GH_TOKEN, "from-orch"); +}); + +test("resolveCredentials: env wins over orchestrator.env when both present", () => { + const creds = resolveCredentials({ GH_TOKEN: "from-env" }, { GH_TOKEN: "from-orch" }); + assert.equal(creds.GH_TOKEN, "from-env"); +}); + +test("resolveCredentials: missing credentials are undefined", () => { + const creds = resolveCredentials({}, {}); + assert.equal(creds.GH_TOKEN, undefined); + assert.equal(creds.GITHUB_TOKEN, undefined); + assert.equal(creds.ANTHROPIC_API_KEY, undefined); + assert.equal(creds.CLAUDE_CODE_OAUTH_TOKEN, undefined); +}); + +test("resolveCredentials: empty env value falls through to orchestrator.env", () => { + const creds = resolveCredentials({ GH_TOKEN: "" }, { GH_TOKEN: "from-orch" }); + assert.equal(creds.GH_TOKEN, "from-orch"); +}); + +test("resolveCredentials: AGENTIC_IN_CONTAINER set → cockpit true", () => { + const creds = resolveCredentials({ AGENTIC_IN_CONTAINER: "1" }); + assert.equal(creds.cockpit, true); +}); + +test("resolveCredentials: AGENTIC_IN_CONTAINER absent → cockpit false", () => { + const creds = resolveCredentials({}); + assert.equal(creds.cockpit, false); +}); + +test("resolveCredentials: resolves all four credential keys independently", () => { + const creds = resolveCredentials( + { GH_TOKEN: "gh-env", ANTHROPIC_API_KEY: "ak-env" }, + { GITHUB_TOKEN: "ght-orch", CLAUDE_CODE_OAUTH_TOKEN: "cco-orch" }, + ); + assert.equal(creds.GH_TOKEN, "gh-env"); + assert.equal(creds.ANTHROPIC_API_KEY, "ak-env"); + assert.equal(creds.GITHUB_TOKEN, "ght-orch"); + assert.equal(creds.CLAUDE_CODE_OAUTH_TOKEN, "cco-orch"); +}); diff --git a/CLAUDE.md b/CLAUDE.md index 0bafeb5..0953b04 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,20 +1,30 @@ # agentic-dev -A reusable devcontainer + agentic workflow. Claude runs on the **host** and drives a -disposable Docker sandbox; GitHub issues hold the durable state. See `README.md` for the -full picture — this file is the operating contract. +A reusable devcontainer + agentic workflow. GitHub issues hold the durable state. +See `README.md` for the full picture — this file is the operating contract. + +There are two ways to drive the workflow: + +| Mode | Where Claude runs | How to start | +|------|------------------|--------------| +| **Host-driven** (default) | On the host; reaches the container via docker MCP | `./up.sh .` then open a host Claude session | +| **Cockpit** | Inside the devcontainer | `docker compose exec -it devcontainer cockpit` | + +Both modes use the same slash commands. `/exec` detects the context automatically +(`AGENTIC_IN_CONTAINER` env var, baked into the image) and routes correctly. ## Execution environment -- When a sandbox is running, **use `/exec` for all shell commands**, never the Bash tool. - `/exec` routes to `mcp__docker__run_command(service="devcontainer")` (a `docker compose exec` wrapper). +- **Use `/exec` for all shell commands**, never the Bash tool directly. + - **Host mode:** `/exec` routes to `mcp__docker__run_command(service="devcontainer")`. + - **Cockpit mode:** `/exec` runs the command in the local shell (inside the container). - The workflow is **headless** — VS Code is never required. (`code.sh` exists only for the human.) - Each project is **self-contained**: it holds its own `.devcontainer/`, gets a per-project container name (`DEVCONTAINER_NAME`), and is discovered natively by `devcontainer up` / - VS Code *Reopen in Container* (ADR-0012). `/exec` targets `devcontainer:` - (this repo: `agentic-dev`). + VS Code *Reopen in Container* (ADR-0012). In host mode, `/exec` targets + `devcontainer:` (this repo: `agentic-dev`). -## Run commands (host) +## Run commands (host-driven mode) ``` ./up.sh . # spin up THIS project's sandbox (init.sh runs automatically) @@ -23,6 +33,19 @@ full picture — this file is the operating contract. ./code.sh # optional: attach VS Code to a running sandbox ``` +## Cockpit mode (drive from inside the container) + +With only the compose file and exported credentials — no host Claude, no docker MCP wiring: + +```sh +# Export credentials on the host, then: +docker compose -f .devcontainer/docker-compose.yml exec -it devcontainer cockpit +# → lands in Claude Code inside the container, at the workspace root +``` + +The workflow slash commands (`/grill-me-with-docs`, `/to-prd`, `/to-issues`, `/afk`, `/hitl`) +are baked into the published image and available immediately (ADR-0017/0018). + ## Development workflow ``` @@ -45,8 +68,7 @@ resolved env without launching. The **published** image additionally bakes the workflow slash commands → `~/.claude/commands` and four upstream engineering disciplines → `~/.claude/skills` (ADR-0017), and `.devcontainer/docker-compose.yml` boots standalone without `init.sh` (every var has a -fallback). Both are groundwork for running the workflow from *inside* the container; this -repo still drives it from the host via `/exec`, using the workspace `.claude/commands`. +fallback). This is what cockpit mode depends on. ### Issue lifecycle @@ -63,8 +85,8 @@ adds dependencies on its own. ## Permissions -Add `mcp__docker__run_command` to the `allow` list in `.claude/settings.local.json` so `/exec` -never prompts: +**Host-driven mode:** add `mcp__docker__run_command` to the `allow` list in +`.claude/settings.local.json` so `/exec` never prompts: ```json { @@ -73,3 +95,7 @@ never prompts: } } ``` + +**Cockpit mode:** the published image bakes a global `~/.claude/settings.json` that +pre-allows `gh`, `git`, `afk`, and `hitl` (installed by `claude-persist-setup`). A +workspace `.claude/settings.local.json` overrides it as usual. diff --git a/docs/adr/0018-cockpit-mode-drive-from-inside-the-outer-image.md b/docs/adr/0018-cockpit-mode-drive-from-inside-the-outer-image.md new file mode 100644 index 0000000..b3c8def --- /dev/null +++ b/docs/adr/0018-cockpit-mode-drive-from-inside-the-outer-image.md @@ -0,0 +1,99 @@ +# Cockpit mode — drive the workflow from inside the outer image + +ADR-0017 baked the workflow slash commands and upstream skills into the published +image so an adopter doesn't need to vendor `.claude/commands/`. The next step is +letting **Claude itself run inside the container** — no host Claude, no docker MCP +wiring, no host slash commands. This is "cockpit mode." + +## Decision + +Make the published outer image the **single deployable unit**: with only the compose +file and exported host credentials, a human can run + +``` +docker compose exec -it devcontainer cockpit +``` + +and drive `/grill-me-with-docs → /to-prd → /to-issues` entirely from inside, using +the baked commands and the in-container `claude` CLI. + +**What changes:** + +### `AGENTIC_IN_CONTAINER` marker (Dockerfile `ENV`) + +Set `AGENTIC_IN_CONTAINER=1` unconditionally in the image. Any in-container process — +Claude Code, bash scripts, the orchestrator — can detect cockpit context by reading this +variable. The marker makes the switch point explicit and avoids inspecting container +names, hostnames, or socket paths. + +### Context-aware `/exec` (one corpus, no forks) + +`/exec` is the single shell-dispatch boundary used by all slash commands (ADR-0006). +In cockpit mode it checks `AGENTIC_IN_CONTAINER`: +- **Set:** run the command in the local Bash shell. +- **Absent:** route to `mcp__docker__run_command` (host mode, unchanged). + +The `.md` command file stays logic-free; it just states the rule. No slash command is +forked — `/tdd` and the rest work unchanged in both modes. + +### Credential passthrough (compose `environment:`) + +The compose file forwards `GH_TOKEN`/`GITHUB_TOKEN` and `ANTHROPIC_API_KEY`/ +`CLAUDE_CODE_OAUTH_TOKEN` from the **host shell environment** into the container. +No values are committed; each entry is `VAR: ${VAR:-}`. Unset host vars arrive as +empty strings (harmless — the TS resolver and gh/claude CLIs treat empty as absent). + +### TypeScript credential resolver (the real logic) + +A pure function `resolveCredentials(env, orchEnv)` in the orchestrator resolves +credentials from both sources with **env taking precedence**: + +- `env` = `process.env` (includes docker-compose forwarded vars in cockpit mode) +- `orchEnv` = parsed `orchestrator.env` (via `parseOrchEnv`, a second pure function) + +A non-empty value in `env` wins over `orchEnv`; missing or empty env values fall +through. The resolved credentials are the single source for the orchestrator's own +gh calls and for what it forwards to sandcastle's inner sandboxes. Cockpit Claude +(the outer driver) and the orchestrator resolve from the same source. + +Both functions are unit-tested in the `reduce.test.ts` / `sandbox-runner.test.ts` +style: no Docker, no GitHub, no network. The `cockpit` boolean field of +`ResolvedCredentials` (derived from `AGENTIC_IN_CONTAINER`) is exercised through +this resolver, not a separate bash dry-run. + +### Global `~/.claude/settings.json` (baked, installed by `claude-persist-setup`) + +A `cockpit-settings.json` is baked to `/opt/agentic-settings/settings.json` and +installed into `~/.claude/settings.json` at container creation by `claude-persist-setup` +(alongside baked commands and skills). It pre-allows `gh`, `git`, `afk`, and `hitl` +so the human isn't interrupted by permission prompts during the cockpit workflow. +A workspace `.claude/settings.local.json` overrides it via Claude Code's normal +settings hierarchy. + +### `cockpit` shim (`/usr/local/bin/cockpit`, baked) + +A thin shell script that `cd`s to `${WORKSPACE_FOLDER:-/workspace}` and `exec`s +`claude`. This lands `docker compose exec -it devcontainer cockpit` in the right +directory without the human needing to know the workspace path. No new host script. + +## Relation to prior ADRs + +- **ADR-0006:** `/exec` remains the single shell boundary; cockpit mode adds a + second code path (local Bash), but the rule ("only use `/exec`") is unchanged. +- **ADR-0011:** the path-matched host mount is still used by the orchestrator so + sandcastle's worktrees resolve correctly under docker-outside-of-docker. Cockpit + mode only relocates the *driver* (outer Claude); the orchestrator topology is + unchanged. +- **ADR-0016/0017:** cockpit mode is the payoff for baking the orchestrator source + and workflow commands into the image. Those ADRs called this groundwork; this ADR + activates it. + +## Consequences + +- A human can run the definition phase (`/grill→/to-prd→/to-issues`) with only + the compose file and exported credentials — no Claude installation on the host. +- The published image is now the complete deployable unit for the full workflow. +- Host-driven mode is unchanged. The docker MCP path, `orchestrator.env`, inner + sandboxes, and the `afk`/`hitl` launchers all work as before. +- `AGENTIC_IN_CONTAINER` is unconditionally set in the devcontainer image, so any + process inside the container sees it. Processes that don't check it are unaffected.