From 911caa1873a2dc81085724825fd96e13fe7393a9 Mon Sep 17 00:00:00 2001 From: Luca Giordano Date: Sun, 28 Jun 2026 13:11:12 +0200 Subject: [PATCH] fix: bypass DooD socat proxy so /afk agent turns stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docker-outside-of-docker feature fronts the host socket with a socat proxy (for permissions). socat tears down `docker exec`'s hijacked bidirectional stream after the first data burst, and sandcastle streams the agent's stream-json over `docker exec` — so the agent's init line arrives, the rest of the turn is dropped, and every iteration ends as an empty "started → stopped" turn with zero commits (the exec close handler's `code ?? 0` even masks it as a clean exit). Point the orchestrator's docker CLI at the real host socket (docker-host.sock, which the feature also exposes and grants docker-group access to) via a guarded resolveDockerHost() called first thing in main(). It is a no-op when socat isn't in play — a bare `docker compose up` has no docker-host.sock and its raw socket streams natively — or when an explicit DOCKER_HOST is already set. Pure decider with unit tests. --- .sandcastle/main.ts | 43 +++++++++++++++++++++++++++++++++++++- .sandcastle/reduce.test.ts | 20 +++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts index dc2d6bc..4c4c4d8 100644 --- a/.sandcastle/main.ts +++ b/.sandcastle/main.ts @@ -26,7 +26,7 @@ import { createHmac } from "node:crypto"; import * as https from "node:https"; import * as http from "node:http"; import { execFile, spawn } from "node:child_process"; -import { mkdirSync, openSync, closeSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, openSync, closeSync, writeFileSync } from "node:fs"; import { promisify } from "node:util"; import { fileURLToPath } from "node:url"; import { join, dirname } from "node:path"; @@ -58,6 +58,36 @@ export function parseOrchEnv(content: string): Record { return result; } +/** + * Decide the DOCKER_HOST the orchestrator's docker CLI should use, to dodge the + * docker-outside-of-docker socat proxy. + * + * The DooD feature fixes socket permissions by fronting the host socket with + * `socat UNIX-LISTEN:/var/run/docker.sock ... UNIX-CONNECT:/var/run/docker-host.sock`. + * But socat tears down `docker exec`'s *hijacked* bidirectional stream after the + * first data burst — and sandcastle streams the agent's stream-json over + * `docker exec`. So the agent's init line arrives, then the rest of the turn is + * dropped: every iteration is an empty "started → stopped" turn with zero commits + * (the close handler's `code ?? 0` even masks it as a clean exit). The feature + * also exposes the *real* host socket at docker-host.sock and adds the user to the + * docker group, so pointing the CLI straight at it restores streaming. + * + * Guarded so it is a no-op outside the socat setup — returns undefined (leave + * DOCKER_HOST as-is) when the caller already set one, or when the direct socket is + * absent. A bare `docker compose up` (no DooD feature, no socat) has no + * docker-host.sock and its raw /var/run/docker.sock works natively, so it must be + * left untouched. Pure: the decision is returned, not applied, so it's testable + * without env/fs side effects. + */ +export function resolveDockerHost( + currentDockerHost: string | undefined, + directSocketExists: boolean, +): string | undefined { + if (currentDockerHost) return undefined; // an explicit choice always wins + if (!directSocketExists) return undefined; // no socat proxy in the way + return "unix:///var/run/docker-host.sock"; +} + export interface ResolvedCredentials { readonly GH_TOKEN: string | undefined; readonly GITHUB_TOKEN: string | undefined; @@ -543,6 +573,17 @@ async function main(): Promise { return; } + // Dodge the docker-outside-of-docker socat proxy (see resolveDockerHost): socat + // breaks docker exec's streamed output, which sandcastle relies on for the agent + // turn — without this every iteration is an empty turn. No-op when socat isn't in + // play (bare `docker compose up`, or an explicit DOCKER_HOST). Must run before any + // docker call below. + const dockerHost = resolveDockerHost( + process.env.DOCKER_HOST, + existsSync("/var/run/docker-host.sock"), + ); + if (dockerHost) process.env.DOCKER_HOST = dockerHost; + const repo = process.env.AGENTIC_REPO; const base = process.env.AGENTIC_BASE_BRANCH ?? "main"; const repoRoot = process.cwd(); diff --git a/.sandcastle/reduce.test.ts b/.sandcastle/reduce.test.ts index 5fc9935..7ffbdd2 100644 --- a/.sandcastle/reduce.test.ts +++ b/.sandcastle/reduce.test.ts @@ -10,7 +10,7 @@ import { test } from "node:test"; import assert from "node:assert/strict"; import { reduce, READY_LABEL, type State, type CiStatus, type Pr } from "./reduce.ts"; import { parseBlockedBy } from "./issue-source.ts"; -import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials, resolveRunMode } from "./main.ts"; +import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials, resolveRunMode, resolveDockerHost } from "./main.ts"; import { createHmac } from "node:crypto"; import { SANDBOX_LABEL, PROJECT_LABEL_KEY, deriveProject } from "./sandbox-runner.ts"; @@ -915,6 +915,24 @@ test("resolveCredentials: resolves all four credential keys independently", () = assert.equal(creds.CLAUDE_CODE_OAUTH_TOKEN, "cco-orch"); }); +// ─── resolveDockerHost (socat-proxy bypass) ──────────────────────────────────── + +test("resolveDockerHost: socat present (direct socket exists) → redirect to docker-host.sock", () => { + assert.equal(resolveDockerHost(undefined, true), "unix:///var/run/docker-host.sock"); +}); + +test("resolveDockerHost: no direct socket (bare compose, no socat) → leave DOCKER_HOST untouched", () => { + assert.equal(resolveDockerHost(undefined, false), undefined); +}); + +test("resolveDockerHost: an explicit DOCKER_HOST always wins, even when the direct socket exists", () => { + assert.equal(resolveDockerHost("unix:///custom.sock", true), undefined); +}); + +test("resolveDockerHost: explicit DOCKER_HOST with no direct socket is still left untouched", () => { + assert.equal(resolveDockerHost("tcp://1.2.3.4:2375", false), undefined); +}); + // ─── resolveRunMode ─────────────────────────────────────────────────────────── test("resolveRunMode: AGENTIC_IN_CONTAINER set → detached", () => {