lsfera · lsfera · Jun 29, 2026
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
@@ -620,6 +620,9 @@ async function main(): Promise<void> {
   });
 
   const inFlight: number[] = [];
+  // Per-issue count of SandboxFailed events dispatched this run. Persists
+  // across ticks so the retry cap is enforced within a single orchestrator run.
+  const failedAttempts: Record<number, number> = {};
   // Issue ids whose PrMerged event has been dispatched to the reducer.
   // Synchronous check + add before the first await ensures concurrent smee
   // and reconciliation paths never double-dispatch the same merge.
@@ -760,15 +763,48 @@ async function main(): Promise<void> {
 
       void (async () => {
         let openedPr: Pr | null = null;
+        let sandboxFailed = false;
         try {
           openedPr = await processIssue(n, issues, runner, base, repoRoot, mode);
+          if (openedPr === null) {
+            sandboxFailed = true;
+            console.log(`[${mode}] #${n} sandbox produced no commits`);
+          }
         } catch (err) {
           console.error(`[${mode}] #${n} failed:`, err);
-          // Leave the issue claimed (label removed) for a human to inspect.
+          sandboxFailed = true;
         } finally {
           inFlight.splice(inFlight.indexOf(n), 1);
         }
 
+        if (sandboxFailed) {
+          const attempts = failedAttempts[n] ?? 0;
+          const failState: State = { issues: ready, prs: allPrs, inFlight, policy, failedAttempts };
+          const failActions = reduce(failState, { type: "SandboxFailed", issue: n });
+          failedAttempts[n] = attempts + 1;
+          for (const action of failActions) {
+            if (action.type === "Relabel") {
+              console.log(
+                `[${mode}] #${n} failed — re-queuing for retry ` +
+                  `(${attempts + 1}/${policy.maxRetries ?? 2})`,
+              );
+              try {
+                await withRetry(() => issues.addLabel(n, action.label), { label: `addLabel #${n}` });
+              } catch (err) {
+                console.error(`[${mode}] #${n} could not re-queue:`, err);
+              }
+            }
+            if (action.type === "Comment") {
+              console.log(`[${mode}] #${n} retries exhausted after ${attempts + 1} failures`);
+              try {
+                await issues.comment(n, action.body);
+              } catch (err) {
+                console.error(`[${mode}] #${n} could not post exhaustion comment:`, err);
+              }
+            }
+          }
+        }
+
         if (openedPr) {
           const finishState: State = {
             issues: ready,
@@ -877,14 +913,6 @@ async function processIssue(
   );
 
   if (outcome.commits.length === 0) {
-    // Leave the issue claimed (label stays off) so the poll loop does not
-    // re-pick a persistently-failing issue every tick. A human re-labels it
-    // after inspecting. (Automated backoff/retry is later work, not slice 1.)
-    await issues.comment(
-      n,
-      "AFK agent produced no commits — leaving this issue unlabelled for a " +
-        "human to inspect and re-label `ready-for-agent` if appropriate.",
-    );
     return null;
   }
 

diff --git a/.sandcastle/reduce.test.ts b/.sandcastle/reduce.test.ts
@@ -1010,3 +1010,63 @@ test("resolveRunMode: any truthy value for AGENTIC_IN_CONTAINER → detached", (
   assert.equal(resolveRunMode({ AGENTIC_IN_CONTAINER: "true" }), "detached");
   assert.equal(resolveRunMode({ AGENTIC_IN_CONTAINER: "yes" }), "detached");
 });
+
+// ─── SandboxFailed retry gate ─────────────────────────────────────────────────
+
+test("SandboxFailed with attempts < maxRetries → Relabel(ready-for-agent)", () => {
+  const state = base({
+    failedAttempts: { 1: 0 },
+    policy: { concurrency: 1, mode: "afk", maxRetries: 2 },
+  });
+  assert.deepEqual(reduce(state, { type: "SandboxFailed", issue: 1 }), [
+    { type: "Relabel", issueId: 1, label: READY_LABEL },
+  ]);
+});
+
+test("SandboxFailed one attempt before cap also re-queues", () => {
+  const state = base({
+    failedAttempts: { 1: 1 },
+    policy: { concurrency: 1, mode: "afk", maxRetries: 2 },
+  });
+  assert.deepEqual(reduce(state, { type: "SandboxFailed", issue: 1 }), [
+    { type: "Relabel", issueId: 1, label: READY_LABEL },
+  ]);
+});
+
+test("SandboxFailed at the cap → Comment, no Relabel (no infinite loop)", () => {
+  const state = base({
+    failedAttempts: { 1: 2 },
+    policy: { concurrency: 1, mode: "afk", maxRetries: 2 },
+  });
+  const actions = reduce(state, { type: "SandboxFailed", issue: 1 });
+  assert.ok(!actions.some((a) => a.type === "Relabel"), "must not re-queue at the cap — no infinite loop");
+  assert.ok(actions.some((a) => a.type === "Comment"), "must emit exhaustion comment");
+});
+
+test("SandboxFailed with absent failedAttempts defaults to 0 → Relabel", () => {
+  // Existing tests pass base() without failedAttempts — must still work
+  const state = base();
+  const actions = reduce(state, { type: "SandboxFailed", issue: 99 });
+  assert.deepEqual(actions, [{ type: "Relabel", issueId: 99, label: READY_LABEL }]);
+});
+
+test("after SandboxFailed Relabel, re-labelled issue is picked up by next Tick", () => {
+  // Simulate: SandboxFailed emitted Relabel, orchestrator applied it; next Tick starts the issue
+  const stateAfterRelabel = base({
+    issues: [{ id: 1, labels: [READY_LABEL], blockedBy: [] }],
+    failedAttempts: { 1: 1 },
+  });
+  assert.deepEqual(reduce(stateAfterRelabel, { type: "Tick" }), [
+    { type: "StartSandbox", issueId: 1 },
+  ]);
+});
+
+test("SandboxFailed applies in hitl mode too (afk/hitl-agnostic)", () => {
+  const state = base({
+    failedAttempts: { 5: 0 },
+    policy: { concurrency: 1, mode: "hitl", maxRetries: 2 },
+  });
+  assert.deepEqual(reduce(state, { type: "SandboxFailed", issue: 5 }), [
+    { type: "Relabel", issueId: 5, label: READY_LABEL },
+  ]);
+});
diff --git a/.sandcastle/reduce.ts b/.sandcastle/reduce.ts
@@ -56,6 +56,8 @@ export type Mode = "afk" | "hitl";
 export interface Policy {
   readonly concurrency: number;
   readonly mode: Mode;
+  /** Maximum retries per issue within a single orchestrator run (default 2). */
+  readonly maxRetries?: number;
 }
 
 export interface State {
@@ -64,6 +66,8 @@ export interface State {
   readonly policy: Policy;
   /** Issue ids whose sandbox is currently running. */
   readonly inFlight: number[];
+  /** Per-issue count of SandboxFailed events dispatched this run (default {}). */
+  readonly failedAttempts?: Record<number, number>;
 }
 
 export type Event =
@@ -79,6 +83,7 @@ export type Action =
   | { type: "EnableAutoMerge"; pr: Pr }
   | { type: "Relabel"; issueId: number; label: string }
   | { type: "WaitForHuman"; pr: Pr }
+  | { type: "Comment"; issueId: number; body: string }
   | { type: "Stop" };
 
 export function reduce(state: State, event: Event): Action[] {
@@ -91,9 +96,8 @@ export function reduce(state: State, event: Event): Action[] {
       return onSandboxFinished(state, event);
     case "ReviewFinished":
       return onReviewFinished(event);
-    // SandboxFailed lands in a later slice.
-    default:
-      return [];
+    case "SandboxFailed":
+      return onSandboxFailed(state, event);
   }
 }
 
@@ -143,6 +147,22 @@ function onReviewFinished(event: { issue: number; pr: Pr; verdict: ReviewVerdict
   return [{ type: "WaitForHuman", pr: event.pr }];
 }
 
+function onSandboxFailed(state: State, event: { issue: number }): Action[] {
+  const maxRetries = state.policy.maxRetries ?? 2;
+  const attempts = (state.failedAttempts ?? {})[event.issue] ?? 0;
+  if (attempts < maxRetries) {
+    return [{ type: "Relabel", issueId: event.issue, label: READY_LABEL }];
+  }
+  return [{
+    type: "Comment",
+    issueId: event.issue,
+    body:
+      `AFK agent failed ${attempts + 1} times (retry limit: ${maxRetries}) — ` +
+      `leaving this issue unlabelled for a human to inspect and re-label ` +
+      `\`ready-for-agent\` if appropriate.`,
+  }];
+}
+
 /**
  * Map a structured review output (or any extraction/validation error) to a
  * merge verdict. Fail-safe: any missing or malformed result → "changes-requested"

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -103,6 +103,18 @@ fallback). This is what cockpit mode depends on.
 - After each close, dependents are checked and labelled if now unblocked.
 - GitHub issues are the durable state — nothing is stored locally.
 
+### Sandbox retry semantics (#76)
+
+When a sandbox fails (throws or produces no commits) the orchestrator automatically
+re-labels the issue `ready-for-agent` so the next tick picks it up again, up to
+`Policy.maxRetries` times per run (default **2**). After the cap the issue is left
+unlabelled and a comment explains the exhaustion — never an infinite retry loop.
+
+**Known limitation:** attempt counts live in the orchestrator's in-run state and
+reset when the orchestrator restarts. A persistently-failing issue gets another
+`maxRetries` attempts on each fresh orchestrator run. In-run bounding is sufficient
+for v1; cross-run persistence is not implemented.
+
 ### Implementation approach
 
 Each implementation sub-agent uses `/tdd` (red → green → refactor) per acceptance criterion,