Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions .sandcastle/issue-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
*
* Runs inside the outer devcontainer, where `gh` is already authenticated.
*/
import { execFile } from "node:child_process";
import { execFile, spawn } from "node:child_process";
import { promisify } from "node:util";
import { READY_LABEL, type Issue } from "./reduce.ts";
import { READY_LABEL, type Issue, type ReviewOutput } from "./reduce.ts";

const exec = promisify(execFile);

Expand Down Expand Up @@ -39,6 +39,24 @@ export class IssueSource {
return exec("gh", full, { maxBuffer: 16 * 1024 * 1024 });
}

private ghWithInput(args: string[], input: string): Promise<{ stdout: string }> {
return new Promise((resolve, reject) => {
const full = this.repo ? ["-R", this.repo, ...args] : args;
const child = spawn("gh", full, { stdio: ["pipe", "pipe", "pipe"] });
let stdout = "";
let stderr = "";
child.stdout.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
child.stderr.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
child.on("close", (code: number | null) => {
if (code !== 0) reject(new Error(`gh exited ${code}: ${stderr}`));
else resolve({ stdout });
});
child.on("error", reject);
child.stdin.write(input);
child.stdin.end();
});
}

/** Open issues carrying the ready label, with dependency refs parsed from body. */
async listReady(): Promise<Issue[]> {
const { stdout } = await this.gh([
Expand Down Expand Up @@ -139,6 +157,53 @@ export class IssueSource {
await this.gh(["issue", "comment", String(n), "--body", body]);
}

/** Return the unified diff for the PR on the given issue's branch. */
async getPrDiff(issueId: number): Promise<string> {
const { stdout } = await this.gh(["pr", "diff", `agent/issue-${issueId}`]);
return stdout;
}

/**
* Post a REQUEST_CHANGES review on the PR for the given issue.
* Uses the GitHub API to attach the structured verdict + filtered inline comments.
* Best-effort: if inline-comment posting fails, falls back to a plain PR comment.
*/
async postPrReview(issueId: number, output: ReviewOutput): Promise<void> {
const { stdout: prJson } = await this.gh([
"pr", "view", `agent/issue-${issueId}`,
"--json", "number,headRefOid",
]);
const prInfo = JSON.parse(prJson) as { number: number; headRefOid: string };

const reviewBody = {
commit_id: prInfo.headRefOid,
event: "REQUEST_CHANGES",
body: output.summary,
comments: output.comments.map((c) => ({
path: c.path,
line: c.line,
side: "RIGHT",
body: c.body,
})),
};

try {
await this.ghWithInput(
["api", `repos/{owner}/{repo}/pulls/${prInfo.number}/reviews`, "--method", "POST", "--input", "-"],
JSON.stringify(reviewBody),
);
} catch (err) {
// Inline comments may fail when lines are no longer in the diff; fall back to a
// plain top-level review without inline comments so the verdict is still visible.
console.warn(`[issue-source] inline review post failed — falling back to body-only review:`, err);
const bodyOnly = { ...reviewBody, comments: [] };
await this.ghWithInput(
["api", `repos/{owner}/{repo}/pulls/${prInfo.number}/reviews`, "--method", "POST", "--input", "-"],
JSON.stringify(bodyOnly),
);
}
}

/** Open a PR for an already-pushed head branch. Returns the PR URL. */
async openPr(opts: {
head: string;
Expand Down
64 changes: 57 additions & 7 deletions .sandcastle/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ import { existsSync, mkdirSync, openSync, closeSync, writeFileSync } from "node:
import { promisify } from "node:util";
import { fileURLToPath } from "node:url";
import { join, dirname } from "node:path";
import { reduce, READY_LABEL, type State, type Pr, type Policy, type Mode } from "./reduce.ts";
import { reduce, READY_LABEL, reviewVerdict, type State, type Pr, type Policy, type Mode } from "./reduce.ts";
import { IssueSource } from "./issue-source.ts";
import { SandboxRunner, SANDBOX_LABEL, PROJECT_LABEL_KEY, deriveProject } from "./sandbox-runner.ts";
import { ReviewerAdapter } from "./reviewer-adapter.ts";

export { SANDBOX_LABEL };

Expand Down Expand Up @@ -612,6 +613,12 @@ async function main(): Promise<void> {
network,
});

const reviewer = new ReviewerAdapter({
model: process.env.AGENTIC_MODEL,
cwd: repoRoot,
baseBranch: base,
});

const inFlight: number[] = [];
// Issue ids whose PrMerged event has been dispatched to the reducer.
// Synchronous check + add before the first await ensures concurrent smee
Expand Down Expand Up @@ -771,14 +778,57 @@ async function main(): Promise<void> {
};
const finishActions = reduce(finishState, { type: "SandboxFinished", issue: n, pr: openedPr });
for (const action of finishActions) {
if (action.type === "EnableAutoMerge") {
console.log(`[${mode}] #${n} → enabling auto-merge`);
if (action.type === "StartReview") {
console.log(`[${mode}] #${n} → running AI review`);
let verdict: import("./reduce.ts").ReviewVerdict;
let reviewOutput: import("./reduce.ts").ReviewOutput | null = null;
try {
await issues.enableAutoMerge(action.pr.issue);
const ghIssue = await issues.get(n);
const diff = await issues.getPrDiff(n);
const result = await reviewer.runReview(
{
issueNumber: n,
issueTitle: ghIssue.title,
issueBody: ghIssue.body,
branch: `agent/issue-${n}`,
prNumber: n,
},
diff,
);
verdict = result.verdict;
reviewOutput = result.output;
} catch (err) {
// A config gap (auto-merge disabled, no required check) must not
// crash the orchestrator — the PR stays open for a human to merge.
console.error(`[${mode}] #${n} could not enable auto-merge:`, err);
// Fail-safe: any reviewer crash → changes-requested, never auto-merges.
console.warn(`[${mode}] #${n} reviewer failed — defaulting to changes-requested:`, err);
verdict = reviewVerdict(null);
}
console.log(`[${mode}] #${n} review verdict: ${verdict}`);

const reviewActions = reduce(finishState, {
type: "ReviewFinished",
issue: n,
pr: action.pr,
verdict,
});
for (const reviewAction of reviewActions) {
if (reviewAction.type === "EnableAutoMerge") {
console.log(`[${mode}] #${n} review passed → enabling auto-merge`);
try {
await issues.enableAutoMerge(reviewAction.pr.issue);
} catch (err) {
console.error(`[${mode}] #${n} could not enable auto-merge:`, err);
}
}
if (reviewAction.type === "WaitForHuman") {
console.log(`[${mode}] #${n} review requested changes → parked for human`);
if (reviewOutput) {
try {
await issues.postPrReview(n, reviewOutput);
} catch (err) {
console.error(`[${mode}] #${n} could not post PR review:`, err);
}
}
}
}
}
if (action.type === "WaitForHuman") {
Expand Down
65 changes: 62 additions & 3 deletions .sandcastle/reduce.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { reduce, READY_LABEL, type State, type CiStatus, type Pr } from "./reduce.ts";
import { reduce, READY_LABEL, reviewVerdict, type State, type CiStatus, type Pr, type ReviewOutput } from "./reduce.ts";
import { parseBlockedBy } from "./issue-source.ts";
import { sweepOrphanedSandboxes, ensureSandboxNetwork, parseConcurrency, withRetry, resetAgentBranch, refreshBase, validateSignature, classifyDelivery, parseSmeeEvent, parseOrchEnv, resolveCredentials, resolveRunMode, resolveDockerHost } from "./main.ts";
import { createHmac } from "node:crypto";
Expand Down Expand Up @@ -210,11 +210,11 @@ test("PrMerged unblocks all fully-resolved dependents in one call", () => {

// ─── SandboxFinished / EnableAutoMerge / WaitForHuman ───────────────────────

test("afk mode + SandboxFinished → EnableAutoMerge", () => {
test("afk mode + SandboxFinished → StartReview (not EnableAutoMerge)", () => {
const pr: Pr = { issue: 1, ciStatus: "pending", merged: false };
const state = base({ policy: { concurrency: 1, mode: "afk" } });
assert.deepEqual(reduce(state, { type: "SandboxFinished", issue: 1, pr }), [
{ type: "EnableAutoMerge", pr },
{ type: "StartReview", pr },
]);
});

Expand All @@ -226,6 +226,65 @@ test("hitl mode + SandboxFinished → WaitForHuman", () => {
]);
});

test("ReviewFinished pass → EnableAutoMerge", () => {
const pr: Pr = { issue: 1, ciStatus: "pending", merged: false };
const state = base({ policy: { concurrency: 1, mode: "afk" } });
assert.deepEqual(reduce(state, { type: "ReviewFinished", issue: 1, pr, verdict: "pass" }), [
{ type: "EnableAutoMerge", pr },
]);
});

test("ReviewFinished changes-requested → WaitForHuman", () => {
const pr: Pr = { issue: 1, ciStatus: "pending", merged: false };
const state = base({ policy: { concurrency: 1, mode: "afk" } });
assert.deepEqual(
reduce(state, { type: "ReviewFinished", issue: 1, pr, verdict: "changes-requested" }),
[{ type: "WaitForHuman", pr }],
);
});

test("onTick does not Stop while a PR is awaiting/under review (open, non-conflicting)", () => {
const state = base({
issues: [],
prs: [{ issue: 1, ciStatus: "pending", merged: false }],
policy: { concurrency: 1, mode: "afk" },
});
const actions = reduce(state, { type: "Tick" });
assert.ok(!actions.some((a) => a.type === "Stop"), "open PR under review keeps loop alive");
});

// ─── reviewVerdict ───────────────────────────────────────────────────────────

test("reviewVerdict: valid pass output → 'pass'", () => {
const output: ReviewOutput = { verdict: "pass", summary: "LGTM", comments: [] };
assert.equal(reviewVerdict(output), "pass");
});

test("reviewVerdict: valid changes-requested output → 'changes-requested'", () => {
const output: ReviewOutput = {
verdict: "changes-requested",
summary: "needs work",
comments: [{ path: "foo.ts", line: 1, body: "fix this" }],
};
assert.equal(reviewVerdict(output), "changes-requested");
});

test("reviewVerdict: Error → 'changes-requested' (fail-safe)", () => {
assert.equal(reviewVerdict(new Error("extraction failed")), "changes-requested");
});

test("reviewVerdict: null → 'changes-requested' (fail-safe)", () => {
assert.equal(reviewVerdict(null), "changes-requested");
});

test("reviewVerdict: undefined → 'changes-requested' (fail-safe)", () => {
assert.equal(reviewVerdict(undefined), "changes-requested");
});

test("reviewVerdict: malformed object (no verdict field) → 'changes-requested' (fail-safe)", () => {
assert.equal(reviewVerdict({ summary: "hmm" } as unknown as ReviewOutput), "changes-requested");
});

test("afk mode + Tick with open PR and nothing else ready → no Stop (waiting for CI)", () => {
const state = base({
issues: [{ id: 1, labels: [], blockedBy: [] }],
Expand Down
39 changes: 38 additions & 1 deletion .sandcastle/reduce.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ export const READY_LABEL = "ready-for-agent";

export type CiStatus = "none" | "pending" | "success" | "failure" | "conflicting";

export type ReviewVerdict = "pass" | "changes-requested";

export interface ReviewComment {
readonly path: string;
readonly line: number;
readonly body: string;
}

export interface ReviewOutput {
readonly verdict: ReviewVerdict;
readonly summary: string;
readonly comments: ReadonlyArray<ReviewComment>;
}

export interface Issue {
readonly id: number;
readonly labels: string[];
Expand Down Expand Up @@ -56,10 +70,12 @@ export type Event =
| { type: "Tick" }
| { type: "PrMerged"; pr: Pr }
| { type: "SandboxFinished"; issue: number; pr: Pr }
| { type: "SandboxFailed"; issue: number };
| { type: "SandboxFailed"; issue: number }
| { type: "ReviewFinished"; issue: number; pr: Pr; verdict: ReviewVerdict };

export type Action =
| { type: "StartSandbox"; issueId: number }
| { type: "StartReview"; pr: Pr }
| { type: "EnableAutoMerge"; pr: Pr }
| { type: "Relabel"; issueId: number; label: string }
| { type: "WaitForHuman"; pr: Pr }
Expand All @@ -73,6 +89,8 @@ export function reduce(state: State, event: Event): Action[] {
return onPrMerged(state, event.pr);
case "SandboxFinished":
return onSandboxFinished(state, event);
case "ReviewFinished":
return onReviewFinished(event);
// SandboxFailed lands in a later slice.
default:
return [];
Expand Down Expand Up @@ -113,11 +131,30 @@ function onPrMerged(state: State, mergedPr: Pr): Action[] {

function onSandboxFinished(state: State, event: { issue: number; pr: Pr }): Action[] {
if (state.policy.mode === "afk") {
return [{ type: "StartReview", pr: event.pr }];
}
return [{ type: "WaitForHuman", pr: event.pr }];
}

function onReviewFinished(event: { issue: number; pr: Pr; verdict: ReviewVerdict }): Action[] {
if (event.verdict === "pass") {
return [{ type: "EnableAutoMerge", pr: event.pr }];
}
return [{ type: "WaitForHuman", pr: event.pr }];
}

/**
* Map a structured review output (or any extraction/validation error) to a
* merge verdict. Fail-safe: any missing or malformed result → "changes-requested"
* so a reviewer crash never silently auto-merges.
*/
export function reviewVerdict(result: ReviewOutput | Error | null | undefined): ReviewVerdict {
if (!result || result instanceof Error) return "changes-requested";
const v = (result as ReviewOutput).verdict;
if (v === "pass" || v === "changes-requested") return v;
return "changes-requested";
}

function onTick(state: State): Action[] {
const ready = state.issues
.filter((issue) => isReady(issue, state))
Expand Down
27 changes: 27 additions & 0 deletions .sandcastle/review-extraction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Based on the review you just completed, emit a structured verdict as a JSON object inside an `<output>` XML tag.

The JSON must match this schema exactly:

```json
{
"verdict": "pass" | "changes-requested",
"summary": "<one paragraph summarising the verdict>",
"comments": [
{
"path": "<file path relative to repo root>",
"line": <line number in the diff, as a positive integer>,
"body": "<specific, actionable feedback>"
}
]
}
```

- `verdict`: `"pass"` if all acceptance criteria are met, `"changes-requested"` otherwise.
- `summary`: A concise paragraph explaining the overall verdict.
- `comments`: Empty array `[]` for a pass. For changes-requested, list only comments for lines that appear in the diff (path + line must correspond to an actual changed line). If unsure of the exact line, omit that comment rather than guessing.

Emit **only** the `<output>` block — no prose before or after it.

<output>
{"verdict": "...", "summary": "...", "comments": [...]}
</output>
25 changes: 25 additions & 0 deletions .sandcastle/review-prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
You are an AI code reviewer. Your task is to review the changes on the PR branch against the issue's acceptance criteria and the repository's ADR conventions.

## Issue

**#{{ISSUE_NUMBER}}: {{ISSUE_TITLE}}**

{{ISSUE_BODY}}

## What to review

The branch `{{BRANCH}}` has been pushed and a PR is open. Your job:

1. Run `git diff origin/{{BASE_BRANCH}}...{{BRANCH}}` to see the diff.
2. Read any ADR files in `docs/adr/` that are relevant to the changes.
3. Check each acceptance criterion from the issue body against the diff.
4. Form a verdict: **pass** if all acceptance criteria are met and no convention is violated; **changes-requested** otherwise.

## Rules

- Read-only: do NOT push, commit, or modify any files.
- Focus on correctness and spec compliance — not style nit-picks.
- A `pass` verdict means the implementation is ready to merge as-is.
- A `changes-requested` verdict must be accompanied by specific, actionable comments.

When you have completed your review, summarize your findings clearly. You will be asked to emit a structured output afterwards.
Loading
Loading