diff --git a/.changeset/reliable-account-switching.md b/.changeset/reliable-account-switching.md new file mode 100644 index 000000000..867e1fe88 --- /dev/null +++ b/.changeset/reliable-account-switching.md @@ -0,0 +1,5 @@ +--- +"executor": patch +--- + +Make multi-account server profiles reliable across the CLI and desktop, including same-origin switching, unambiguous logout, persistent remote selection, and immediate self-host membership revocation. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 937936e3b..c8a4f7aca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -73,45 +73,3 @@ jobs: - run: bun install --frozen-lockfile - run: bun run test - - desktop-smoke: - name: Desktop smoke build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: oven-sh/setup-bun@v2 - with: - bun-version: 1.3.11 - - - run: bun install --frozen-lockfile - - - name: Build web app - run: bun run --filter @executor-js/local build - - - name: Build bundled executor - env: - BUN_TARGET: bun-linux-x64 - run: bun ./scripts/build-sidecar.ts - working-directory: apps/desktop - - - name: Build Electron main/preload/renderer - run: bunx --bun electron-vite build - working-directory: apps/desktop - - selfhost-docker-smoke: - name: Self-host Docker image - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build self-host image - uses: docker/build-push-action@v6 - with: - context: . - file: apps/host-selfhost/Dockerfile - push: false - tags: executor-selfhost:ci diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 000000000..452b4daa8 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,669 @@ +name: End-to-end + +on: + pull_request: + push: + branches: + - main + schedule: + - cron: "23 7 * * *" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: e2e-${{ github.event_name }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name != 'schedule' }} + +jobs: + portable: + name: Portable / ${{ matrix.project }} + runs-on: ubuntu-latest + timeout-minutes: 45 + strategy: + fail-fast: false + matrix: + include: + - project: harness + script: test:harness + browser: false + claude: false + opencode: false + - project: clients + script: test:clients + browser: false + claude: true + opencode: false + - project: cloud-hermetic + script: test:cloud:hermetic + browser: true + claude: true + opencode: true + - project: selfhost-hermetic + script: test:selfhost:hermetic + browser: true + claude: true + opencode: true + - project: cloudflare-hermetic + script: test:cloudflare:hermetic + browser: true + claude: false + opencode: false + - project: local + script: test:local + browser: true + claude: false + opencode: false + env: + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Install browser runtime + if: matrix.browser + run: bunx playwright install --with-deps chromium + + - name: Install pinned Claude Code client + if: matrix.claude + shell: bash + run: | + bun install --global @anthropic-ai/claude-code@2.1.195 + echo "E2E_CLAUDE_CODE_BIN=$(command -v claude)" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_REQUIRED=1" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_VERSION=2.1.195" >> "$GITHUB_ENV" + + - name: Install pinned OpenCode client + if: matrix.opencode + shell: bash + run: | + bun install --global opencode-ai@1.17.11 + command -v opencode + echo "E2E_OPENCODE_REQUIRED=1" >> "$GITHUB_ENV" + + - name: Run ${{ matrix.project }} + run: bun run --cwd e2e ${{ matrix.script }} + + - name: Sanitize evidence + id: sanitize + if: always() + run: bun e2e/scripts/sanitize-evidence.ts --trusted-project "${{ matrix.project }}" + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-${{ matrix.project }}-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + scheduled-deterministic-gate: + name: Scheduled deterministic coverage is required + if: ${{ always() && github.event_name == 'schedule' }} + needs: portable + runs-on: ubuntu-latest + env: + PORTABLE_RESULT: ${{ needs.portable.result }} + steps: + - name: Require a successful deterministic lane + shell: bash + run: | + if [[ "$PORTABLE_RESULT" != "success" ]]; then + echo "::error::Scheduled e2e requires the deterministic portable matrix, got: $PORTABLE_RESULT" + exit 1 + fi + + selfhost-production: + name: Portable / selfhost production image + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + timeout-minutes: 60 + env: + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Install browser runtime + run: bunx playwright install --with-deps chromium + + - name: Install pinned real clients + shell: bash + run: | + bun install --global @anthropic-ai/claude-code@2.1.195 + bun install --global opencode-ai@1.17.11 + echo "E2E_CLAUDE_CODE_BIN=$(command -v claude)" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_REQUIRED=1" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_VERSION=2.1.195" >> "$GITHUB_ENV" + command -v opencode + echo "E2E_OPENCODE_REQUIRED=1" >> "$GITHUB_ENV" + + - name: Run the production image journeys + run: bun run --cwd e2e test:selfhost-docker:hermetic + + - name: Sanitize evidence + id: sanitize + if: always() + run: >- + bun e2e/scripts/sanitize-evidence.ts + --trusted-project selfhost-docker-hermetic + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-selfhost-production-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + desktop-linux: + name: Desktop GUI / Linux packaged app + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + timeout-minutes: 60 + env: + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Install Electron runtime dependencies + run: bunx playwright install --with-deps chromium + + - name: Run development Electron journeys + run: xvfb-run -a bun run --cwd e2e test:desktop + + - name: Run packaged Electron journeys + run: xvfb-run -a bun run --cwd e2e test:desktop-packaged + + - name: Sanitize evidence + id: sanitize + if: always() + run: >- + bun e2e/scripts/sanitize-evidence.ts + --trusted-project desktop + --trusted-project desktop-packaged + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-desktop-linux-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + desktop-native-package: + name: Native desktop package / ${{ matrix.platform }} + if: github.event_name != 'schedule' + runs-on: ${{ matrix.os }} + timeout-minutes: 45 + strategy: + fail-fast: false + matrix: + include: + - os: macos-latest + platform: macOS arm64 + builder-flag: --mac + bun-target: bun-darwin-arm64 + - os: windows-latest + platform: Windows x64 + builder-flag: --win + bun-target: bun-windows-x64 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Build embedded web app + run: bun run --filter @executor-js/local build + + - name: Build bundled executor + env: + BUN_TARGET: ${{ matrix.bun-target }} + run: bun ./scripts/build-sidecar.ts + working-directory: apps/desktop + + - name: Start bundled executor + run: bun run test:smoke + working-directory: apps/desktop + + - name: Build Electron application + run: bunx --bun electron-vite build + working-directory: apps/desktop + + - name: Package unsigned native application + env: + CSC_IDENTITY_AUTO_DISCOVERY: "false" + run: bunx --bun electron-builder --config electron-builder.e2e.config.ts ${{ matrix.builder-flag }} + working-directory: apps/desktop + + desktop-linux-kvm: + name: Desktop VM / Linux KVM packaged app + if: >- + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && + vars.E2E_LINUX_KVM_ENABLED == 'true' + runs-on: [self-hosted, Linux, X64, executor-e2e-kvm] + timeout-minutes: 75 + env: + E2E_DESKTOP_GUI_REQUIRED: "1" + E2E_KVM_BASE_IMAGE: ${{ vars.E2E_KVM_BASE_IMAGE }} + E2E_KVM_REPOSITORY_SCOPE: repo-${{ github.repository_id }} + E2E_KVM_LEDGER_DIR: /var/tmp/executor-kvm-ledgers/repo-${{ github.repository_id }} + E2E_KVM_CLEANUP_LEDGER: /var/tmp/executor-kvm-ledgers/repo-${{ github.repository_id }}/run-${{ github.run_id }}-attempt-${{ github.run_attempt }}.json + E2E_KVM_STALE_TTL_MS: "21600000" + E2E_KVM_WORK_ROOT: ${{ vars.E2E_KVM_WORK_ROOT }} + E2E_KVM_RUN_SCOPE: repo-${{ github.repository_id }}-run-${{ github.run_id }}-attempt-${{ github.run_attempt }}-desktop-linux-kvm + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Install pinned Claude Code client + shell: bash + run: | + bun install --global @anthropic-ai/claude-code@2.1.195 + echo "E2E_CLAUDE_CODE_BIN=$(command -v claude)" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_REQUIRED=1" >> "$GITHUB_ENV" + echo "E2E_CLAUDE_CODE_VERSION=2.1.195" >> "$GITHUB_ENV" + + - name: Sweep expired Linux KVM guest scopes + run: bun e2e/scripts/cleanup-linux-kvm.ts sweep + + - name: Verify requested Linux KVM capabilities + run: bun e2e/scripts/preflight-vm-lane.ts linux-kvm + + - name: Run packaged app in a real KVM desktop guest + run: bun run --cwd e2e test:desktop-kvm + + - name: Cleanup exact Linux KVM guest scope + if: ${{ always() }} + run: bun e2e/scripts/cleanup-linux-kvm.ts "$E2E_KVM_CLEANUP_LEDGER" + + - name: Sanitize evidence + id: sanitize + if: always() + run: bun e2e/scripts/sanitize-evidence.ts --trusted-project desktop-kvm + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-desktop-linux-kvm-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: error + retention-days: 14 + + live-provider-drift: + name: Live provider drift / ${{ matrix.target }} + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + continue-on-error: true + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - target: cloud + script: test:live:cloud + - target: selfhost + script: test:live:selfhost + - target: cloudflare + script: test:live:cloudflare + env: + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Install browser runtime + run: bunx playwright install --with-deps chromium + + - name: Exercise live providers + run: bun run --cwd e2e ${{ matrix.script }} + + - name: Sanitize evidence + id: sanitize + if: always() + run: bun e2e/scripts/sanitize-evidence.ts --trusted-project "${{ matrix.target }}" + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-live-${{ matrix.target }}-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + tart-service-vms: + name: Service VM / Tart ${{ matrix.guest }} + if: >- + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && + vars.E2E_TART_VM_ENABLED == 'true' + runs-on: [self-hosted, macOS, executor-e2e] + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - guest: macOS + vm-os: macos + project: cli-macos + script: test:cli:macos + - guest: Linux + vm-os: linux + project: cli-linux + script: test:cli:linux + env: + E2E_VM_RUN_SCOPE: repo-${{ github.repository_id }}-run-${{ github.run_id }}-attempt-${{ github.run_attempt }}-tart-${{ matrix.vm-os }} + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Sweep expired managed Tart resources + if: matrix.vm-os == 'macos' + run: bun e2e/scripts/cleanup-vms.ts tart --sweep-expired --minimum-age-hours 6 + + - name: Verify requested Tart capabilities + run: bun e2e/scripts/preflight-vm-lane.ts tart ${{ matrix.vm-os }} + + - name: Run real guest reboot journeys + run: bun run --cwd e2e ${{ matrix.script }} + + - name: Cleanup exact Tart guest scope + if: ${{ always() }} + run: bun e2e/scripts/cleanup-vms.ts tart + + - name: Sanitize evidence + id: sanitize + if: always() + run: bun e2e/scripts/sanitize-evidence.ts --trusted-project "${{ matrix.project }}" + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-tart-${{ matrix.vm-os }}-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + windows-service-vm: + name: Service VM / Windows EC2 + if: >- + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && + vars.E2E_WINDOWS_VM_ENABLED == 'true' + runs-on: ubuntu-latest + timeout-minutes: 60 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.E2E_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.E2E_AWS_SECRET_ACCESS_KEY }} + AWS_SESSION_TOKEN: ${{ secrets.E2E_AWS_SESSION_TOKEN }} + E2E_VM_RUN_SCOPE: repo-${{ github.repository_id }}-run-${{ github.run_id }}-attempt-${{ github.run_attempt }}-ec2-windows + E2E_REQUIRED_CAPABILITY_MODE: required + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Verify requested EC2 capabilities + run: bun e2e/scripts/preflight-vm-lane.ts ec2 windows + + - name: Run the real Windows reboot journeys + run: bun run --cwd e2e test:cli:windows + + - name: Cleanup exact EC2 guest scope + if: ${{ always() }} + run: bun e2e/scripts/cleanup-vms.ts ec2 + + - name: Sweep expired managed EC2 resources + if: ${{ always() && github.event_name == 'schedule' }} + run: bun e2e/scripts/cleanup-vms.ts ec2 --sweep-expired --minimum-age-hours 6 + + - name: Sanitize evidence + id: sanitize + if: always() + run: bun e2e/scripts/sanitize-evidence.ts --trusted-project cli-windows + + - name: Upload evidence + if: always() && steps.sanitize.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: e2e-windows-service-vm-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: ignore + retention-days: 14 + + aggregate-evidence: + name: Evidence publication + if: ${{ always() && !cancelled() }} + needs: + - portable + - scheduled-deterministic-gate + - selfhost-production + - desktop-linux + - desktop-native-package + - desktop-linux-kvm + - live-provider-drift + - tart-service-vms + - windows-service-vm + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Install evidence viewer dependencies + run: bun install --frozen-lockfile + + - name: Download target evidence without flattening + uses: actions/download-artifact@v4 + with: + pattern: e2e-*-${{ github.run_attempt }} + path: ${{ runner.temp }}/executor-e2e-artifacts + merge-multiple: false + + - name: Merge attempt directories without overwrites + run: >- + bun e2e/scripts/merge-evidence.ts + --input-dir "${{ runner.temp }}/executor-e2e-artifacts" + --output-dir e2e/runs + --run-attempt "${{ github.run_attempt }}" + --trusted-runs-output "${{ runner.temp }}/executor-e2e-trusted-runs.json" + + - name: Rebuild the browsable matrix + run: bun e2e/scripts/rebuild-viewer.ts + + - name: Sanitize the merged publication bundle + run: >- + bun e2e/scripts/sanitize-evidence.ts + --runs-dir e2e/runs + --trusted-lanes "${{ runner.temp }}/executor-e2e-trusted-runs.json" + + - name: Upload the downloadable evidence bundle + id: evidence-upload + uses: actions/upload-artifact@v4 + with: + name: e2e-evidence-matrix-${{ github.run_attempt }} + path: e2e/runs + if-no-files-found: error + retention-days: 14 + + - name: Select safe static publication policy + id: publication_policy + env: + CDN_BASE_URL: ${{ vars.EXECUTOR_PREVIEW_CDN_URL }} + IS_FORK_PULL_REQUEST: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + REPOSITORY_ID: ${{ github.repository_id }} + RUN_ATTEMPT: ${{ github.run_attempt }} + RUN_ID: ${{ github.run_id }} + shell: bash + run: | + if [[ "$IS_FORK_PULL_REQUEST" == "true" ]]; then + echo "enabled=false" >> "$GITHUB_OUTPUT" + echo "reason=Fork pull requests do not receive R2 publication credentials." >> "$GITHUB_OUTPUT" + exit 0 + fi + if [[ -z "$CDN_BASE_URL" ]]; then + echo "enabled=false" >> "$GITHUB_OUTPUT" + echo "reason=EXECUTOR_PREVIEW_CDN_URL is not configured." >> "$GITHUB_OUTPUT" + exit 0 + fi + scope="non-pr" + if [[ -n "$PR_NUMBER" ]]; then + scope="pr-$PR_NUMBER" + fi + prefix="e2e/repo-$REPOSITORY_ID/$scope/run-$RUN_ID/attempt-$RUN_ATTEMPT" + echo "enabled=true" >> "$GITHUB_OUTPUT" + echo "object_prefix=$prefix" >> "$GITHUB_OUTPUT" + + - name: Publish sanitizer-approved evidence to public R2 + id: static_publication + if: steps.publication_policy.outputs.enabled == 'true' + env: + PUBLIC_BASE_URL: ${{ vars.EXECUTOR_PREVIEW_CDN_URL }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + run: >- + bun e2e/scripts/publish-evidence-r2.ts + --runs-dir e2e/runs + --bucket executor-previews + --prefix "${{ steps.publication_policy.outputs.object_prefix }}" + --endpoint "$R2_ENDPOINT" + --public-base-url "$PUBLIC_BASE_URL" + --source-revision "$GITHUB_SHA" + --trusted-lanes "${{ runner.temp }}/executor-e2e-trusted-runs.json" + + - name: Publish hosted scenario and run links + if: ${{ always() && steps.static_publication.outcome == 'success' }} + env: + ARTIFACT_URL: ${{ steps.evidence-upload.outputs.artifact-url }} + VIEWER_URL: ${{ steps.static_publication.outputs.viewer_url }} + WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + shell: bash + run: | + bun e2e/scripts/evidence-publication-summary.ts \ + --manifest e2e/runs/manifest.json \ + --viewer-url "$VIEWER_URL" >> "$GITHUB_STEP_SUMMARY" + { + echo + echo "- [Download the sanitized evidence bundle]($ARTIFACT_URL)" + echo "- [Open this workflow run and its source jobs]($WORKFLOW_URL)" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Publish honest artifact-only fallback + if: ${{ always() && steps.static_publication.outcome != 'success' }} + env: + ARTIFACT_URL: ${{ steps.evidence-upload.outputs.artifact-url }} + PUBLICATION_ENABLED: ${{ steps.publication_policy.outputs.enabled }} + PUBLICATION_REASON: ${{ steps.publication_policy.outputs.reason }} + WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + shell: bash + run: | + reason="$PUBLICATION_REASON" + if [[ "$PUBLICATION_ENABLED" == "true" ]]; then + reason="Static publication was attempted but did not pass public readback verification." + fi + { + echo "## End-to-end evidence" + echo + echo "- [Download the sanitized evidence bundle]($ARTIFACT_URL)" + echo "- [Open this workflow run and its source jobs]($WORKFLOW_URL)" + echo + echo "$reason" + echo "No hosted viewer URL is available for this attempt. Extract the bundle and serve its directory locally to open \`index.html\`." + } >> "$GITHUB_STEP_SUMMARY" diff --git a/RUNNING.md b/RUNNING.md index 55c1c6fba..3e7dec6ee 100644 --- a/RUNNING.md +++ b/RUNNING.md @@ -41,9 +41,26 @@ working instance of X" — read them before inventing a boot path. `e2e/AGENTS.md` covers writing scenarios. Operationally: -- `cd e2e && bun run test` boots dev servers and runs everything; - `--project cloud|selfhost` narrows. `E2E_CLOUD_URL`/`E2E_SELFHOST_URL` - attach to an already-running server instead of booting. +- `cd e2e && bun run test` runs the portable hermetic projects: harness unit + tests, client adapters, cloud, selfhost, local, and Cloudflare. The cloud and + selfhost projects exclude scenarios whose purpose is to detect drift in a + public service. +- `bun run test:cloud`, `bun run test:selfhost`, and + `bun run test:cloudflare` run the corresponding full project, including + live-provider checks. Their `:hermetic` variants match the pull-request + gates. +- `bun run test:selfhost-docker:hermetic` runs the same journeys against the + production Docker artifact. `bun run test:desktop-packaged` builds and drives + the unsigned packaged Electron application and needs a GUI display. +- `E2E_CLOUD_URL` and `E2E_SELFHOST_URL` attach to an already-running server + instead of booting one. +- Project names describe execution policy, while `E2E_TARGET` describes the + deployed product. For example, `cloud` and `cloud-hermetic` both resolve the + cloud target and use the same boot recipe. +- Local exploration allows a scenario to skip when its target does not offer a + requested Effect service. CI sets `E2E_REQUIRED_CAPABILITY_MODE=required`, so + a missing service promised by the project's matrix fails instead of becoming + a green skip. The matrix is `e2e/src/project-matrix.ts`. - Runs land in `e2e/runs///` — `result.json`, step screenshots, `session.mp4` + `trace.zip` for browser scenarios, and the scenario source as `test.ts`. @@ -69,6 +86,46 @@ collide or attach to each other's servers. `E2E_*_PORT` env vars pin ports explicitly. If a boot reports a squatted port, an old dev server leaked — `bun run reap` (repo root) lists and kills orphaned stacks. +## E2E CI tiers + +The end-to-end workflow separates deterministic product guarantees from +environment and provider drift: + +- Pull requests run harness unit tests, emulator-backed cloud and selfhost, + local, Cloudflare, and the production selfhost image on ephemeral + GitHub-hosted Linux VMs. The same VM runs development and packaged Electron + journeys under Xvfb. +- The Cloudflare lane leaves `ENABLE_DEV_AUTH` off. A scoped loopback Access + issuer signs human and service application tokens and serves the team JWKS, + so issuer, audience, expiry, signature, and machine-identity checks remain in + the pull-request gate without a Cloudflare account. +- The client lanes install pinned Claude Code and OpenCode binaries and require + their real-client capabilities. Model traffic goes only to local replay + fixtures, so they need neither a user login nor paid inference. +- macOS and Windows runners build the native unsigned package and start the + bundled Executor binary. They do not claim GUI journey coverage. Linux is the + current required packaged-GUI lane. +- The scheduled or manually dispatched `desktop-linux-kvm` job provides the + stronger guest boundary when `E2E_LINUX_KVM_ENABLED=true`. A labeled x64 + self-hosted runner uses the prepared QCOW2 image at `E2E_KVM_BASE_IMAGE` to + create a disposable cloud-init overlay, launch the packaged app on Xorg, + drive a native window, record its SPICE framebuffer, and discard the domain. + The runner must provide QEMU, libvirt, `virt-install`, `cloud-localds`, SSH, + Xvfb, Openbox, `remote-viewer`, and ffmpeg. The base image must provide the + guest Xorg, Openbox, D-Bus, `xdpyinfo`, `xdotool`, SSH, and Electron runtime + libraries. +- Public Microsoft Graph metadata and the hosted Microsoft OAuth emulator run + nightly and are nonblocking. `bun run test:live` reproduces that group. The + Resend handoff, no-auth API, and PostHog-shaped MCP flows use scoped local + emulators and remain in pull-request coverage. +- Real service installation and reboot in Tart macOS/Linux guests is enabled + only when `E2E_TART_VM_ENABLED=true` and an `executor-e2e` macOS runner has the + documented base images. The EC2 Windows guest is similarly gated by + `E2E_WINDOWS_VM_ENABLED=true` and dedicated AWS credentials. These are CLI + service VMs, not macOS or Windows desktop-GUI claims. +- Every e2e job sanitizes `e2e/runs` before artifact upload. If sanitization + fails, the workflow does not publish the unsanitized directory. + ## The dev CLI: live instances, interactively `cd e2e && bun run cli` — the same primitives scenarios use, as commands. diff --git a/apps/cli/src/main.ts b/apps/cli/src/main.ts index 7162d5f30..cc6059517 100644 --- a/apps/cli/src/main.ts +++ b/apps/cli/src/main.ts @@ -121,6 +121,7 @@ import { canAutoStartCliServerConnection, chooseCliServerConnectionWithActiveLocal, parseCliExecutorServerConnection, + readCliServerAuth, type CliServerConnectionSource, withCliServerAuthFallback, } from "./server-connection"; @@ -139,14 +140,17 @@ import { stopWindowsExecutorListenersOnPort, } from "./service"; import { + cliServerConnectionProfileRows, + clearCliServerConnectionProfileAuth, defaultCliServerConnectionProfile, findCliServerConnectionProfile, readCliServerConnectionStore, removeCliServerConnectionProfile, setDefaultCliServerConnectionProfile, upsertCliServerConnectionProfile, + upsertCliServerLoginProfile, + updateCliServerConnectionProfileAfterOAuthRefresh, validateCliServerConnectionProfileName, - type CliServerConnectionStore, } from "./server-profile"; import { buildResumeContentTemplate, @@ -642,10 +646,10 @@ const refreshOAuthConnection = ( const profileName = profileNameFromKey(connection.key); if (profileName) { - yield* upsertCliServerConnectionProfile({ + yield* updateCliServerConnectionProfileAfterOAuthRefresh({ name: profileName, + previousAccessToken: auth.accessToken, connection: nextConnection, - makeDefault: false, }).pipe(Effect.ignore); } return nextConnection; @@ -1944,20 +1948,13 @@ const printServerProfiles = () => return; } - const rows = store.profiles.map((profile) => ({ - marker: profile.name === store.defaultProfile ? "*" : " ", - name: profile.name, - kind: profile.connection.kind, - origin: profile.connection.origin, - displayName: profile.connection.displayName, - auth: profile.connection.auth ? "stored-auth" : "env-auth", - })); + const rows = cliServerConnectionProfileRows(store, readCliServerAuth()); const nameWidth = rows.reduce((max, row) => Math.max(max, row.name.length), 4); const kindWidth = rows.reduce((max, row) => Math.max(max, row.kind.length), 4); for (const row of rows) { console.log( - `${row.marker} ${row.name.padEnd(nameWidth)} ${row.kind.padEnd(kindWidth)} ${row.origin} ${row.displayName} ${row.auth}`, + `${row.marker} ${row.name.padEnd(nameWidth)} ${row.kind.padEnd(kindWidth)} ${row.origin} ${row.displayName} ${row.auth} account=${row.account} org=${row.organization}`, ); } }); @@ -2083,45 +2080,6 @@ const sanitizeProfileName = (raw: string): string => { return cleaned.length > 0 ? cleaned : "server"; }; -// The (origin, user, org) a stored oauth profile authenticates as, lets us -// recognize a re-login to the SAME account (update in place) versus a -// different account on the same host (needs its own profile). -const oauthAccountIdentity = (connection: ExecutorServerConnection): string | null => { - const auth = connection.auth; - if (!auth || auth.kind !== "oauth") return null; - const claims = decodeAccessTokenClaims(auth.accessToken); - const sub = typeof claims?.sub === "string" ? claims.sub : undefined; - const org = typeof claims?.org_id === "string" ? claims.org_id : undefined; - return sub && org ? `${connection.origin}|${sub}|${org}` : null; -}; - -// Name a login's profile by the ACCOUNT it authenticates (email, falling back -// to user id), not the hostname, so two accounts on the same server get -// distinct profiles instead of clobbering each other (the way opencode keys -// accounts by email/url). A re-login to the same account reuses its profile. -const chooseLoginProfileName = ( - store: CliServerConnectionStore, - account: { - readonly origin: string; - readonly sub?: string; - readonly org?: string; - readonly email?: string; - }, -): string => { - const identity = - account.sub && account.org ? `${account.origin}|${account.sub}|${account.org}` : null; - if (identity) { - const existing = store.profiles.find((p) => oauthAccountIdentity(p.connection) === identity); - if (existing) return existing.name; - } - const base = sanitizeProfileName(account.email ?? account.sub ?? loginHostLabel(account.origin)); - if (!store.profiles.some((p) => p.name === base)) return base; - for (let suffix = 2; ; suffix += 1) { - const candidate = `${base}-${suffix}`; - if (!store.profiles.some((p) => p.name === candidate)) return candidate; - } -}; - // Resolve which server a login/logout targets: an existing profile (--server // or the default) or a bare origin (--base-url). The profile name is decided // later, from the authenticated account. @@ -2207,17 +2165,18 @@ const loginCommand = Command.make( tokens.organizationId ?? (typeof claims?.org_id === "string" ? claims.org_id : undefined); const email = tokens.email; - // Name by account so a different account on the same host doesn't clobber - // an existing login; --server / the default profile / --name pin it. - const store = yield* readCliServerConnectionStore(); - const profileName = explicitName - ? validateCliServerConnectionProfileName(explicitName) - : target.profile - ? target.profile.name - : chooseLoginProfileName(store, { origin: target.origin, sub, org, email }); - - yield* upsertCliServerConnectionProfile({ - name: profileName, + // Choose and save the account-bound profile under one filesystem lock. + // --server, the default profile, and --name intentionally pin the name; + // --base-url selects an existing account identity or creates a new name. + const pinnedName = explicitName ?? target.profile?.name; + const saved = yield* upsertCliServerLoginProfile({ + ...(pinnedName ? { name: validateCliServerConnectionProfileName(pinnedName) } : {}), + suggestedName: sanitizeProfileName(email ?? sub ?? loginHostLabel(target.origin)), + account: { + ...(sub ? { subject: sub } : {}), + ...(org ? { organizationId: org } : {}), + ...(email ? { email } : {}), + }, connection: { kind: "http", origin: target.origin, @@ -2231,8 +2190,8 @@ const loginCommand = Command.make( clientId: discovery.clientId, }, }, - makeDefault: true, }); + const profileName = saved.profile.name; console.log(""); console.log(`Logged in to ${target.origin} (profile "${profileName}", now the default).`); @@ -2249,9 +2208,21 @@ const logoutCommand = Command.make( Effect.gen(function* () { const target = yield* resolveLoginOrigin({ baseUrl, server }); const store = yield* readCliServerConnectionStore(); - // --server / default give the profile directly; --base-url matches by origin. - const profile = - target.profile ?? store.profiles.find((p) => p.connection.origin === target.origin) ?? null; + // --server and the default identify one profile. A bare origin may map to + // multiple accounts, so never guess which local credential to delete. + const matchingProfiles = target.profile + ? [target.profile] + : store.profiles.filter((profile) => profile.connection.origin === target.origin); + if (matchingProfiles.length > 1) { + return yield* Effect.fail( + new Error( + `Multiple server profiles use ${target.origin}: ${matchingProfiles + .map((profile) => profile.name) + .join(", ")}. Re-run with --server .`, + ), + ); + } + const profile = matchingProfiles[0] ?? null; if (!profile) { console.log(`No stored login for ${target.origin}.`); return; @@ -2260,15 +2231,7 @@ const logoutCommand = Command.make( console.log(`Profile "${profile.name}" has no stored credentials.`); return; } - yield* upsertCliServerConnectionProfile({ - name: profile.name, - connection: { - kind: profile.connection.kind, - origin: profile.connection.origin, - displayName: profile.connection.displayName, - }, - makeDefault: store.defaultProfile === profile.name, - }); + yield* clearCliServerConnectionProfileAuth(profile.name); console.log( `Logged out of ${profile.connection.origin} (cleared credentials for "${profile.name}").`, ); diff --git a/apps/cli/src/server-profile.test.ts b/apps/cli/src/server-profile.test.ts index 465aa75fe..eb8beed67 100644 --- a/apps/cli/src/server-profile.test.ts +++ b/apps/cli/src/server-profile.test.ts @@ -1,18 +1,23 @@ import { afterEach, describe, expect, it } from "@effect/vitest"; import { BunServices } from "@effect/platform-bun"; -import { mkdtempSync, rmSync } from "node:fs"; +import { chmodSync, mkdtempSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import * as Effect from "effect/Effect"; import { + cliServerConnectionProfileRows, + clearCliServerConnectionProfileAuth, defaultCliServerConnectionProfile, parseCliServerConnectionStore, readCliServerConnectionStore, removeCliServerConnectionProfile, setDefaultCliServerConnectionProfile, upsertCliServerConnectionProfile, + upsertCliServerLoginProfile, + updateCliServerConnectionProfileAfterOAuthRefresh, } from "./server-profile"; +import { readCliServerAuth } from "./server-connection"; const previousDataDir = process.env.EXECUTOR_DATA_DIR; @@ -24,6 +29,18 @@ afterEach(() => { } }); +const accessToken = (claims: Record) => + `${Buffer.from("{}").toString("base64url")}.${Buffer.from(JSON.stringify(claims)).toString("base64url")}.signature`; + +const oauthConnection = (token: string) => ({ + origin: "https://executor.example", + auth: { + kind: "oauth" as const, + accessToken: token, + refreshToken: `refresh-${token}`, + }, +}); + describe("CLI server connection profiles", () => { it("round-trips named server connections and default selection", () => Effect.gen(function* () { @@ -84,6 +101,273 @@ describe("CLI server connection profiles", () => { } }).pipe(Effect.provide(BunServices.layer))); + it("serializes concurrent updates and keeps credential files owner-only", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + yield* Effect.all( + Array.from({ length: 12 }, (_, index) => + upsertCliServerConnectionProfile({ + name: `remote-${index}`, + connection: { + origin: `https://executor-${index}.example`, + auth: { kind: "bearer", token: `key-${index}` }, + }, + makeDefault: index === 0, + }), + ), + { concurrency: "unbounded" }, + ); + + const store = yield* readCliServerConnectionStore(); + expect(store.profiles.map((profile) => profile.name)).toEqual( + Array.from({ length: 12 }, (_, index) => `remote-${index}`).sort(), + ); + + const storePath = join(dataDir, "server-connections.json"); + expect(statSync(storePath).mode & 0o777).toBe(0o600); + expect( + readdirSync(dataDir).filter((name) => name.endsWith(".tmp") || name.endsWith(".lock")), + ).toEqual([]); + + chmodSync(storePath, 0o644); + yield* readCliServerConnectionStore(); + expect(statSync(storePath).mode & 0o777).toBe(0o600); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + + it("serializes concurrent stale-lock reclaimers without deleting a fresh writer lock", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + const lockPath = join(dataDir, "server-connections.json.lock"); + writeFileSync(lockPath, `${JSON.stringify({ pid: 999_999_999, owner: "stale-owner" })}\n`, { + mode: 0o600, + }); + + yield* Effect.all( + Array.from({ length: 24 }, (_, index) => + upsertCliServerConnectionProfile({ + name: `contender-${index}`, + connection: { origin: `https://contender-${index}.example` }, + makeDefault: false, + }), + ), + { concurrency: "unbounded" }, + ); + + const store = yield* readCliServerConnectionStore(); + expect(store.profiles).toHaveLength(24); + expect(new Set(store.profiles.map((profile) => profile.name)).size).toBe(24); + expect( + readdirSync(dataDir).filter((name) => name.startsWith("server-connections.json.lock")), + ).toEqual([]); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + + it("treats a displaced live owner's tombstone as an advisory lock", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + const tombstonePath = join(dataDir, "server-connections.json.lock.tombstone-live-owner"); + writeFileSync( + tombstonePath, + `${JSON.stringify({ pid: process.pid, owner: "other-live-owner" })}\n`, + { mode: 0o600 }, + ); + + const outcome = yield* Effect.race( + upsertCliServerConnectionProfile({ + name: "blocked", + connection: { origin: "https://blocked.example" }, + makeDefault: true, + }).pipe(Effect.as("acquired" as const)), + Effect.sleep("75 millis").pipe(Effect.as("blocked" as const)), + ); + expect(outcome).toBe("blocked"); + + rmSync(tombstonePath, { force: true }); + yield* upsertCliServerConnectionProfile({ + name: "unblocked", + connection: { origin: "https://unblocked.example" }, + makeDefault: true, + }); + const store = yield* readCliServerConnectionStore(); + expect(store.profiles.map((profile) => profile.name)).toEqual(["unblocked"]); + expect( + readdirSync(dataDir).filter((name) => name.startsWith("server-connections.json.lock")), + ).toEqual([]); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + + it("keeps same-origin accounts distinct and reuses a logged-out account profile", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + const first = yield* upsertCliServerLoginProfile({ + suggestedName: "account", + account: { + subject: "shared-user", + organizationId: "org-a", + email: "shared@example.com", + }, + connection: oauthConnection(accessToken({ sub: "shared-user", org_id: "org-a" })), + }); + const second = yield* upsertCliServerLoginProfile({ + suggestedName: "account", + account: { + subject: "shared-user", + organizationId: "org-b", + email: "shared@example.com", + }, + connection: oauthConnection(accessToken({ sub: "shared-user", org_id: "org-b" })), + }); + + expect(first.profile.name).toBe("account"); + expect(second.profile.name).toBe("account-2"); + + const loggedOut = yield* clearCliServerConnectionProfileAuth(second.profile.name); + const loggedOutProfile = loggedOut.profiles.find( + (profile) => profile.name === second.profile.name, + ); + expect(loggedOutProfile?.connection.auth).toBeUndefined(); + expect(loggedOutProfile?.account).toEqual({ + subject: "shared-user", + organizationId: "org-b", + email: "shared@example.com", + }); + + const signedOutRows = cliServerConnectionProfileRows(loggedOut, undefined); + expect( + signedOutRows.map(({ account, organization, auth }) => ({ + account, + organization, + auth, + })), + ).toEqual([ + { account: "shared@example.com", organization: "org-a", auth: "stored-auth" }, + { account: "shared@example.com", organization: "org-b", auth: "signed-out" }, + ]); + const environmentRows = cliServerConnectionProfileRows( + loggedOut, + readCliServerAuth({ EXECUTOR_AUTH_TOKEN: "environment-token" }), + ); + expect(environmentRows.map((row) => row.auth)).toEqual(["stored-auth", "env-auth"]); + + const relogged = yield* upsertCliServerLoginProfile({ + suggestedName: "account", + account: { + subject: "shared-user", + organizationId: "org-b", + email: "shared@example.com", + }, + connection: oauthConnection(accessToken({ sub: "shared-user", org_id: "org-b" })), + }); + expect(relogged.profile.name).toBe("account-2"); + expect(relogged.store.profiles).toHaveLength(2); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + + it("migrates a legacy oauth profile by matching its token identity", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + writeFileSync( + join(dataDir, "server-connections.json"), + JSON.stringify({ + version: 1, + defaultProfile: "legacy", + profiles: [ + { + name: "legacy", + connection: oauthConnection( + accessToken({ sub: "legacy-user", org_id: "legacy-org" }), + ), + }, + ], + }), + { mode: 0o600 }, + ); + + const legacy = yield* readCliServerConnectionStore(); + expect(cliServerConnectionProfileRows(legacy, undefined)[0]).toMatchObject({ + account: "legacy-user", + organization: "legacy-org", + auth: "stored-auth", + }); + const loggedOut = yield* clearCliServerConnectionProfileAuth("legacy"); + expect(loggedOut.profiles[0]?.account).toEqual({ + subject: "legacy-user", + organizationId: "legacy-org", + }); + + const migrated = yield* upsertCliServerLoginProfile({ + suggestedName: "new-name", + account: { + subject: "legacy-user", + organizationId: "legacy-org", + email: "legacy@example.com", + }, + connection: oauthConnection(accessToken({ sub: "legacy-user", org_id: "legacy-org" })), + }); + expect(migrated.profile.name).toBe("legacy"); + expect(migrated.store.profiles).toHaveLength(1); + expect(migrated.profile.account).toEqual({ + subject: "legacy-user", + organizationId: "legacy-org", + email: "legacy@example.com", + }); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + + it("does not let a stale refresh restore credentials after logout", () => + Effect.gen(function* () { + const dataDir = mkdtempSync(join(tmpdir(), "executor-server-profiles-")); + process.env.EXECUTOR_DATA_DIR = dataDir; + + try { + const originalToken = accessToken({ sub: "user-a", org_id: "org-a" }); + const saved = yield* upsertCliServerLoginProfile({ + suggestedName: "account", + account: { subject: "user-a", organizationId: "org-a" }, + connection: oauthConnection(originalToken), + }); + + yield* clearCliServerConnectionProfileAuth(saved.profile.name); + const updated = yield* updateCliServerConnectionProfileAfterOAuthRefresh({ + name: saved.profile.name, + previousAccessToken: originalToken, + connection: oauthConnection(accessToken({ sub: "user-a", org_id: "org-a", v: 2 })), + }); + expect(updated).toBe(false); + + const store = yield* readCliServerConnectionStore(); + expect(store.profiles[0]?.connection.auth).toBeUndefined(); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + } + }).pipe(Effect.provide(BunServices.layer))); + it("drops malformed profiles when parsing", () => { const store = parseCliServerConnectionStore( JSON.stringify({ diff --git a/apps/cli/src/server-profile.ts b/apps/cli/src/server-profile.ts index dd8df3085..100d17911 100644 --- a/apps/cli/src/server-profile.ts +++ b/apps/cli/src/server-profile.ts @@ -1,3 +1,4 @@ +import { randomUUID } from "node:crypto"; import { homedir } from "node:os"; import { FileSystem, Option, Path, Schema } from "effect"; import type { PlatformError } from "effect/PlatformError"; @@ -5,13 +6,25 @@ import * as Effect from "effect/Effect"; import { normalizeExecutorServerConnection, + type ExecutorServerAuth, type ExecutorServerConnection, type ExecutorServerConnectionInput, } from "@executor-js/sdk/shared"; +import { decodeAccessTokenClaims } from "./device-login"; +import { isPidAlive } from "./daemon-state"; + +export interface CliServerAccountIdentity { + readonly subject?: string; + readonly organizationId?: string; + readonly email?: string; +} export interface CliServerConnectionProfile { readonly name: string; readonly connection: ExecutorServerConnection; + /** Stable account metadata survives logout so a later login can reuse the + * same profile without retaining an access token solely for identification. */ + readonly account?: CliServerAccountIdentity; } export interface CliServerConnectionStore { @@ -42,6 +55,12 @@ const resolveDataDir = (path: Path.Path): string => const serverConnectionStorePath = (path: Path.Path): string => path.join(resolveDataDir(path), "server-connections.json"); +const serverConnectionStoreLockPath = (path: Path.Path): string => + `${serverConnectionStorePath(path)}.lock`; + +const serverConnectionStoreLockTombstonePath = (lockPath: string): string => + `${lockPath}.tombstone-${randomUUID()}`; + const PersistedAuth = Schema.Union([ Schema.Struct({ kind: Schema.Literal("basic"), @@ -71,9 +90,16 @@ const PersistedConnection = Schema.Struct({ auth: Schema.optional(PersistedAuth), }); +const PersistedAccount = Schema.Struct({ + subject: Schema.optional(Schema.String), + organizationId: Schema.optional(Schema.String), + email: Schema.optional(Schema.String), +}); + const PersistedProfile = Schema.Struct({ name: Schema.String, connection: PersistedConnection, + account: Schema.optional(PersistedAccount), }); const PersistedStore = Schema.Struct({ @@ -91,6 +117,78 @@ const decodeConnection = ( return normalizeExecutorServerConnection(input); }; +const nonEmpty = (value: string | undefined): string | undefined => { + const normalized = value?.trim(); + return normalized && normalized.length > 0 ? normalized : undefined; +}; + +const normalizeAccountIdentity = ( + input: CliServerAccountIdentity | undefined, +): CliServerAccountIdentity | undefined => { + const subject = nonEmpty(input?.subject); + const organizationId = nonEmpty(input?.organizationId); + const email = nonEmpty(input?.email)?.toLowerCase(); + if (!subject && !email) return undefined; + return { + ...(subject ? { subject } : {}), + ...(organizationId ? { organizationId } : {}), + ...(email ? { email } : {}), + }; +}; + +/** Legacy version-1 profiles predate persisted account metadata. Derive it + * from their OAuth token once, then the next login writes it explicitly. */ +const legacyAccountIdentity = ( + profile: CliServerConnectionProfile, +): CliServerAccountIdentity | undefined => { + const auth = profile.connection.auth; + if (!auth || auth.kind !== "oauth") return undefined; + const claims = decodeAccessTokenClaims(auth.accessToken); + return normalizeAccountIdentity({ + subject: typeof claims?.sub === "string" ? claims.sub : undefined, + organizationId: typeof claims?.org_id === "string" ? claims.org_id : undefined, + email: + typeof claims?.email === "string" + ? claims.email + : profile.connection.displayName.includes("@") + ? profile.connection.displayName + : undefined, + }); +}; + +const accountIdentityKey = ( + origin: string, + account: CliServerAccountIdentity | undefined, +): string | undefined => { + const normalized = normalizeAccountIdentity(account); + if (!normalized) return undefined; + const principal = normalized.subject + ? ["subject", normalized.subject] + : ["email", normalized.email ?? ""]; + return JSON.stringify([origin, ...principal, normalized.organizationId ?? null]); +}; + +const profileAccountIdentityKey = (profile: CliServerConnectionProfile): string | undefined => + accountIdentityKey(profile.connection.origin, profile.account ?? legacyAccountIdentity(profile)); + +export const cliServerConnectionProfileRows = ( + store: CliServerConnectionStore, + environmentAuth: ExecutorServerAuth | undefined, +) => + store.profiles.map((profile) => { + const account = profile.account ?? legacyAccountIdentity(profile); + return { + marker: profile.name === store.defaultProfile ? "*" : " ", + name: profile.name, + kind: profile.connection.kind, + origin: profile.connection.origin, + displayName: profile.connection.displayName, + auth: profile.connection.auth ? "stored-auth" : environmentAuth ? "env-auth" : "signed-out", + account: account?.email ?? account?.subject ?? "-", + organization: account?.organizationId ?? "-", + }; + }); + export const parseCliServerConnectionStore = (raw: string): CliServerConnectionStore => { const decoded = decodeStoreJson(raw); if (Option.isNone(decoded)) return emptyCliServerConnectionStore; @@ -100,7 +198,14 @@ export const parseCliServerConnectionStore = (raw: string): CliServerConnectionS const connection = decodeConnection(value.connection); if (!connection) return []; try { - return [{ name: validateCliServerConnectionProfileName(value.name), connection }]; + const account = normalizeAccountIdentity(value.account); + return [ + { + name: validateCliServerConnectionProfileName(value.name), + connection, + ...(account ? { account } : {}), + }, + ]; } catch { return []; } @@ -121,7 +226,7 @@ export const parseCliServerConnectionStore = (raw: string): CliServerConnectionS const serializeCliServerConnectionStore = (store: CliServerConnectionStore): string => `${JSON.stringify(store, null, 2)}\n`; -export const readCliServerConnectionStore = (): Effect.Effect< +const readCliServerConnectionStoreUnlocked = (): Effect.Effect< CliServerConnectionStore, never, FileSystem.FileSystem | Path.Path @@ -129,53 +234,409 @@ export const readCliServerConnectionStore = (): Effect.Effect< Effect.gen(function* () { const fs = yield* FileSystem.FileSystem; const path = yield* Path.Path; + const storePath = serverConnectionStorePath(path); const raw = yield* fs - .readFileString(serverConnectionStorePath(path)) + .readFileString(storePath) .pipe(Effect.catchCause(() => Effect.succeed(null))); if (raw === null) return emptyCliServerConnectionStore; + // Repair permissions on legacy files as soon as they are read. New writes + // are also created owner-only below. + yield* fs.chmod(storePath, 0o600).pipe(Effect.ignore); return parseCliServerConnectionStore(raw); }); -export const writeCliServerConnectionStore = ( +export const readCliServerConnectionStore = (): Effect.Effect< + CliServerConnectionStore, + never, + FileSystem.FileSystem | Path.Path +> => readCliServerConnectionStoreUnlocked(); + +interface CliServerConnectionStoreLock { + readonly path: string; + readonly owner: string; +} + +const STORE_LOCK_RETRY_MS = 25; +const STORE_LOCK_TIMEOUT_MS = 10_000; + +const StoreLockPayload = Schema.Struct({ + pid: Schema.Number, + owner: Schema.String, +}); + +const decodeStoreLock = Schema.decodeUnknownOption(Schema.fromJsonString(StoreLockPayload)); + +const parseStoreLock = (raw: string) => { + const decoded = decodeStoreLock(raw); + return Option.isSome(decoded) ? decoded.value : null; +}; + +const readStoreLock = (fs: FileSystem.FileSystem, lockPath: string) => + fs.readFileString(lockPath).pipe( + Effect.map(parseStoreLock), + Effect.catchCause(() => Effect.succeed(null)), + ); + +const listStoreLockTombstones = (fs: FileSystem.FileSystem, path: Path.Path, lockPath: string) => { + const directory = path.dirname(lockPath); + const prefix = `${path.basename(lockPath)}.tombstone-`; + return fs.readDirectory(directory).pipe( + Effect.map((entries) => + entries + .filter((entry) => entry.startsWith(prefix)) + .map((entry) => path.join(directory, entry)), + ), + Effect.catchCause(() => Effect.succeed([])), + ); +}; + +/** Canonical lock names can be replaced while a contender is suspended. Move + * the path to a unique tombstone first, then only delete the exact owner that + * was inspected. A mismatched live owner remains an advisory lock. */ +const quarantineStaleStoreLock = ( + fs: FileSystem.FileSystem, + lockPath: string, + expected: { readonly pid: number; readonly owner: string }, +) => + Effect.gen(function* () { + const tombstonePath = serverConnectionStoreLockTombstonePath(lockPath); + const moved = yield* fs.rename(lockPath, tombstonePath).pipe( + Effect.as(true), + Effect.catchCause(() => Effect.succeed(false)), + ); + if (!moved) return; + + const quarantined = yield* readStoreLock(fs, tombstonePath); + if ( + quarantined?.owner === expected.owner && + quarantined.pid === expected.pid && + !isPidAlive(quarantined.pid) + ) { + yield* fs.remove(tombstonePath, { force: true }).pipe(Effect.ignore); + } + }); + +const inspectStoreLockTombstones = ( + fs: FileSystem.FileSystem, + path: Path.Path, + lockPath: string, + owner: string, +) => + Effect.gen(function* () { + const tombstonePaths = yield* listStoreLockTombstones(fs, path, lockPath); + let hasOwnedLock = false; + let hasOtherLiveLock = false; + + for (const tombstonePath of tombstonePaths) { + const lock = yield* readStoreLock(fs, tombstonePath); + if (!lock) { + // Unknown tombstones are conservative advisory locks. They may be a + // newly renamed lock whose payload has not become visible yet. + hasOtherLiveLock = true; + } else if (!isPidAlive(lock.pid)) { + // Tombstone names are unique and never reused, so deleting this exact + // dead owner's path has no compare-and-delete race. + yield* fs.remove(tombstonePath, { force: true }).pipe(Effect.ignore); + } else if (lock.owner === owner) { + hasOwnedLock = true; + } else { + hasOtherLiveLock = true; + } + } + + return { hasOwnedLock, hasOtherLiveLock }; + }); + +const releaseOwnedCanonicalStoreLock = ( + fs: FileSystem.FileSystem, + lockPath: string, + owner: string, +) => + Effect.gen(function* () { + const current = yield* readStoreLock(fs, lockPath); + if (current?.owner !== owner) return; + + const tombstonePath = serverConnectionStoreLockTombstonePath(lockPath); + const moved = yield* fs.rename(lockPath, tombstonePath).pipe( + Effect.as(true), + Effect.catchCause(() => Effect.succeed(false)), + ); + if (!moved) return; + + const quarantined = yield* readStoreLock(fs, tombstonePath); + if (quarantined?.owner === owner) { + yield* fs.remove(tombstonePath, { force: true }).pipe(Effect.ignore); + } + }); + +const releaseOwnedStoreLocks = (lock: CliServerConnectionStoreLock) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + + yield* releaseOwnedCanonicalStoreLock(fs, lock.path, lock.owner); + const tombstonePaths = yield* listStoreLockTombstones(fs, path, lock.path); + for (const tombstonePath of tombstonePaths) { + const tombstone = yield* readStoreLock(fs, tombstonePath); + if (tombstone?.owner === lock.owner) { + yield* fs.remove(tombstonePath, { force: true }).pipe(Effect.ignore); + } + } + }); + +const acquireCliServerConnectionStoreLock = (): Effect.Effect< + CliServerConnectionStoreLock, + Error, + FileSystem.FileSystem | Path.Path +> => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const dataDir = resolveDataDir(path); + const lockPath = serverConnectionStoreLockPath(path); + const owner = randomUUID(); + const payload = `${JSON.stringify({ pid: process.pid, owner, startedAt: new Date().toISOString() })}\n`; + const deadline = Date.now() + STORE_LOCK_TIMEOUT_MS; + + yield* fs.makeDirectory(dataDir, { recursive: true }); + + for (;;) { + const acquired = yield* fs + .writeFileString(lockPath, payload, { flag: "wx", mode: 0o600 }) + .pipe( + Effect.as(true), + Effect.catchCause(() => Effect.succeed(false)), + ); + if (acquired) { + const tombstones = yield* inspectStoreLockTombstones(fs, path, lockPath, owner); + const canonical = yield* readStoreLock(fs, lockPath); + const stillOwnsLock = canonical?.owner === owner || tombstones.hasOwnedLock; + if (stillOwnsLock && !tombstones.hasOtherLiveLock) { + return { path: lockPath, owner }; + } + yield* releaseOwnedStoreLocks({ path: lockPath, owner }); + } + + const existing = yield* readStoreLock(fs, lockPath); + if (existing && !isPidAlive(existing.pid)) { + yield* quarantineStaleStoreLock(fs, lockPath, existing); + continue; + } + + if (Date.now() >= deadline) { + return yield* Effect.fail( + new Error("Timed out waiting to update the Executor server profile store."), + ); + } + yield* Effect.sleep(STORE_LOCK_RETRY_MS); + } + }); + +const releaseCliServerConnectionStoreLock = ( + lock: CliServerConnectionStoreLock, +): Effect.Effect => releaseOwnedStoreLocks(lock); + +const writeCliServerConnectionStoreUnlocked = ( store: CliServerConnectionStore, ): Effect.Effect => Effect.gen(function* () { const fs = yield* FileSystem.FileSystem; const path = yield* Path.Path; const dataDir = resolveDataDir(path); + const storePath = serverConnectionStorePath(path); + const tempPath = `${storePath}.${process.pid}.${randomUUID()}.tmp`; yield* fs.makeDirectory(dataDir, { recursive: true }); - yield* fs.writeFileString( - serverConnectionStorePath(path), - serializeCliServerConnectionStore(store), - ); + try { + yield* fs.writeFileString(tempPath, serializeCliServerConnectionStore(store), { + flag: "wx", + mode: 0o600, + }); + yield* fs.chmod(tempPath, 0o600).pipe(Effect.ignore); + yield* fs.rename(tempPath, storePath); + yield* fs.chmod(storePath, 0o600).pipe(Effect.ignore); + } finally { + yield* fs.remove(tempPath, { force: true }).pipe(Effect.ignore); + } }); +const withCliServerConnectionStoreLock = (effect: Effect.Effect) => + Effect.acquireUseRelease( + acquireCliServerConnectionStoreLock(), + () => effect, + releaseCliServerConnectionStoreLock, + ); + +export const writeCliServerConnectionStore = ( + store: CliServerConnectionStore, +): Effect.Effect => + withCliServerConnectionStoreLock(writeCliServerConnectionStoreUnlocked(store)); + export const upsertCliServerConnectionProfile = (input: { readonly name: string; readonly connection: ExecutorServerConnectionInput; readonly makeDefault: boolean; -}): Effect.Effect => - Effect.gen(function* () { - const name = validateCliServerConnectionProfileName(input.name); - const store = yield* readCliServerConnectionStore(); - const connection = normalizeExecutorServerConnection({ - ...input.connection, - key: input.connection.key ?? `profile:${name}`, - displayName: input.connection.displayName ?? name, - }); - const nextProfiles = [ - ...store.profiles.filter((profile) => profile.name !== name), - { name, connection }, - ].sort((a, b) => a.name.localeCompare(b.name)); - const nextStore: CliServerConnectionStore = { - version: 1, - defaultProfile: - input.makeDefault || store.defaultProfile === null ? name : store.defaultProfile, - profiles: nextProfiles, - }; - yield* writeCliServerConnectionStore(nextStore); - return nextStore; - }); +}): Effect.Effect< + CliServerConnectionStore, + Error | PlatformError, + FileSystem.FileSystem | Path.Path +> => + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const name = validateCliServerConnectionProfileName(input.name); + const store = yield* readCliServerConnectionStoreUnlocked(); + const connection = normalizeExecutorServerConnection({ + ...input.connection, + key: input.connection.key ?? `profile:${name}`, + displayName: input.connection.displayName ?? name, + }); + const nextProfiles = [ + ...store.profiles.filter((profile) => profile.name !== name), + { name, connection }, + ].sort((a, b) => a.name.localeCompare(b.name)); + const nextStore: CliServerConnectionStore = { + version: 1, + defaultProfile: + input.makeDefault || store.defaultProfile === null ? name : store.defaultProfile, + profiles: nextProfiles, + }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return nextStore; + }), + ); + +const uniqueProfileName = (store: CliServerConnectionStore, suggestedName: string): string => { + const base = validateCliServerConnectionProfileName(suggestedName); + if (!store.profiles.some((profile) => profile.name === base)) return base; + for (let suffix = 2; ; suffix += 1) { + const candidate = `${base}-${suffix}`; + if (!store.profiles.some((profile) => profile.name === candidate)) return candidate; + } +}; + +/** Save a device login while choosing its profile under the same lock as the + * write. Persisted account metadata lets a logout and later re-login reuse the + * original profile without keeping credentials around. */ +export const upsertCliServerLoginProfile = (input: { + readonly name?: string; + readonly suggestedName: string; + readonly account?: CliServerAccountIdentity; + readonly connection: ExecutorServerConnectionInput; +}): Effect.Effect< + { readonly store: CliServerConnectionStore; readonly profile: CliServerConnectionProfile }, + Error | PlatformError, + FileSystem.FileSystem | Path.Path +> => + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const store = yield* readCliServerConnectionStoreUnlocked(); + const account = normalizeAccountIdentity(input.account); + const identity = accountIdentityKey( + normalizeExecutorServerConnection(input.connection).origin, + account, + ); + const existing = identity + ? store.profiles.find((profile) => profileAccountIdentityKey(profile) === identity) + : undefined; + const name = input.name + ? validateCliServerConnectionProfileName(input.name) + : (existing?.name ?? uniqueProfileName(store, input.suggestedName)); + const connection = normalizeExecutorServerConnection({ + ...input.connection, + key: `profile:${name}`, + displayName: input.connection.displayName ?? name, + }); + const profile: CliServerConnectionProfile = { + name, + connection, + ...(account ? { account } : {}), + }; + const profiles = [ + ...store.profiles.filter((candidate) => candidate.name !== name), + profile, + ].sort((left, right) => left.name.localeCompare(right.name)); + const nextStore: CliServerConnectionStore = { + version: 1, + defaultProfile: name, + profiles, + }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return { store: nextStore, profile }; + }), + ); + +/** Clear only the named profile's local credential. Account metadata remains + * so re-authentication can safely reuse this profile. */ +export const clearCliServerConnectionProfileAuth = ( + name: string, +): Effect.Effect< + CliServerConnectionStore, + Error | PlatformError, + FileSystem.FileSystem | Path.Path +> => + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const profileName = validateCliServerConnectionProfileName(name); + const store = yield* readCliServerConnectionStoreUnlocked(); + const profile = store.profiles.find((candidate) => candidate.name === profileName); + if (!profile) + return yield* Effect.fail(new Error(`No server profile named "${profileName}".`)); + if (!profile.connection.auth) return store; + const account = profile.account ?? legacyAccountIdentity(profile); + const connection = normalizeExecutorServerConnection({ + kind: profile.connection.kind, + key: `profile:${profileName}`, + origin: profile.connection.origin, + apiBaseUrl: profile.connection.apiBaseUrl, + displayName: profile.connection.displayName, + }); + const nextStore: CliServerConnectionStore = { + ...store, + profiles: store.profiles.map((candidate) => + candidate.name === profileName + ? { ...candidate, connection, ...(account ? { account } : {}) } + : candidate, + ), + }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return nextStore; + }), + ); + +/** Persist a refresh only if the profile still carries the token that started + * it. A concurrent logout or re-login must not be overwritten by stale work. */ +export const updateCliServerConnectionProfileAfterOAuthRefresh = (input: { + readonly name: string; + readonly previousAccessToken: string; + readonly connection: ExecutorServerConnectionInput; +}): Effect.Effect => + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const name = validateCliServerConnectionProfileName(input.name); + const store = yield* readCliServerConnectionStoreUnlocked(); + const profile = store.profiles.find((candidate) => candidate.name === name); + const auth = profile?.connection.auth; + if ( + !profile || + !auth || + auth.kind !== "oauth" || + auth.accessToken !== input.previousAccessToken + ) { + return false; + } + const connection = normalizeExecutorServerConnection({ + ...input.connection, + key: `profile:${name}`, + }); + const nextStore: CliServerConnectionStore = { + ...store, + profiles: store.profiles.map((candidate) => + candidate.name === name ? { ...candidate, connection } : candidate, + ), + }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return true; + }), + ); export const setDefaultCliServerConnectionProfile = ( name: string, @@ -184,32 +645,40 @@ export const setDefaultCliServerConnectionProfile = ( Error | PlatformError, FileSystem.FileSystem | Path.Path > => - Effect.gen(function* () { - const profileName = validateCliServerConnectionProfileName(name); - const store = yield* readCliServerConnectionStore(); - if (!store.profiles.some((profile) => profile.name === profileName)) { - return yield* Effect.fail(new Error(`No server profile named "${profileName}".`)); - } - const nextStore: CliServerConnectionStore = { ...store, defaultProfile: profileName }; - yield* writeCliServerConnectionStore(nextStore); - return nextStore; - }); + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const profileName = validateCliServerConnectionProfileName(name); + const store = yield* readCliServerConnectionStoreUnlocked(); + if (!store.profiles.some((profile) => profile.name === profileName)) { + return yield* Effect.fail(new Error(`No server profile named "${profileName}".`)); + } + const nextStore: CliServerConnectionStore = { ...store, defaultProfile: profileName }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return nextStore; + }), + ); export const removeCliServerConnectionProfile = ( name: string, -): Effect.Effect => - Effect.gen(function* () { - const profileName = validateCliServerConnectionProfileName(name); - const store = yield* readCliServerConnectionStore(); - const nextProfiles = store.profiles.filter((profile) => profile.name !== profileName); - const nextStore: CliServerConnectionStore = { - version: 1, - defaultProfile: store.defaultProfile === profileName ? null : store.defaultProfile, - profiles: nextProfiles, - }; - yield* writeCliServerConnectionStore(nextStore); - return nextStore; - }); +): Effect.Effect< + CliServerConnectionStore, + Error | PlatformError, + FileSystem.FileSystem | Path.Path +> => + withCliServerConnectionStoreLock( + Effect.gen(function* () { + const profileName = validateCliServerConnectionProfileName(name); + const store = yield* readCliServerConnectionStoreUnlocked(); + const nextProfiles = store.profiles.filter((profile) => profile.name !== profileName); + const nextStore: CliServerConnectionStore = { + version: 1, + defaultProfile: store.defaultProfile === profileName ? null : store.defaultProfile, + profiles: nextProfiles, + }; + yield* writeCliServerConnectionStoreUnlocked(nextStore); + return nextStore; + }), + ); export const findCliServerConnectionProfile = ( store: CliServerConnectionStore, diff --git a/apps/cloud/src/api/protected-api-key-auth.node.test.ts b/apps/cloud/src/api/protected-api-key-auth.node.test.ts index 15094617c..87521544e 100644 --- a/apps/cloud/src/api/protected-api-key-auth.node.test.ts +++ b/apps/cloud/src/api/protected-api-key-auth.node.test.ts @@ -87,6 +87,7 @@ describe("protected API key auth", () => { accountId: "user_123", organizationId: "org_123", organizationName: "Org org_123", + organizationSlug: "org-slug-org_123", email: "", name: null, avatarUrl: null, diff --git a/apps/cloud/src/api/protected-jwt-auth.node.test.ts b/apps/cloud/src/api/protected-jwt-auth.node.test.ts index c0c123315..9b317a5b8 100644 --- a/apps/cloud/src/api/protected-jwt-auth.node.test.ts +++ b/apps/cloud/src/api/protected-jwt-auth.node.test.ts @@ -109,6 +109,7 @@ describe("protected JWT (device-login) auth", () => { accountId: "user_123", organizationId: "org_123", organizationName: "Org org_123", + organizationSlug: "org-slug-org_123", email: "", name: null, avatarUrl: null, diff --git a/apps/cloud/src/auth/handlers.ts b/apps/cloud/src/auth/handlers.ts index 64d3ec447..a1a82acdd 100644 --- a/apps/cloud/src/auth/handlers.ts +++ b/apps/cloud/src/auth/handlers.ts @@ -16,6 +16,7 @@ import { AUTH_HINT_COOKIE } from "@executor-js/react/multiplayer/auth-hint"; import { SessionContext, SessionCookies } from "./middleware"; import { encodeLoginState, decodeLoginState } from "./login-state"; import { safeReturnTo } from "./return-to"; +import { activeOrganizationMemberships } from "./organization-memberships"; import { UserStoreService } from "./context"; import { env } from "cloudflare:workers"; import { WorkOSError } from "./errors"; @@ -161,8 +162,9 @@ const deleteResponseCookie = (response: HttpServerResponse.HttpServerResponse, n HttpServerResponse.setCookieUnsafe(response, name, "", DELETE_COOKIE_OPTIONS); // --------------------------------------------------------------------------- -// Single non-protected API surface — public (login/callback) + session -// (me/logout/organizations/switch-organization). The session group has SessionAuth on it. +// Single non-protected API surface: public login/callback plus session-scoped +// identity, logout, organization listing, creation, and invitations. The +// session group has SessionAuth on it. // --------------------------------------------------------------------------- export const NonProtectedApi = HttpApi.make("cloudWeb").add(CloudAuthPublicApi).add(CloudAuthApi); @@ -337,10 +339,12 @@ export const CloudSessionAuthHandlers = HttpApiBuilder.group( const session = yield* SessionContext; const memberships = yield* workos.listUserMemberships(session.accountId); + const activeMemberships = activeOrganizationMemberships(memberships.data); // Resolve through the mirror (not WorkOS directly) so each org's - // URL slug is minted/read — the switcher navigates to `/`. + // URL slug is minted/read. Pending and inactive memberships are not + // switchable and must stay out of the URL navigation menu. const organizations = yield* Effect.all( - memberships.data.map((m) => + activeMemberships.map((m) => resolveOrganization(m.organizationId).pipe( Effect.map((org) => ({ id: org.id, name: org.name, slug: org.slug })), Effect.orElseSucceed(() => null), @@ -364,9 +368,7 @@ export const CloudSessionAuthHandlers = HttpApiBuilder.group( const name = payload.name.trim(); const memberships = yield* workos.listUserMemberships(session.accountId); - const activeMemberships = memberships.data.filter( - (membership) => membership.status === "active", - ); + const activeMemberships = activeOrganizationMemberships(memberships.data); if (isOverFreeOrganizationLimit(activeMemberships)) { const paidOrganizationIds = yield* Effect.all( diff --git a/apps/cloud/src/auth/organization-memberships.test.ts b/apps/cloud/src/auth/organization-memberships.test.ts new file mode 100644 index 000000000..bfe1936f3 --- /dev/null +++ b/apps/cloud/src/auth/organization-memberships.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from "@effect/vitest"; + +import { activeOrganizationMemberships } from "./organization-memberships"; + +describe("activeOrganizationMemberships", () => { + it("keeps active memberships and excludes pending or inactive memberships", () => { + const memberships = [ + { id: "active-a", status: "active" }, + { id: "pending", status: "pending" }, + { id: "inactive", status: "inactive" }, + { id: "active-b", status: "active" }, + ]; + + expect(activeOrganizationMemberships(memberships).map(({ id }) => id)).toEqual([ + "active-a", + "active-b", + ]); + }); +}); diff --git a/apps/cloud/src/auth/organization-memberships.ts b/apps/cloud/src/auth/organization-memberships.ts new file mode 100644 index 000000000..16242c970 --- /dev/null +++ b/apps/cloud/src/auth/organization-memberships.ts @@ -0,0 +1,3 @@ +export const activeOrganizationMemberships = ( + memberships: ReadonlyArray, +) => memberships.filter((membership) => membership.status === "active"); diff --git a/apps/cloud/src/auth/ssr-gate.ts b/apps/cloud/src/auth/ssr-gate.ts index c329ac8cb..a366a851f 100644 --- a/apps/cloud/src/auth/ssr-gate.ts +++ b/apps/cloud/src/auth/ssr-gate.ts @@ -22,6 +22,7 @@ import { createMiddleware } from "@tanstack/react-start"; import { Effect, Exit, Layer, ManagedRuntime } from "effect"; +import { isValidOrgSlug } from "@executor-js/api"; import { AUTH_HINT_COOKIE, @@ -39,6 +40,7 @@ import { sealedSessionDisplayName } from "./middleware"; import { browserOriginFromRequest } from "./request-origin"; import { loginPath, safeReturnTo } from "./return-to"; import { ONBOARDING_PATHS, PUBLIC_PATHS } from "./route-paths"; +import { authorizeOrganizationSelector } from "./organization"; import { WorkOSClient } from "./workos"; const SESSION_COOKIE = "wos-session"; @@ -76,6 +78,8 @@ type VerifiedSession = { readonly refreshedSession?: string | undefined; }; +type DocumentOrganization = NonNullable; + // EVERY failure collapses to "signed out" — WorkOS errors inside the effect // and layer-construction errors like a bad cookie password (runPromiseExit // carries those in its Exit too) — so the login flow surfaces the real @@ -100,21 +104,24 @@ const verifySession = async (sealed: string): Promise => /** * The hint this request should be served with: the browser's own cookie when - * it already matches the verified identity, else one minted fresh from the - * session. `mint` is set when the cookie must also be (re)written — identity - * data freshness (a renamed user/org) is the CLIENT's job via /account/me, + * it already matches the verified identity and document organization, else a + * request-local hint. `mint` is set when the cookie must also be (re)written, + * but URL-specific hints are never persisted because tabs share cookies. + * Identity data freshness (a renamed user/org) is the CLIENT's job via /account/me, * so the gate only steps in when the ids are wrong, never to rewrite display * fields (which would ping-pong with the client's authoritative write). */ const resolveAuthHint = async ( session: VerifiedSession, cookieHeader: string | null, + organization: DocumentOrganization | null, + persist: boolean, ): Promise<{ hint: AuthHint; mint: boolean }> => { const existing = decodeAuthHint(parseCookie(cookieHeader, AUTH_HINT_COOKIE)); if ( existing && existing.user.id === session.userId && - (existing.organization?.id ?? null) === session.organizationId + (existing.organization?.id ?? null) === organization?.id ) { return { hint: existing, mint: false }; } @@ -127,14 +134,9 @@ const resolveAuthHint = async ( name: session.name, avatarUrl: session.avatarUrl, }, - organization: session.organizationId - ? { - id: session.organizationId, - ...(await organizationDisplay(session.organizationId)), - } - : null, + organization, }, - mint: true, + mint: persist, }; }; @@ -158,6 +160,29 @@ const organizationDisplay = async ( : { name: "", slug: "" }; }; +const userStoreLayer = () => Layer.provide(makeUserStoreLayer(), makeDbLayer()); + +const organizationSelectorFromPath = (pathname: string) => { + const segment = pathname.split("/")[1]; + return segment && isValidOrgSlug(segment) ? segment : null; +}; + +const organizationForDocument = async (session: VerifiedSession, selector: string | null) => { + if (!selector) { + if (!session.organizationId) return null; + return { + id: session.organizationId, + ...(await organizationDisplay(session.organizationId)), + }; + } + + const exit = await getRuntime().runPromiseExit( + authorizeOrganizationSelector(session.userId, selector).pipe(Effect.provide(userStoreLayer())), + ); + if (!Exit.isSuccess(exit) || !exit.value) return null; + return { id: exit.value.id, name: exit.value.name, slug: exit.value.slug }; +}; + const hintSetCookie = (hint: AuthHint) => `${AUTH_HINT_COOKIE}=${encodeAuthHint(hint)}; ${HINT_COOKIE_ATTRIBUTES}; Max-Age=${AUTH_HINT_MAX_AGE_SECONDS}`; @@ -235,7 +260,20 @@ export const authGateMiddleware = createMiddleware({ type: "request" }).server( // SSR render the real `https://…//mcp` instead of the client-side // `http://127.0.0.1:4000` default — which would otherwise flash until // hydration corrected it. Set-cookie writes ride on the rendered response. - const { hint, mint } = await resolveAuthHint(session, cookieHeader); + // A slugged document is scoped by its URL, not by the browser-global + // session or hint cookies. Resolve that organization before rendering so + // the first HTML cannot paint another tab's organization. A foreign or + // inactive organization deliberately produces a null organization hint, + // which makes the root render its unframed not-found page. Do not persist + // URL-specific hints here because sibling tabs share one cookie jar. + const organizationSelector = organizationSelectorFromPath(pathname); + const organization = await organizationForDocument(session, organizationSelector); + const { hint, mint } = await resolveAuthHint( + session, + cookieHeader, + organization, + organizationSelector === null, + ); const result = await next({ context: { authHint: hint, origin: browserOriginFromRequest(request) }, }); diff --git a/apps/cloud/src/routes/app/org.tsx b/apps/cloud/src/routes/app/org.tsx index 5a46e81af..2afd5294c 100644 --- a/apps/cloud/src/routes/app/org.tsx +++ b/apps/cloud/src/routes/app/org.tsx @@ -9,13 +9,14 @@ import { orgDomainWriteKeys } from "@executor-js/react/api/reactivity-keys"; import { Button } from "@executor-js/react/components/button"; import { Badge } from "@executor-js/react/components/badge"; import { CopyButton } from "@executor-js/react/components/copy-button"; +import { Skeleton } from "@executor-js/react/components/skeleton"; import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger, } from "@executor-js/react/components/dropdown-menu"; -import { OrgPage as SharedOrgPage } from "@executor-js/react/pages/org"; +import { OrgPage as SharedOrgPage, type OrgPageAccess } from "@executor-js/react/pages/org"; import { orgDomainsAtom, getDomainVerificationLink, deleteDomain } from "../../web/org-atoms"; // --------------------------------------------------------------------------- @@ -45,12 +46,12 @@ function OrgPage() { return (
{/* Shared members / roles / invite / org-name surface. */} - } /> + } />
); } -function DomainsSection() { +function DomainsSection(props: { access: OrgPageAccess }) { const domainsResult = useAtomValue(orgDomainsAtom); const doDeleteDomain = useAtomSet(deleteDomain, { mode: "promiseExit" }); const doGetVerificationLink = useAtomSet(getDomainVerificationLink, { @@ -93,9 +94,18 @@ function DomainsSection() { Verify a domain to let anyone with a matching email join automatically.

- + {props.access.status === "loading" ? ( + + ) : props.access.canManageOrganization ? ( + + ) : null} {!canUseDomains && ( @@ -103,11 +113,15 @@ function DomainsSection() {

Join by domain is available on the Team plan.

- - - + {props.access.status === "loading" ? ( + + ) : props.access.canManageOrganization ? ( + + + + ) : null} )} @@ -129,7 +143,9 @@ function DomainsSection() { if (!canUseDomains) return null; return (

- No domains yet. Add your company domain so members can join without an invite. + {props.access.canManageOrganization + ? "No domains yet. Add your company domain so members can join without an invite." + : "No domains have been configured for this organization."}

); } @@ -140,6 +156,7 @@ function DomainsSection() { handleDeleteDomain(d.id, d.domain)} /> ))} @@ -151,7 +168,15 @@ function DomainsSection() { ); } -function DomainCard({ domain: d, onDelete }: { domain: DomainData; onDelete: () => void }) { +function DomainCard({ + domain: d, + access, + onDelete, +}: { + domain: DomainData; + access: OrgPageAccess; + onDelete: () => void; +}) { const isVerified = d.state === "verified"; const isPending = d.state === "pending"; @@ -180,26 +205,30 @@ function DomainCard({ domain: d, onDelete }: { domain: DomainData; onDelete: () -
- - - - - - - Remove domain - - - +
+ {access.status === "loading" ? ( + + ) : access.canManageOrganization ? ( + + + + + + + Remove domain + + + + ) : null}
diff --git a/apps/cloud/src/web/components/org-menu-slot.tsx b/apps/cloud/src/web/components/org-menu-slot.tsx index 085656616..a73813b98 100644 --- a/apps/cloud/src/web/components/org-menu-slot.tsx +++ b/apps/cloud/src/web/components/org-menu-slot.tsx @@ -23,6 +23,7 @@ import { import { useAuth } from "../auth"; import { organizationsAtom } from "../auth"; import { CreateOrganizationFields, useCreateOrganizationForm } from "./create-organization-form"; +import { organizationNavigationHref } from "./org-navigation"; // --------------------------------------------------------------------------- // Cloud-only org-switcher slot for the shared shell's account dropdown. @@ -54,11 +55,11 @@ function OrganizationSwitcherItems(props: { activeOrganizationId: string | null // Switching orgs is now a pure URL navigation: the session authenticates the // user to ALL their orgs, and the slug in the path scopes every request (the // `x-executor-organization` header). No cookie to rewrite, no server switch - // call — just land on the other org's URL root and the whole app re-scopes. + // call, just land on the other org's URL and let the whole app re-scope. const handleSwitch = (organization: { id: string; slug: string }) => { if (organization.id === props.activeOrganizationId) return; trackEvent("org_switched", { success: true }); - window.location.href = `/${organization.slug}`; + window.location.href = organizationNavigationHref(organization.slug, window.location); }; return AsyncResult.match(organizations, { @@ -101,10 +102,9 @@ export function OrgMenuSlot() { const form = useCreateOrganizationForm({ defaultName: suggestedOrganizationName, - // Land on the new org's URL root — a reload would keep the old slug and - // the slug gate would switch the session right back. + // Keep the current route intent while replacing its organization scope. onSuccess: (org) => { - window.location.href = `/${org.slug}`; + window.location.href = organizationNavigationHref(org.slug, window.location); }, }); diff --git a/apps/cloud/src/web/components/org-navigation.test.ts b/apps/cloud/src/web/components/org-navigation.test.ts new file mode 100644 index 000000000..4e5f81b56 --- /dev/null +++ b/apps/cloud/src/web/components/org-navigation.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from "@effect/vitest"; + +import { organizationNavigationHref } from "./org-navigation"; + +describe("organizationNavigationHref", () => { + it("replaces the organization while preserving route, query, and hash", () => { + expect( + organizationNavigationHref("org-b", { + pathname: "/org-a/policies", + search: "?owner=user", + hash: "#rules", + }), + ).toBe("/org-b/policies?owner=user#rules"); + }); + + it("adds an organization to a bare deep link", () => { + expect( + organizationNavigationHref("org-b", { + pathname: "/policies", + search: "?owner=org", + hash: "", + }), + ).toBe("/org-b/policies?owner=org"); + }); + + it("lands a root route at the target organization root", () => { + expect(organizationNavigationHref("org-b", { pathname: "/", search: "", hash: "#top" })).toBe( + "/org-b#top", + ); + }); +}); diff --git a/apps/cloud/src/web/components/org-navigation.ts b/apps/cloud/src/web/components/org-navigation.ts new file mode 100644 index 000000000..3e8534a72 --- /dev/null +++ b/apps/cloud/src/web/components/org-navigation.ts @@ -0,0 +1,27 @@ +import { isValidOrgSlug } from "@executor-js/api"; + +export type OrganizationNavigationLocation = { + readonly pathname: string; + readonly search: string; + readonly hash: string; +}; + +export const organizationNavigationHref = ( + targetSlug: string, + location: OrganizationNavigationLocation, +) => { + const segments = location.pathname.split("/"); + const currentSlug = segments[1]; + let pathname: string; + + if (currentSlug && isValidOrgSlug(currentSlug)) { + segments[1] = targetSlug; + pathname = segments.join("/"); + } else if (location.pathname === "/") { + pathname = `/${targetSlug}`; + } else { + pathname = `/${targetSlug}${location.pathname.startsWith("/") ? "" : "/"}${location.pathname}`; + } + + return `${pathname}${location.search}${location.hash}`; +}; diff --git a/apps/host-cloudflare/src/auth/cloudflare-access.ts b/apps/host-cloudflare/src/auth/cloudflare-access.ts index 178f8818f..fc7d585db 100644 --- a/apps/host-cloudflare/src/auth/cloudflare-access.ts +++ b/apps/host-cloudflare/src/auth/cloudflare-access.ts @@ -58,7 +58,7 @@ export const principalFromAccessClaims = ( * `jose` caches + rotates the team JWKS, so build the verifier once per config. */ export const makeAccessVerifier = (config: CloudflareConfig) => { - const issuer = `https://${config.accessTeamDomain}`; + const issuer = config.accessIssuerUrl ?? `https://${config.accessTeamDomain}`; // Cached, lazily-fetched team signing keys; jose handles rotation + caching. const jwks = createRemoteJWKSet(new URL(`${issuer}/cdn-cgi/access/certs`)); @@ -83,12 +83,18 @@ export const makeAccessVerifier = (config: CloudflareConfig) => { if (!token) return null; const verified = yield* Effect.tryPromise({ - try: () => jwtVerify(token, jwks, { issuer, audience: config.accessAud }), + try: () => + jwtVerify(token, jwks, { + algorithms: ["RS256"], + issuer, + audience: config.accessAud, + }), catch: () => "invalid access assertion", }).pipe(Effect.orElseSucceed(() => null)); if (!verified) return null; - return principalFromAccessClaims(verified.payload as Record, config); + const principal = principalFromAccessClaims(verified.payload, config); + return principal.accountId.length > 0 ? principal : null; }); return { verify }; diff --git a/apps/host-cloudflare/src/config.ts b/apps/host-cloudflare/src/config.ts index 140d904f8..26511636d 100644 --- a/apps/host-cloudflare/src/config.ts +++ b/apps/host-cloudflare/src/config.ts @@ -29,6 +29,9 @@ export interface CloudflareEnv { readonly ACCESS_TEAM_DOMAIN: string; /** The Access application's AUD tag (the JWT audience to verify). */ readonly ACCESS_AUD: string; + /** Optional issuer override for loopback e2e. Production defaults to the + * HTTPS team domain and should leave this unset. */ + readonly ACCESS_ISSUER_URL?: string; /** Claim holding the display name (default `name`). */ readonly ACCESS_NAME_CLAIM?: string; /** Claim holding the user's groups (default `groups`). */ @@ -56,6 +59,7 @@ export interface CloudflareEnv { export interface CloudflareConfig { readonly accessTeamDomain: string; readonly accessAud: string; + readonly accessIssuerUrl?: string; readonly accessNameClaim: string; readonly accessGroupsClaim: string; readonly adminEmails: readonly string[]; @@ -77,6 +81,27 @@ const splitLower = (value: string | undefined): readonly string[] => .map((part) => part.trim().toLowerCase()) .filter((part) => part.length > 0); +const resolveAccessIssuerUrl = (value: string | undefined): string | undefined => { + if (!value) return undefined; + if (!URL.canParse(value)) { + // oxlint-disable-next-line executor/no-try-catch-or-throw, executor/no-error-constructor -- boundary: an invalid issuer would make Access verification fail unpredictably + throw new Error("ACCESS_ISSUER_URL must be an absolute HTTPS URL or a loopback HTTP URL"); + } + const url = new URL(value); + const isLoopback = url.hostname === "127.0.0.1" || url.hostname === "localhost"; + if ( + (url.protocol !== "https:" && !(url.protocol === "http:" && isLoopback)) || + url.username !== "" || + url.password !== "" || + url.search !== "" || + url.hash !== "" + ) { + // oxlint-disable-next-line executor/no-try-catch-or-throw, executor/no-error-constructor -- boundary: production Access issuers must not permit plaintext remote JWKS or URL credentials + throw new Error("ACCESS_ISSUER_URL must be an absolute HTTPS URL or a loopback HTTP URL"); + } + return url.toString().replace(/\/+$/, ""); +}; + // The org slug doubles as a URL segment (`//policies`), so an // operator-set value must fit the shared grammar and avoid reserved root // segments — a colliding slug would shadow real routes (notably /api, /mcp, @@ -114,6 +139,7 @@ export const loadConfig = (env: CloudflareEnv): CloudflareConfig => { return { accessTeamDomain: env.ACCESS_TEAM_DOMAIN.replace(/^https?:\/\//, "").replace(/\/+$/, ""), accessAud: env.ACCESS_AUD, + accessIssuerUrl: resolveAccessIssuerUrl(env.ACCESS_ISSUER_URL), accessNameClaim: env.ACCESS_NAME_CLAIM ?? "name", accessGroupsClaim: env.ACCESS_GROUPS_CLAIM ?? "groups", adminEmails: splitLower(env.ADMIN_EMAILS), diff --git a/apps/host-selfhost/src/account/better-auth-account-provider.ts b/apps/host-selfhost/src/account/better-auth-account-provider.ts index 472bbf658..98fa20753 100644 --- a/apps/host-selfhost/src/account/better-auth-account-provider.ts +++ b/apps/host-selfhost/src/account/better-auth-account-provider.ts @@ -1,16 +1,19 @@ import { Effect, Layer } from "effect"; import { AccountProvider, type AccountHeaders } from "@executor-js/api/server"; -import { AccountError, AccountUnauthorized } from "@executor-js/api"; +import { AccountError, AccountNoOrganization, AccountUnauthorized } from "@executor-js/api"; +import { EXECUTOR_ORG_SELECTOR_HEADER } from "@executor-js/sdk/shared"; import { BetterAuth } from "../auth/better-auth"; +import { resolveSelfHostAuthorization } from "../auth/identity"; // --------------------------------------------------------------------------- // Self-host AccountProvider — implements the provider-neutral account surface // over the Better Auth instance (auth.api.*). The shared AccountHandlers call // this; cloud provides its own WorkOS-backed implementation of the same shape. // -// Single-org instance: organization id/name come from the boot-seeded org. +// Single-org instance: the id is boot-seeded, while membership and display +// fields are read live for each request. // auth.api.* throws on failure; we map those to the neutral AccountError so the // UI sees one shape. API keys returned by `list` only expose a masked value; // the plaintext is returned once, by `create`. @@ -37,24 +40,53 @@ const orgRole = (slug: string | undefined): "owner" | "admin" | "member" => export const betterAuthAccountProvider: Layer.Layer = Layer.effect(AccountProvider)( Effect.gen(function* () { - const { auth, organizationId, organizationName, organizationSlug } = yield* BetterAuth; - - const getSession = (headers: AccountHeaders) => - Effect.tryPromise({ - try: () => auth.api.getSession({ headers: toHeaders(headers) }), - catch: () => new AccountError({ message: "Failed to resolve session" }), - }).pipe(Effect.orElseSucceed(() => null)); + const betterAuth = yield* BetterAuth; + const { auth, organizationId } = betterAuth; // Run a Better Auth api call, mapping any rejection to a neutral // AccountError with a stable, user-facing message. const call = (message: string, run: () => Promise) => Effect.tryPromise({ try: run, catch: () => new AccountError({ message }) }); + const getSession = (headers: AccountHeaders) => + call("Failed to resolve session", () => + auth.api.getSession({ headers: toHeaders(headers) }), + ); + + const requireSession = (headers: AccountHeaders) => + Effect.gen(function* () { + const resolved = yield* getSession(headers); + if (!resolved) return yield* new AccountUnauthorized(); + return resolved; + }); + + const authorize = ( + headers: AccountHeaders, + resolved: NonNullable>>, + ) => + call("Failed to authorize organization", () => + resolveSelfHostAuthorization( + betterAuth, + resolved.user.id, + headers[EXECUTOR_ORG_SELECTOR_HEADER] ?? + resolved.session.activeOrganizationId ?? + organizationId, + ), + ); + + const requireOrganization = (headers: AccountHeaders) => + Effect.gen(function* () { + const resolved = yield* requireSession(headers); + const authorization = yield* authorize(headers, resolved); + if (!authorization) return yield* new AccountNoOrganization(); + return { resolved, authorization }; + }); + return AccountProvider.of({ me: (headers) => Effect.gen(function* () { - const resolved = yield* getSession(headers); - if (!resolved) return yield* new AccountUnauthorized(); + const resolved = yield* requireSession(headers); + const authorization = yield* authorize(headers, resolved); return { user: { id: resolved.user.id, @@ -62,19 +94,23 @@ export const betterAuthAccountProvider: Layer.Layer - call("Failed to list API keys", () => - auth.api.listApiKeys({ headers: toHeaders(headers) }), - ).pipe( - Effect.map((result) => ({ + Effect.gen(function* () { + yield* requireOrganization(headers); + const result = yield* call("Failed to list API keys", () => + auth.api.listApiKeys({ headers: toHeaders(headers) }), + ); + return { apiKeys: result.apiKeys.map((key) => ({ id: key.id, name: key.name ?? "API key", @@ -83,14 +119,16 @@ export const betterAuthAccountProvider: Layer.Layer - call("Failed to create API key", () => - auth.api.createApiKey({ body: { name }, headers: toHeaders(headers) }), - ).pipe( - Effect.map((key) => ({ + Effect.gen(function* () { + yield* requireOrganization(headers); + const key = yield* call("Failed to create API key", () => + auth.api.createApiKey({ body: { name }, headers: toHeaders(headers) }), + ); + return { id: key.id, name: key.name ?? name, obfuscatedValue: masked(key.start), @@ -98,22 +136,26 @@ export const betterAuthAccountProvider: Layer.Layer - call("Failed to revoke API key", () => - auth.api.deleteApiKey({ body: { keyId: apiKeyId }, headers: toHeaders(headers) }), - ).pipe(Effect.as({ success: true })), + Effect.gen(function* () { + yield* requireOrganization(headers); + yield* call("Failed to revoke API key", () => + auth.api.deleteApiKey({ body: { keyId: apiKeyId }, headers: toHeaders(headers) }), + ); + return { success: true }; + }), listMembers: (headers) => Effect.gen(function* () { - const resolved = yield* getSession(headers); - const currentUserId = resolved?.user.id; + const { resolved } = yield* requireOrganization(headers); const result = yield* call("Failed to list members", () => - auth.api.listMembers({ headers: toHeaders(headers) }), - ).pipe( - Effect.catchTag("AccountError", () => Effect.succeed({ members: [], total: 0 })), + auth.api.listMembers({ + query: { organizationId }, + headers: toHeaders(headers), + }), ); const members = result.members.map((member) => ({ id: member.id, @@ -124,7 +166,7 @@ export const betterAuthAccountProvider: Layer.Layer - Effect.succeed({ - roles: [ - { slug: "owner", name: "Owner" }, - { slug: "admin", name: "Admin" }, - { slug: "member", name: "Member" }, - ], + listRoles: (headers) => + Effect.gen(function* () { + yield* requireOrganization(headers); + return { + roles: [ + { slug: "owner", name: "Owner" }, + { slug: "admin", name: "Admin" }, + { slug: "member", name: "Member" }, + ], + }; }), inviteMember: (headers, body) => - call("Failed to invite member", () => - auth.api.createInvitation({ - // Narrow the free-form slug to the org plugin's role union (no cast). - body: { email: body.email, role: orgRole(body.roleSlug) }, - headers: toHeaders(headers), - }), - ).pipe(Effect.map((invite) => ({ id: invite.id, email: invite.email }))), + Effect.gen(function* () { + yield* requireOrganization(headers); + const invite = yield* call("Failed to invite member", () => + auth.api.createInvitation({ + // Narrow the free-form slug to the org plugin's role union (no cast). + body: { email: body.email, role: orgRole(body.roleSlug), organizationId }, + headers: toHeaders(headers), + }), + ); + return { id: invite.id, email: invite.email }; + }), removeMember: (headers, membershipId) => - call("Failed to remove member", () => - auth.api.removeMember({ - body: { memberIdOrEmail: membershipId }, - headers: toHeaders(headers), - }), - ).pipe(Effect.as({ success: true })), + Effect.gen(function* () { + yield* requireOrganization(headers); + yield* call("Failed to remove member", () => + auth.api.removeMember({ + body: { memberIdOrEmail: membershipId, organizationId }, + headers: toHeaders(headers), + }), + ); + return { success: true }; + }), updateMemberRole: (headers, membershipId, roleSlug) => - call("Failed to update member role", () => - auth.api.updateMemberRole({ - body: { memberId: membershipId, role: roleSlug }, - headers: toHeaders(headers), - }), - ).pipe(Effect.as({ success: true })), + Effect.gen(function* () { + yield* requireOrganization(headers); + yield* call("Failed to update member role", () => + auth.api.updateMemberRole({ + body: { memberId: membershipId, role: roleSlug, organizationId }, + headers: toHeaders(headers), + }), + ); + return { success: true }; + }), updateOrgName: (headers, name) => - call("Failed to update organization name", () => - auth.api.updateOrganization({ - body: { data: { name }, organizationId }, - headers: toHeaders(headers), - }), - ).pipe(Effect.as({ name })), + Effect.gen(function* () { + yield* requireOrganization(headers); + yield* call("Failed to update organization name", () => + auth.api.updateOrganization({ + body: { data: { name }, organizationId }, + headers: toHeaders(headers), + }), + ); + return { name }; + }), }); }), ); diff --git a/apps/host-selfhost/src/app.ts b/apps/host-selfhost/src/app.ts index c4a080f3b..2c9dac9c9 100644 --- a/apps/host-selfhost/src/app.ts +++ b/apps/host-selfhost/src/app.ts @@ -19,6 +19,7 @@ import { SelfHostPluginsProvider, } from "./execution"; import { makeSelfHostMcpSeams } from "./mcp"; +import { resolveMcpOrgPath } from "./mcp/org-path"; import { selfHostPlugins } from "./plugins"; import { ErrorCaptureLive } from "./observability"; import { oauthCallbackSignInRedirectLocation } from "./auth/oauth-callback-login"; @@ -161,6 +162,17 @@ export const makeSelfHostApiHandler = async ( const web = toWebHandler(); return { handler: async (request) => { + const url = new URL(request.url); + const scopedPath = resolveMcpOrgPath(url.pathname, { + id: betterAuth.organizationId, + slug: betterAuth.organizationSlug, + }); + if (scopedPath.kind === "reject") return new Response("Not Found", { status: 404 }); + if (scopedPath.kind === "rewrite") { + url.pathname = scopedPath.pathname; + request = new Request(url, request); + } + const location = await oauthCallbackSignInRedirectLocation(request, betterAuth.auth); if (location) return new Response(null, { status: 302, headers: { location } }); return web.handler(request); diff --git a/apps/host-selfhost/src/auth/authorization.node.test.ts b/apps/host-selfhost/src/auth/authorization.node.test.ts new file mode 100644 index 000000000..fc698d266 --- /dev/null +++ b/apps/host-selfhost/src/auth/authorization.node.test.ts @@ -0,0 +1,253 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { afterAll, expect, test } from "@effect/vitest"; +import { EXECUTOR_ORG_SELECTOR_HEADER } from "@executor-js/sdk/shared"; + +import { mintInviteCode } from "../testing/mint-invite"; + +process.env.EXECUTOR_DATA_DIR = mkdtempSync(join(tmpdir(), "eh-authorization-")); +process.env.BETTER_AUTH_SECRET = "authorization-secret-0123456789-abcdefghij"; +process.env.EXECUTOR_BOOTSTRAP_ADMIN_EMAIL = "admin@authorization.test"; +process.env.EXECUTOR_BOOTSTRAP_ADMIN_PASSWORD = "admin-pass-123456"; +process.env.EXECUTOR_ORG_NAME = "Original Team"; +process.env.EXECUTOR_ORG_SLUG = "real-team"; + +const { makeSelfHostApiHandler } = await import("../app"); +const { handler, dispose } = await makeSelfHostApiHandler(); +afterAll(() => dispose()); + +const BASE = "http://localhost:4788"; + +const json = async (response: Response) => (await response.json()) as Record; + +const signIn = async (email: string, password: string) => { + const response = await handler( + new Request(`${BASE}/api/auth/sign-in/email`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ email, password }), + }), + ); + expect(response.status).toBe(200); + const token = response.headers.get("set-auth-token") ?? ""; + expect(token).not.toBe(""); + return { token, cookie: response.headers.get("set-cookie") ?? "" }; +}; + +const signUp = async (email: string) => { + const inviteCode = await mintInviteCode(handler); + const response = await handler( + new Request(`${BASE}/api/auth/sign-up/email`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + email, + password: "member-password-123", + name: email, + inviteCode, + }), + }), + ); + expect(response.status).toBe(200); + const token = response.headers.get("set-auth-token") ?? ""; + expect(token).not.toBe(""); + return { token, cookie: response.headers.get("set-cookie") ?? "" }; +}; + +const bearerHeaders = (token: string, extra: Record = {}) => ({ + authorization: `Bearer ${token}`, + ...extra, +}); + +const protectedRequest = (token: string, extra: Record = {}) => + handler( + new Request(`${BASE}/api/connections`, { + headers: bearerHeaders(token, extra), + }), + ); + +const initializeMcp = (token: string, path = "/mcp") => + handler( + new Request(`${BASE}${path}`, { + method: "POST", + headers: { + ...bearerHeaders(token), + "content-type": "application/json", + accept: "application/json, text/event-stream", + }, + body: JSON.stringify({ + jsonrpc: "2.0", + id: 1, + method: "initialize", + params: { + protocolVersion: "2025-03-26", + capabilities: {}, + clientInfo: { name: "authorization-test", version: "1" }, + }, + }), + }), + ); + +test("live organization authorization rejects stale membership and bogus URL scopes", async () => { + const admin = await signIn("admin@authorization.test", "admin-pass-123456"); + const member = await signUp("removed-member@authorization.test"); + + const initialMe = await handler( + new Request(`${BASE}/api/account/me`, { headers: bearerHeaders(member.token) }), + ); + expect(initialMe.status).toBe(200); + const initialMeBody = (await json(initialMe)) as { + organization: { id: string; name: string; slug: string } | null; + }; + expect(initialMeBody.organization).toMatchObject({ name: "Original Team", slug: "real-team" }); + const organizationId = initialMeBody.organization?.id ?? ""; + expect(organizationId).not.toBe(""); + + const rename = await handler( + new Request(`${BASE}/api/account/name`, { + method: "PATCH", + headers: bearerHeaders(admin.token, { "content-type": "application/json" }), + body: JSON.stringify({ name: "Renamed Team" }), + }), + ); + expect(rename.status).toBe(200); + + const renamedMe = await handler( + new Request(`${BASE}/api/account/me`, { headers: bearerHeaders(member.token) }), + ); + expect(renamedMe.status).toBe(200); + const renamedMeBody = (await json(renamedMe)) as { + organization: { id: string; name: string; slug: string } | null; + }; + expect(renamedMeBody.organization).toMatchObject({ name: "Renamed Team", slug: "real-team" }); + + const validScope = await protectedRequest(member.token, { + [EXECUTOR_ORG_SELECTOR_HEADER]: "real-team", + }); + expect(validScope.status).toBe(200); + + const validIdScope = await protectedRequest(member.token, { + [EXECUTOR_ORG_SELECTOR_HEADER]: organizationId, + }); + expect(validIdScope.status).toBe(200); + + expect((await initializeMcp(member.token, "/real-team/mcp")).status).toBe(200); + expect((await initializeMcp(member.token, `/${organizationId}/mcp`)).status).toBe(200); + expect((await initializeMcp(member.token, "/not-this-team/mcp")).status).toBe(404); + + const bogusMe = await handler( + new Request(`${BASE}/api/account/me`, { + headers: bearerHeaders(member.token, { + [EXECUTOR_ORG_SELECTOR_HEADER]: "not-this-team", + }), + }), + ); + expect(bogusMe.status).toBe(200); + expect((await json(bogusMe)).organization).toBeNull(); + + const bogusProtected = await protectedRequest(member.token, { + [EXECUTOR_ORG_SELECTOR_HEADER]: "not-this-team", + }); + expect(bogusProtected.status).toBe(403); + + const unauthenticatedRoles = await handler(new Request(`${BASE}/api/account/roles`)); + expect(unauthenticatedRoles.status).toBe(401); + + const roles = await handler( + new Request(`${BASE}/api/account/roles`, { headers: bearerHeaders(member.token) }), + ); + expect(roles.status).toBe(200); + + const createKey = await handler( + new Request(`${BASE}/api/account/api-keys`, { + method: "POST", + headers: bearerHeaders(member.token, { "content-type": "application/json" }), + body: JSON.stringify({ name: "Surviving key" }), + }), + ); + expect(createKey.status).toBe(200); + const key = (await json(createKey)) as { id: string; value: string }; + expect((await protectedRequest(key.value)).status).toBe(200); + + const listMembers = await handler( + new Request(`${BASE}/api/account/members`, { headers: bearerHeaders(admin.token) }), + ); + expect(listMembers.status).toBe(200); + const listMembersBody = (await json(listMembers)) as { + members: ReadonlyArray<{ id: string; email: string }>; + }; + const membership = listMembersBody.members.find( + (candidate) => candidate.email === "removed-member@authorization.test", + ); + expect(membership).toBeDefined(); + + const remove = await handler( + new Request(`${BASE}/api/account/members/${membership?.id ?? "missing"}`, { + method: "DELETE", + headers: bearerHeaders(admin.token), + }), + ); + expect(remove.status).toBe(200); + + expect((await protectedRequest(member.token)).status).toBe(403); + expect((await protectedRequest(key.value)).status).toBe(403); + + const removedMembers = await handler( + new Request(`${BASE}/api/account/members`, { headers: bearerHeaders(member.token) }), + ); + expect(removedMembers.status).toBe(403); + + const removedRoles = await handler( + new Request(`${BASE}/api/account/roles`, { headers: bearerHeaders(member.token) }), + ); + expect(removedRoles.status).toBe(403); + + const removedMe = await handler( + new Request(`${BASE}/api/account/me`, { headers: bearerHeaders(member.token) }), + ); + expect(removedMe.status).toBe(200); + expect((await json(removedMe)).organization).toBeNull(); + + const approval = await handler( + new Request(`${BASE}/api/mcp-sessions/not-a-session`, { + headers: { cookie: member.cookie }, + }), + ); + expect(approval.status).toBe(401); + expect((await initializeMcp(member.token)).status).toBe(401); +}); + +test("server-side API key and session revocation take effect on the next request", async () => { + const member = await signUp("revoked-credential@authorization.test"); + + const createKey = await handler( + new Request(`${BASE}/api/account/api-keys`, { + method: "POST", + headers: bearerHeaders(member.token, { "content-type": "application/json" }), + body: JSON.stringify({ name: "Short lived key" }), + }), + ); + expect(createKey.status).toBe(200); + const key = (await json(createKey)) as { id: string; value: string }; + expect((await protectedRequest(key.value)).status).toBe(200); + + const revokeKey = await handler( + new Request(`${BASE}/api/account/api-keys/${key.id}`, { + method: "DELETE", + headers: bearerHeaders(member.token), + }), + ); + expect(revokeKey.status).toBe(200); + expect((await protectedRequest(key.value)).status).toBe(401); + + const revokeSessions = await handler( + new Request(`${BASE}/api/auth/revoke-sessions`, { + method: "POST", + headers: bearerHeaders(member.token), + }), + ); + expect(revokeSessions.status).toBe(200); + expect((await protectedRequest(member.token)).status).toBe(401); +}); diff --git a/apps/host-selfhost/src/auth/better-auth.ts b/apps/host-selfhost/src/auth/better-auth.ts index 329c5a8e9..25a0f17c4 100644 --- a/apps/host-selfhost/src/auth/better-auth.ts +++ b/apps/host-selfhost/src/auth/better-auth.ts @@ -1,4 +1,4 @@ -import { betterAuth, type BetterAuthOptions } from "better-auth"; +import { betterAuth, getCurrentAdapter, type BetterAuthOptions } from "better-auth"; import { APIError } from "better-auth/api"; import { admin, bearer, deviceAuthorization, mcp, organization } from "better-auth/plugins"; import { apiKey } from "@better-auth/api-key"; @@ -7,24 +7,52 @@ import { LibsqlDialect, type LibsqlDialectConfig } from "@libsql/kysely-libsql"; import { Context } from "effect"; import { loadConfig } from "../config"; +import { + ensureInviteCodeTable, + finalizeFirstOwner, + finalizeInviteCode, + reserveFirstOwner, + reserveInviteCode, + signupClaimPlugin, +} from "./invites"; import { seedOrgAndAdmin } from "./seed"; -import { consumeInviteCode, ensureInviteCodeTable, findRedeemableCode } from "./invites"; -// The self-service signup gate: present only on the live (phase-2) auth -// instance, so the bootstrap seed's `createUser` — which -// runs on the gate-free phase-1 instance — is never blocked. `getAuth` is -// late-bound because the hooks call `auth.api.addMember` AFTER the instance they -// belong to is constructed (the closure resolves it at request time). +// The self-service gate acts only on the public email-signup endpoint, so the +// bootstrap seed's server-side createUser call is never blocked. interface SignupGate { - readonly client: Client; readonly organizationId: string; - readonly getAuth: () => Auth | null; } // Only self-service email signups are code-gated. Server/admin-initiated user // creation (the seed, or a future admin "add user") flows through other paths. const SIGNUP_PATH = "/sign-up/email"; +type SignupClaim = + | NonNullable>> + | NonNullable>>; + +// Both hooks execute in the same endpoint context and database transaction. +// Keeping the reservation on that context passes only an opaque claim id from +// the pre-user hook to the pre-account hook, without accepting client state. +const signupClaims = new WeakMap(); + +// libSQL supports transactions, but its single adopted client cannot begin two +// transactions concurrently. Queue only email signups at this process boundary +// so each request gets a real transaction instead of racing into SQLITE_BUSY. +// The database claim predicates remain the authority across processes. +const serializeEmailSignups = (handler: (request: Request) => Promise) => { + let pending = Promise.resolve(); + return (request: Request) => { + if (new URL(request.url).pathname !== `/api/auth${SIGNUP_PATH}`) return handler(request); + const response = pending.then(() => handler(request)); + pending = response.then( + () => undefined, + () => undefined, + ); + return response; + }; +}; + // --------------------------------------------------------------------------- // Better Auth instance over the SAME libSQL CONNECTION as the FumaDB executor // tables ("one connection, two schema regions"). @@ -53,8 +81,8 @@ const SIGNUP_PATH = "/sign-up/email"; // // We build exactly ONE auth instance, held for the process lifetime. An earlier // design also built a throwaway "bootstrap" instance (discarded mid-boot); that -// is gone too — the org id is late-bound the same way the signup gate's -// `getAuth` already is, so no second instance is ever needed. +// is gone too. The org id is late-bound through a shared reference, so no +// second instance is needed. // // `satisfies BetterAuthOptions` (not a return annotation) keeps the literal // plugin tuple so `betterAuth` infers the plugin-augmented `auth.api` and @@ -86,6 +114,10 @@ const makeAuthOptions = (client: Client, getOrganizationId: () => string, gate?: // oxlint-disable-next-line executor/no-double-cast -- boundary: the two @libsql/core versions' Client types are structurally identical for the calls the dialect makes (see above); no schema/decode applies to a native client handle. dialect: new LibsqlDialect({ client } as unknown as LibsqlDialectConfig), type: "sqlite" as const, + // Better Auth defaults Kysely transaction support off, even when the + // dialect supports it. Signup claims rely on the user, membership, + // credential account, session, and claim sharing one real transaction. + transaction: true, }, secret, // The browser Origin must match this exactly; CLI/MCP bearer requests carry @@ -111,6 +143,7 @@ const makeAuthOptions = (client: Client, getOrganizationId: () => string, gate?: // not the /api/auth basePath). plugins: [ organization(), + signupClaimPlugin, admin(), apiKey({ enableSessionForAPIKeys: true, rateLimit: { enabled: false } }), bearer(), @@ -148,17 +181,27 @@ const makeAuthOptions = (client: Client, getOrganizationId: () => string, gate?: }), }, }, - // The signup gate. First-run: an org with ZERO members is unclaimed, so - // the first signup is admitted ungated and becomes the owner. After that, - // `before` rejects a signup without a valid, unused, unexpired invite code - // and `after` makes the new user a real `member` + burns the code. + // The signup gate reserves the first-owner slot or invite before creating + // the user, then creates the membership and finalizes the claim before + // creating the credential account. Better Auth wraps the whole email + // signup in one database transaction, so any later failure rolls back the + // user, membership, and claim together. ...(gate ? { user: { create: { - before: async (_user, context) => { + before: async (user, context) => { if (context?.path !== SIGNUP_PATH) return; - if (await orgHasNoMembers(gate)) return; // first user claims the org + const adapter = await getCurrentAdapter(context.context.adapter); + const ownerClaim = await reserveFirstOwner( + adapter, + gate.organizationId, + user.email, + ); + if (ownerClaim) { + signupClaims.set(context, ownerClaim); + return; + } const code = inviteCodeFrom(context); if (!code) { // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a Better Auth create hook rejects a request by throwing APIError @@ -166,39 +209,55 @@ const makeAuthOptions = (client: Client, getOrganizationId: () => string, gate?: message: "An invite code is required to sign up.", }); } - if (!(await findRedeemableCode(gate.client, code))) { + const inviteClaim = await reserveInviteCode(adapter, code, user.email); + if (!inviteClaim) { // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a Better Auth create hook rejects a request by throwing APIError throw new APIError("FORBIDDEN", { message: "That invite code is invalid, already used, or expired.", }); } + signupClaims.set(context, inviteClaim); }, - after: async (user, context) => { + }, + }, + account: { + create: { + before: async (account, context) => { if (context?.path !== SIGNUP_PATH) return; - const auth = gate.getAuth(); - if (!auth) return; - // First user into an empty org becomes its owner (no code). - if (await orgHasNoMembers(gate)) { - await auth.api.addMember({ - body: { userId: user.id, role: "owner", organizationId: gate.organizationId }, + const claim = signupClaims.get(context); + if (!claim) { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a Better Auth create hook rejects a request by throwing APIError + throw new APIError("FORBIDDEN", { + message: "The signup claim could not be completed.", }); - return; } - const code = inviteCodeFrom(context); - if (!code) return; - const redeemable = await findRedeemableCode(gate.client, code); - if (!redeemable) return; - await auth.api.addMember({ - body: { - userId: user.id, - role: redeemable.role, + const adapter = await getCurrentAdapter(context.context.adapter); + await adapter.create({ + model: "member", + data: { organizationId: gate.organizationId, + userId: account.userId, + role: claim.kind === "owner" ? "owner" : claim.role, + createdAt: new Date(), }, }); - await consumeInviteCode(gate.client, code, { - usedBy: user.id, - usedByEmail: user.email, - }); + const finalized = + claim.kind === "owner" + ? await finalizeFirstOwner(adapter, gate.organizationId, claim.claimId, { + id: account.userId, + email: claim.email, + }) + : await finalizeInviteCode(adapter, claim, { + id: account.userId, + email: claim.email, + }); + if (!finalized) { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a Better Auth create hook rejects a request by throwing APIError + throw new APIError("FORBIDDEN", { + message: "The signup claim could not be completed.", + }); + } + signupClaims.delete(context); }, }, }, @@ -220,24 +279,14 @@ const inviteCodeFrom = (context: { body?: unknown }): string | undefined => { return undefined; }; -// Count org members via Better Auth's OWN adapter. Now that auth shares -// SelfHostDb's libSQL client (one connection), this no longer guards against a -// cross-connection snapshot lag — that lag is gone with the second connection. -// It stays the canonical read because the adapter already models the `member` -// table and the count gates the first-run claim; reading through it keeps the -// gate logic next to the writes. +// Count org members via Better Auth's own adapter. System setup status uses the +// live membership count, while the durable signup claim separately prevents a +// previously claimed instance from reopening when every member is removed. export const countOrgMembers = (auth: Auth, organizationId: string): Promise => auth.$context.then(({ adapter }) => adapter.count({ model: "member", where: [{ field: "organizationId", value: organizationId }] }), ); -// True when the single org has no members yet — the unclaimed first-run state. -const orgHasNoMembers = async (gate: SignupGate): Promise => { - const auth = gate.getAuth(); - if (!auth) return true; - return (await countOrgMembers(auth, gate.organizationId)) === 0; -}; - const createAuthInstance = (client: Client, getOrganizationId: () => string, gate?: SignupGate) => betterAuth(makeAuthOptions(client, getOrganizationId, gate)); @@ -262,9 +311,9 @@ export class BetterAuth extends Context.Service()( * runMigrations and the seed are idempotent, so this is safe on every boot. * * One instance, not two: the org id the session-pin and gate need isn't known - * until the seed creates the org, but both read it lazily (a ref, like the - * gate's `getAuth`), so there's no need for a throwaway bootstrap instance — - * and so no second libSQL connection to be GC-closed mid-boot and unlink the + * until the seed creates the org, but both read it lazily through one ref, so + * there is no need for a throwaway bootstrap instance, and no second libSQL + * connection to be GC-closed mid-boot and unlink the * shared WAL (see the header comment; that was the self-host data-loss bug). * * The gate is active during the seed, but its hooks only act on the @@ -281,32 +330,29 @@ export const buildBetterAuth = async (client: Client): Promise const config = loadConfig(); // The org id is resolved by the seed below, AFTER this instance is built; the - // session-pin hook and the gate read it through these late-bound accessors - // (no session is created during the seed, so the empty initial id is never - // observed). `getAuth` resolves to this very instance, so the gate's `after` - // hook can call `auth.api.addMember` once a code is redeemed. - let auth: Auth | null = null; + // session-pin hook and the gate read it through this late-bound accessor (no + // session is created during the seed, so the empty initial id is never + // observed). const orgRef = { id: "" }; const gate: SignupGate = { - client, get organizationId() { return orgRef.id; }, - getAuth: () => auth, }; - auth = createAuthInstance(client, () => orgRef.id, gate); + const auth = createAuthInstance(client, () => orgRef.id, gate); // `runMigrations()` flows through the LibsqlDialect and is idempotent. await (await auth.$context).runMigrations(); await ensureInviteCodeTable(client); const { organizationId, organizationName } = await seedOrgAndAdmin(auth, client, config); orgRef.id = organizationId; + const handler = serializeEmailSignups(auth.handler); return { auth, organizationId, organizationName, organizationSlug: config.orgSlug, - handler: auth.handler, + handler, }; }; diff --git a/apps/host-selfhost/src/auth/identity.ts b/apps/host-selfhost/src/auth/identity.ts index 6c86fd9de..e5d40c22b 100644 --- a/apps/host-selfhost/src/auth/identity.ts +++ b/apps/host-selfhost/src/auth/identity.ts @@ -1,8 +1,9 @@ import { Effect, Layer } from "effect"; -import { IdentityProvider, Unauthorized } from "@executor-js/api/server"; +import { IdentityProvider, NoOrganization, Unauthorized } from "@executor-js/api/server"; +import { EXECUTOR_ORG_SELECTOR_HEADER } from "@executor-js/sdk/shared"; -import { BetterAuth } from "./better-auth"; +import { BetterAuth, type BetterAuthHandle } from "./better-auth"; // --------------------------------------------------------------------------- // The self-host identity seam — the production implementation of the shared @@ -27,6 +28,45 @@ const bearerToken = (headers: Headers): string | undefined => { : undefined; }; +/** + * Resolve the instance's one organization and verify that the user is still a + * member. The optional selector comes from the console URL header (or a + * credential's active organization) and must match the live organization id or + * slug. Reading through Better Auth's database adapter keeps membership + * removal and organization renames visible on the very next request. + */ +export const resolveSelfHostAuthorization = async ( + betterAuth: BetterAuthHandle, + userId: string, + selector?: string | null, +) => { + const context = await betterAuth.auth.$context; + const organization = await context.adapter.findOne<{ + readonly id: string; + readonly name: string; + readonly slug: string; + }>({ + model: "organization", + where: [{ field: "id", value: betterAuth.organizationId }], + }); + if (!organization) return null; + if (selector && selector !== organization.id && selector !== organization.slug) return null; + + const member = await context.adapter.findOne<{ + readonly id: string; + readonly userId: string; + readonly organizationId: string; + readonly role: string; + }>({ + model: "member", + where: [ + { field: "userId", value: userId }, + { field: "organizationId", value: organization.id }, + ], + }); + return member ? { member, organization } : null; +}; + // --------------------------------------------------------------------------- // The production IdentityProvider: resolve a request to a Better Auth session // and map it to a neutral Principal. Three credential shapes resolve here: @@ -36,13 +76,14 @@ const bearerToken = (headers: Headers): string | undefined => { // resolution fails we retry with the Bearer value as x-api-key, which (with // enableSessionForAPIKeys) mints the owner's session. This is what lets a // generated API key authenticate the API + MCP endpoint as a Bearer token. -// Single-org instance, so organizationName is the boot-cached org name. +// The live organization row is resolved after the credential on every request. // --------------------------------------------------------------------------- export const betterAuthIdentityLayer: Layer.Layer = Layer.effect(IdentityProvider)( Effect.gen(function* () { - const { auth, organizationId, organizationName, organizationSlug } = yield* BetterAuth; + const betterAuth = yield* BetterAuth; + const { auth, organizationId } = betterAuth; return IdentityProvider.of({ authenticate: (request) => Effect.gen(function* () { @@ -61,16 +102,21 @@ export const betterAuthIdentityLayer: Layer.Layer unauthenticated. // The middleware's failure strategy renders this as a 401. if (!resolved) return yield* new Unauthorized(); - // Single-org instance: every authenticated user belongs to the one - // seeded org. Cookie/bearer-session logins are pinned to it by the - // session hook; API-key-minted sessions carry no active org, so we - // default to the seeded org rather than rejecting with NoOrganization. - const resolvedOrganizationId = resolved.session.activeOrganizationId ?? organizationId; + + const selector = + request.headers.get(EXECUTOR_ORG_SELECTOR_HEADER) ?? + resolved.session.activeOrganizationId ?? + organizationId; + const authorization = yield* Effect.promise(() => + resolveSelfHostAuthorization(betterAuth, resolved.user.id, selector), + ); + if (!authorization) return yield* new NoOrganization(); + return { accountId: resolved.user.id, - organizationId: resolvedOrganizationId, - organizationName, - organizationSlug, + organizationId: authorization.organization.id, + organizationName: authorization.organization.name, + organizationSlug: authorization.organization.slug, email: resolved.user.email, name: resolved.user.name ?? null, avatarUrl: resolved.user.image ?? null, diff --git a/apps/host-selfhost/src/auth/invites.ts b/apps/host-selfhost/src/auth/invites.ts index 20fdd23c2..654ef1fc8 100644 --- a/apps/host-selfhost/src/auth/invites.ts +++ b/apps/host-selfhost/src/auth/invites.ts @@ -1,6 +1,7 @@ import { randomBytes } from "node:crypto"; import type { Client, Row } from "@libsql/client"; +import type { BetterAuthPlugin, DBTransactionAdapter } from "better-auth"; // --------------------------------------------------------------------------- // Invite codes — the join mechanism for a single-tenant instance. @@ -33,6 +34,42 @@ export interface InviteCodeRow { readonly usedAt: string | null; } +export const signupClaimPlugin = { + id: "executor-signup-claims", + schema: { + inviteCode: { + modelName: "invite_code", + disableMigration: true, + fields: { + code: { type: "string", unique: true }, + role: { type: "string" }, + label: { type: "string", required: false }, + createdBy: { type: "string", fieldName: "created_by" }, + createdAt: { type: "date", fieldName: "created_at" }, + expiresAt: { type: "date", required: false, fieldName: "expires_at" }, + usedBy: { type: "string", required: false, fieldName: "used_by" }, + usedByEmail: { type: "string", required: false, fieldName: "used_by_email" }, + usedAt: { type: "date", required: false, fieldName: "used_at" }, + }, + }, + signupClaim: { + modelName: "signup_claim", + disableMigration: true, + fields: { + organizationId: { + type: "string", + unique: true, + fieldName: "organization_id", + references: { model: "organization", field: "id", onDelete: "cascade" }, + }, + claimedBy: { type: "string", required: false, fieldName: "claimed_by" }, + claimedEmail: { type: "string", required: false, fieldName: "claimed_email" }, + claimedAt: { type: "date", required: false, fieldName: "claimed_at" }, + }, + }, + }, +} satisfies BetterAuthPlugin; + // Unambiguous alphabet (no 0/O/1/I/l) so a code is easy to read and type. const ALPHABET = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789"; @@ -73,8 +110,129 @@ export const ensureInviteCodeTable = async (client: Client): Promise => { used_at TEXT ) `); + await client.execute(` + CREATE TABLE IF NOT EXISTS signup_claim ( + id TEXT PRIMARY KEY, + organization_id TEXT NOT NULL UNIQUE REFERENCES organization(id) ON DELETE CASCADE, + claimed_by TEXT, + claimed_email TEXT, + claimed_at TEXT + ) + `); +}; + +export const ensureOrganizationSignupClaim = async ( + client: Client, + input: { + readonly organizationId: string; + readonly claimedBy?: string | null; + readonly claimedEmail?: string | null; + }, +) => { + const claimedAt = input.claimedBy ? new Date().toISOString() : null; + await client.execute({ + sql: `INSERT INTO signup_claim + (id, organization_id, claimed_by, claimed_email, claimed_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(organization_id) DO UPDATE SET + claimed_by = COALESCE(signup_claim.claimed_by, excluded.claimed_by), + claimed_email = COALESCE(signup_claim.claimed_email, excluded.claimed_email), + claimed_at = COALESCE(signup_claim.claimed_at, excluded.claimed_at)`, + args: [ + input.organizationId, + input.organizationId, + input.claimedBy ?? null, + input.claimedEmail ?? null, + claimedAt, + ], + }); }; +const pendingClaimId = () => `pending:${randomBytes(16).toString("hex")}`; + +export const reserveFirstOwner = async ( + adapter: DBTransactionAdapter, + organizationId: string, + email: string, +) => { + const claimId = pendingClaimId(); + const claimed = await adapter.updateMany({ + model: "signupClaim", + where: [ + { field: "organizationId", value: organizationId }, + { field: "claimedAt", value: null }, + ], + update: { claimedBy: claimId, claimedEmail: email, claimedAt: new Date() }, + }); + return claimed === 1 ? { kind: "owner" as const, claimId, email } : null; +}; + +export const finalizeFirstOwner = async ( + adapter: DBTransactionAdapter, + organizationId: string, + claimId: string, + user: { readonly id: string; readonly email: string }, +) => + (await adapter.updateMany({ + model: "signupClaim", + where: [ + { field: "organizationId", value: organizationId }, + { field: "claimedBy", value: claimId }, + ], + update: { claimedBy: user.id, claimedEmail: user.email }, + })) === 1; + +export const reserveInviteCode = async ( + adapter: DBTransactionAdapter, + code: string, + email: string, +) => { + const normalizedCode = code.trim().toUpperCase(); + const claimId = pendingClaimId(); + const claimedAt = new Date(); + const claimed = await adapter.updateMany({ + model: "inviteCode", + where: [ + { field: "code", value: normalizedCode }, + { field: "usedAt", value: null }, + { field: "expiresAt", value: null, connector: "OR" }, + { field: "expiresAt", value: claimedAt, operator: "gt", connector: "OR" }, + ], + update: { usedBy: claimId, usedByEmail: email, usedAt: claimedAt }, + }); + if (claimed !== 1) return null; + + const row = await adapter.findOne<{ readonly role: string }>({ + model: "inviteCode", + where: [ + { field: "code", value: normalizedCode }, + { field: "usedBy", value: claimId }, + ], + }); + if (!row) return null; + return { + kind: "invite" as const, + claimId, + code: normalizedCode, + email, + role: row.role === "admin" ? ("admin" as const) : ("member" as const), + }; +}; + +export const finalizeInviteCode = async ( + adapter: DBTransactionAdapter, + reservation: { readonly code: string; readonly claimId: string }, + user: { readonly id: string; readonly email: string }, +) => + (await adapter.updateMany({ + model: "inviteCode", + where: [ + { field: "code", value: reservation.code }, + { field: "usedBy", value: reservation.claimId }, + ], + update: { usedBy: user.id, usedByEmail: user.email }, + })) === 1; + export interface CreateInviteCodeInput { readonly createdBy: string; readonly role?: InviteRole; @@ -120,34 +278,3 @@ export const revokeInviteCode = async (client: Client, id: string): Promise => { - const result = await client.execute({ - sql: "SELECT * FROM invite_code WHERE code = ? AND used_at IS NULL", - args: [code.trim().toUpperCase()], - }); - const raw = result.rows[0]; - if (!raw) return null; - const row = toRow(raw); - if (row.expiresAt && Date.parse(row.expiresAt) < Date.now()) return null; - return row; -}; - -// Mark a code consumed. The `used_at IS NULL` guard makes this the single-use -// gate even under a race: rowsAffected === 0 means someone redeemed it first. -export const consumeInviteCode = async ( - client: Client, - code: string, - by: { usedBy: string; usedByEmail: string }, -): Promise => { - const result = await client.execute({ - sql: `UPDATE invite_code SET used_by = ?, used_by_email = ?, used_at = ? - WHERE code = ? AND used_at IS NULL`, - args: [by.usedBy, by.usedByEmail, new Date().toISOString(), code.trim().toUpperCase()], - }); - return result.rowsAffected > 0; -}; diff --git a/apps/host-selfhost/src/auth/seed.ts b/apps/host-selfhost/src/auth/seed.ts index dbeeac93a..d59ce90fd 100644 --- a/apps/host-selfhost/src/auth/seed.ts +++ b/apps/host-selfhost/src/auth/seed.ts @@ -4,6 +4,7 @@ import type { Client } from "@libsql/client"; import type { SelfHostConfig } from "../config"; import type { Auth } from "./better-auth"; +import { ensureOrganizationSignupClaim } from "./invites"; // --------------------------------------------------------------------------- // Idempotent first-boot bootstrap: ensure the single organization and a @@ -39,6 +40,23 @@ export const seedOrgAndAdmin = async ( args: [config.orgSlug, existingOrg.id], }); } + // Backfill the durable first-owner claim for databases created before the + // claim table existed. Once claimed, deleting every membership must not + // reopen public signup, so the upsert only fills an empty claim. + // Any existing user proves this instance has already admitted an account, + // even if an administrator later removed its final membership. + // oxlint-disable-next-line executor/no-double-cast -- boundary: the SELECT columns are the schema contract for Better Auth user rows read off the libSQL client + const existingUser = ( + await client.execute({ + sql: "SELECT id AS user_id, email FROM user ORDER BY createdAt ASC LIMIT 1", + args: [], + }) + ).rows[0] as unknown as { user_id: string; email: string } | undefined; + await ensureOrganizationSignupClaim(client, { + organizationId: existingOrg.id, + claimedBy: existingUser?.user_id, + claimedEmail: existingUser?.email, + }); return { organizationId: existingOrg.id, organizationName: existingOrg.name }; } @@ -74,6 +92,11 @@ export const seedOrgAndAdmin = async ( // oxlint-disable-next-line executor/no-try-catch-or-throw, executor/no-error-constructor -- boundary: org creation must succeed for a usable instance throw new Error("Failed to create the bootstrap organization"); } + await ensureOrganizationSignupClaim(client, { + organizationId: org.id, + claimedBy: adminId, + claimedEmail: config.bootstrapAdminEmail, + }); return { organizationId: org.id, organizationName: config.organizationName }; } @@ -85,5 +108,6 @@ export const seedOrgAndAdmin = async ( sql: "INSERT INTO organization (id, name, slug, createdAt) VALUES (?, ?, ?, ?)", args: [organizationId, config.organizationName, config.orgSlug, new Date().toISOString()], }); + await ensureOrganizationSignupClaim(client, { organizationId }); return { organizationId, organizationName: config.organizationName }; }; diff --git a/apps/host-selfhost/src/auth/signup-race.node.test.ts b/apps/host-selfhost/src/auth/signup-race.node.test.ts new file mode 100644 index 000000000..1da729cfa --- /dev/null +++ b/apps/host-selfhost/src/auth/signup-race.node.test.ts @@ -0,0 +1,154 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { afterAll, expect, test } from "@effect/vitest"; + +process.env.EXECUTOR_DATA_DIR = mkdtempSync(join(tmpdir(), "eh-signup-race-")); +process.env.BETTER_AUTH_SECRET = "signup-race-secret-0123456789-abcdefghij"; +delete process.env.EXECUTOR_BOOTSTRAP_ADMIN_EMAIL; +delete process.env.EXECUTOR_BOOTSTRAP_ADMIN_PASSWORD; + +const { makeSelfHostApiHandler } = await import("../app"); +const { handler, dispose } = await makeSelfHostApiHandler(); +afterAll(() => dispose()); + +const BASE = "http://localhost:4788"; +const PASSWORD = "password-12345678"; + +const bearerHeaders = (token: string) => ({ authorization: `Bearer ${token}` }); + +const signUp = (email: string, inviteCode?: string) => + handler( + new Request(`${BASE}/api/auth/sign-up/email`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + email, + password: PASSWORD, + name: email, + ...(inviteCode ? { inviteCode } : {}), + }), + }), + ); + +const signIn = (email: string) => + handler( + new Request(`${BASE}/api/auth/sign-in/email`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ email, password: PASSWORD }), + }), + ); + +const createInvite = async (ownerToken: string) => { + const response = await handler( + new Request(`${BASE}/api/admin/invites`, { + method: "POST", + headers: { ...bearerHeaders(ownerToken), "content-type": "application/json" }, + body: JSON.stringify({ role: "member" }), + }), + ); + expect(response.status).toBe(200); + return (await response.json()) as { code: string }; +}; + +const listMembers = async (token: string) => { + const response = await handler( + new Request(`${BASE}/api/account/members`, { headers: bearerHeaders(token) }), + ); + expect(response.status).toBe(200); + return (await response.json()) as { + members: ReadonlyArray<{ email: string; role: string }>; + }; +}; + +const listInvites = async (token: string) => { + const response = await handler( + new Request(`${BASE}/api/admin/invites`, { headers: bearerHeaders(token) }), + ); + expect(response.status).toBe(200); + return (await response.json()) as { + invites: ReadonlyArray<{ + code: string; + usedAt: string | null; + usedByEmail: string | null; + }>; + }; +}; + +test("signup claims serialize the first owner and each single-use invite", async () => { + const ownerEmails = ["owner-a@race.test", "owner-b@race.test"] as const; + const ownerAttempts = await Promise.all( + ownerEmails.map(async (email) => ({ email, response: await signUp(email) })), + ); + const ownerWinners = ownerAttempts.filter(({ response }) => response.status === 200); + const ownerLosers = ownerAttempts.filter(({ response }) => response.status !== 200); + expect(ownerWinners).toHaveLength(1); + expect(ownerLosers).toHaveLength(1); + + const ownerToken = ownerWinners[0]?.response.headers.get("set-auth-token") ?? ""; + expect(ownerToken).not.toBe(""); + const membersAfterOwnerRace = await listMembers(ownerToken); + expect(membersAfterOwnerRace.members).toHaveLength(1); + expect(membersAfterOwnerRace.members[0]).toMatchObject({ + email: ownerWinners[0]?.email, + role: "owner", + }); + expect((await signIn(ownerLosers[0]?.email ?? "missing@race.test")).status).not.toBe(200); + + const { code } = await createInvite(ownerToken); + const memberEmails = ["member-a@race.test", "member-b@race.test"] as const; + const memberAttempts = await Promise.all( + memberEmails.map(async (email) => ({ email, response: await signUp(email, code) })), + ); + const memberWinners = memberAttempts.filter(({ response }) => response.status === 200); + const memberLosers = memberAttempts.filter(({ response }) => response.status !== 200); + expect(memberWinners).toHaveLength(1); + expect(memberLosers).toHaveLength(1); + + const membersAfterInviteRace = await listMembers(ownerToken); + expect(membersAfterInviteRace.members).toHaveLength(2); + const racingMembers = membersAfterInviteRace.members.filter(({ email }) => + memberEmails.some((candidate) => candidate === email), + ); + expect(racingMembers).toHaveLength(1); + expect(racingMembers[0]).toMatchObject({ email: memberWinners[0]?.email, role: "member" }); + expect((await signIn(memberLosers[0]?.email ?? "missing@race.test")).status).not.toBe(200); + + const invitesAfterRace = await listInvites(ownerToken); + expect(invitesAfterRace.invites.find((invite) => invite.code === code)).toMatchObject({ + usedByEmail: memberWinners[0]?.email, + }); + expect(invitesAfterRace.invites.find((invite) => invite.code === code)?.usedAt).not.toBeNull(); +}); + +test("a failed concurrent signup leaves its invite redeemable", async () => { + const owner = await signIn("owner-a@race.test"); + const alternateOwner = await signIn("owner-b@race.test"); + const ownerResponse = owner.status === 200 ? owner : alternateOwner; + expect(ownerResponse.status).toBe(200); + const ownerToken = ownerResponse.headers.get("set-auth-token") ?? ""; + expect(ownerToken).not.toBe(""); + + const inviteA = await createInvite(ownerToken); + const inviteB = await createInvite(ownerToken); + const duplicateEmail = "duplicate@race.test"; + const duplicateAttempts = await Promise.all([ + signUp(duplicateEmail, inviteA.code), + signUp(duplicateEmail, inviteB.code), + ]); + expect(duplicateAttempts.filter(({ status }) => status === 200)).toHaveLength(1); + expect(duplicateAttempts.filter(({ status }) => status !== 200)).toHaveLength(1); + + const invites = await listInvites(ownerToken); + const duplicateInvites = invites.invites.filter( + ({ code }) => code === inviteA.code || code === inviteB.code, + ); + expect(duplicateInvites.filter(({ usedAt }) => usedAt !== null)).toHaveLength(1); + const reusableCode = duplicateInvites.find(({ usedAt }) => usedAt === null)?.code ?? ""; + expect(reusableCode).not.toBe(""); + + const recovered = await signUp("recovered@race.test", reusableCode); + expect(recovered.status).toBe(200); +}); diff --git a/apps/host-selfhost/src/mcp/auth.ts b/apps/host-selfhost/src/mcp/auth.ts index 363fbe260..dc9704ad3 100644 --- a/apps/host-selfhost/src/mcp/auth.ts +++ b/apps/host-selfhost/src/mcp/auth.ts @@ -12,6 +12,7 @@ import { } from "@executor-js/host-mcp"; import { BetterAuth } from "../auth/better-auth"; +import { resolveSelfHostAuthorization } from "../auth/identity"; // --------------------------------------------------------------------------- // Self-host McpAuthProvider adapter, backed by Better Auth's mcp() plugin. @@ -122,7 +123,8 @@ export const selfHostMcpAuth: Layer.Layer context.internalAdapter.findUserById(userId)); if (!user) return null; + const authorization = yield* Effect.promise(() => + resolveSelfHostAuthorization(betterAuth, user.id, betterAuth.organizationId), + ); + if (!authorization) return null; return { accountId: user.id, - // Single-org self-host: OAuth tokens carry no active org, so pin to - // the seeded org (same default as the cookie/api-key path). - organizationId, - organizationName, - organizationSlug, + organizationId: authorization.organization.id, + organizationName: authorization.organization.name, + organizationSlug: authorization.organization.slug, email: user.email ?? "", name: user.name ?? null, avatarUrl: user.image ?? null, diff --git a/apps/host-selfhost/src/mcp/index.ts b/apps/host-selfhost/src/mcp/index.ts index 52287518c..003277d16 100644 --- a/apps/host-selfhost/src/mcp/index.ts +++ b/apps/host-selfhost/src/mcp/index.ts @@ -9,6 +9,7 @@ import type { } from "@executor-js/host-mcp"; import { BetterAuth, type BetterAuthHandle } from "../auth/better-auth"; +import { resolveSelfHostAuthorization } from "../auth/identity"; import type { SelfHostDbHandle } from "../db/self-host-db"; import { selfHostMcpAuth } from "./auth"; import { @@ -79,18 +80,27 @@ type BetterAuthSession = NonNullable< Awaited> >; -const principalFromSession = ( - resolved: BetterAuthSession, - betterAuth: BetterAuthHandle, -): Principal => ({ - accountId: resolved.user.id, - organizationId: resolved.session.activeOrganizationId ?? betterAuth.organizationId, - organizationName: betterAuth.organizationName, - email: resolved.user.email, - name: resolved.user.name ?? null, - avatarUrl: resolved.user.image ?? null, - roles: parseRoles(resolved.user.role ?? null), -}); +const principalFromSession = (resolved: BetterAuthSession, betterAuth: BetterAuthHandle) => + Effect.gen(function* () { + const authorization = yield* Effect.promise(() => + resolveSelfHostAuthorization( + betterAuth, + resolved.user.id, + resolved.session.activeOrganizationId ?? betterAuth.organizationId, + ), + ); + if (!authorization) return null; + return { + accountId: resolved.user.id, + organizationId: authorization.organization.id, + organizationName: authorization.organization.name, + organizationSlug: authorization.organization.slug, + email: resolved.user.email, + name: resolved.user.name ?? null, + avatarUrl: resolved.user.image ?? null, + roles: parseRoles(resolved.user.role ?? null), + } satisfies Principal; + }); /** * Gate the browser-approval endpoints behind a valid Better Auth session (the @@ -113,7 +123,8 @@ const makeApprovalHandler = }).pipe(Effect.orElseSucceed(() => null)), ); if (!session) return jsonResponse({ error: "Unauthorized" }, 401); - const principal = principalFromSession(session, betterAuth); + const principal = await Effect.runPromise(principalFromSession(session, betterAuth)); + if (!principal) return jsonResponse({ error: "Unauthorized" }, 401); return ( (await store.handlePausedRequest(request, principal)) ?? diff --git a/apps/host-selfhost/src/mcp/mcp-oauth.test.ts b/apps/host-selfhost/src/mcp/mcp-oauth.test.ts index a66952a0e..7f6943a6c 100644 --- a/apps/host-selfhost/src/mcp/mcp-oauth.test.ts +++ b/apps/host-selfhost/src/mcp/mcp-oauth.test.ts @@ -99,13 +99,27 @@ const signUp = async (email: string): Promise => { return res.headers.get("set-cookie") ?? ""; }; +const signInToken = async (email: string, password: string) => { + const response = await handler( + new Request(`${BASE}/api/auth/sign-in/email`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ email, password }), + }), + ); + expect(response.status).toBe(200); + const token = response.headers.get("set-auth-token") ?? ""; + expect(token).not.toBe(""); + return token; +}; + const b64url = (buf: Uint8Array): string => btoa(String.fromCharCode(...buf)) .replaceAll("+", "-") .replaceAll("/", "_") .replaceAll("=", ""); -test("MCP OAuth opaque-bearer flow authenticates /mcp end-to-end", async () => { +test("MCP OAuth opaque-bearer flow authenticates end-to-end and rejects removed members", async () => { const cookie = await signUp("oauth@env.test"); // 1. Dynamic client registration (public/PKCE client). @@ -208,5 +222,44 @@ test("MCP OAuth opaque-bearer flow authenticates /mcp end-to-end", async () => { }), ); expect(init.status).toBe(200); - expect(init.headers.get("mcp-session-id")).not.toBe(null); + const sessionId = init.headers.get("mcp-session-id") ?? ""; + expect(sessionId).not.toBe(""); + + // Membership is authorization, not a property cached in the OAuth token. + // Remove this user while the access token and MCP session both still exist; + // the very next request must fail before that session can be reused. + const adminToken = await signInToken("admin@env.test", "admin-pass-123456"); + const members = await handler( + new Request(`${BASE}/api/account/members`, { + headers: { authorization: `Bearer ${adminToken}` }, + }), + ); + expect(members.status).toBe(200); + const membersBody = (await members.json()) as { + members: ReadonlyArray<{ id: string; email: string }>; + }; + const membership = membersBody.members.find((member) => member.email === "oauth@env.test"); + expect(membership).toBeDefined(); + + const remove = await handler( + new Request(`${BASE}/api/account/members/${membership?.id ?? "missing"}`, { + method: "DELETE", + headers: { authorization: `Bearer ${adminToken}` }, + }), + ); + expect(remove.status).toBe(200); + + const stale = await handler( + new Request(`${BASE}/mcp`, { + method: "POST", + headers: { + authorization: `Bearer ${accessToken}`, + "mcp-session-id": sessionId, + "content-type": "application/json", + accept: "application/json, text/event-stream", + }, + body: JSON.stringify({ jsonrpc: "2.0", id: 2, method: "tools/list" }), + }), + ); + expect(stale.status).toBe(401); }); diff --git a/apps/host-selfhost/src/mcp/org-path.test.ts b/apps/host-selfhost/src/mcp/org-path.test.ts index 5c7460fa9..8d1f5e76c 100644 --- a/apps/host-selfhost/src/mcp/org-path.test.ts +++ b/apps/host-selfhost/src/mcp/org-path.test.ts @@ -1,34 +1,71 @@ import { describe, expect, it } from "@effect/vitest"; -import { stripMcpOrgSegment } from "./org-path"; +import { isScopedMcpPath, resolveMcpOrgPath } from "./org-path"; -describe("stripMcpOrgSegment", () => { - it("strips a single org segment before /mcp", () => { - expect(stripMcpOrgSegment("/iI9idP7BZcWpg9wW8cit3xE4r4dFSnHj/mcp")).toBe("/mcp"); - expect(stripMcpOrgSegment("/org_123/mcp")).toBe("/mcp"); - expect(stripMcpOrgSegment("/org_123/mcp/toolkits/deploy")).toBe("/mcp/toolkits/deploy"); +const organization = { id: "iI9idP7BZcWpg9wW8cit3xE4r4dFSnHj", slug: "real-team" }; + +describe("resolveMcpOrgPath", () => { + it("strips only the live organization id or slug before /mcp", () => { + expect(resolveMcpOrgPath(`/${organization.id}/mcp`, organization)).toEqual({ + kind: "rewrite", + pathname: "/mcp", + }); + expect(resolveMcpOrgPath(`/${organization.slug}/mcp`, organization)).toEqual({ + kind: "rewrite", + pathname: "/mcp", + }); + expect(resolveMcpOrgPath(`/${organization.slug}/mcp/toolkits/deploy`, organization)).toEqual({ + kind: "rewrite", + pathname: "/mcp/toolkits/deploy", + }); + }); + + it("strips a valid scope from the protected-resource discovery path", () => { + expect( + resolveMcpOrgPath( + `/.well-known/oauth-protected-resource/${organization.id}/mcp`, + organization, + ), + ).toEqual({ kind: "rewrite", pathname: "/.well-known/oauth-protected-resource" }); + expect( + resolveMcpOrgPath( + `/.well-known/oauth-protected-resource/${organization.slug}/mcp/toolkits/deploy`, + organization, + ), + ).toEqual({ + kind: "rewrite", + pathname: "/.well-known/oauth-protected-resource/mcp/toolkits/deploy", + }); }); - it("strips the org segment from the protected-resource discovery path", () => { - expect(stripMcpOrgSegment("/.well-known/oauth-protected-resource/abc123/mcp")).toBe( - "/.well-known/oauth-protected-resource", - ); + it("rejects foreign organization prefixes", () => { + expect(resolveMcpOrgPath("/not-this-team/mcp", organization)).toEqual({ kind: "reject" }); expect( - stripMcpOrgSegment("/.well-known/oauth-protected-resource/abc123/mcp/toolkits/deploy"), - ).toBe("/.well-known/oauth-protected-resource/mcp/toolkits/deploy"); + resolveMcpOrgPath("/.well-known/oauth-protected-resource/not-this-team/mcp", organization), + ).toEqual({ kind: "reject" }); }); - it("leaves the bare paths untouched", () => { - expect(stripMcpOrgSegment("/mcp")).toBeNull(); - expect(stripMcpOrgSegment("/mcp/toolkits/deploy")).toBeNull(); - expect(stripMcpOrgSegment("/.well-known/oauth-authorization-server")).toBeNull(); + it("leaves bare, OAuth, and unrelated paths untouched", () => { + for (const path of [ + "/mcp", + "/mcp/toolkits/deploy", + "/.well-known/oauth-authorization-server", + "/api/auth/mcp/authorize", + "/api/auth/mcp/register", + "/integrations", + "/", + "/a/b/mcp", + ]) { + expect(resolveMcpOrgPath(path, organization)).toEqual({ kind: "none" }); + } }); +}); - it("never claims OAuth endpoints or unrelated paths", () => { - expect(stripMcpOrgSegment("/api/auth/mcp/authorize")).toBeNull(); - expect(stripMcpOrgSegment("/api/auth/mcp/register")).toBeNull(); - expect(stripMcpOrgSegment("/integrations")).toBeNull(); - expect(stripMcpOrgSegment("/")).toBeNull(); - expect(stripMcpOrgSegment("/a/b/mcp")).toBeNull(); // deeper than one segment +describe("isScopedMcpPath", () => { + it("identifies paths the dev proxy must forward for live validation", () => { + expect(isScopedMcpPath(`/${organization.id}/mcp`)).toBe(true); + expect(isScopedMcpPath("/not-this-team/mcp")).toBe(true); + expect(isScopedMcpPath("/mcp")).toBe(false); + expect(isScopedMcpPath("/mcp-consent")).toBe(false); }); }); diff --git a/apps/host-selfhost/src/mcp/org-path.ts b/apps/host-selfhost/src/mcp/org-path.ts index f24d38d40..67b236ec0 100644 --- a/apps/host-selfhost/src/mcp/org-path.ts +++ b/apps/host-selfhost/src/mcp/org-path.ts @@ -1,47 +1,74 @@ -// Self-host serves MCP at the bare `/mcp` path (and bare OAuth discovery docs). -// The console "Connect an agent" card, however, prints -// `//mcp` — a convention the multi-tenant cloud worker -// routes (it strips the org segment at the edge, carrying the org in a header). -// Self-host is single-tenant: the session already pins the one org, so the org -// segment in the URL carries no routing meaning. Rather than special-case the -// card per host, both self-host front-ends (the prod Bun server and the vite -// dev middleware) strip a single leading segment so the card's URL reaches the -// real route — mirroring cloud's edge rewrite, but accepting ANY segment (a -// Better Auth org id is not the `org_…` shape cloud keys on) and setting no -// header. +// Self-host serves MCP at the bare `/mcp` path and bare OAuth discovery docs. +// The console "Connect an agent" card prints an organization-prefixed MCP URL, +// matching the multi-tenant cloud convention. Self-host may remove that prefix +// only when it names this instance's live organization. // -// Pure + Effect-free on purpose: the vite config imports it too. +// Pure and Effect-free on purpose: the Vite config imports the shape detector. const PRM_PREFIX = "/.well-known/oauth-protected-resource"; -/** - * Given a request pathname, return the bare MCP pathname it should route to - * when it carries a single leading org segment, or `null` when no rewrite - * applies (already bare, not an MCP path, or an OAuth endpoint like - * `/api/auth/mcp/authorize`). - * - * //mcp -> /mcp - * //mcp/toolkits/ -> /mcp/toolkits/ - * /.well-known/oauth-protected-resource//mcp -> /.well-known/oauth-protected-resource - * /.well-known/oauth-protected-resource//mcp/toolkits/ - * -> /.well-known/oauth-protected-resource/mcp/toolkits/ - */ -export const stripMcpOrgSegment = (pathname: string): string | null => { +export interface McpOrganizationScope { + readonly id: string; + readonly slug: string; +} + +export type McpOrgPathResolution = + | { readonly kind: "none" } + | { readonly kind: "reject" } + | { readonly kind: "rewrite"; readonly pathname: string }; + +interface ScopedMcpPath { + readonly organization: string; + readonly pathname: string; +} + +const parseScopedMcpPath = (pathname: string): ScopedMcpPath | null => { if (pathname.startsWith(`${PRM_PREFIX}/`)) { const rest = pathname .slice(PRM_PREFIX.length + 1) .split("/") .filter((segment) => segment.length > 0); - if (rest.length === 2 && rest[1] === "mcp") return PRM_PREFIX; + if (rest.length === 2 && rest[1] === "mcp") { + return { organization: rest[0] ?? "", pathname: PRM_PREFIX }; + } if (rest.length === 4 && rest[1] === "mcp" && rest[2] === "toolkits") { - return `${PRM_PREFIX}/mcp/toolkits/${rest[3]}`; + return { + organization: rest[0] ?? "", + pathname: `${PRM_PREFIX}/mcp/toolkits/${rest[3]}`, + }; } return null; } + const segments = pathname.split("/").filter((segment) => segment.length > 0); - if (segments.length === 2 && segments[1] === "mcp") return "/mcp"; + if (segments.length === 2 && segments[1] === "mcp") { + return { organization: segments[0] ?? "", pathname: "/mcp" }; + } if (segments.length === 4 && segments[1] === "mcp" && segments[2] === "toolkits") { - return `/mcp/toolkits/${segments[3]}`; + return { + organization: segments[0] ?? "", + pathname: `/mcp/toolkits/${segments[3]}`, + }; } return null; }; + +/** True when the path has a single organization segment before an MCP route. */ +export const isScopedMcpPath = (pathname: string) => parseScopedMcpPath(pathname) !== null; + +/** + * Validate an organization-prefixed MCP or protected-resource path. Valid live + * organization ids and slugs are rewritten to the provider-neutral bare route; + * foreign scopes are rejected instead of silently reaching this tenant. + */ +export const resolveMcpOrgPath = ( + pathname: string, + organization: McpOrganizationScope, +): McpOrgPathResolution => { + const scoped = parseScopedMcpPath(pathname); + if (!scoped) return { kind: "none" }; + if (scoped.organization !== organization.id && scoped.organization !== organization.slug) { + return { kind: "reject" }; + } + return { kind: "rewrite", pathname: scoped.pathname }; +}; diff --git a/apps/host-selfhost/src/serve.ts b/apps/host-selfhost/src/serve.ts index a521ab5f1..4eb1c7943 100644 --- a/apps/host-selfhost/src/serve.ts +++ b/apps/host-selfhost/src/serve.ts @@ -32,13 +32,12 @@ import { OAUTH_CALLBACK_PATH, oauthCallbackSignInRedirectLocation, } from "./auth/oauth-callback-login"; -import { stripMcpOrgSegment } from "./mcp/org-path"; +import { resolveMcpOrgPath } from "./mcp/org-path"; const distDir = fileURLToPath(new URL("../dist/", import.meta.url)); -// Rewrite `//mcp` (and its OAuth discovery path) to the bare path before -// routing, so the "Connect an agent" card's org-pinned URL reaches the real -// `/mcp` route — see ./mcp/org-path. A no-op for every other request. +// Validate `//mcp` and its OAuth discovery path before routing. The live +// org id or slug reaches the bare MCP route; foreign scopes get a plain 404. const selfHostHttpMiddleware = (betterAuth: BetterAuthHandle) => HttpMiddleware.make((httpApp) => Effect.gen(function* () { @@ -56,12 +55,17 @@ const selfHostHttpMiddleware = (betterAuth: BetterAuthHandle) => if (location) return HttpServerResponse.redirect(location, { status: 302 }); } - const rewritten = stripMcpOrgSegment(url.pathname); - if (rewritten === null) return yield* httpApp; + const scopedPath = resolveMcpOrgPath(url.pathname, { + id: betterAuth.organizationId, + slug: betterAuth.organizationSlug, + }); + if (scopedPath.kind === "none") return yield* httpApp; + if (scopedPath.kind === "reject") + return HttpServerResponse.text("Not Found", { status: 404 }); return yield* httpApp.pipe( Effect.provideService( HttpServerRequest.HttpServerRequest, - request.modify({ url: `${rewritten}${url.search}` }), + request.modify({ url: `${scopedPath.pathname}${url.search}` }), ), ); }), diff --git a/apps/host-selfhost/vite.config.ts b/apps/host-selfhost/vite.config.ts index 79d25b7e6..4fef7d031 100644 --- a/apps/host-selfhost/vite.config.ts +++ b/apps/host-selfhost/vite.config.ts @@ -8,7 +8,7 @@ import { tanstackRouter } from "@tanstack/router-plugin/vite"; import executorVitePlugin from "@executor-js/vite-plugin"; import { routes } from "./tsr.routes"; -import { stripMcpOrgSegment } from "./src/mcp/org-path"; +import { isScopedMcpPath } from "./src/mcp/org-path"; // Self-host web SPA. Mirrors @executor-js/app's vite plugin bundle, but points // the TanStack router codegen at THIS app's routes (web/routes) so we get the @@ -54,26 +54,19 @@ function executorApiPlugin(): Plugin { if (path.includes("/src/") || path.endsWith("/executor.config.ts")) handlerPromise = null; }); server.middlewares.use(async (req, res, next) => { - let rawUrl = req.url ?? "/"; - // The "Connect an agent" card prints `//mcp`; self-host - // serves the bare `/mcp`, so rewrite it here (prod does the same in - // serve.ts) — otherwise this org-pinned path isn't recognized as an MCP - // path and falls through to the SPA as a 404. Mirrors ./src/mcp/org-path. + const rawUrl = req.url ?? "/"; const devOrigin = `http://${req.headers.host ?? `localhost:${DEV_PORT}`}`; - const pathname = stripMcpOrgSegment(new URL(rawUrl, devOrigin).pathname) ?? ""; - if (pathname !== "") { - const original = new URL(rawUrl, devOrigin); - rawUrl = `${pathname}${original.search}`; - } // Match on PATHNAME, not a raw-URL prefix: `/mcp` must NOT swallow the // SPA route `/mcp-consent`, or the dev server misroutes it to the API - // handler and returns a 404. + // handler and returns a 404. Scoped MCP paths are forwarded unchanged; + // the live app validates the org id or slug before rewriting. const path = new URL(rawUrl, devOrigin).pathname; const handled = path === "/api" || path.startsWith("/api/") || path === "/mcp" || path.startsWith("/mcp/") || + isScopedMcpPath(path) || path === "/docs" || path.startsWith("/docs/") || // RFC 9728 / RFC 8414 OAuth discovery the MCP client fetches before diff --git a/e2e/AGENTS.md b/e2e/AGENTS.md index a77694871..f01aaa603 100644 --- a/e2e/AGENTS.md +++ b/e2e/AGENTS.md @@ -14,9 +14,18 @@ produce a Playwright trace, video, and step screenshots for debugging. ## File placement -- `scenarios/*.test.ts` — runs on every target (cloud + selfhost) -- `cloud/*.test.ts` — cloud-only (e.g. billing, WorkOS-session UI) -- `selfhost/*.test.ts` — selfhost-only +- `scenarios/*.test.ts`: shared deployment journeys selected by each project's + include list. +- `cloud/*.test.ts`, `selfhost/*.test.ts`, and `cloudflare/*.test.ts`: + deployment-specific guarantees. +- `local/*.test.ts`, `desktop/*.test.ts`, `desktop-packaged/*.test.ts`, and + `cli/*.test.ts`: client and machine-specific journeys. +- `src/clients/*.test.ts`: hermetic adapter tests selected by the `clients` + project, including real third-party binaries against replay emulators. +- `harness/*.test.ts`: no-service Effect Vitest coverage for the runner, + evidence pipeline, trace writers, and port allocation. +- Add or change project membership in `src/project-matrix.ts`. Do not duplicate + the target or global-setup registry in `vitest.config.ts`. ## Anatomy @@ -25,21 +34,28 @@ import { expect } from "@effect/vitest"; import { Effect } from "effect"; import { composePluginApi } from "@executor-js/api/server"; import { scenario } from "../src/scenario"; +import { Api, Target } from "../src/services"; const coreApi = composePluginApi([] as const); // tools/integrations/connections/providers/executions/oauth/policies -scenario("Tools · a fresh workspace advertises the built-in tools", { needs: ["api"] }, (ctx) => +scenario( + "Tools · a fresh workspace advertises the built-in tools", + {}, Effect.gen(function* () { - const identity = yield* ctx.target.newIdentity(); // fresh isolated user+org - const client = yield* ctx.api.client(coreApi, identity); // typed HttpApiClient + const target = yield* Target; + const { client: makeClient } = yield* Api; + const identity = yield* target.newIdentity(); + const client = yield* makeClient(coreApi, identity); const tools = yield* client.tools.list(); expect(tools.length, "at least one tool is exposed").toBeGreaterThan(0); }), ); ``` -- Capabilities (`needs`): `api`, `browser` (cloud only today), `mcp-oauth` - (selfhost only today), `billing` (cloud only). +- Yielding an Effect service declares the scenario's capability requirement. + Unsupported services skip during local exploratory runs. CI uses required + mode, so a service promised by the selected project's matrix fails instead + of producing a green skip. - Resources created in a test must be cleaned up with `Effect.ensuring` (a finalizer), not trailing statements — a mid-test failure must not leak state into the shared instance. @@ -47,10 +63,12 @@ scenario("Tools · a fresh workspace advertises the built-in tools", { needs: [" ## Browser scenarios (cloud) ```ts -const identity = yield * ctx.target.newIdentity(); // logged in, has an org +const target = yield * Target; +const browser = yield * Browser; +const identity = yield * target.newIdentity(); // logged in, has an org // or newIdentity({ org: false }) for the onboarding flow yield * - ctx.browser.session(identity, async ({ page, step }) => { + browser.session(identity, async ({ page, step }) => { await step("A fresh user lands on the integrations page", async () => { await page.goto("/", { waitUntil: "networkidle" }); await page.getByText("Integrations").first().waitFor(); @@ -71,7 +89,10 @@ yield * ## MCP scenarios (selfhost) ```ts -const session = ctx.mcp.session(identity); +const target = yield * Target; +const mcp = yield * Mcp; +const identity = yield * target.newIdentity(); +const session = mcp.session(identity); const tools = yield * session.listTools(); // OAuth happens headlessly here const r = yield * session.call("execute", { code: "return 1 + 1;" }); // human-in-the-loop: session.approvePaused(r.text) resumes a paused execution @@ -85,7 +106,7 @@ the layer where "observability silently went dark" bugs live (an attribute stamped on a span the exporter never carries looks identical to health). ```ts -const telemetry = yield * Telemetry; // skips when motel didn't boot +const telemetry = yield * Telemetry; const span = yield * telemetry.expectSpan({ @@ -106,14 +127,26 @@ expect(span.span.tags["executor.tool.outcome"]).toBe("fail"); ```sh cd e2e -bun run test # boots both dev servers, runs everything -bun run test:cloud # one target +bun run test # portable hermetic projects +bun run test:harness # runner and evidence unit tests, no target boot +bun run test:clients # client adapters, no deployed target +bun run test:cloud:hermetic # the cloud pull-request project +bun run test:cloud # cloud plus live-provider drift checks +bun run test:selfhost-docker:hermetic +bun run test:desktop-packaged # needs a GUI display +bun run test:live # public-provider drift, nonblocking in CI bun run ports # print THIS checkout's derived ports # attach to an already-running server while iterating (use `bun run ports` URLs): E2E_CLOUD_URL=http://127.0.0.1: ../node_modules/.bin/vitest run --project cloud E2E_SELFHOST_URL=http://localhost: ../node_modules/.bin/vitest run --project selfhost ``` +`E2E_REQUIRED_CAPABILITY_MODE=required` turns a missing capability promised by +the project matrix into a failure. Pull-request CI sets it automatically. +Public-provider scenarios are explicitly excluded from hermetic projects and +run in the nightly nonblocking lane. See [RUNNING.md](../RUNNING.md) for the +native desktop and heavyweight VM coverage boundaries. + Ports are claimed at boot (see `src/ports.ts`): each checkout hashes its repo root to a preferred block, atomically locks it (a held lock port makes races impossible), and walks to the next free block if it's locked or squatted — so @@ -126,6 +159,31 @@ Each run writes `runs///result.json` plus any browser artifacts (trace.zip / session.mp4 / screenshots). `bun run serve` hosts the scenario × target matrix; a run page links the trace into Playwright's trace viewer. +### Published evidence policy + +Text and JSON artifacts are redacted before publication. Screenshots and video +are retained byte-for-byte because generic pixel redaction cannot prove that a +secret is absent. Therefore visual artifacts may contain synthetic test data +only. A run with PNG, MP4, or WebM evidence must declare +`visualEvidence.dataClassification` in `result.json`, but that claim never +authorizes publication by itself. The harness also writes +`lane-provenance.json` from the central project matrix. CI binds each artifact +name to its workflow project outside `runs/`; both sanitization and static +publication require the persisted provenance to match that external binding. +This prevents a live lane from claiming a hermetic project that shares its +target. Visual artifacts are retained only for externally bound hermetic +`synthetic-only` lanes. Unknown, live, manual, forged, or mismatched lanes lose +their visual binaries and fail publication. The publication also includes +`publication.json` with sanitizer provenance, policy version, counts, binary +artifact paths, and the byte-canary limitation. Never point a recorded e2e +lane at a real user account or production data. + +`scenario()` writes a focused `test.ts` and `test-source-metadata.json`. Direct +Vitest journeys, such as a packaged guest test, should call +`writeFocusedTestSource()` from `src/test-source.ts` with their run directory, +`import.meta.url` file path, and registered test name so the viewer receives +the same focused source evidence. + When handing results to the user, follow the evidence contract in the root [AGENTS.md](../AGENTS.md) (direct run links + a live instance + what to try); [RUNNING.md](../RUNNING.md) has the current sharing/demo mechanics. @@ -171,11 +229,11 @@ developer actually uses it. Three tiers, pick deliberately: Exemplar: `scenarios/connect-handoff-session.test.ts`. Artifacts: `terminal.cast` (the chat) + `session.mp4` (browser hops); the viewer plays them in story order. -2. **Replay brain + real client** (`src/clients/replay-brain.ts`): when the - third-party CLIENT's behavior is under test (OpenCode/Claude Code - protocol handling). A scripted OpenAI-wire server plays the LLM; the - real client does everything else. Script by transcript inspection, never - turn counting. +2. **Replay brain + real client** (`src/clients/replay-brain.ts` and + `src/clients/anthropic-replay-brain.ts`): when the third-party CLIENT's + behavior is under test (OpenCode or Claude Code protocol handling). A + scripted provider-wire server plays the LLM; the real client does + everything else. Script by transcript inspection, never turn counting. 3. **Real-inference evals**: a different axis (performance distributions, not pass/fail). Not in this suite. diff --git a/e2e/cli/service-install-takeover.test.ts b/e2e/cli/service-install-takeover.test.ts index 5eff865c5..fedbd84f5 100644 --- a/e2e/cli/service-install-takeover.test.ts +++ b/e2e/cli/service-install-takeover.test.ts @@ -3,7 +3,7 @@ // a same-data-dir predecessor instead of refusing and leaving users to find a // pid. Runs on the CLI VM targets where the test worker can SSH into the guest // that globalsetup provisioned. -import { execFile, spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; +import { execFile, spawn } from "node:child_process"; import { promisify } from "node:util"; import { expect } from "@effect/vitest"; @@ -127,7 +127,7 @@ const startPredecessor = async (exe: string): Promise => { } const invocation = sshInvocation(windowsPredecessorCommand(exe)); - const child: ChildProcessWithoutNullStreams = spawn(invocation.command, [...invocation.args], { + const child = spawn(invocation.command, [...invocation.args], { stdio: ["ignore", "pipe", "pipe"], }); let stdout = ""; diff --git a/e2e/cloud/cli-device-login.test.ts b/e2e/cloud/cli-device-login.test.ts index 3b34438bc..e14b8a950 100644 --- a/e2e/cloud/cli-device-login.test.ts +++ b/e2e/cloud/cli-device-login.test.ts @@ -9,7 +9,8 @@ // and `tools sources`; a clean exit of that chain proves the resulting WorkOS // access token (a JWT) is accepted by the protected `/api/*` plane. import { spawn } from "node:child_process"; -import { readFileSync } from "node:fs"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; @@ -48,17 +49,25 @@ scenario( const cli = yield* Cli; const browser = yield* Browser; const runDir = yield* RunDir; - const dataDir = join(runDir, "cli-home"); + // This directory contains live OAuth credentials. Keep it outside the + // viewer-served runs tree and remove it when the scenario finishes. + const dataDir = mkdtempSync(join(tmpdir(), "executor-e2e-cli-cloud-")); + yield* Effect.addFinalizer(() => + Effect.sync(() => rmSync(dataDir, { recursive: true, force: true })), + ); // A fresh signed-in user with an org, the org is what the device token's // org_id claim binds to, and what the /api plane authorizes against. const identity = yield* target.newIdentity(); const email = identity.credentials?.email ?? identity.label; - const env = { ...process.env, EXECUTOR_DATA_DIR: dataDir }; - for (const key of ["EXECUTOR_API_KEY", "EXECUTOR_AUTH_TOKEN", "EXECUTOR_AUTH_PASSWORD"]) { - delete (env as Record)[key]; - } + const env: Record = { + ...process.env, + EXECUTOR_DATA_DIR: dataDir, + }; + delete env.EXECUTOR_API_KEY; + delete env.EXECUTOR_AUTH_TOKEN; + delete env.EXECUTOR_AUTH_PASSWORD; // Hand the printed verification URL from the terminal fiber to the browser. let resolveUrl!: (url: string) => void; @@ -73,7 +82,8 @@ scenario( const journey = `${cli_} login --base-url ${CLOUD_BASE_URL} --no-browser --name cloud && ` + `${cli_} whoami --server cloud && ` + - `${cli_} tools sources --server cloud`; + `${cli_} tools sources --server cloud && ` + + `${cli_} server list`; const terminal = cli.session( ["bash", "-c", journey], @@ -129,41 +139,52 @@ scenario( concurrency: "unbounded", }); expect(finalScreen, "whoami reported the bound organization").toMatch(/org_\w+/); - - // The stored profile carries an oauth device-login credential, not a key. - const store = JSON.parse(readFileSync(join(dataDir, "server-connections.json"), "utf8")) as { - defaultProfile: string | null; - profiles: Array<{ - name: string; - connection: { auth?: { kind: string; accessToken?: string } }; - }>; - }; - expect(store.defaultProfile, "the login became the default profile").toBe("cloud"); - const cloudProfile = store.profiles.find((p) => p.name === "cloud"); - expect(cloudProfile?.connection.auth?.kind, "credential is an oauth device token").toBe( - "oauth", - ); - expect(typeof cloudProfile?.connection.auth?.accessToken, "an access token is stored").toBe( - "string", + expect(finalScreen, "the public profile list reports stored authentication").toMatch( + /\* cloud\s+http\s+\S+\s+\S+\s+stored-auth/, ); }), ), ); +const cliEnvironment = (dataDir: string) => { + const env: Record = { + ...process.env, + EXECUTOR_DATA_DIR: dataDir, + }; + delete env.EXECUTOR_API_KEY; + delete env.EXECUTOR_AUTH_TOKEN; + delete env.EXECUTOR_AUTH_PASSWORD; + return env; +}; + +const runCli = (args: readonly string[], dataDir: string) => + new Promise<{ code: number | null; stdout: string; stderr: string }>((res, rej) => { + const child = spawn("bun", ["run", CLI_ENTRY, ...args], { + cwd: REPO_ROOT, + env: cliEnvironment(dataDir), + }); + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (chunk: Buffer) => { + stdout += chunk.toString(); + }); + child.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + child.on("error", rej); + child.on("close", (code) => res({ code, stdout, stderr })); + }); + // Run `executor login` as a subprocess, approving the device for `approveEmail` // the moment the verification URL is printed (raw stdout, no PTY). -const runCliLogin = ( - args: readonly string[], - dataDir: string, - approveEmail: string, -): Promise<{ code: number | null; stdout: string }> => - new Promise((res, rej) => { - const env = { ...process.env, EXECUTOR_DATA_DIR: dataDir }; - for (const k of ["EXECUTOR_API_KEY", "EXECUTOR_AUTH_TOKEN", "EXECUTOR_AUTH_PASSWORD"]) { - delete (env as Record)[k]; - } - const child = spawn("bun", ["run", CLI_ENTRY, ...args], { cwd: REPO_ROOT, env }); +const runCliLogin = (args: readonly string[], dataDir: string, approveEmail: string) => + new Promise<{ code: number | null; stdout: string; stderr: string }>((res, rej) => { + const child = spawn("bun", ["run", CLI_ENTRY, ...args], { + cwd: REPO_ROOT, + env: cliEnvironment(dataDir), + }); let stdout = ""; + let stderr = ""; let approved = false; child.stdout.on("data", (chunk: Buffer) => { stdout += chunk.toString(); @@ -175,49 +196,110 @@ const runCliLogin = ( url.searchParams.set("login_hint", approveEmail); void fetch(url, { redirect: "manual" }); }); - child.stderr.on("data", () => {}); + child.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); child.on("error", rej); - child.on("close", (code) => res({ code, stdout })); + child.on("close", (code) => res({ code, stdout, stderr })); }); +const profileNameFromLogin = (stdout: string) => { + const profileName = stdout.match(/profile "([^"]+)"/)?.[1]; + if (!profileName) throw new Error(`login output did not contain a profile name:\n${stdout}`); + return profileName; +}; + scenario( - "CLI · two accounts on the same host get separate profiles", + "CLI · switch, logout, and re-login two accounts on one host", { timeout: 120_000 }, - Effect.gen(function* () { - const target = yield* Target; - if (target.name !== "cloud") return; - - const runDir = yield* RunDir; - const dataDir = join(runDir, "multi-home"); - - // Two distinct hosted accounts (different user + org) on the SAME server. - const a = yield* target.newIdentity(); - const b = yield* target.newIdentity(); - const emailA = a.credentials?.email ?? a.label; - const emailB = b.credentials?.email ?? b.label; - - // Log in as each with NO --name, so naming is driven by the account. - const loginA = yield* Effect.promise(() => - runCliLogin(["login", "--base-url", CLOUD_BASE_URL, "--no-browser"], dataDir, emailA), - ); - expect(loginA.code, "first login exited cleanly").toBe(0); - const loginB = yield* Effect.promise(() => - runCliLogin(["login", "--base-url", CLOUD_BASE_URL, "--no-browser"], dataDir, emailB), - ); - expect(loginB.code, "second login exited cleanly").toBe(0); - - const store = JSON.parse(readFileSync(join(dataDir, "server-connections.json"), "utf8")) as { - defaultProfile: string | null; - profiles: Array<{ - name: string; - connection: { origin: string; displayName?: string; auth?: { kind: string } }; - }>; - }; - const oauthProfiles = store.profiles.filter((p) => p.connection.auth?.kind === "oauth"); - // The second login must NOT clobber the first, both accounts kept. - expect(oauthProfiles.length, "both accounts retained as separate profiles").toBe(2); - expect(new Set(oauthProfiles.map((p) => p.name)).size, "profile names are distinct").toBe(2); - const emails = new Set(oauthProfiles.map((p) => p.connection.displayName)); - expect(emails.has(emailA) && emails.has(emailB), "both account emails present").toBe(true); - }), + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + if (target.name !== "cloud") return; + + const dataDir = mkdtempSync(join(tmpdir(), "executor-e2e-cli-cloud-accounts-")); + yield* Effect.addFinalizer(() => + Effect.sync(() => rmSync(dataDir, { recursive: true, force: true })), + ); + + // Two distinct hosted accounts (different user + org) on the same server. + const a = yield* target.newIdentity(); + const b = yield* target.newIdentity(); + const emailA = a.credentials?.email ?? a.label; + const emailB = b.credentials?.email ?? b.label; + + // Log in as each with no pinned name, so naming is driven by account identity. + const loginA = yield* Effect.promise(() => + runCliLogin(["login", "--base-url", CLOUD_BASE_URL, "--no-browser"], dataDir, emailA), + ); + expect(loginA.code, `first login failed:\n${loginA.stderr}`).toBe(0); + const profileA = profileNameFromLogin(loginA.stdout); + + const loginB = yield* Effect.promise(() => + runCliLogin(["login", "--base-url", CLOUD_BASE_URL, "--no-browser"], dataDir, emailB), + ); + expect(loginB.code, `second login failed:\n${loginB.stderr}`).toBe(0); + const profileB = profileNameFromLogin(loginB.stdout); + expect(profileB, "the second account has a distinct profile").not.toBe(profileA); + + const useA = yield* Effect.promise(() => runCli(["server", "use", profileA], dataDir)); + expect(useA.code, `selecting account A failed:\n${useA.stderr}`).toBe(0); + const callA = yield* Effect.promise(() => runCli(["tools", "sources"], dataDir)); + expect(callA.code, `account A protected call failed:\n${callA.stderr}`).toBe(0); + + const useB = yield* Effect.promise(() => runCli(["server", "use", profileB], dataDir)); + expect(useB.code, `selecting account B failed:\n${useB.stderr}`).toBe(0); + const callB = yield* Effect.promise(() => runCli(["tools", "sources"], dataDir)); + expect(callB.code, `account B protected call failed:\n${callB.stderr}`).toBe(0); + + const ambiguousLogout = yield* Effect.promise(() => + runCli(["logout", "--base-url", CLOUD_BASE_URL], dataDir), + ); + expect(ambiguousLogout.code, "origin-only logout rejects ambiguous accounts").not.toBe(0); + expect(`${ambiguousLogout.stdout}\n${ambiguousLogout.stderr}`).toContain( + "Multiple server profiles", + ); + + const logoutB = yield* Effect.promise(() => + runCli(["logout", "--server", profileB], dataDir), + ); + expect(logoutB.code, `named logout failed:\n${logoutB.stderr}`).toBe(0); + const loggedOutB = yield* Effect.promise(() => + runCli(["whoami", "--server", profileB], dataDir), + ); + expect(loggedOutB.stdout).toContain("Not logged in (no stored credentials)."); + + yield* Effect.promise(() => runCli(["server", "use", profileA], dataDir)); + const stillAuthenticatedA = yield* Effect.promise(() => + runCli(["tools", "sources"], dataDir), + ); + expect( + stillAuthenticatedA.code, + `account A was affected by logging out B:\n${stillAuthenticatedA.stderr}`, + ).toBe(0); + + const reloginB = yield* Effect.promise(() => + runCliLogin(["login", "--base-url", CLOUD_BASE_URL, "--no-browser"], dataDir, emailB), + ); + expect(reloginB.code, `account B re-login failed:\n${reloginB.stderr}`).toBe(0); + expect(profileNameFromLogin(reloginB.stdout), "re-login reused B's profile").toBe(profileB); + + const useReloggedB = yield* Effect.promise(() => + runCli(["server", "use", profileB], dataDir), + ); + expect(useReloggedB.code, `reselecting account B failed:\n${useReloggedB.stderr}`).toBe(0); + const reloggedCallB = yield* Effect.promise(() => runCli(["tools", "sources"], dataDir)); + expect(reloggedCallB.code, `re-logged account B call failed:\n${reloggedCallB.stderr}`).toBe( + 0, + ); + + const listed = yield* Effect.promise(() => runCli(["server", "list"], dataDir)); + expect(listed.code, `listing profiles failed:\n${listed.stderr}`).toBe(0); + expect(listed.stdout).toContain(profileA); + expect(listed.stdout).toContain(profileB); + expect(listed.stdout.match(/stored-auth/g), "both profiles retain credentials").toHaveLength( + 2, + ); + }), + ), ); diff --git a/e2e/cloud/connection-owner-isolation.test.ts b/e2e/cloud/connection-owner-isolation.test.ts index ae707fa3e..ccb7ce4dc 100644 --- a/e2e/cloud/connection-owner-isolation.test.ts +++ b/e2e/cloud/connection-owner-isolation.test.ts @@ -1,8 +1,8 @@ // Cloud-only: the connection OWNER model, with real multi-user organizations. // Every connection is filed under `owner: "org"` (shared with the whole tenant) // or `owner: "user"` (this subject's own). The org membership is built through -// the real product flows — invite → accept-invitation, create-organization, -// switch-organization — so the guarantees hold for genuine sessions: +// the real product flows: invite, accept-invitation, create-organization, and +// the same organization selector that URL-scoped browser requests send: // // 1. A user-owned connection is private to its creator, even from co-workers // in the same org (personal OAuth tokens don't leak to colleagues). @@ -16,7 +16,12 @@ import { Effect } from "effect"; import type { HttpApiClient } from "effect/unstable/httpapi"; import { composePluginApi } from "@executor-js/api/server"; import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; -import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; +import { + AuthTemplateSlug, + ConnectionName, + EXECUTOR_ORG_SELECTOR_HEADER, + IntegrationSlug, +} from "@executor-js/sdk/shared"; import { scenario } from "../src/scenario"; import { Api, Target } from "../src/services"; @@ -62,8 +67,8 @@ const registerIntegration = (client: Client) => const freshConnectionName = () => ConnectionName.make(`conn${randomBytes(4).toString("hex")}`); // ── Session plumbing over the real auth endpoints ─────────────────────────── -// These mirror what the product web app does: cookie-authenticated calls whose -// responses re-seal the session when the active org changes. +// These mirror the product's cookie-authenticated account mutations and +// URL-derived organization selector used by scoped API requests. const cookieOf = (identity: Identity): string => identity.headers?.["cookie"] ?? ""; @@ -114,14 +119,14 @@ const createAnotherOrg = (target: TargetShape, identity: Identity, name: string) return withRefreshedSession(identity, response); }); -/** Switch this account's active org; returns the identity bound to it. */ -const switchOrg = (target: TargetShape, identity: Identity, organizationId: string) => - Effect.gen(function* () { - const response = yield* postJson(target, "/api/auth/switch-organization", identity, { - organizationId, - }); - return withRefreshedSession(identity, response); - }); +const inOrganization = (identity: Identity, organizationId: string) => + ({ + ...identity, + headers: { + ...identity.headers, + [EXECUTOR_ORG_SELECTOR_HEADER]: organizationId, + }, + }) satisfies Identity; /** The org this identity's session is currently bound to. */ const activeOrganizationId = (target: TargetShape, identity: Identity) => @@ -245,7 +250,7 @@ scenario( const { client } = yield* Api; const userInOrgA = yield* target.newIdentity(); const orgAId = yield* activeOrganizationId(target, userInOrgA); - const clientA = yield* client(api, userInOrgA); + const clientA = yield* client(api, inOrganization(userInOrgA, orgAId)); const integration = yield* registerIntegration(clientA); const name = freshConnectionName(); @@ -265,7 +270,8 @@ scenario( userInOrgA, `Second Org ${randomBytes(3).toString("hex")}`, ); - const clientB = yield* client(api, userInOrgB); + const orgBId = yield* activeOrganizationId(target, userInOrgB); + const clientB = yield* client(api, inOrganization(userInOrgB, orgBId)); const orgBIntegrations = yield* clientB.integrations.list(); expect( @@ -279,9 +285,8 @@ scenario( "the user's org-A connection is invisible from their second org", ).not.toContain(name); - // Switching back to org A, the connection is still theirs. - const backInOrgA = yield* switchOrg(target, userInOrgB, orgAId); - const clientABack = yield* client(api, backInOrgA); + // Browser switching changes the URL selector, not the shared session cookie. + const clientABack = yield* client(api, inOrganization(userInOrgB, orgAId)); const orgAUserList = yield* clientABack.connections.list({ query: { integration, owner: "user" }, }); diff --git a/e2e/cloud/org-first-paint.test.ts b/e2e/cloud/org-first-paint.test.ts new file mode 100644 index 000000000..62c400ddc --- /dev/null +++ b/e2e/cloud/org-first-paint.test.ts @@ -0,0 +1,347 @@ +// Cloud browser coverage for the first paint of an organization URL when the +// shared WorkOS session cookie is pinned to a different organization. +import { randomBytes } from "node:crypto"; + +import { expect } from "@effect/vitest"; +import { Effect, Schema } from "effect"; + +import { EXECUTOR_ORG_SELECTOR_HEADER } from "@executor-js/sdk/shared"; + +import { scenario } from "../src/scenario"; +import { Browser, Target } from "../src/services"; +import type { Identity } from "../src/target"; + +const Organization = Schema.Struct({ + id: Schema.String, + name: Schema.String, + slug: Schema.String, +}); +const SelectedAccount = Schema.Struct({ organization: Organization }); +const RenameBody = Schema.Struct({ name: Schema.String }); + +const decodeOrganization = Schema.decodeUnknownSync(Organization); +const decodeSelectedAccount = Schema.decodeUnknownSync(SelectedAccount); +const decodeRenameBody = Schema.decodeUnknownSync(RenameBody); + +const sessionCookiePair = (response: Response) => { + const headers = response.headers.getSetCookie?.() ?? [response.headers.get("set-cookie") ?? ""]; + return headers.find((header) => header.startsWith("wos-session="))?.split(";")[0] ?? ""; +}; + +const accountRequest = (baseUrl: string, cookie: string, organizationSlug: string) => + fetch(new URL("/api/account/me", baseUrl), { + headers: { + cookie, + [EXECUTOR_ORG_SELECTOR_HEADER]: organizationSlug, + }, + }); + +scenario( + "Org first paint · the URL organization wins over a stale cookie organization", + {}, + Effect.gen(function* () { + const target = yield* Target; + const browser = yield* Browser; + const identity = yield* target.newIdentity(); + const originalCookie = identity.headers?.cookie ?? ""; + const suffix = randomBytes(4).toString("hex"); + const urlOrganizationName = `URL Organization B ${suffix}`; + const cookieOrganizationName = `Cookie Organization A ${suffix}`; + + const originalAccountResponse = yield* Effect.promise(() => + fetch(new URL("/api/auth/me", target.baseUrl), { + headers: { cookie: originalCookie }, + }), + ); + expect(originalAccountResponse.ok, "the original organization resolves").toBe(true); + const originalAccount = decodeSelectedAccount( + yield* Effect.promise(() => originalAccountResponse.json()), + ); + + const renameOriginalResponse = yield* Effect.promise(() => + fetch(new URL("/api/account/name", target.baseUrl), { + method: "PATCH", + headers: { + "content-type": "application/json", + origin: new URL(target.baseUrl).origin, + cookie: originalCookie, + [EXECUTOR_ORG_SELECTOR_HEADER]: originalAccount.organization.slug, + }, + body: JSON.stringify({ name: urlOrganizationName }), + }), + ); + expect(renameOriginalResponse.ok, "the URL organization receives its distinct name").toBe(true); + + const createCookieOrganizationResponse = yield* Effect.promise(() => + fetch(new URL("/api/auth/create-organization", target.baseUrl), { + method: "POST", + headers: { + "content-type": "application/json", + origin: new URL(target.baseUrl).origin, + cookie: originalCookie, + }, + body: JSON.stringify({ name: cookieOrganizationName }), + }), + ); + expect( + createCookieOrganizationResponse.ok, + "the second organization is created and selected in the session", + ).toBe(true); + const cookieOrganization = decodeOrganization( + yield* Effect.promise(() => createCookieOrganizationResponse.json()), + ); + const cookiePair = sessionCookiePair(createCookieOrganizationResponse); + expect(cookiePair, "organization creation returns a refreshed session cookie").not.toBe(""); + const cookieValue = cookiePair.slice("wos-session=".length); + const pinnedToCookieOrganization = { + ...identity, + headers: { ...identity.headers, cookie: cookiePair }, + cookies: [{ name: "wos-session", value: cookieValue }], + } satisfies Identity; + + yield* browser.session(pinnedToCookieOrganization, async ({ page, step }) => { + await step("Render URL organization B before account hydration completes", async () => { + let releaseAccountRequests = () => {}; + let releaseMemberRequests = () => {}; + const accountRequestGate = new Promise((resolve) => { + releaseAccountRequests = resolve; + }); + const memberRequestGate = new Promise((resolve) => { + releaseMemberRequests = resolve; + }); + await page.route("**/api/account/me", async (route) => { + await accountRequestGate; + await route.continue(); + }); + await page.route("**/api/account/members", async (route) => { + await memberRequestGate; + await route.continue(); + }); + const hydratedAccountResponse = page.waitForResponse( + (response) => new URL(response.url()).pathname === "/api/account/me", + { timeout: 30_000 }, + ); + const hydratedMemberResponse = page.waitForResponse( + (response) => new URL(response.url()).pathname === "/api/account/members", + { timeout: 30_000 }, + ); + + await page.goto(`/${originalAccount.organization.slug}/org`, { waitUntil: "commit" }); + const nameInput = page.getByLabel("Organization name"); + try { + await page + .getByTestId("organization-name-permission-loading") + .waitFor({ timeout: 30_000 }); + await page.getByRole("button", { name: new RegExp(urlOrganizationName) }).waitFor(); + expect( + await page.getByTestId("organization-member-actions-loading").count(), + "the member action slot is reserved while permissions load", + ).toBe(1); + expect( + await page.getByTestId("organization-domain-actions-loading").count(), + "the domain action slot is reserved while permissions load", + ).toBe(1); + expect(await page.getByTestId("organization-members-loading").count()).toBe(1); + expect(await nameInput.count(), "admin inputs do not render before role resolution").toBe( + 0, + ); + expect( + await page.getByTestId("organization-permission-read-only").count(), + "loading is not presented as denied access", + ).toBe(0); + expect( + await page.getByText(cookieOrganizationName, { exact: true }).count(), + "the cookie organization is absent from the URL organization page", + ).toBe(0); + + releaseAccountRequests(); + const response = await hydratedAccountResponse; + expect(response.ok(), "account hydration succeeds for the URL organization").toBe(true); + expect( + response.request().headers()[EXECUTOR_ORG_SELECTOR_HEADER], + "account hydration uses the URL organization selector", + ).toBe(originalAccount.organization.slug); + await page.getByTestId("organization-name-permission-loading").waitFor(); + expect( + await nameInput.count(), + "account hydration alone cannot reveal management controls", + ).toBe(0); + + releaseMemberRequests(); + const membersResponse = await hydratedMemberResponse; + expect(membersResponse.ok(), "the member role request succeeds").toBe(true); + expect( + membersResponse.request().headers()[EXECUTOR_ORG_SELECTOR_HEADER], + "the permission request uses the URL organization selector", + ).toBe(originalAccount.organization.slug); + } finally { + releaseAccountRequests(); + releaseMemberRequests(); + } + + await page.unroute("**/api/account/me"); + await page.unroute("**/api/account/members"); + await nameInput.waitFor({ timeout: 30_000 }); + expect( + await nameInput.inputValue(), + "the resolved admin form belongs to the URL organization", + ).toBe(urlOrganizationName); + }); + + const renamedUrlOrganization = `${urlOrganizationName} Renamed`; + + await step("Rename URL organization B without mutating cookie organization A", async () => { + const responsePromise = page.waitForResponse( + (response) => + response.request().method() === "PATCH" && + new URL(response.url()).pathname === "/api/account/name", + { timeout: 30_000 }, + ); + await page.getByLabel("Organization name").fill(renamedUrlOrganization); + await page.getByRole("button", { name: "Save", exact: true }).click(); + const response = await responsePromise; + expect(response.ok(), "the URL organization rename succeeds").toBe(true); + expect( + response.request().headers()[EXECUTOR_ORG_SELECTOR_HEADER], + "the rename targets the URL organization", + ).toBe(originalAccount.organization.slug); + expect( + decodeRenameBody(response.request().postDataJSON()).name, + "the submitted name comes from the current URL organization form", + ).toBe(renamedUrlOrganization); + }); + + const browserCookie = `wos-session=${ + (await page.context().cookies()).find((cookie) => cookie.name === "wos-session")?.value ?? + "" + }`; + expect(browserCookie, "the browser remains authenticated").not.toBe("wos-session="); + + const urlOrganizationResponse = await accountRequest( + target.baseUrl, + browserCookie, + originalAccount.organization.slug, + ); + const cookieOrganizationResponse = await accountRequest( + target.baseUrl, + browserCookie, + cookieOrganization.slug, + ); + expect(urlOrganizationResponse.ok, "URL organization B still resolves").toBe(true); + expect(cookieOrganizationResponse.ok, "cookie organization A still resolves").toBe(true); + const urlOrganizationAccount = decodeSelectedAccount(await urlOrganizationResponse.json()); + const cookieOrganizationAccount = decodeSelectedAccount( + await cookieOrganizationResponse.json(), + ); + expect(urlOrganizationAccount.organization.name).toBe(renamedUrlOrganization); + expect(cookieOrganizationAccount.organization.name).toBe(cookieOrganizationName); + }); + }), +); + +scenario( + "Org permissions · loading and request failure stay distinct before admin controls render", + {}, + Effect.gen(function* () { + const target = yield* Target; + const browser = yield* Browser; + const identity = yield* target.newIdentity(); + const cookie = identity.headers?.cookie ?? ""; + const accountResponse = yield* Effect.promise(() => + fetch(new URL("/api/auth/me", target.baseUrl), { + headers: { cookie }, + }), + ); + expect(accountResponse.ok, "the administrator organization resolves").toBe(true); + const account = decodeSelectedAccount(yield* Effect.promise(() => accountResponse.json())); + + yield* browser.session(identity, async ({ page, step }) => { + await step( + "Hold the first permission request on the server-rendered loading state", + async () => { + let failFirstRequest = () => {}; + let releaseRetryRequest = () => {}; + const firstRequestGate = new Promise((resolve) => { + failFirstRequest = resolve; + }); + const retryRequestGate = new Promise((resolve) => { + releaseRetryRequest = resolve; + }); + let memberRequestAttempt = 0; + + await page.route("**/api/account/members", async (route) => { + memberRequestAttempt += 1; + if (memberRequestAttempt === 1) { + await firstRequestGate; + await route.fulfill({ + status: 503, + contentType: "application/json", + body: JSON.stringify({ message: "permission lookup unavailable" }), + }); + return; + } + await retryRequestGate; + await route.continue(); + }); + const failedResponsePromise = page.waitForResponse( + (response) => + new URL(response.url()).pathname === "/api/account/members" && + response.status() === 503, + { timeout: 30_000 }, + ); + + await page.goto(`/${account.organization.slug}/org`, { waitUntil: "commit" }); + try { + await page + .getByTestId("organization-name-permission-loading") + .waitFor({ timeout: 30_000 }); + expect(await page.getByTestId("organization-members-loading").count()).toBe(1); + expect(await page.getByLabel("Organization name").count()).toBe(0); + expect(await page.getByRole("button", { name: "Invite member" }).count()).toBe(0); + expect(await page.getByTestId("organization-permission-read-only").count()).toBe(0); + + failFirstRequest(); + const failedResponse = await failedResponsePromise; + expect(failedResponse.status(), "the first permission request fails visibly").toBe(503); + await page.getByTestId("organization-permission-failed").waitFor({ timeout: 30_000 }); + await page.getByRole("button", { name: "Retry permissions" }).waitFor(); + expect(await page.getByTestId("organization-name-permission-loading").count()).toBe(0); + expect(await page.getByLabel("Organization name").count()).toBe(0); + expect(await page.getByRole("button", { name: "Invite member" }).count()).toBe(0); + + const retryResponsePromise = page.waitForResponse( + (response) => new URL(response.url()).pathname === "/api/account/members", + { timeout: 30_000 }, + ); + await page.getByRole("button", { name: "Retry permissions" }).click(); + await page + .getByTestId("organization-name-permission-loading") + .waitFor({ timeout: 30_000 }); + expect(await page.getByTestId("organization-members-loading").count()).toBe(1); + expect(await page.getByTestId("organization-permission-failed").count()).toBe(0); + expect(await page.getByLabel("Organization name").count()).toBe(0); + + releaseRetryRequest(); + const retryResponse = await retryResponsePromise; + expect(retryResponse.ok(), "retrying the permission request succeeds").toBe(true); + expect( + retryResponse.request().headers()[EXECUTOR_ORG_SELECTOR_HEADER], + "the retried permission request keeps the URL organization selector", + ).toBe(account.organization.slug); + + await page.getByLabel("Organization name").waitFor({ timeout: 30_000 }); + expect(await page.getByLabel("Organization name").inputValue()).toBe( + account.organization.name, + ); + await page.getByRole("button", { name: "Invite member" }).waitFor(); + expect(await page.getByTestId("organization-permission-failed").count()).toBe(0); + } finally { + failFirstRequest(); + releaseRetryRequest(); + await page.unroute("**/api/account/members"); + } + }, + ); + }); + }), +); diff --git a/e2e/cloud/org-member-access.test.ts b/e2e/cloud/org-member-access.test.ts new file mode 100644 index 000000000..48865ba7f --- /dev/null +++ b/e2e/cloud/org-member-access.test.ts @@ -0,0 +1,175 @@ +// Cloud browser and API coverage for a regular organization member. The UI +// stays read-only, while the same mutations remain forbidden at the server. +import { randomBytes } from "node:crypto"; + +import { expect } from "@effect/vitest"; +import { Effect, Schema } from "effect"; + +import { EXECUTOR_ORG_SELECTOR_HEADER } from "@executor-js/sdk/shared"; + +import { scenario } from "../src/scenario"; +import { Browser, Target } from "../src/services"; +import type { Identity } from "../src/target"; + +const Organization = Schema.Struct({ + id: Schema.String, + name: Schema.String, + slug: Schema.String, +}); +const SelectedAccount = Schema.Struct({ organization: Organization }); +const Invitation = Schema.Struct({ id: Schema.String }); + +const decodeSelectedAccount = Schema.decodeUnknownSync(SelectedAccount); +const decodeInvitation = Schema.decodeUnknownSync(Invitation); + +const sessionCookiePair = (response: Response) => { + const headers = response.headers.getSetCookie?.() ?? [response.headers.get("set-cookie") ?? ""]; + return headers.find((header) => header.startsWith("wos-session="))?.split(";")[0] ?? ""; +}; + +scenario( + "Organization access · members see read-only controls and mutations stay forbidden", + {}, + Effect.gen(function* () { + const target = yield* Target; + const browser = yield* Browser; + const admin = yield* target.newIdentity(); + const member = yield* target.newIdentity({ org: false }); + const adminCookie = admin.headers?.cookie ?? ""; + const memberCookie = member.headers?.cookie ?? ""; + const memberEmail = member.credentials?.email ?? ""; + + const adminAccountResponse = yield* Effect.promise(() => + fetch(new URL("/api/auth/me", target.baseUrl), { + headers: { cookie: adminCookie }, + }), + ); + expect(adminAccountResponse.ok, "the admin organization resolves").toBe(true); + const adminAccount = decodeSelectedAccount( + yield* Effect.promise(() => adminAccountResponse.json()), + ); + + const inviteResponse = yield* Effect.promise(() => + fetch(new URL("/api/account/members/invite", target.baseUrl), { + method: "POST", + headers: { + "content-type": "application/json", + origin: new URL(target.baseUrl).origin, + cookie: adminCookie, + [EXECUTOR_ORG_SELECTOR_HEADER]: adminAccount.organization.slug, + }, + body: JSON.stringify({ email: memberEmail }), + }), + ); + expect(inviteResponse.ok, "the admin can invite the member").toBe(true); + const invitation = decodeInvitation(yield* Effect.promise(() => inviteResponse.json())); + + const acceptResponse = yield* Effect.promise(() => + fetch(new URL("/api/auth/accept-invitation", target.baseUrl), { + method: "POST", + headers: { + "content-type": "application/json", + origin: new URL(target.baseUrl).origin, + cookie: memberCookie, + }, + body: JSON.stringify({ invitationId: invitation.id }), + }), + ); + expect(acceptResponse.ok, "the invited user accepts the membership").toBe(true); + const acceptedCookie = sessionCookiePair(acceptResponse); + expect(acceptedCookie, "acceptance returns a refreshed member session").not.toBe(""); + const memberIdentity = { + ...member, + headers: { ...member.headers, cookie: acceptedCookie }, + cookies: [ + { + name: "wos-session", + value: acceptedCookie.slice("wos-session=".length), + }, + ], + } satisfies Identity; + + yield* browser.session(memberIdentity, async ({ page, step }) => { + await step("Resolve permission loading into explicit read-only access", async () => { + let releaseMemberRequest = () => {}; + const memberRequestGate = new Promise((resolve) => { + releaseMemberRequest = resolve; + }); + await page.route("**/api/account/members", async (route) => { + await memberRequestGate; + await route.continue(); + }); + const membersResponsePromise = page.waitForResponse( + (response) => new URL(response.url()).pathname === "/api/account/members", + { timeout: 30_000 }, + ); + + await page.goto(`/${adminAccount.organization.slug}/org`, { waitUntil: "commit" }); + try { + await page + .getByTestId("organization-name-permission-loading") + .waitFor({ timeout: 30_000 }); + expect(await page.getByTestId("organization-member-actions-loading").count()).toBe(1); + expect(await page.getByTestId("organization-domain-actions-loading").count()).toBe(1); + expect(await page.getByTestId("organization-members-loading").count()).toBe(1); + expect(await page.getByTestId("organization-permission-read-only").count()).toBe(0); + expect(await page.getByLabel("Organization name").count()).toBe(0); + expect(await page.getByRole("button", { name: "Invite member" }).count()).toBe(0); + + releaseMemberRequest(); + const membersResponse = await membersResponsePromise; + expect(membersResponse.ok(), "the member permission request succeeds").toBe(true); + expect( + membersResponse.request().headers()[EXECUTOR_ORG_SELECTOR_HEADER], + "the member permission request uses the URL organization selector", + ).toBe(adminAccount.organization.slug); + } finally { + releaseMemberRequest(); + await page.unroute("**/api/account/members"); + } + + await page.getByRole("heading", { name: "Organization", exact: true }).waitFor(); + await page.getByText(memberEmail, { exact: true }).waitFor({ timeout: 30_000 }); + await page.getByTestId("organization-permission-read-only").waitFor({ timeout: 30_000 }); + }); + + await step("Verify administrative controls are absent", async () => { + await page.getByText(adminAccount.organization.name, { exact: true }).first().waitFor(); + expect(await page.getByTestId("organization-permission-read-only").count()).toBe(1); + expect(await page.getByTestId("organization-permission-failed").count()).toBe(0); + expect(await page.getByLabel("Organization name").count()).toBe(0); + expect(await page.getByRole("button", { name: "Save", exact: true }).count()).toBe(0); + expect(await page.getByRole("button", { name: "Invite member" }).count()).toBe(0); + expect(await page.getByRole("button", { name: "Add domain" }).count()).toBe(0); + expect(await page.getByRole("button", { name: "Upgrade" }).count()).toBe(0); + }); + + const browserCookie = `wos-session=${ + (await page.context().cookies()).find((cookie) => cookie.name === "wos-session")?.value ?? + "" + }`; + const scopedHeaders = { + "content-type": "application/json", + origin: new URL(target.baseUrl).origin, + cookie: browserCookie, + [EXECUTOR_ORG_SELECTOR_HEADER]: adminAccount.organization.slug, + }; + + const renameResponse = await fetch(new URL("/api/account/name", target.baseUrl), { + method: "PATCH", + headers: scopedHeaders, + body: JSON.stringify({ name: `Forbidden ${randomBytes(3).toString("hex")}` }), + }); + const secondInviteResponse = await fetch( + new URL("/api/account/members/invite", target.baseUrl), + { + method: "POST", + headers: scopedHeaders, + body: JSON.stringify({ email: `forbidden-${randomBytes(3).toString("hex")}@e2e.test` }), + }, + ); + expect(renameResponse.status, "the API rejects a member organization rename").toBe(403); + expect(secondInviteResponse.status, "the API rejects a member invitation").toBe(403); + }); + }), +); diff --git a/e2e/cloud/org-multitab-cookie.test.ts b/e2e/cloud/org-multitab-cookie.test.ts index 27c8a967a..fb15a3ebf 100644 --- a/e2e/cloud/org-multitab-cookie.test.ts +++ b/e2e/cloud/org-multitab-cookie.test.ts @@ -1,29 +1,38 @@ -// Cloud-only (browser): two tabs, two orgs, at the same time — independent. -// -// WorkOS still pins ONE org into the sealed `wos-session` cookie, and the whole -// browser shares one cookie jar. Under the OLD cookie-based "active org" model -// that made "active organization" a browser-global: two tabs could not be in -// two orgs at once, and a switch (or the slug gate's switch-to-honor-the-URL) -// silently re-scoped the other tab out from under it. -// -// The stateless URL model removes that hazard. The slug in the path is the -// request scope: every API call carries it (the `x-executor-organization` -// header), the server re-checks live membership and resolves data for THAT -// org, and the session merely authenticates the user to all their orgs at -// once. Nothing writes the cookie on a switch. So this scenario — once the -// reproduction of the corruption — now asserts the opposite: each tab's -// requests stay scoped to its own URL org, no matter what the other tab does. -// -// Everything runs through the browser (onboarding + the menu create-org), so -// the single-use WorkOS refresh-token chain stays browser-owned and valid. +// Cloud browser coverage for two tabs sharing one cookie jar while their URLs +// select different organizations. Distinct persisted policies make accidental +// cross-tab re-scoping visible in both the network and the rendered page. +import { randomBytes } from "node:crypto"; + import { expect } from "@effect/vitest"; import { Effect } from "effect"; +import type { Page } from "playwright"; import { scenario } from "../src/scenario"; import { Browser, Target } from "../src/services"; +const policyResponse = (page: Page, method: "GET" | "POST") => + page.waitForResponse( + (response) => + response.request().method() === method && + new URL(response.url()).pathname === "/api/policies", + { timeout: 30_000 }, + ); + +const createPolicy = async (page: Page, pattern: string) => { + const responsePromise = policyResponse(page, "POST"); + await page.locator("#policy-pattern").fill(pattern); + await page.getByRole("button", { name: "Add policy" }).click(); + const response = await responsePromise; + expect(response.ok(), `creating ${pattern} succeeds`).toBe(true); + await page.getByText(pattern, { exact: true }).waitFor(); + return response.request(); +}; + +const sessionCookieValue = async (page: Page) => + (await page.context().cookies()).find((cookie) => cookie.name === "wos-session")?.value ?? ""; + scenario( - "Org tabs · two tabs on different orgs stay independent (URL-scoped, no cookie steal)", + "Org tabs · two URL-scoped organizations retain independent resources", {}, Effect.gen(function* () { const target = yield* Target; @@ -31,81 +40,99 @@ scenario( const identity = yield* target.newIdentity({ org: false }); yield* browser.session(identity, async ({ page: tab1, step }) => { - const slugOf = (page: typeof tab1) => new URL(page.url()).pathname.replace(/^\/|\/.*$/g, ""); - - // The org slug a page's REAL app requests carry — read straight off the - // outgoing `x-executor-organization` header, the actual request scope. - // This is what makes the two tabs independent; the shared session cookie - // is irrelevant to it. - const requestOrgSlugOf = async (page: typeof tab1): Promise => { - const matching = page.waitForRequest( - (request) => - request.url().includes("/api/") && - request.headers()["x-executor-organization"] !== undefined, - { timeout: 15_000 }, - ); - // Nudge the app to refetch so a fresh scoped request goes out. - void page.reload({ waitUntil: "commit" }); - return (await matching).headers()["x-executor-organization"]!; - }; - - let slugA = ""; - let slugB = ""; + const suffix = randomBytes(4).toString("hex"); + const organizationA = `Multitab A ${suffix}`; + const organizationB = `Multitab B ${suffix}`; + const policyA = `multitab-a-${suffix}.*`; + const policyB = `multitab-b-${suffix}.*`; - await step("Onboard org A in tab 1", async () => { + await step("Create organization A and its policy in tab 1", async () => { await tab1.goto("/", { waitUntil: "networkidle" }); - await tab1.getByPlaceholder("Northwind Labs").fill("Multitab A"); + await tab1.getByPlaceholder("Northwind Labs").fill(organizationA); await tab1.getByRole("button", { name: "Create organization" }).click(); await tab1.getByText("Connect your MCP client").waitFor({ timeout: 30_000 }); await tab1.getByRole("button", { name: "Continue to app" }).click(); - await tab1.waitForURL((url) => /^\/[a-z0-9-]+\/?$/.test(url.pathname), { timeout: 30_000 }); - await tab1.getByText("Integrations").first().waitFor({ timeout: 30_000 }); - slugA = slugOf(tab1); + await tab1.waitForURL((url) => /^\/[a-z0-9-]+\/?$/.test(url.pathname), { + timeout: 30_000, + }); }); - await step("Create org B from tab 1's account menu — tab 1 is now in B", async () => { + const slugA = new URL(tab1.url()).pathname.split("/")[1]!; + + await step("Persist organization A data with an A selector", async () => { + await tab1.goto(`/${slugA}/policies`, { waitUntil: "networkidle" }); + const request = await createPolicy(tab1, policyA); + expect( + request.headers()["x-executor-organization"], + "organization A policy writes use the A URL selector", + ).toBe(slugA); + }); + + await step("Create organization B and its distinct policy in tab 1", async () => { await tab1.getByRole("button", { name: /Test User/ }).click(); - await tab1.getByRole("menuitem", { name: "Multitab A" }).click(); - await tab1 - .locator('[data-slot="dropdown-menu-sub-content"]') - .getByText("Create organization", { exact: true }) - .click(); + await tab1.getByRole("menuitem", { name: organizationA, exact: true }).click(); + const submenu = tab1.locator('[data-slot="dropdown-menu-sub-content"]'); + await submenu.waitFor({ state: "visible" }); + await submenu.getByText("Create organization", { exact: true }).click(); await tab1.getByText("Add another organization").waitFor(); - await tab1.getByPlaceholder("Northwind Labs").fill("Multitab B"); + await tab1.getByPlaceholder("Northwind Labs").fill(organizationB); await tab1.getByRole("button", { name: "Create organization" }).click(); - await tab1.waitForURL((url) => url.pathname !== `/${slugA}`, { timeout: 30_000 }); - await tab1.getByText("Integrations").first().waitFor({ timeout: 30_000 }); - slugB = slugOf(tab1); + await tab1.waitForURL( + (url) => url.pathname.endsWith("/policies") && url.pathname !== `/${slugA}/policies`, + { timeout: 30_000 }, + ); }); - expect(slugB, "the two orgs have distinct slugs").not.toBe(slugA); - // A second tab in the SAME context — shares tab 1's cookie jar. + const slugB = new URL(tab1.url()).pathname.split("/")[1]!; + expect(slugB, "organization B has a distinct URL slug").not.toBe(slugA); + + await step("Persist organization B data with a B selector", async () => { + const request = await createPolicy(tab1, policyB); + expect( + request.headers()["x-executor-organization"], + "organization B policy writes use the B URL selector", + ).toBe(slugB); + expect(await tab1.getByText(policyA, { exact: true }).count()).toBe(0); + }); + + const cookieWhileInB = await sessionCookieValue(tab1); + expect(cookieWhileInB, "organization creation leaves a real browser session").not.toBe(""); + const tab2 = await tab1.context().newPage(); - await step("Tab 2 opens org A's URL and stays in A — no switch, no reload loop", async () => { + await step("Tab 2 renders organization A data from the A URL", async () => { + const responsePromise = policyResponse(tab2, "GET"); await tab2.goto(`/${slugA}/policies`, { waitUntil: "networkidle" }); - await tab2.getByText("Policies").first().waitFor({ timeout: 30_000 }); - expect(new URL(tab2.url()).pathname, "tab 2 stays on org A's URL").toBe( - `/${slugA}/policies`, - ); - expect(await requestOrgSlugOf(tab2), "tab 2's API requests are scoped to org A").toBe( - slugA, - ); + const response = await responsePromise; + expect(response.ok(), "organization A policies load in tab 2").toBe(true); + expect( + response.request().headers()["x-executor-organization"], + "tab 2 requests remain scoped to organization A", + ).toBe(slugA); + await tab2.getByText(policyA, { exact: true }).waitFor(); + expect(await tab2.getByText(policyB, { exact: true }).count()).toBe(0); }); - await step("Tab 1 is untouched: still org B, and its requests still scope to B", async () => { - expect(new URL(tab1.url()).pathname, "tab 1's URL still says org B").toBe(`/${slugB}`); - expect( - await tab1.getByRole("button", { name: /Multitab B/ }).isVisible(), - "tab 1's sidebar still shows org B", - ).toBe(true); - // The crux: tab 2 opening org A did NOT re-scope tab 1. Tab 1's own - // requests still carry org B's slug — the URL is the scope, not a - // shared cookie a sibling tab can steal. - expect(await requestOrgSlugOf(tab1), "tab 1's API requests stay scoped to org B").toBe( - slugB, + await step("Tab 1 still renders organization B data from the B URL", async () => { + const responsePromise = policyResponse(tab1, "GET"); + await tab1.reload({ waitUntil: "networkidle" }); + const response = await responsePromise; + expect(new URL(tab1.url()).pathname, "tab 1 stays on organization B").toBe( + `/${slugB}/policies`, ); + expect( + response.request().headers()["x-executor-organization"], + "tab 1 requests remain scoped to organization B", + ).toBe(slugB); + await tab1.getByRole("button", { name: new RegExp(organizationB) }).waitFor(); + await tab1.getByText(policyB, { exact: true }).waitFor(); + expect(await tab1.getByText(policyA, { exact: true }).count()).toBe(0); }); + + expect( + await sessionCookieValue(tab1), + "opening organization A in tab 2 does not rewrite the shared cookie", + ).toBe(cookieWhileInB); }); }), ); @@ -138,8 +165,8 @@ scenario( await page.getByPlaceholder("https://api.example.com/openapi.json").waitFor(); }); - const orgSlug = new URL(page.url()).pathname.split("/")[1]; - expect(orgSlug, "the add form landed on an org-scoped URL").toBeTruthy(); + const organizationSlug = new URL(page.url()).pathname.split("/")[1]; + expect(organizationSlug, "the add form lands on an organization URL").toBeTruthy(); await step("Paste an inline spec", async () => { const previewRequest = page.waitForRequest( @@ -151,8 +178,8 @@ scenario( expect( (await previewRequest).headers()["x-executor-organization"], - "plugin-owned preview requests use the URL's org selector", - ).toBe(orgSlug); + "plugin preview requests use the URL organization selector", + ).toBe(organizationSlug); }); }); }), diff --git a/e2e/cloud/org-switcher.test.ts b/e2e/cloud/org-switcher.test.ts index eaf1aa998..070fc271d 100644 --- a/e2e/cloud/org-switcher.test.ts +++ b/e2e/cloud/org-switcher.test.ts @@ -1,16 +1,39 @@ -// Cloud-specific (browser): switching organizations changes the active workspace. -// A fresh user creates two organizations through the real web UI — the first -// via onboarding and the second via the account-menu → org switcher → "Create -// organization" modal — then uses the same switcher to return to the first org -// and confirms the workspace label in the bottom-left account button updates. +// Cloud browser coverage for URL-driven organization switching. A user creates +// distinct resources in organization A and organization B, then switches back +// through the public menu. The route intent, request selector, visible data, +// and shared session cookie are all asserted at each boundary. +import { randomBytes } from "node:crypto"; + import { expect } from "@effect/vitest"; import { Effect } from "effect"; +import type { Page } from "playwright"; import { scenario } from "../src/scenario"; import { Browser, Target } from "../src/services"; +const policyResponse = (page: Page, method: "GET" | "POST") => + page.waitForResponse( + (response) => + response.request().method() === method && + new URL(response.url()).pathname === "/api/policies", + { timeout: 30_000 }, + ); + +const createPolicy = async (page: Page, pattern: string) => { + const responsePromise = policyResponse(page, "POST"); + await page.locator("#policy-pattern").fill(pattern); + await page.getByRole("button", { name: "Add policy" }).click(); + const response = await responsePromise; + expect(response.ok(), `creating ${pattern} succeeds`).toBe(true); + await page.getByText(pattern, { exact: true }).waitFor(); + return response.request(); +}; + +const sessionCookieValue = async (page: Page) => + (await page.context().cookies()).find((cookie) => cookie.name === "wos-session")?.value ?? ""; + scenario( - "Organizations · switching organizations switches the workspace", + "Organizations · A/B/A switching preserves route intent and isolates resources", {}, Effect.gen(function* () { const target = yield* Target; @@ -18,126 +41,101 @@ scenario( const identity = yield* target.newIdentity({ org: false }); yield* browser.session(identity, async ({ page, step }) => { - // ── Step 1: onboarding, create the first org ───────────────────── - await step("Fresh user lands on onboarding (no organization yet)", async () => { - await page.goto("/", { waitUntil: "networkidle" }); - await page.getByPlaceholder("Northwind Labs").waitFor(); - }); - - const ORG_1 = "Switcher Org One"; - const ORG_2 = "Switcher Org Two"; + const suffix = randomBytes(4).toString("hex"); + const organizationA = `Switcher A ${suffix}`; + const organizationB = `Switcher B ${suffix}`; + const policyA = `switcher-a-${suffix}.*`; + const policyB = `switcher-b-${suffix}.*`; - await step(`Create "${ORG_1}" via onboarding`, async () => { - await page.getByPlaceholder("Northwind Labs").fill(ORG_1); + await step("Create organization A through onboarding", async () => { + await page.goto("/", { waitUntil: "networkidle" }); + await page.getByPlaceholder("Northwind Labs").fill(organizationA); await page.getByRole("button", { name: "Create organization" }).click(); - // Onboarding step 2 — proves the first org was created. - await page.getByText("Connect your MCP client").waitFor(); - }); - - await step("Continue into the app", async () => { + await page.getByText("Connect your MCP client").waitFor({ timeout: 30_000 }); await page.getByRole("button", { name: "Continue to app" }).click(); - await page.getByText("Integrations").first().waitFor(); - // Let the router navigation fully settle before opening menus — a late - // remount closes them mid-interaction. The console canonicalizes onto - // the org's URL slug (/switcher-org-one). await page.waitForURL((url) => /^\/[a-z0-9-]+\/?$/.test(url.pathname), { timeout: 30_000, }); - await page.waitForLoadState("networkidle"); }); - // ── Step 2: create the second org via the account-menu switcher ── - await step('Open the org switcher and choose "Create organization"', async () => { - // Bounded retry: under parallel-suite load the radix menu re-renders - // while the org list loads and a click can land on a closing menu. - for (let attempt = 1; ; attempt++) { - try { - await page.keyboard.press("Escape"); - await page.getByRole("button", { name: /Test User/ }).click(); - await page.getByRole("menuitem", { name: ORG_1 }).click({ timeout: 5_000 }); - const subContent = page.locator('[data-slot="dropdown-menu-sub-content"]'); - await subContent.waitFor({ state: "visible", timeout: 5_000 }); - await subContent - .getByText("Create organization", { exact: true }) - .click({ timeout: 5_000 }); - await page.getByText("Add another organization").waitFor({ timeout: 5_000 }); - break; - } catch (error) { - if (attempt >= 3) throw error; - } - } + const slugA = new URL(page.url()).pathname.split("/")[1]!; + + await step("Create an organization A policy on a deep route", async () => { + await page.goto(`/${slugA}/policies?view=switcher#rules`, { + waitUntil: "networkidle", + }); + const request = await createPolicy(page, policyA); + expect( + request.headers()["x-executor-organization"], + "organization A writes use its URL selector", + ).toBe(slugA); }); - await step(`Create "${ORG_2}" via the org switcher modal`, async () => { - await page.getByPlaceholder("Northwind Labs").fill(ORG_2); + await step("Create organization B without losing the current route", async () => { + await page.getByRole("button", { name: /Test User/ }).click(); + await page.getByRole("menuitem", { name: organizationA, exact: true }).click(); + const submenu = page.locator('[data-slot="dropdown-menu-sub-content"]'); + await submenu.waitFor({ state: "visible" }); + await submenu.getByText("Create organization", { exact: true }).click(); + await page.getByText("Add another organization").waitFor(); + await page.getByPlaceholder("Northwind Labs").fill(organizationB); await page.getByRole("button", { name: "Create organization" }).click(); - // The modal closes and the session switches into the new org. - await page.getByText("Add another organization").waitFor({ state: "hidden" }); - // Confirm the account button now shows ORG_2. - await page.getByRole("button", { name: new RegExp(ORG_2) }).waitFor(); + await page.waitForURL( + (url) => + url.pathname.endsWith("/policies") && + url.pathname !== `/${slugA}/policies` && + url.search === "?view=switcher" && + url.hash === "#rules", + { timeout: 30_000 }, + ); + await page.getByRole("button", { name: new RegExp(organizationB) }).waitFor(); }); - // Capture the label while we are in ORG_2 as a baseline. - const labelAfterOrg2 = await page - .getByRole("button", { name: new RegExp(ORG_2) }) - .innerText(); - expect(labelAfterOrg2, "account button shows the second org after creation").toContain(ORG_2); - - // ── Step 3: switch back to the first org ───────────────────────── - // The org-switcher sub-menu shows org IDs (not names) because the stub's - // getOrganization returns the ID as the name. The currently-active org is - // rendered with data-disabled="" (Radix convention). The only item without - // data-disabled that isn't "Create organization" is ORG_1. - await step(`Open the org switcher and switch back to "${ORG_1}"`, async () => { - await page.waitForLoadState("networkidle"); - await page.getByRole("button", { name: /Test User/ }).click(); - // Click the SubTrigger (shows current org name = ORG_2) to expand the list. - await page.getByRole("menuitem", { name: ORG_2 }).click(); - // Wait for the sub-content to open. - await page.locator('[data-slot="dropdown-menu-sub-content"]').waitFor({ state: "visible" }); - // The organizationsAtom loads asynchronously — wait until the loading state - // clears and the org items appear. The org items have data-disabled="" when - // active and no data-disabled when not. "Create organization" is always shown - // and always enabled; wait until there are at least 2 non-disabled items - // (the non-active org + "Create organization") before clicking. - await page - .locator('[data-slot="dropdown-menu-sub-content"]') - .locator('[role="menuitem"]:not([data-disabled])') - .nth(1) - .waitFor(); - // Now the sub-content has loaded. The org items appear BEFORE the separator and - // "Create organization". ORG_1 (non-active, not disabled) appears before ORG_2 - // (active, disabled) and before "Create organization". Click the first - // non-disabled item that is NOT "Create organization" — that is ORG_1. - await page - .locator('[data-slot="dropdown-menu-sub-content"]') - .locator('[role="menuitem"]:not([data-disabled])') - .filter({ hasNot: page.getByText("Create organization") }) - .first() - .click(); - // The menu closes, the page reloads, and the session switches into ORG_1. - await page.getByRole("button", { name: new RegExp(ORG_1) }).waitFor(); + const slugB = new URL(page.url()).pathname.split("/")[1]!; + expect(slugB, "organization B has a distinct URL slug").not.toBe(slugA); + + await step("Create an isolated organization B policy", async () => { + const request = await createPolicy(page, policyB); + expect( + request.headers()["x-executor-organization"], + "organization B writes use its URL selector", + ).toBe(slugB); + expect(await page.getByText(policyA, { exact: true }).count()).toBe(0); }); - // ── Assert: workspace label reflects the first org ─────────────── - const labelAfterSwitch = await page - .getByRole("button", { name: new RegExp(ORG_1) }) - .innerText(); - expect(labelAfterSwitch, "account button shows the first org after switching back").toContain( - ORG_1, - ); - - // Cross-check the active org through the session API. - const cookie = (await page.context().cookies()).map((c) => `${c.name}=${c.value}`).join("; "); - const response = await fetch(new URL("/api/auth/organizations", target.baseUrl), { - headers: { cookie }, + const cookieWhileInB = await sessionCookieValue(page); + expect(cookieWhileInB, "organization creation leaves a real browser session").not.toBe(""); + + await step("Switch from organization B back to A through the public menu", async () => { + const policiesResponse = policyResponse(page, "GET"); + await page.getByRole("button", { name: /Test User/ }).click(); + await page.getByRole("menuitem", { name: organizationB, exact: true }).click(); + const submenu = page.locator('[data-slot="dropdown-menu-sub-content"]'); + await submenu.waitFor({ state: "visible" }); + await submenu.getByRole("menuitem", { name: organizationA, exact: true }).click(); + await page.waitForURL( + (url) => + url.pathname === `/${slugA}/policies` && + url.search === "?view=switcher" && + url.hash === "#rules", + { timeout: 30_000 }, + ); + + const response = await policiesResponse; + expect(response.ok(), "organization A policies reload successfully").toBe(true); + expect( + response.request().headers()["x-executor-organization"], + "the first request after switching back uses organization A", + ).toBe(slugA); + await page.getByRole("button", { name: new RegExp(organizationA) }).waitFor(); + await page.getByText(policyA, { exact: true }).waitFor(); + expect(await page.getByText(policyB, { exact: true }).count()).toBe(0); }); - const body = (await response.json()) as { - organizations: ReadonlyArray<{ name: string }>; - activeOrganizationId?: string; - }; - expect(response.ok).toBe(true); - expect(body.organizations.length, "exactly two organizations exist for this user").toBe(2); + + expect( + await sessionCookieValue(page), + "URL switching does not rewrite the shared session cookie", + ).toBe(cookieWhileInB); }); }), ); diff --git a/e2e/cloudflare/access-auth.test.ts b/e2e/cloudflare/access-auth.test.ts new file mode 100644 index 000000000..04eba570f --- /dev/null +++ b/e2e/cloudflare/access-auth.test.ts @@ -0,0 +1,142 @@ +import { expect } from "@effect/vitest"; +import { Effect } from "effect"; +import { AccountHttpApi } from "@executor-js/api"; + +import { + accessAssertionHeaders, + issueCloudflareAccessToken, + readCloudflareAccessLedger, +} from "../src/cloudflare-access-emulator"; +import { scenario } from "../src/scenario"; +import { Api, Target } from "../src/services"; +import { CLOUDFLARE_ACCESS_BASE_URL } from "../targets/cloudflare"; + +scenario( + "Cloudflare Access · a signed human assertion reaches the protected account API", + {}, + Effect.gen(function* () { + const target = yield* Target; + const { client: makeClient } = yield* Api; + const identity = yield* target.newIdentity(); + const client = yield* makeClient(AccountHttpApi, identity); + + const me = yield* client.account.me(); + expect(me.user.email, "the verified Access email reaches the account surface").toBe( + "admin@e2e.test", + ); + expect(me.user.id, "the verified Access subject is the stable account id").toMatch(/^user-/); + expect(me.organization?.id, "the Access principal belongs to the configured tenant").toBe( + "default", + ); + const ledger = yield* Effect.promise(() => + readCloudflareAccessLedger(CLOUDFLARE_ACCESS_BASE_URL), + ); + const events = ledger.map( + (entry) => `${entry.operation}:${entry.tokenKind ?? "none"}:${entry.status}`, + ); + expect( + events, + "the Worker fetched the Access signing keys over the documented certs endpoint", + ).toContain("jwks.read:none:200"); + expect(events, "the fixture records human issuance without recording the JWT").toContain( + "token.issue:human:200", + ); + }), +); + +scenario( + "Cloudflare Access · anonymous and wrong-audience human assertions are rejected", + {}, + Effect.gen(function* () { + const target = yield* Target; + const anonymous = yield* Effect.promise(() => + fetch(new URL("/api/account/me", target.baseUrl)), + ); + expect(anonymous.status, "the Worker does not rely on the old dev-auth bypass").toBe(401); + + const wrongAudience = yield* Effect.promise(() => + issueCloudflareAccessToken(CLOUDFLARE_ACCESS_BASE_URL, { + kind: "human", + subject: "wrong-audience-user", + email: "admin@e2e.test", + audience: "another-access-application", + }), + ); + const rejected = yield* Effect.promise(() => + fetch(new URL("/api/account/me", target.baseUrl), { + headers: accessAssertionHeaders(wrongAudience), + }), + ); + expect(rejected.status, "a valid signature cannot bypass the configured Access AUD").toBe(401); + }), +); + +scenario( + "Cloudflare Access · a signed service-token assertion receives its machine identity", + {}, + Effect.gen(function* () { + const { client: makeClient } = yield* Api; + const commonName = "executor-ci.access"; + // Access exchanges the raw client-id/client-secret pair at its edge. The + // origin receives this signed application-token shape with common_name. + const token = yield* Effect.promise(() => + issueCloudflareAccessToken(CLOUDFLARE_ACCESS_BASE_URL, { + kind: "service", + commonName, + }), + ); + const client = yield* makeClient(AccountHttpApi, { + label: commonName, + headers: accessAssertionHeaders(token), + }); + + const me = yield* client.account.me(); + expect(me.user.id, "the service-token client id is the stable account id").toBe(commonName); + expect(me.user.name, "the machine identity remains recognizable").toBe(commonName); + expect(me.user.email, "service tokens do not impersonate a human email").toBe(""); + const ledger = yield* Effect.promise(() => + readCloudflareAccessLedger(CLOUDFLARE_ACCESS_BASE_URL), + ); + expect( + ledger.map((entry) => `${entry.operation}:${entry.tokenKind ?? "none"}:${entry.status}`), + "the fixture records service issuance without recording credentials or assertions", + ).toContain("token.issue:service:200"); + }), +); + +scenario( + "Cloudflare Access · expired and tampered service-token assertions are rejected", + {}, + Effect.gen(function* () { + const target = yield* Target; + const expired = yield* Effect.promise(() => + issueCloudflareAccessToken(CLOUDFLARE_ACCESS_BASE_URL, { + kind: "service", + commonName: "expired-ci.access", + expiresInSeconds: -60, + }), + ); + const expiredResponse = yield* Effect.promise(() => + fetch(new URL("/api/account/me", target.baseUrl), { + headers: accessAssertionHeaders(expired), + }), + ); + expect(expiredResponse.status, "Access expiry is enforced by the Worker").toBe(401); + + const valid = yield* Effect.promise(() => + issueCloudflareAccessToken(CLOUDFLARE_ACCESS_BASE_URL, { + kind: "service", + commonName: "tampered-ci.access", + }), + ); + const [header, payload, signature] = valid.split("."); + const tamperedSignature = `${signature?.startsWith("A") ? "B" : "A"}${signature?.slice(1) ?? ""}`; + const tampered = `${header}.${payload}.${tamperedSignature}`; + const tamperedResponse = yield* Effect.promise(() => + fetch(new URL("/api/account/me", target.baseUrl), { + headers: accessAssertionHeaders(tampered), + }), + ); + expect(tamperedResponse.status, "a forged service assertion never reaches the app").toBe(401); + }), +); diff --git a/e2e/desktop-kvm/guest-runtime.ts b/e2e/desktop-kvm/guest-runtime.ts new file mode 100644 index 000000000..d6ac56384 --- /dev/null +++ b/e2e/desktop-kvm/guest-runtime.ts @@ -0,0 +1,668 @@ +// Dependency-free guest payload for the Linux KVM desktop journey. The same +// source runs a bearer-specific remote account fixture, an Anthropic Messages +// replay boundary, and the pinned Claude Code binary inside the disposable VM. + +import { execFile } from "node:child_process"; +import { chmodSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync } from "node:fs"; +import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +export const KVM_ACCOUNT_FIXTURES = [ + { + name: "Remote account A", + token: "desktop-profile-account-a", + marker: "Wire catalog alpha", + slug: "fixture-account-a", + }, + { + name: "Remote account B", + token: "desktop-profile-account-b", + marker: "Wire catalog beta", + slug: "fixture-account-b", + }, +] as const; + +export const KVM_CLAUDE_EXPECTED_RESULT = "42"; +export const KVM_CLAUDE_EXECUTE_CODE = "return 6 * 7;"; +export const KVM_REPLAY_API_KEY = "executor-e2e-replay-key"; + +interface AccountFixtureRequest { + readonly method: string; + readonly url: string; + readonly authorization: string | null; +} + +interface ReplayToolResult { + readonly toolUseId: string; + readonly content: string; + readonly isError: boolean; +} + +interface ReplayMessage { + readonly role: string; + readonly text: string; + readonly toolResults: ReadonlyArray; +} + +interface ReplayRequest { + readonly path: string; + readonly model: string; + readonly messages: ReadonlyArray; + readonly toolNames: ReadonlyArray; + readonly stream: boolean; +} + +export interface KvmGuestRuntimeState { + readonly pid: number; + readonly accountOrigin: string; + readonly brainOrigin: string; + readonly accountLedgerPath: string; + readonly replayLedgerPath: string; +} + +export interface KvmGuestClaudeConfig { + readonly binaryPath: string; + readonly expectedVersion: string; + readonly homeDir: string; + readonly mcpUrl: string; + readonly authorizationHeader: string; + readonly brainBaseUrl: string; + readonly outputPath: string; +} + +export interface KvmGuestClaudeResult { + readonly binaryPath: string; + readonly expectedVersion: string; + readonly observedVersion?: string; + readonly durationMs: number; + readonly exitCode: number | null; + readonly stdout: string; + readonly stderr: string; + readonly structuredResult?: unknown; + readonly mcpServerName: "executor"; + readonly mcpOrigin: string; + readonly replayOrigin: string; +} + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const requiredString = (record: Record, key: string) => { + const value = record[key]; + if (typeof value !== "string" || value.length === 0) { + throw new Error(`guest runtime requires ${key}`); + } + return value; +}; + +const parseClaudeConfig = (value: unknown): KvmGuestClaudeConfig => { + if (!isUnknownRecord(value)) throw new Error("guest Claude config must be an object"); + return { + binaryPath: requiredString(value, "binaryPath"), + expectedVersion: requiredString(value, "expectedVersion"), + homeDir: requiredString(value, "homeDir"), + mcpUrl: requiredString(value, "mcpUrl"), + authorizationHeader: requiredString(value, "authorizationHeader"), + brainBaseUrl: requiredString(value, "brainBaseUrl"), + outputPath: requiredString(value, "outputPath"), + }; +}; + +const writeJsonAtomic = (path: string, value: unknown) => { + mkdirSync(dirname(path), { recursive: true }); + const temporary = `${path}.${process.pid}.tmp`; + writeFileSync(temporary, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 }); + renameSync(temporary, path); + chmodSync(path, 0o600); +}; + +const writeJson = (response: ServerResponse, status: number, value: unknown) => { + response.writeHead(status, { "content-type": "application/json", "cache-control": "no-store" }); + response.end(JSON.stringify(value)); +}; + +const listen = (server: ReturnType, host: string) => + new Promise((resolveListen, reject) => { + server.once("error", reject); + server.listen(0, host, () => { + server.off("error", reject); + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: node:http listen callbacks cannot return an Effect failure + reject(new Error("guest fixture server did not publish a TCP address")); + return; + } + resolveListen(address.port); + }); + }); + +const close = (server: ReturnType) => + new Promise((resolveClose) => server.close(() => resolveClose())); + +const accountIntegration = (authorization: string | null) => { + const account = KVM_ACCOUNT_FIXTURES.find( + (candidate) => authorization === `Bearer ${candidate.token}`, + ); + if (!account) return undefined; + return { + slug: account.slug, + name: account.marker, + description: `Bearer-specific catalog for ${account.name}`, + kind: "fixture", + canRemove: false, + canRefresh: false, + authMethods: [], + }; +}; + +export const createKvmAccountFixture = (ledgerPath: string) => { + const requests: AccountFixtureRequest[] = []; + writeJsonAtomic(ledgerPath, requests); + return createServer((request, response) => { + const method = request.method ?? "GET"; + const url = request.url ?? "/"; + const authorization = request.headers.authorization ?? null; + requests.push({ method, url, authorization }); + writeJsonAtomic(ledgerPath, requests); + + response.setHeader("access-control-allow-origin", request.headers.origin ?? "*"); + response.setHeader( + "access-control-allow-headers", + request.headers["access-control-request-headers"] ?? + "authorization, content-type, x-executor-org, traceparent, baggage", + ); + response.setHeader("access-control-allow-methods", "GET, OPTIONS"); + response.setHeader("access-control-allow-private-network", "true"); + response.setHeader("cache-control", "no-store"); + response.setHeader("vary", "Origin, Access-Control-Request-Headers"); + + if (method === "OPTIONS") { + response.writeHead(204); + response.end(); + return; + } + + const pathname = new URL(url, "http://executor-kvm-account-fixture").pathname; + if (method !== "GET" || pathname !== "/api/integrations") { + writeJson(response, 404, { message: "Not found" }); + return; + } + const integration = accountIntegration(authorization); + if (!integration) { + writeJson(response, 401, { message: "Invalid bearer" }); + return; + } + writeJson(response, 200, [integration]); + }); +}; + +const contentText = (content: unknown): string => { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content + .map((part) => + isUnknownRecord(part) && part.type === "text" && typeof part.text === "string" + ? part.text + : "", + ) + .join(""); +}; + +const toolResultsFrom = (content: unknown): ReadonlyArray => { + if (!Array.isArray(content)) return []; + return content.flatMap((part) => { + if (!isUnknownRecord(part) || part.type !== "tool_result") return []; + return [ + { + toolUseId: typeof part.tool_use_id === "string" ? part.tool_use_id : "", + content: contentText(part.content), + isError: part.is_error === true, + }, + ]; + }); +}; + +const messagesFrom = (body: Record): ReadonlyArray => { + if (!Array.isArray(body.messages)) return []; + return body.messages.flatMap((message) => { + if (!isUnknownRecord(message)) return []; + return [ + { + role: typeof message.role === "string" ? message.role : "", + text: contentText(message.content), + toolResults: toolResultsFrom(message.content), + }, + ]; + }); +}; + +const toolNamesFrom = (body: Record): ReadonlyArray => { + if (!Array.isArray(body.tools)) return []; + return body.tools.flatMap((tool) => + isUnknownRecord(tool) && typeof tool.name === "string" ? [tool.name] : [], + ); +}; + +const writeEvent = (response: ServerResponse, event: string, data: unknown) => { + response.write(`event: ${event}\n`); + response.write(`data: ${JSON.stringify(data)}\n\n`); +}; + +const resolveExecuteTool = (offered: ReadonlyArray) => + offered.find((name) => name === "execute") ?? offered.find((name) => name.endsWith("__execute")); + +const writeReplayResponse = ( + response: ServerResponse, + requestIndex: number, + model: string, + toolNames: ReadonlyArray, + toolResults: ReadonlyArray, + errors: string[], +) => { + const toolName = toolResults.length === 0 ? resolveExecuteTool(toolNames) : undefined; + if (toolResults.length === 0 && !toolName) { + errors.push( + `request ${requestIndex}: Executor execute was not offered (${toolNames.join(", ")})`, + ); + } + const text = toolResults.length > 0 ? `executor-result:${toolResults.at(-1)?.content ?? ""}` : ""; + const messageId = `msg_kvm_replay_${requestIndex}`; + response.writeHead(200, { + "content-type": "text/event-stream", + "cache-control": "no-cache", + connection: "keep-alive", + }); + writeEvent(response, "message_start", { + type: "message_start", + message: { + id: messageId, + type: "message", + role: "assistant", + model, + content: [], + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 1, output_tokens: 0 }, + }, + }); + + let blockIndex = 0; + if (text) { + writeEvent(response, "content_block_start", { + type: "content_block_start", + index: blockIndex, + content_block: { type: "text", text: "" }, + }); + writeEvent(response, "content_block_delta", { + type: "content_block_delta", + index: blockIndex, + delta: { type: "text_delta", text }, + }); + writeEvent(response, "content_block_stop", { + type: "content_block_stop", + index: blockIndex, + }); + blockIndex += 1; + } + + if (toolName) { + const toolUseId = `toolu_kvm_replay_${requestIndex}`; + writeEvent(response, "content_block_start", { + type: "content_block_start", + index: blockIndex, + content_block: { type: "tool_use", id: toolUseId, name: toolName, input: {} }, + }); + writeEvent(response, "content_block_delta", { + type: "content_block_delta", + index: blockIndex, + delta: { + type: "input_json_delta", + partial_json: JSON.stringify({ code: KVM_CLAUDE_EXECUTE_CODE }), + }, + }); + writeEvent(response, "content_block_stop", { + type: "content_block_stop", + index: blockIndex, + }); + } + + writeEvent(response, "message_delta", { + type: "message_delta", + delta: { + stop_reason: toolName ? "tool_use" : "end_turn", + stop_sequence: null, + }, + usage: { output_tokens: 1 }, + }); + writeEvent(response, "message_stop", { type: "message_stop" }); + response.end(); +}; + +const readRequestBody = (request: IncomingMessage) => + new Promise((resolveBody, reject) => { + let raw = ""; + request.on("data", (chunk: Buffer) => { + raw += chunk.toString("utf8"); + }); + request.on("end", () => resolveBody(raw)); + request.on("error", reject); + }); + +export const createKvmReplayBrain = (ledgerPath: string) => { + const ledger: { requests: ReplayRequest[]; errors: string[] } = { requests: [], errors: [] }; + writeJsonAtomic(ledgerPath, ledger); + return createServer((request, response) => { + void (async () => { + const requestUrl = new URL(request.url ?? "/", "http://127.0.0.1"); + if (request.method !== "POST") { + writeJson(response, 405, { error: { type: "method_not_allowed" } }); + return; + } + if (requestUrl.pathname === "/v1/messages/count_tokens") { + writeJson(response, 200, { input_tokens: 1 }); + return; + } + if (requestUrl.pathname !== "/v1/messages") { + ledger.errors.push(`unexpected request path: ${request.method} ${requestUrl.pathname}`); + writeJsonAtomic(ledgerPath, ledger); + writeJson(response, 404, { error: { type: "not_found" } }); + return; + } + + const raw = await readRequestBody(request); + let decoded: unknown; + try { + decoded = JSON.parse(raw || "{}"); + } catch (error) { + ledger.errors.push(`request JSON decode failed: ${String(error)}`); + writeJsonAtomic(ledgerPath, ledger); + writeJson(response, 400, { error: { type: "invalid_request_error" } }); + return; + } + if (!isUnknownRecord(decoded)) { + ledger.errors.push("request body was not a JSON object"); + writeJsonAtomic(ledgerPath, ledger); + writeJson(response, 400, { error: { type: "invalid_request_error" } }); + return; + } + + const messages = messagesFrom(decoded); + const toolNames = toolNamesFrom(decoded); + const toolResults = messages.flatMap((message) => message.toolResults); + const model = typeof decoded.model === "string" ? decoded.model : "replay-model"; + const requestIndex = ledger.requests.length; + ledger.requests.push({ + path: `${requestUrl.pathname}${requestUrl.search}`, + model, + messages, + toolNames, + stream: decoded.stream === true, + }); + writeReplayResponse(response, requestIndex, model, toolNames, toolResults, ledger.errors); + writeJsonAtomic(ledgerPath, ledger); + })().catch((error) => { + ledger.errors.push(`replay request failed: ${String(error)}`); + writeJsonAtomic(ledgerPath, ledger); + if (!response.headersSent) writeJson(response, 500, { error: { type: "fixture_error" } }); + else response.end(); + }); + }); +}; + +const parseServeArguments = (args: ReadonlyArray) => { + const values = new Map(); + for (let index = 0; index < args.length; index += 2) { + const name = args[index]; + const value = args[index + 1]; + if (!name?.startsWith("--") || !value) throw new Error(`invalid serve argument: ${name}`); + values.set(name, value); + } + const stateDir = values.get("--state-dir"); + const accountHost = values.get("--account-host"); + if (!stateDir || !accountHost) throw new Error("serve requires --state-dir and --account-host"); + return { stateDir, accountHost }; +}; + +export const serveKvmGuestFixtures = async (input: { + readonly stateDir: string; + readonly accountHost: string; +}) => { + rmSync(input.stateDir, { force: true, recursive: true }); + mkdirSync(input.stateDir, { recursive: true, mode: 0o700 }); + const accountLedgerPath = join(input.stateDir, "account-fixture-ledger.json"); + const replayLedgerPath = join(input.stateDir, "anthropic-replay-ledger.json"); + const accountServer = createKvmAccountFixture(accountLedgerPath); + const brainServer = createKvmReplayBrain(replayLedgerPath); + const accountPort = await listen(accountServer, "0.0.0.0"); + const brainPort = await listen(brainServer, "127.0.0.1"); + const state: KvmGuestRuntimeState = { + pid: process.pid, + accountOrigin: `http://${input.accountHost}:${accountPort}`, + brainOrigin: `http://127.0.0.1:${brainPort}`, + accountLedgerPath, + replayLedgerPath, + }; + writeJsonAtomic(join(input.stateDir, "runtime.json"), state); + + let closing = false; + const shutdown = () => { + if (closing) return; + closing = true; + void Promise.all([close(accountServer), close(brainServer)]).then(() => process.exit(0)); + }; + process.once("SIGINT", shutdown); + process.once("SIGTERM", shutdown); + return state; +}; + +export const isLoopbackHttpUrl = (value: string) => { + try { + const url = new URL(value); + return ( + url.protocol === "http:" && + (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "[::1]") + ); + } catch { + return false; + } +}; + +const invoke = ( + binaryPath: string, + args: ReadonlyArray, + options: { readonly cwd: string; readonly env: Readonly> }, +) => + new Promise<{ + readonly exitCode: number | null; + readonly stdout: string; + readonly stderr: string; + }>((resolveInvocation) => { + execFile( + binaryPath, + [...args], + { + cwd: options.cwd, + env: { ...options.env }, + encoding: "utf8", + timeout: 120_000, + maxBuffer: 32 * 1024 * 1024, + }, + (error, stdout, stderr) => { + const code = error && "code" in error && typeof error.code === "number" ? error.code : null; + resolveInvocation({ exitCode: error ? (code ?? 1) : 0, stdout, stderr }); + }, + ); + }); + +const claudeEnvironment = (homeDir: string, brainOrigin: string) => { + const configDir = join(homeDir, "claude-config"); + const xdgDir = join(homeDir, "xdg"); + const tempDir = join(homeDir, "tmp"); + for (const directory of [ + homeDir, + configDir, + join(homeDir, "project"), + join(xdgDir, "config"), + join(xdgDir, "data"), + join(xdgDir, "state"), + join(xdgDir, "cache"), + tempDir, + ]) { + mkdirSync(directory, { recursive: true }); + } + return { + PATH: "/usr/bin:/bin", + SHELL: "/bin/bash", + LANG: "C.UTF-8", + HOME: homeDir, + USERPROFILE: homeDir, + CLAUDE_CONFIG_DIR: configDir, + XDG_CONFIG_HOME: join(xdgDir, "config"), + XDG_DATA_HOME: join(xdgDir, "data"), + XDG_STATE_HOME: join(xdgDir, "state"), + XDG_CACHE_HOME: join(xdgDir, "cache"), + TMPDIR: tempDir, + TEMP: tempDir, + TMP: tempDir, + CI: "1", + NO_PROXY: "127.0.0.1,localhost,::1", + no_proxy: "127.0.0.1,localhost,::1", + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1", + CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1", + CLAUDE_CODE_ATTRIBUTION_HEADER: "0", + DISABLE_AUTOUPDATER: "1", + DISABLE_UPDATES: "1", + DISABLE_TELEMETRY: "1", + DISABLE_ERROR_REPORTING: "1", + ANTHROPIC_BASE_URL: brainOrigin, + ANTHROPIC_API_KEY: KVM_REPLAY_API_KEY, + }; +}; + +const probeVersion = (binaryPath: string, env: Readonly>) => + new Promise((resolveVersion) => { + execFile( + binaryPath, + ["--version"], + { env: { ...env }, encoding: "utf8", timeout: 10_000 }, + (error, stdout) => { + resolveVersion(error ? undefined : /^(\S+)/.exec(stdout.trim())?.[1]); + }, + ); + }); + +export const runKvmGuestClaude = async (config: KvmGuestClaudeConfig) => { + if (!isLoopbackHttpUrl(config.brainBaseUrl)) { + throw new Error(`refusing non-loopback Anthropic replay URL: ${config.brainBaseUrl}`); + } + if (!isLoopbackHttpUrl(config.mcpUrl)) { + throw new Error(`refusing non-loopback desktop MCP URL: ${config.mcpUrl}`); + } + rmSync(config.homeDir, { force: true, recursive: true }); + const environment = claudeEnvironment(config.homeDir, new URL(config.brainBaseUrl).origin); + const projectDir = join(config.homeDir, "project"); + const mcpConfigPath = join(config.homeDir, "mcp.json"); + writeJsonAtomic(mcpConfigPath, { + mcpServers: { + executor: { + type: "http", + url: config.mcpUrl, + headers: { Authorization: config.authorizationHeader }, + }, + }, + }); + const observedVersion = await probeVersion(config.binaryPath, environment); + if (observedVersion !== config.expectedVersion) { + const result: KvmGuestClaudeResult = { + binaryPath: config.binaryPath, + expectedVersion: config.expectedVersion, + observedVersion, + durationMs: 0, + exitCode: 1, + stdout: "", + stderr: `Claude Code ${config.expectedVersion} is required, found ${observedVersion ?? "no runnable binary"}`, + mcpServerName: "executor", + mcpOrigin: new URL(config.mcpUrl).origin, + replayOrigin: new URL(config.brainBaseUrl).origin, + }; + writeJsonAtomic(config.outputPath, result); + return result; + } + + const startedAt = Date.now(); + const invocation = await invoke( + config.binaryPath, + [ + "--bare", + "--mcp-config", + mcpConfigPath, + "--strict-mcp-config", + "--print", + "--output-format", + "json", + "--no-session-persistence", + "--disable-slash-commands", + "--no-chrome", + "--model", + "claude-sonnet-4-6", + "--tools", + "", + "--allowed-tools", + "mcp__executor__*", + "--permission-mode", + "dontAsk", + "--system-prompt", + "Follow the user request using only the explicitly configured MCP tools.", + "Use Executor to calculate six times seven.", + ], + { cwd: projectDir, env: environment }, + ); + let structuredResult: unknown; + try { + structuredResult = JSON.parse(invocation.stdout.trim()); + } catch { + structuredResult = undefined; + } + const result: KvmGuestClaudeResult = { + binaryPath: config.binaryPath, + expectedVersion: config.expectedVersion, + observedVersion, + durationMs: Date.now() - startedAt, + exitCode: invocation.exitCode, + stdout: invocation.stdout, + stderr: invocation.stderr, + structuredResult, + mcpServerName: "executor", + mcpOrigin: new URL(config.mcpUrl).origin, + replayOrigin: new URL(config.brainBaseUrl).origin, + }; + writeJsonAtomic(config.outputPath, result); + return result; +}; + +const main = async () => { + const [command, ...args] = process.argv.slice(2); + if (command === "serve") { + await serveKvmGuestFixtures(parseServeArguments(args)); + return; + } + if (command === "claude") { + const configPath = args[0]; + if (!configPath) throw new Error("claude requires a config path"); + const config = parseClaudeConfig(JSON.parse(readFileSync(configPath, "utf8"))); + const result = await runKvmGuestClaude(config); + if (result.exitCode !== 0) process.exitCode = 1; + return; + } + throw new Error(`unknown guest runtime command: ${command ?? ""}`); +}; + +const invokedPath = process.argv[1] ? resolve(process.argv[1]) : ""; +if (invokedPath === fileURLToPath(import.meta.url)) { + void main().catch((error) => { + console.error(error); + process.exitCode = 1; + }); +} diff --git a/e2e/desktop-kvm/gui-acceptance.test.ts b/e2e/desktop-kvm/gui-acceptance.test.ts new file mode 100644 index 000000000..526d3c98c --- /dev/null +++ b/e2e/desktop-kvm/gui-acceptance.test.ts @@ -0,0 +1,845 @@ +// One watched product journey in a disposable Linux KVM guest: the real +// packaged desktop switches two bearer accounts at one remote origin, keeps a +// remote account active across restart, then exposes its local MCP to the real +// pinned Claude Code binary with deterministic loopback-only model replay. + +import { existsSync, mkdtempSync, readdirSync, rmSync, writeFileSync } from "node:fs"; +import { get } from "node:http"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { expect, it } from "@effect/vitest"; + +import { writeJsonAtomicSync } from "../src/artifact-io"; +import { writeClaudeCodeEvidence } from "../src/clients/claude-code-evidence"; +import { PackagedDesktopPage } from "../src/desktop/packaged"; +import { writeFocusedTestSource } from "../src/test-source"; +import { buildManifest } from "../src/viewer/manifest"; +import { connectLinuxKvmGuest } from "../src/vm/linux-kvm-libvirt"; +import type { LinuxKvmGuestConnection } from "../src/vm/linux-kvm"; +import { + KVM_ACCOUNT_FIXTURES, + KVM_CLAUDE_EXPECTED_RESULT, + KVM_REPLAY_API_KEY, + isLoopbackHttpUrl, + type KvmGuestClaudeResult, + type KvmGuestRuntimeState, +} from "./guest-runtime"; + +const SCENARIO_NAME = + "Desktop KVM · bearer accounts survive remote-active restart and real Claude uses local MCP"; + +interface CdpTarget { + readonly type: string; + readonly url: string; + readonly webSocketDebuggerUrl?: string; +} + +interface RunningDesktop { + readonly page: PackagedDesktopPage; + readonly pid: string; +} + +interface PersistedProfile { + readonly kind: string; + readonly key: string; + readonly origin: string; + readonly displayName: string; + readonly token: string | null; +} + +interface PersistedProfileSnapshot { + readonly activeKey: string | null; + readonly profiles: ReadonlyArray; +} + +interface AccountLedgerEntry { + readonly method: string; + readonly url: string; + readonly authorization: string | null; +} + +interface ReplayLedger { + readonly requests: ReadonlyArray<{ + readonly path: string; + readonly toolNames: ReadonlyArray; + readonly messages: ReadonlyArray<{ + readonly toolResults: ReadonlyArray<{ + readonly content: string; + readonly isError: boolean; + }>; + }>; + }>; + readonly errors: ReadonlyArray; +} + +const env = (name: string) => { + const value = process.env[name]; + if (!value) throw new Error(`desktop KVM setup did not publish ${name}`); + return value; +}; + +const shellQuote = (value: string) => `'${value.replaceAll("'", `'"'"'`)}'`; + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const cdpTargets = (value: unknown): ReadonlyArray => { + if (!Array.isArray(value)) return []; + return value.flatMap((candidate) => { + if ( + !isUnknownRecord(candidate) || + typeof candidate.type !== "string" || + typeof candidate.url !== "string" + ) { + return []; + } + return [ + { + type: candidate.type, + url: candidate.url, + ...(typeof candidate.webSocketDebuggerUrl === "string" + ? { webSocketDebuggerUrl: candidate.webSocketDebuggerUrl } + : {}), + }, + ]; + }); +}; + +const readText = (url: string) => + new Promise((resolve, reject) => { + const request = get(url, (response) => { + const chunks: Buffer[] = []; + response.on("data", (chunk: Buffer) => chunks.push(chunk)); + response.on("end", () => { + const body = Buffer.concat(chunks).toString(); + if (response.statusCode && response.statusCode >= 200 && response.statusCode < 300) { + resolve(body); + } else { + // oxlint-disable-next-line executor/no-promise-reject -- boundary: node:http callback adapter for the external CDP endpoint + reject(new Error(`CDP target list returned ${response.statusCode}: ${body}`)); + } + }); + }); + request.once("error", reject); + request.setTimeout(2_000, () => request.destroy(new Error("CDP target list timed out"))); + }); + +const waitForPage = async (localPort: number) => { + const deadline = Date.now() + 180_000; + for (;;) { + const targets = await readText(`http://127.0.0.1:${localPort}/json/list`) + .then((body) => cdpTargets(JSON.parse(body))) + .catch(() => []); + const target = targets.find( + (candidate) => + candidate.type === "page" && + candidate.webSocketDebuggerUrl && + !candidate.url.startsWith("devtools://"), + ); + if (target?.webSocketDebuggerUrl) { + const endpoint = new URL(target.webSocketDebuggerUrl); + endpoint.hostname = "127.0.0.1"; + endpoint.port = String(localPort); + return PackagedDesktopPage.connect(endpoint.toString()); + } + if (Date.now() >= deadline) throw new Error("packaged app did not publish a CDP page"); + await new Promise((resolve) => setTimeout(resolve, 250)); + } +}; + +const launchDesktop = async (input: { + readonly guest: LinuxKvmGuestConnection; + readonly display: string; + readonly remoteApp: string; + readonly remoteHome: string; + readonly appLog: string; + readonly localCdpPort: number; + readonly cleanHome: boolean; +}) => { + const directories = [ + input.remoteHome, + `${input.remoteHome}/.config`, + `${input.remoteHome}/.cache`, + `${input.remoteHome}/.local/share`, + `${input.remoteHome}/.xdg-runtime`, + ]; + const launch = await input.guest.run( + [ + ...(input.cleanHome ? [`rm -rf ${shellQuote(input.remoteHome)}`] : []), + `mkdir -p ${directories.map(shellQuote).join(" ")}`, + `chmod 700 ${shellQuote(`${input.remoteHome}/.xdg-runtime`)}`, + `nohup env DISPLAY=${shellQuote(input.display)} HOME=${shellQuote(input.remoteHome)} XDG_CONFIG_HOME=${shellQuote(`${input.remoteHome}/.config`)} XDG_CACHE_HOME=${shellQuote(`${input.remoteHome}/.cache`)} XDG_DATA_HOME=${shellQuote(`${input.remoteHome}/.local/share`)} XDG_RUNTIME_DIR=${shellQuote(`${input.remoteHome}/.xdg-runtime`)} ELECTRON_ENABLE_LOGGING=1 ${shellQuote(input.remoteApp)} --no-sandbox --remote-debugging-address=0.0.0.0 --remote-debugging-port=9222 --remote-allow-origins='*' >>${shellQuote(input.appLog)} 2>&1 < /dev/null &`, + "echo $!", + ].join("; "), + ); + if (launch.code !== 0 || !/^\d+$/.test(launch.stdout.trim())) { + throw new Error(`packaged app launch failed: ${launch.stderr || launch.stdout}`); + } + const page = await waitForPage(input.localCdpPort); + await page.command("Runtime.enable"); + await page.command("Page.enable"); + await page.waitForText("Settings", 180_000); + return { page, pid: launch.stdout.trim() } satisfies RunningDesktop; +}; + +const stopDesktop = async (guest: LinuxKvmGuestConnection, app: RunningDesktop | undefined) => { + if (!app) return; + app.page.close(); + const stopped = await guest.run( + `kill -TERM ${app.pid} 2>/dev/null || true; for attempt in $(seq 1 100); do kill -0 ${app.pid} 2>/dev/null || exit 0; sleep 0.1; done; kill -KILL ${app.pid} 2>/dev/null || true`, + ); + if (stopped.code !== 0) throw new Error(`packaged app did not stop: ${stopped.stderr}`); +}; + +const waitForServerConnectionLabel = async ( + page: PackagedDesktopPage, + expectedText: string, + timeoutMs: number, +) => { + const deadline = Date.now() + timeoutMs; + let label = ""; + for (;;) { + label = await page + .evaluate( + `document.querySelector('[aria-label^="Select Executor server:"]')?.getAttribute('aria-label') ?? ""`, + ) + .catch(() => ""); + if (label.includes(expectedText)) return label; + if (Date.now() >= deadline) { + throw new Error( + `Timed out waiting for server connection label ${expectedText}; last=${label}`, + ); + } + await new Promise((resolve) => setTimeout(resolve, 250)); + } +}; + +const openServerProfiles = async (page: PackagedDesktopPage) => { + const alreadyOpen = await page.evaluate( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') !== null`, + ); + const opened = + alreadyOpen || + (await page.evaluate(`(() => { + const trigger = document.querySelector('[aria-label^="Select Executor server:"]'); + if (!(trigger instanceof HTMLButtonElement)) return false; + trigger.click(); + return true; + })()`)); + expect(opened, "the packaged desktop app exposes the server profile trigger").toBe(true); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]')?.textContent?.includes("Server profiles")`, + 30_000, + "the server profiles popover", + ); +}; + +const closeServerProfiles = async (page: PackagedDesktopPage) => { + const open = await page.evaluate( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') !== null`, + ); + if (!open) return; + const clicked = await page.evaluate(`(() => { + const trigger = document.querySelector('[aria-label^="Select Executor server:"]'); + if (!(trigger instanceof HTMLButtonElement)) return false; + trigger.click(); + return true; + })()`); + expect(clicked, "the server profile trigger closes its popover").toBe(true); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') === null`, + 30_000, + "the server profiles popover to close", + ); +}; + +const serverProfileRowText = async (page: PackagedDesktopPage, name: string) => { + await openServerProfiles(page); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]')?.textContent?.includes(${JSON.stringify(name)})`, + 30_000, + `the ${name} profile row`, + ); + return page.evaluate(`(() => { + const content = document.querySelector('[data-slot="popover-content"][data-state="open"]'); + const button = Array.from(content?.querySelectorAll("button") ?? []).find( + (candidate) => candidate.textContent?.includes(${JSON.stringify(name)}), + ); + return button?.parentElement?.textContent ?? ""; + })()`); +}; + +const clickServerProfileButton = async (page: PackagedDesktopPage, text: string) => { + const clicked = await page.evaluate(`(() => { + const content = document.querySelector('[data-slot="popover-content"][data-state="open"]'); + if (!(content instanceof HTMLElement)) return false; + const expected = ${JSON.stringify(text)}; + const button = Array.from(content.querySelectorAll("button")).find( + (candidate) => candidate.getClientRects().length > 0 && candidate.textContent?.includes(expected), + ); + if (!(button instanceof HTMLButtonElement)) return false; + button.click(); + return true; + })()`); + expect(clicked, `the server profiles popover contains ${text}`).toBe(true); +}; + +const setServerProfileControl = async ( + page: PackagedDesktopPage, + selector: string, + value: string, +) => { + const changed = await page.evaluate(`(() => { + const control = document.querySelector(${JSON.stringify(selector)}); + const nextValue = ${JSON.stringify(value)}; + const prototype = control instanceof HTMLSelectElement + ? HTMLSelectElement.prototype + : control instanceof HTMLInputElement + ? HTMLInputElement.prototype + : null; + const setter = prototype ? Object.getOwnPropertyDescriptor(prototype, "value")?.set : undefined; + if (!control || !setter) return false; + setter.call(control, nextValue); + control.dispatchEvent(new Event("input", { bubbles: true })); + control.dispatchEvent(new Event("change", { bubbles: true })); + return true; + })()`); + expect(changed, `the server profile form exposes ${selector}`).toBe(true); +}; + +const addServerProfile = async ( + page: PackagedDesktopPage, + input: { readonly origin: string; readonly name: string; readonly token: string }, +) => { + await openServerProfiles(page); + await clickServerProfileButton(page, "Custom server"); + await page.waitForExpression( + `document.querySelector('input[placeholder="https://executor.example"]') !== null`, + 30_000, + "the custom server form", + ); + await setServerProfileControl( + page, + 'input[placeholder="https://executor.example"]', + input.origin, + ); + await setServerProfileControl(page, 'input[placeholder="Remote executor"]', input.name); + await setServerProfileControl(page, "form select", "bearer"); + await page.waitForExpression( + `document.querySelector('form input[type="password"]') !== null`, + 30_000, + "the bearer token input", + ); + await setServerProfileControl(page, 'form input[type="password"]', input.token); + await clickServerProfileButton(page, "Add and use"); + await waitForServerConnectionLabel(page, input.name, 30_000); +}; + +const selectServerProfile = async (page: PackagedDesktopPage, name: string) => { + await openServerProfiles(page); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]')?.textContent?.includes(${JSON.stringify(name)})`, + 30_000, + `the ${name} profile to hydrate`, + ); + await clickServerProfileButton(page, name); + await waitForServerConnectionLabel(page, name, 30_000); +}; + +const expectIntegrationAccount = async ( + page: PackagedDesktopPage, + expected: string, + rejected: string, +) => { + await page.waitForText(expected, 30_000); + expect(await page.textPresent(rejected), `${expected} does not render ${rejected}`).toBe(false); +}; + +const readPersistedProfiles = (page: PackagedDesktopPage) => + page.evaluate(`(() => { + const bridge = window.executor; + if (!bridge?.getServerProfiles) return { activeKey: null, profiles: [] }; + return bridge.getServerProfiles().then((raw) => { + const snapshot = JSON.parse(raw ?? '{"profiles":[]}'); + return { + activeKey: snapshot.activeKey ?? null, + profiles: (snapshot.profiles ?? []).map((profile) => ({ + kind: profile.kind ?? "", + key: profile.key ?? "", + origin: profile.origin ?? "", + displayName: profile.displayName ?? "", + token: profile.auth?.kind === "bearer" ? profile.auth.token : null, + })), + }; + }); + })()`); + +const waitForPersistedProfiles = async ( + page: PackagedDesktopPage, + names: ReadonlyArray, +) => { + const deadline = Date.now() + 30_000; + for (;;) { + const snapshot = await readPersistedProfiles(page); + if (names.every((name) => snapshot.profiles.some((profile) => profile.displayName === name))) { + return snapshot; + } + if (Date.now() >= deadline) { + throw new Error(`Timed out waiting for persisted profiles: ${names.join(", ")}`); + } + await new Promise((resolve) => setTimeout(resolve, 250)); + } +}; + +const guestJson = async (guest: LinuxKvmGuestConnection, path: string) => { + const result = await guest.run(`cat ${shellQuote(path)}`); + if (result.code !== 0) throw new Error(`could not read guest JSON ${path}: ${result.stderr}`); + const decoded: unknown = JSON.parse(result.stdout); + return decoded; +}; + +const runtimeState = (value: unknown): KvmGuestRuntimeState => { + if ( + !isUnknownRecord(value) || + typeof value.pid !== "number" || + typeof value.accountOrigin !== "string" || + typeof value.brainOrigin !== "string" || + typeof value.accountLedgerPath !== "string" || + typeof value.replayLedgerPath !== "string" + ) { + throw new Error("guest runtime published an invalid state document"); + } + return { + pid: value.pid, + accountOrigin: value.accountOrigin, + brainOrigin: value.brainOrigin, + accountLedgerPath: value.accountLedgerPath, + replayLedgerPath: value.replayLedgerPath, + }; +}; + +const claudeResult = (value: unknown): KvmGuestClaudeResult => { + if ( + !isUnknownRecord(value) || + typeof value.binaryPath !== "string" || + typeof value.expectedVersion !== "string" || + typeof value.durationMs !== "number" || + (typeof value.exitCode !== "number" && value.exitCode !== null) || + typeof value.stdout !== "string" || + typeof value.stderr !== "string" || + value.mcpServerName !== "executor" || + typeof value.mcpOrigin !== "string" || + typeof value.replayOrigin !== "string" + ) { + throw new Error("guest Claude runner published an invalid result document"); + } + return { + binaryPath: value.binaryPath, + expectedVersion: value.expectedVersion, + ...(typeof value.observedVersion === "string" + ? { observedVersion: value.observedVersion } + : {}), + durationMs: value.durationMs, + exitCode: value.exitCode, + stdout: value.stdout, + stderr: value.stderr, + ...(value.structuredResult === undefined ? {} : { structuredResult: value.structuredResult }), + mcpServerName: "executor", + mcpOrigin: value.mcpOrigin, + replayOrigin: value.replayOrigin, + }; +}; + +const stringArray = (value: unknown, label: string) => { + if (!Array.isArray(value)) throw new Error(`${label} must be an array`); + return value.map((entry) => { + if (typeof entry !== "string") throw new Error(`${label} must contain only strings`); + return entry; + }); +}; + +const replayLedger = (value: unknown): ReplayLedger => { + if (!isUnknownRecord(value) || !Array.isArray(value.requests)) { + throw new Error("guest replay ledger is invalid"); + } + const errors = stringArray(value.errors, "guest replay errors"); + const requests = value.requests.map((request) => { + if (!isUnknownRecord(request) || typeof request.path !== "string") { + throw new Error("guest replay request is invalid"); + } + if (!Array.isArray(request.messages)) { + throw new Error("guest replay request messages are invalid"); + } + const messages = request.messages.map((message) => { + if (!isUnknownRecord(message) || !Array.isArray(message.toolResults)) { + throw new Error("guest replay message is invalid"); + } + const toolResults = message.toolResults.map((result) => { + if ( + !isUnknownRecord(result) || + typeof result.content !== "string" || + typeof result.isError !== "boolean" + ) { + throw new Error("guest replay tool result is invalid"); + } + return { content: result.content, isError: result.isError }; + }); + return { toolResults }; + }); + return { + path: request.path, + toolNames: stringArray(request.toolNames, "guest replay tool names"), + messages, + }; + }); + return { requests, errors }; +}; + +const accountLedger = (value: unknown): ReadonlyArray => { + if (!Array.isArray(value)) throw new Error("guest account ledger is invalid"); + return value.map((request) => { + if ( + !isUnknownRecord(request) || + typeof request.method !== "string" || + typeof request.url !== "string" || + (typeof request.authorization !== "string" && request.authorization !== null) + ) { + throw new Error("guest account ledger entry is invalid"); + } + return { + method: request.method, + url: request.url, + authorization: request.authorization, + }; + }); +}; + +const localDesktopMcp = (page: PackagedDesktopPage) => + page.evaluate<{ readonly origin: string; readonly token: string } | null>(`(() => { + return Promise.all([ + window.executor.getServerConnection(), + window.executor.getServerAuthToken(), + ]).then(([connection, token]) => connection && token + ? { origin: connection.origin, token } + : null); + })()`); + +it(SCENARIO_NAME, { timeout: 480_000 }, async () => { + const artifactDir = env("E2E_KVM_ARTIFACT_DIR"); + writeFocusedTestSource({ + runDir: artifactDir, + filePath: fileURLToPath(import.meta.url), + testName: SCENARIO_NAME, + }); + const display = env("E2E_KVM_GUEST_DISPLAY"); + const guestHost = env("E2E_KVM_GUEST_HOST"); + const remoteApp = env("E2E_KVM_REMOTE_APP"); + const remoteBun = env("E2E_KVM_REMOTE_BUN"); + const remoteClaude = env("E2E_KVM_REMOTE_CLAUDE"); + const remoteGuestRuntime = env("E2E_KVM_REMOTE_GUEST_RUNTIME"); + const remoteHome = env("E2E_KVM_REMOTE_HOME"); + const recordingPath = env("E2E_KVM_RECORDING_PATH"); + const expectedClaudeVersion = env("E2E_KVM_CLAUDE_CODE_VERSION"); + const localCdpPort = Number.parseInt(env("E2E_KVM_CDP_PORT"), 10); + const guest = connectLinuxKvmGuest({ + host: guestHost, + keyPath: env("E2E_KVM_SSH_KEY"), + user: env("E2E_KVM_GUEST_USER"), + }); + const appLog = `${remoteHome}/app.log`; + const runtimeDir = `${remoteHome}/guest-runtime`; + const runtimeLog = `${runtimeDir}.log`; + let app: RunningDesktop | undefined; + let fixturePid: number | undefined; + let failure: unknown; + let cleanupFailure: unknown; + let passed = false; + const startedAt = Date.now(); + + try { + app = await launchDesktop({ + guest, + display, + remoteApp, + remoteHome, + appLog, + localCdpPort, + cleanHome: true, + }); + await waitForServerConnectionLabel(app.page, "Local Executor", 120_000); + + const fixtureStart = await guest.run( + `nohup ${shellQuote(remoteBun)} ${shellQuote(remoteGuestRuntime)} serve --state-dir ${shellQuote(runtimeDir)} --account-host ${shellQuote(guestHost)} >${shellQuote(runtimeLog)} 2>&1 < /dev/null & echo $!`, + ); + expect(fixtureStart.code, `guest fixtures failed to launch: ${fixtureStart.stderr}`).toBe(0); + expect(fixtureStart.stdout.trim()).toMatch(/^\d+$/); + fixturePid = Number.parseInt(fixtureStart.stdout.trim(), 10); + + let state: KvmGuestRuntimeState | undefined; + for (let attempt = 0; attempt < 120 && !state; attempt++) { + state = await guestJson(guest, `${runtimeDir}/runtime.json`) + .then(runtimeState) + .catch(() => undefined); + if (!state) await new Promise((resolve) => setTimeout(resolve, 250)); + } + if (!state) throw new Error("guest fixtures did not publish runtime state"); + expect(new URL(state.accountOrigin).hostname, "account fixture is classified as remote").toBe( + guestHost, + ); + expect(new URL(state.brainOrigin).hostname, "model replay stays inside guest loopback").toBe( + "127.0.0.1", + ); + + const [accountA, accountB] = KVM_ACCOUNT_FIXTURES; + await addServerProfile(app.page, { + origin: state.accountOrigin, + name: accountA.name, + token: accountA.token, + }); + await expectIntegrationAccount(app.page, accountA.marker, accountB.marker); + await closeServerProfiles(app.page); + await app.page.screenshot(join(artifactDir, "01-account-a-catalog.png")); + + await addServerProfile(app.page, { + origin: state.accountOrigin, + name: accountB.name, + token: accountB.token, + }); + await expectIntegrationAccount(app.page, accountB.marker, accountA.marker); + await closeServerProfiles(app.page); + await app.page.screenshot(join(artifactDir, "02-account-b-catalog.png")); + + await selectServerProfile(app.page, accountA.name); + await expectIntegrationAccount(app.page, accountA.marker, accountB.marker); + const accountARow = await serverProfileRowText(app.page, accountA.name); + expect(accountARow, "the non-loopback account is visibly classified as remote").toContain( + "Remote", + ); + await closeServerProfiles(app.page); + await app.page.screenshot(join(artifactDir, "03-account-a-restored.png")); + + const beforeRestart = await waitForPersistedProfiles(app.page, [accountA.name, accountB.name]); + const profileA = beforeRestart.profiles.find( + (profile) => profile.displayName === accountA.name, + ); + const profileB = beforeRestart.profiles.find( + (profile) => profile.displayName === accountB.name, + ); + expect(profileA).toMatchObject({ origin: state.accountOrigin, token: accountA.token }); + expect(profileB).toMatchObject({ origin: state.accountOrigin, token: accountB.token }); + expect(profileA?.kind).toBe("http"); + expect(profileB?.kind).toBe("http"); + expect( + beforeRestart.profiles.some((profile) => profile.kind === "desktop-sidecar"), + "the local sidecar remains persisted while account A is active", + ).toBe(true); + expect(profileA?.key).not.toBe(profileB?.key); + expect(beforeRestart.activeKey, "account A is active before the restart").toBe(profileA?.key); + + await stopDesktop(guest, app); + app = undefined; + app = await launchDesktop({ + guest, + display, + remoteApp, + remoteHome, + appLog, + localCdpPort, + cleanHome: false, + }); + await waitForServerConnectionLabel(app.page, accountA.name, 120_000); + await expectIntegrationAccount(app.page, accountA.marker, accountB.marker); + const afterRestart = await waitForPersistedProfiles(app.page, [accountA.name, accountB.name]); + expect(afterRestart.activeKey, "the remote account remains active after restart").toBe( + profileA?.key, + ); + expect( + afterRestart.profiles.some((profile) => profile.kind === "desktop-sidecar"), + "the local sidecar remains persisted after restoring the remote account", + ).toBe(true); + expect( + afterRestart.profiles.filter( + (profile) => profile.displayName === accountA.name || profile.displayName === accountB.name, + ), + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: "http", + key: profileA?.key, + origin: state.accountOrigin, + displayName: accountA.name, + token: accountA.token, + }), + expect.objectContaining({ + kind: "http", + key: profileB?.key, + origin: state.accountOrigin, + displayName: accountB.name, + token: accountB.token, + }), + ]), + ); + const restartedAccountARow = await serverProfileRowText(app.page, accountA.name); + expect( + restartedAccountARow, + "the restored active account remains visibly classified as remote", + ).toContain("Remote"); + await app.page.screenshot(join(artifactDir, "04-remote-profile-after-restart.png")); + await closeServerProfiles(app.page); + + await selectServerProfile(app.page, "Local Executor"); + await closeServerProfiles(app.page); + await app.page.waitForExpression( + `!document.body?.innerText.includes(${JSON.stringify(accountA.marker)}) && + !document.body?.innerText.includes(${JSON.stringify(accountB.marker)})`, + 30_000, + "the local sidecar catalog to replace remote account data", + ); + const localMcp = await localDesktopMcp(app.page); + expect( + localMcp, + "the desktop exposes its real local MCP bearer to Connect an agent", + ).not.toBeNull(); + if (!localMcp) throw new Error("local desktop MCP connection disappeared"); + const mcpUrl = new URL("/mcp", localMcp.origin).toString(); + expect(isLoopbackHttpUrl(mcpUrl), "Claude can reach MCP without leaving guest loopback").toBe( + true, + ); + + const remoteClaudeConfig = `${runtimeDir}/claude-run.json`; + const remoteClaudeOutput = `${runtimeDir}/claude-result.json`; + const localSecretDir = mkdtempSync(join(tmpdir(), "executor-kvm-claude-config-")); + const localClaudeConfig = join(localSecretDir, "config.json"); + try { + writeFileSync( + localClaudeConfig, + `${JSON.stringify({ + binaryPath: remoteClaude, + expectedVersion: expectedClaudeVersion, + homeDir: `${runtimeDir}/claude-home`, + mcpUrl, + authorizationHeader: `Bearer ${localMcp.token}`, + brainBaseUrl: state.brainOrigin, + outputPath: remoteClaudeOutput, + })}\n`, + { mode: 0o600 }, + ); + await guest.push(localClaudeConfig, remoteClaudeConfig); + } finally { + rmSync(localSecretDir, { force: true, recursive: true }); + } + const claudeInvocation = await guest.run( + `chmod 600 ${shellQuote(remoteClaudeConfig)} && ${shellQuote(remoteBun)} ${shellQuote(remoteGuestRuntime)} claude ${shellQuote(remoteClaudeConfig)}`, + ); + const realClaude = claudeResult(await guestJson(guest, remoteClaudeOutput)); + expect(claudeInvocation.code, `real Claude Code failed: ${realClaude.stderr}`).toBe(0); + expect(realClaude.exitCode).toBe(0); + expect(realClaude.binaryPath).toBe(remoteClaude); + expect(realClaude.observedVersion).toBe(expectedClaudeVersion); + expect(realClaude.mcpOrigin).toBe(new URL(mcpUrl).origin); + expect(realClaude.replayOrigin).toBe(new URL(state.brainOrigin).origin); + expect(realClaude.stdout, "Claude returns Executor's real execute result").toContain( + KVM_CLAUDE_EXPECTED_RESULT, + ); + expect( + isUnknownRecord(realClaude.structuredResult) && + typeof realClaude.structuredResult.result === "string" && + realClaude.structuredResult.result.includes(KVM_CLAUDE_EXPECTED_RESULT), + "Claude's structured result contains the value returned by Executor", + ).toBe(true); + + const replay = replayLedger(await guestJson(guest, state.replayLedgerPath)); + expect(replay.errors).toEqual([]); + expect( + replay.requests.some((request) => + request.toolNames.some((name) => name.endsWith("__execute")), + ), + "Claude discovered Executor execute through the desktop MCP", + ).toBe(true); + expect( + replay.requests + .flatMap((request) => request.messages) + .flatMap((message) => message.toolResults) + .some((result) => !result.isError && result.content.includes(KVM_CLAUDE_EXPECTED_RESULT)), + "the real MCP result returned to the loopback model boundary", + ).toBe(true); + + const remoteAccountRequests = accountLedger(await guestJson(guest, state.accountLedgerPath)); + const integrationAuthorizations = remoteAccountRequests + .filter( + (request) => + request.method === "GET" && + new URL(request.url, state.accountOrigin).pathname === "/api/integrations", + ) + .map((request) => request.authorization); + expect(integrationAuthorizations).toContain(`Bearer ${accountA.token}`); + expect(integrationAuthorizations).toContain(`Bearer ${accountB.token}`); + expect( + integrationAuthorizations.every( + (authorization) => + authorization === `Bearer ${accountA.token}` || + authorization === `Bearer ${accountB.token}`, + ), + "the same-origin remote fixture never receives the local desktop bearer", + ).toBe(true); + + writeJsonAtomicSync(join(artifactDir, "account-fixture-ledger.json"), remoteAccountRequests); + writeJsonAtomicSync(join(artifactDir, "anthropic-replay-ledger.json"), replay); + writeClaudeCodeEvidence(artifactDir, { + label: "KVM guest Claude Code against packaged desktop local MCP", + executable: realClaude.binaryPath, + expectedVersion: realClaude.expectedVersion, + observedVersion: realClaude.observedVersion, + durationMs: realClaude.durationMs, + status: realClaude.exitCode === 0 ? "success" : "failure", + exitCode: realClaude.exitCode, + stdout: realClaude.stdout, + stderr: realClaude.stderr, + structuredResult: realClaude.structuredResult, + mcpServerName: realClaude.mcpServerName, + mcpOrigin: realClaude.mcpOrigin, + replayOrigin: realClaude.replayOrigin, + replayRequestPaths: replay.requests.map((request) => request.path), + replayErrors: replay.errors, + secrets: [localMcp.token, accountA.token, accountB.token, KVM_REPLAY_API_KEY], + }); + await app.page.screenshot(join(artifactDir, "05-claude-code-local-mcp.png")); + + expect(existsSync(recordingPath), "the SPICE recorder created its MP4 artifact").toBe(true); + passed = true; + } catch (error) { + failure = error; + } finally { + try { + await stopDesktop(guest, app); + const logs = await guest.run( + `tail -300 ${shellQuote(appLog)} 2>/dev/null || true; tail -200 ${shellQuote(runtimeLog)} 2>/dev/null || true`, + ); + writeFileSync(join(artifactDir, "packaged-app.log"), `${logs.stdout}\n${logs.stderr}`); + if (fixturePid) await guest.run(`kill -TERM ${fixturePid} 2>/dev/null || true`); + await guest.run(`pkill -TERM -f ${shellQuote(remoteApp)} 2>/dev/null || true`); + } catch (error) { + cleanupFailure = error; + } + + const endedAt = Date.now(); + const finalFailure = failure ?? cleanupFailure; + writeJsonAtomicSync(join(artifactDir, "result.json"), { + scenario: SCENARIO_NAME, + target: "desktop-kvm", + ok: passed && cleanupFailure === undefined, + startedAt, + endedAt, + durationMs: endedAt - startedAt, + visualEvidence: { dataClassification: "synthetic-only" }, + ...(finalFailure ? { error: String(finalFailure) } : {}), + artifacts: readdirSync(artifactDir).filter((name) => name !== "result.json"), + }); + buildManifest(dirname(dirname(artifactDir))); + } + + if (failure) throw failure; + if (cleanupFailure) throw cleanupFailure; +}); diff --git a/e2e/desktop-packaged/supervised-attach.test.ts b/e2e/desktop-packaged/supervised-attach.test.ts index c444fd2b7..939c3bb59 100644 --- a/e2e/desktop-packaged/supervised-attach.test.ts +++ b/e2e/desktop-packaged/supervised-attach.test.ts @@ -12,336 +12,45 @@ // fresh pid), and the console — served by the bearer-gated daemon — renders, // which only happens if the app injected the bearer it read from the manifest. // The recording (session.mp4 + screenshots) is the artifact; the waits assert. -import { type ChildProcess, execFileSync, spawn } from "node:child_process"; -import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; -import net from "node:net"; -import { tmpdir } from "node:os"; +import type { ChildProcess } from "node:child_process"; +import { readFileSync } from "node:fs"; import { join } from "node:path"; import { expect, it } from "@effect/vitest"; import { Effect } from "effect"; import { scenario } from "../src/scenario"; +import { + createPackagedDesktopHome, + freePort, + launchPackagedDesktop, + packagedDesktopPreflight, + removePackagedDesktopHome, + startSupervisedDaemon, + stopProcess, + type PackagedDesktopApp, +} from "../src/desktop/packaged"; import { RunDir } from "../src/services"; import { waitForHttp } from "../setup/boot"; -// Driving the packaged Electron app needs a real window-server session: Aqua on -// macOS, an X/Wayland display on Linux. An SSH/CI shell runs in the background -// (non-GUI) session where Electron can't open a window — so this scenario runs -// only where a display is reachable (a logged-in console, or a guest under -// autologin/Xvfb) and skips honestly elsewhere rather than hanging on launch. -const guiAvailable = (): boolean => { - if (process.platform === "darwin") { - // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: probing the session manager; absence = no GUI - try { - return execFileSync("launchctl", ["managername"], { encoding: "utf8" }).trim() === "Aqua"; - } catch { - return false; - } - } - if (process.platform === "linux") { - return Boolean(process.env.DISPLAY || process.env.WAYLAND_DISPLAY); - } - return true; // windows: the runner places this in an interactive session -}; - const SCENARIO_NAME = "Desktop (packaged) · the real bundle attaches to the OS-supervised daemon"; -const appExe = process.env.E2E_DESKTOP_APP_EXE; -const executorBin = process.env.E2E_DESKTOP_EXECUTOR_BIN; - -interface PackagedApp { - readonly child: ChildProcess; - readonly debugPort: string; - cdp: CdpPage; -} - -interface CdpResponse { - readonly id: number; - readonly result?: T; - readonly error?: { readonly message?: string }; -} - -interface CdpEvaluateResult { - readonly result: { readonly value?: unknown }; - readonly exceptionDetails?: unknown; -} - -interface CdpTarget { - readonly type: string; - readonly url: string; - readonly webSocketDebuggerUrl?: string; -} - -class CdpPage { - private nextId = 1; - private readonly pending = new Map< - number, - { - readonly resolve: (value: unknown) => void; - readonly reject: (error: Error) => void; - } - >(); - - private constructor(private readonly socket: WebSocket) { - socket.addEventListener("message", (event) => { - const data = event.data; - if (typeof data !== "string") return; - const message = JSON.parse(data) as CdpResponse; - if (!message.id) return; - const pending = this.pending.get(message.id); - if (!pending) return; - this.pending.delete(message.id); - if (message.error) { - pending.reject(new Error(message.error.message ?? "CDP command failed")); - return; - } - pending.resolve(message.result); - }); - socket.addEventListener("close", () => { - for (const [, pending] of this.pending) { - pending.reject(new Error("CDP socket closed")); - } - this.pending.clear(); - }); - } - - static connect = (url: string): Promise => - new Promise((resolve, reject) => { - const socket = new WebSocket(url); - const timer = setTimeout(() => { - socket.close(); - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter - reject(new Error(`Timed out connecting to page CDP target ${url}`)); - }, 30_000); - socket.addEventListener( - "open", - () => { - clearTimeout(timer); - resolve(new CdpPage(socket)); - }, - { once: true }, - ); - socket.addEventListener( - "error", - () => { - clearTimeout(timer); - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter - reject(new Error(`Failed to connect to page CDP target ${url}`)); - }, - { once: true }, - ); - }); - - command = async (method: string, params: Record = {}): Promise => { - const id = this.nextId; - this.nextId += 1; - const result = new Promise((resolve, reject) => { - this.pending.set(id, { - resolve: (value) => resolve(value as T), - reject, - }); - }); - this.socket.send(JSON.stringify({ id, method, params })); - return result; - }; - - evaluate = async (expression: string): Promise => { - const result = await this.command("Runtime.evaluate", { - expression, - awaitPromise: true, - returnByValue: true, - }); - if (result.exceptionDetails) { - throw new Error(`CDP evaluation failed: ${JSON.stringify(result.exceptionDetails)}`); - } - return result.result.value as T; - }; - - waitForText = async (text: string, timeoutMs: number): Promise => { - const deadline = Date.now() + timeoutMs; - const expression = `document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`; - for (;;) { - if (await this.evaluate(expression).catch(() => false)) return; - if (Date.now() >= deadline) throw new Error(`Timed out waiting for text: ${text}`); - await new Promise((resolve) => setTimeout(resolve, 250)); - } - }; - - screenshot = async (path: string): Promise => { - const result = await this.command<{ readonly data: string }>("Page.captureScreenshot", { - format: "png", - fromSurface: true, - }); - writeFileSync(path, Buffer.from(result.data, "base64")); - }; - - close = (): void => { - this.socket.close(); - }; -} - -const freePort = (): Promise => - new Promise((resolve, reject) => { - const srv = net.createServer(); - srv.on("error", reject); - srv.listen(0, "127.0.0.1", () => { - const port = (srv.address() as net.AddressInfo).port; - srv.close(() => resolve(port)); - }); - }); - interface Manifest { readonly kind: string; readonly pid: number; } -interface DaemonStart { - readonly child: ChildProcess; - readonly ready: boolean; - readonly stderr: string; -} - -/** Spawn the bundle's compiled executor as a supervised daemon; resolves once it - * announces readiness (or times out / exits early, ready:false). */ -const startSupervisedDaemon = (env: NodeJS.ProcessEnv, port: number): Promise => - new Promise((resolve) => { - const child = spawn( - executorBin as string, - ["daemon", "run", "--foreground", "--port", String(port), "--hostname", "127.0.0.1"], - { env, stdio: ["ignore", "pipe", "pipe"] }, - ); - let stderr = ""; - const settle = (ready: boolean) => resolve({ child, ready, stderr }); - const timer = setTimeout(() => settle(false), 60_000); - child.stdout.on("data", (chunk: Buffer) => { - if (/Daemon ready on http:\/\//.test(chunk.toString())) { - clearTimeout(timer); - settle(true); - } - }); - child.stderr.on("data", (chunk: Buffer) => { - stderr += chunk.toString(); - }); - child.on("exit", () => { - clearTimeout(timer); - settle(false); - }); - }); - -const packagedSingleInstanceAvailable = (): boolean => { - if (process.platform !== "darwin" || !appExe) return true; - try { - const lines = execFileSync("pgrep", ["-fl", "Executor.app/Contents/MacOS/Executor"], { - encoding: "utf8", - }) - .split("\n") - .filter(Boolean); - return !lines.some((line) => !line.includes(appExe)); - } catch { - return true; - } -}; - -const waitForPageWebSocket = async (debugPort: string): Promise => { - const deadline = Date.now() + 120_000; - for (;;) { - const targets = (await fetch(`http://127.0.0.1:${debugPort}/json/list`) - .then((response) => (response.ok ? response.json() : [])) - .catch(() => [])) as ReadonlyArray; - const page = targets.find( - (target) => - target.type === "page" && - target.webSocketDebuggerUrl && - !target.url.startsWith("devtools://"), - ); - if (page?.webSocketDebuggerUrl) return page.webSocketDebuggerUrl; - if (Date.now() >= deadline) { - throw new Error("Timed out waiting for packaged app page CDP target"); - } - await new Promise((resolve) => setTimeout(resolve, 100)); - } -}; - -const launchPackaged = async (home: string): Promise => { - let output = ""; - let settled = false; - const child = spawn(appExe as string, ["--remote-debugging-port=0"], { - env: { ...process.env, HOME: home }, - stdio: ["ignore", "pipe", "pipe"], - }); +const desktopPreflight = packagedDesktopPreflight(); - const browserCdpUrl = await new Promise((resolve, reject) => { - const timer = setTimeout(() => { - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter - reject(new Error(`Timed out waiting for packaged app CDP URL\n${output}`)); - }, 120_000); - const settle = (fn: () => void) => { - if (settled) return; - settled = true; - clearTimeout(timer); - fn(); - }; - const collectOutput = (chunk: Buffer) => { - const text = chunk.toString(); - output = (output + text).slice(-16_384); - const match = output.match(/DevTools listening on (ws:\/\/[^\s]+)/); - if (match) settle(() => resolve(match[1]!)); - }; - child.stdout?.on("data", collectOutput); - child.stderr?.on("data", collectOutput); - // oxlint-disable-next-line executor/no-promise-reject -- boundary: packaged-app launch promise adapter - child.once("error", (error) => settle(() => reject(error))); - child.once("exit", (code, signal) => - settle(() => - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter - reject( - new Error(`Packaged app exited before CDP (code=${code} signal=${signal})\n${output}`), - ), - ), - ); - }); - - const debugPort = new URL(browserCdpUrl).port; - const pageCdpUrl = await waitForPageWebSocket(debugPort); - const cdp = await CdpPage.connect(pageCdpUrl); - await cdp.command("Runtime.enable"); - await cdp.command("Page.enable"); - return { child, cdp, debugPort }; -}; - -const stopProcess = async (child: ChildProcess | undefined): Promise => { - if (!child || child.exitCode !== null || child.signalCode !== null) return; - await new Promise((resolve) => { - const timeout = setTimeout(() => { - child.kill("SIGKILL"); - resolve(); - }, 5_000); - child.once("exit", () => { - clearTimeout(timeout); - resolve(); - }); - child.kill("SIGTERM"); - }); -}; - -const closePackaged = async (app: PackagedApp | undefined): Promise => { - app?.cdp.close(); - await stopProcess(app?.child); -}; - -if (!guiAvailable() || !packagedSingleInstanceAvailable()) { - it.skip(`${SCENARIO_NAME} (needs a GUI display and no already-running Executor.app)`, () => {}); +if (desktopPreflight.status === "skip") { + it.skip(`${SCENARIO_NAME} (${desktopPreflight.reason})`, () => {}); +} else if (desktopPreflight.status === "fail") { + scenario(`${SCENARIO_NAME} preflight`, { timeout: 30_000 }, Effect.die(desktopPreflight.reason)); } else { scenario( SCENARIO_NAME, { timeout: 240_000 }, Effect.gen(function* () { - if (!appExe || !executorBin) { - return yield* Effect.die( - "E2E_DESKTOP_APP_EXE / E2E_DESKTOP_EXECUTOR_BIN not set — did desktop-packaged.globalsetup run?", - ); - } const runDir = yield* RunDir; yield* Effect.promise(() => run(runDir)); }), @@ -349,27 +58,26 @@ if (!guiAvailable() || !packagedSingleInstanceAvailable()) { } const run = async (runDir: string) => { - const home = mkdtempSync(join(tmpdir(), "executor-pkg-attach-")); + const home = createPackagedDesktopHome("executor-pkg-attach-"); const dataDir = join(home, ".executor"); const manifestPath = join(dataDir, "server-control", "server.json"); const port = await freePort(); let daemon: ChildProcess | undefined; - let app: PackagedApp | undefined; + let app: PackagedDesktopApp | undefined; let stepIndex = 0; try { - const started = await startSupervisedDaemon( - { - ...process.env, - HOME: home, + const started = await startSupervisedDaemon({ + home, + port, + env: { EXECUTOR_SUPERVISED: "1", EXECUTOR_DATA_DIR: dataDir, EXECUTOR_AUTH_TOKEN: "packaged-attach-film", EXECUTOR_CLIENT: "desktop", }, - port, - ); + }); daemon = started.child; expect(started.ready, `supervised daemon became ready; stderr:\n${started.stderr}`).toBe(true); await waitForHttp(`http://127.0.0.1:${port}/`, { timeoutMs: 30_000 }); @@ -382,13 +90,16 @@ const run = async (runDir: string) => { // Launch the PACKAGED bundle directly. `app.isPackaged` is true, so boot() // runs the supervised attach path; CDP drives the real renderer. - app = await launchPackaged(home); - const page = app.cdp; + const launched = await launchPackagedDesktop({ home }); + app = launched; + const page = launched.cdp; const step = async (label: string, body: () => Promise) => { await body(); stepIndex += 1; const slug = label.toLowerCase().replace(/[^a-z0-9]+/g, "-"); - await page.screenshot(join(runDir, `${String(stepIndex).padStart(2, "0")}-${slug}.png`)); + await launched.captureEvidence({ + rendererPath: join(runDir, `${String(stepIndex).padStart(2, "0")}-${slug}.png`), + }); }; // The console only renders once the app has a live connection AND the bearer @@ -408,8 +119,8 @@ const run = async (runDir: string) => { ); }); } finally { - await closePackaged(app); + await app?.close(); await stopProcess(daemon); - rmSync(home, { recursive: true, force: true }); + removePackagedDesktopHome(home); } }; diff --git a/e2e/desktop-packaged/supervised-regressions.test.ts b/e2e/desktop-packaged/supervised-regressions.test.ts index 69f5d6558..54725fd92 100644 --- a/e2e/desktop-packaged/supervised-regressions.test.ts +++ b/e2e/desktop-packaged/supervised-regressions.test.ts @@ -1,12 +1,11 @@ // Packaged desktop supervised-daemon regressions. These run against the real // electron-builder bundle and its bundled executor because the supervised attach // path is production-only (`app.isPackaged`). -import { type ChildProcess, execFile, execFileSync, spawn } from "node:child_process"; +import { type ChildProcess, execFile, execFileSync } from "node:child_process"; import { chmodSync, existsSync, mkdirSync, - mkdtempSync, readFileSync, rmSync, statSync, @@ -14,7 +13,7 @@ import { } from "node:fs"; import { createServer, type IncomingMessage } from "node:http"; import net from "node:net"; -import { homedir, tmpdir } from "node:os"; +import { homedir, networkInterfaces } from "node:os"; import { dirname, join } from "node:path"; import { promisify } from "node:util"; @@ -25,6 +24,21 @@ import { serializeExecutorLocalServerManifest, } from "@executor-js/sdk/shared"; +import { + closePackagedDesktop, + createPackagedDesktopHome, + freePort, + launchPackagedDesktop, + packagedDesktopPreflight, + packagedDesktopSettingsDir, + reconnectPackagedDesktopPage, + removePackagedDesktopHome, + requirePackagedDesktopBundle, + startSupervisedDaemon, + stopProcess, + type PackagedDesktopApp, + type PackagedDesktopPage, +} from "../src/desktop/packaged"; import { scenario } from "../src/scenario"; import { RunDir } from "../src/services"; import { waitForHttp } from "../setup/boot"; @@ -32,223 +46,15 @@ import { waitForHttp } from "../setup/boot"; const execFileAsync = promisify(execFile); const SERVICE_LABEL = "sh.executor.daemon"; -interface PackagedExecutorBridge { - readonly getSettings: () => Promise<{ readonly port: number }>; - readonly updateSettings: (patch: { readonly port: number }) => Promise; - readonly restartServer: () => Promise; - readonly getServerConnection: () => Promise<{ readonly origin: string } | null>; -} - -interface PackagedApp { - readonly child: ChildProcess; - cdp: CdpPage; - readonly debugPort: string; - readonly output: () => string; -} - -interface CdpResponse { - readonly id: number; - readonly result?: T; - readonly error?: { readonly message?: string; readonly data?: string }; -} - -interface CdpEvaluateResult { - readonly result: { readonly value?: unknown }; - readonly exceptionDetails?: unknown; -} - -interface CdpTarget { - readonly type: string; - readonly url: string; - readonly webSocketDebuggerUrl?: string; -} - -class CdpPage { - private nextId = 1; - private readonly pending = new Map< - number, - { - readonly resolve: (value: unknown) => void; - readonly reject: (error: Error) => void; - } - >(); - - private constructor(private readonly socket: WebSocket) { - socket.addEventListener("message", (event) => { - const data = event.data; - if (typeof data !== "string") return; - const message = JSON.parse(data) as CdpResponse; - if (!message.id) return; - const pending = this.pending.get(message.id); - if (!pending) return; - this.pending.delete(message.id); - if (message.error) { - pending.reject(new Error(message.error.message ?? "CDP command failed")); - return; - } - pending.resolve(message.result); - }); - socket.addEventListener("close", () => { - for (const [, pending] of this.pending) { - pending.reject(new Error("CDP socket closed")); - } - this.pending.clear(); - }); - } - - static connect = (url: string): Promise => - new Promise((resolve, reject) => { - const socket = new WebSocket(url); - const timer = setTimeout(() => { - socket.close(); - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter - reject(new Error(`Timed out connecting to page CDP target ${url}`)); - }, 30_000); - socket.addEventListener( - "open", - () => { - clearTimeout(timer); - resolve(new CdpPage(socket)); - }, - { once: true }, - ); - socket.addEventListener( - "error", - () => { - clearTimeout(timer); - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter - reject(new Error(`Failed to connect to page CDP target ${url}`)); - }, - { once: true }, - ); - }); - - command = async (method: string, params: Record = {}): Promise => { - const id = this.nextId; - this.nextId += 1; - const result = new Promise((resolve, reject) => { - this.pending.set(id, { - resolve: (value) => resolve(value as T), - reject, - }); - }); - this.socket.send(JSON.stringify({ id, method, params })); - return result; - }; - - evaluate = async (expression: string): Promise => { - const result = await this.command("Runtime.evaluate", { - expression, - awaitPromise: true, - returnByValue: true, - }); - if (result.exceptionDetails) { - throw new Error(`CDP evaluation failed: ${JSON.stringify(result.exceptionDetails)}`); - } - return result.result.value as T; - }; - - waitForText = async (text: string, timeoutMs: number): Promise => { - const deadline = Date.now() + timeoutMs; - const expression = `document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`; - for (;;) { - if (await this.evaluate(expression).catch(() => false)) return; - if (Date.now() >= deadline) throw new Error(`Timed out waiting for text: ${text}`); - await new Promise((resolve) => setTimeout(resolve, 250)); - } - }; - - waitForExpression = async ( - expression: string, - timeoutMs: number, - description: string, - ): Promise => { - const deadline = Date.now() + timeoutMs; - for (;;) { - if (await this.evaluate(`Boolean(${expression})`).catch(() => false)) return; - if (Date.now() >= deadline) throw new Error(`Timed out waiting for ${description}`); - await new Promise((resolve) => setTimeout(resolve, 250)); - } - }; - - textPresent = async (text: string): Promise => - this.evaluate(`document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`); - - setViewport = async (width: number, height: number): Promise => { - await this.command("Emulation.setDeviceMetricsOverride", { - width, - height, - deviceScaleFactor: 1, - mobile: false, - }); - }; - - wheel = async (x: number, y: number, deltaY: number): Promise => { - await this.command("Input.dispatchMouseEvent", { - type: "mouseWheel", - x, - y, - deltaX: 0, - deltaY, - }); - }; - - screenshot = async (path: string): Promise => { - const result = await this.command<{ readonly data: string }>("Page.captureScreenshot", { - format: "png", - fromSurface: true, - }); - writeFileSync(path, Buffer.from(result.data, "base64")); - }; - - close = (): void => { - this.socket.close(); - }; -} - -declare global { - interface Window { - readonly executor: PackagedExecutorBridge; - } -} - -const appExe = process.env.E2E_DESKTOP_APP_EXE; -const executorBin = process.env.E2E_DESKTOP_EXECUTOR_BIN; - -const guiAvailable = (): boolean => { - if (process.platform === "darwin") { - try { - return execFileSync("launchctl", ["managername"], { encoding: "utf8" }).trim() === "Aqua"; - } catch { - return false; - } - } - if (process.platform === "linux") - return Boolean(process.env.DISPLAY || process.env.WAYLAND_DISPLAY); - return true; -}; - -const packagedSingleInstanceAvailable = (): boolean => { - if (process.platform !== "darwin" || !appExe) return true; - try { - const lines = execFileSync("pgrep", ["-fl", "Executor.app/Contents/MacOS/Executor"], { - encoding: "utf8", - }) - .split("\n") - .filter(Boolean); - return !lines.some((line) => !line.includes(appExe)); - } catch { - return true; - } -}; - -const requireBundle = (): { readonly app: string; readonly executor: string } => { - if (!appExe || !executorBin) { - throw new Error( - "E2E_DESKTOP_APP_EXE / E2E_DESKTOP_EXECUTOR_BIN not set — did desktop-packaged.globalsetup run?", - ); - } - return { app: appExe, executor: executorBin }; +const nonLoopbackIpv4Address = () => { + const addresses = Object.values(networkInterfaces()) + .flatMap((entries) => entries ?? []) + .filter((entry) => entry.family === "IPv4" && !entry.internal); + return ( + addresses.find((entry) => !entry.address.startsWith("169.254."))?.address ?? + addresses[0]?.address ?? + null + ); }; const currentUid = (): number => { @@ -259,40 +65,30 @@ const currentUid = (): number => { const serviceTarget = (): string => `gui/${currentUid()}/${SERVICE_LABEL}`; const launchAgentPath = (): string => join(homedir(), "Library", "LaunchAgents", `${SERVICE_LABEL}.plist`); -const isolatedDesktopSettingsDir = (home: string): string => - join(home, ".executor-desktop-settings"); const desktopSettingsDirs = (home: string): readonly string[] => { if (process.platform === "darwin") { const support = join(home, "Library", "Application Support"); return [ - isolatedDesktopSettingsDir(home), + packagedDesktopSettingsDir(home), join(support, "@executor-js", "desktop"), join(support, "Executor"), ]; } if (process.platform === "linux") { return [ - isolatedDesktopSettingsDir(home), + packagedDesktopSettingsDir(home), join(home, ".config", "@executor-js", "desktop"), join(home, ".config", "Executor"), ]; } const roaming = join(home, "AppData", "Roaming"); return [ - isolatedDesktopSettingsDir(home), + packagedDesktopSettingsDir(home), join(roaming, "@executor-js", "desktop"), join(roaming, "Executor"), ]; }; -const packagedAppEnv = (home: string): NodeJS.ProcessEnv => { - return { - ...process.env, - HOME: home, - EXECUTOR_DESKTOP_SETTINGS_DIR: isolatedDesktopSettingsDir(home), - }; -}; - interface LaunchdServiceSnapshot { readonly plist: string | null; readonly wasLoaded: boolean; @@ -340,157 +136,8 @@ const restoreLaunchdService = async (snapshot: LaunchdServiceSnapshot | null): P } }; -const freePort = (): Promise => - new Promise((resolve, reject) => { - const srv = net.createServer(); - srv.on("error", reject); - srv.listen(0, "127.0.0.1", () => { - const port = (srv.address() as net.AddressInfo).port; - srv.close(() => resolve(port)); - }); - }); - -interface DaemonStart { - readonly child: ChildProcess; - readonly ready: boolean; - readonly stderr: string; -} - -const startSupervisedDaemon = ( - env: NodeJS.ProcessEnv, - port: number, - hostname = "127.0.0.1", -): Promise => - new Promise((resolve) => { - const { executor } = requireBundle(); - const child = spawn( - executor, - ["daemon", "run", "--foreground", "--port", String(port), "--hostname", hostname], - { env, stdio: ["ignore", "pipe", "pipe"] }, - ); - let stderr = ""; - let settled = false; - const settle = (ready: boolean) => { - if (settled) return; - settled = true; - resolve({ child, ready, stderr }); - }; - const timer = setTimeout(() => settle(false), 60_000); - child.stdout.on("data", (chunk: Buffer) => { - if (/Daemon ready on http:\/\//.test(chunk.toString())) { - clearTimeout(timer); - settle(true); - } - }); - child.stderr.on("data", (chunk: Buffer) => { - stderr += chunk.toString(); - }); - child.on("exit", () => { - clearTimeout(timer); - settle(false); - }); - }); - -const stopProcess = async (child: ChildProcess | undefined): Promise => { - if (!child || child.exitCode !== null || child.signalCode !== null) return; - await new Promise((resolve) => { - const timeout = setTimeout(() => { - child.kill("SIGKILL"); - resolve(); - }, 5_000); - child.once("exit", () => { - clearTimeout(timeout); - resolve(); - }); - child.kill("SIGTERM"); - }); -}; - -const waitForPageWebSocket = async (debugPort: string): Promise => { - const deadline = Date.now() + 120_000; - for (;;) { - const targets = (await fetch(`http://127.0.0.1:${debugPort}/json/list`) - .then((response) => (response.ok ? response.json() : [])) - .catch(() => [])) as ReadonlyArray; - const page = targets.find( - (target) => - target.type === "page" && - target.webSocketDebuggerUrl && - !target.url.startsWith("devtools://"), - ); - if (page?.webSocketDebuggerUrl) return page.webSocketDebuggerUrl; - if (Date.now() >= deadline) { - throw new Error("Timed out waiting for packaged app page CDP target"); - } - await new Promise((resolve) => setTimeout(resolve, 100)); - } -}; - -const launchPackaged = async (home: string): Promise => { - const { app } = requireBundle(); - let output = ""; - let settled = false; - const child = spawn(app, ["--remote-debugging-port=0"], { - env: packagedAppEnv(home), - stdio: ["ignore", "pipe", "pipe"], - }); - - const browserCdpUrl = await new Promise((resolve, reject) => { - const timer = setTimeout(() => { - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter - reject(new Error(`Timed out waiting for packaged app CDP URL\n${output}`)); - }, 120_000); - const settle = (fn: () => void) => { - if (settled) return; - settled = true; - clearTimeout(timer); - fn(); - }; - const collectOutput = (chunk: Buffer) => { - const text = chunk.toString(); - output = (output + text).slice(-16_384); - const match = output.match(/DevTools listening on (ws:\/\/[^\s]+)/); - if (match) settle(() => resolve(match[1])); - }; - child.stdout?.on("data", collectOutput); - child.stderr?.on("data", collectOutput); - // oxlint-disable-next-line executor/no-promise-reject -- boundary: packaged-app launch promise adapter - child.once("error", (error) => settle(() => reject(error))); - child.once("exit", (code, signal) => - settle(() => - // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter - reject( - new Error(`Packaged app exited before CDP (code=${code} signal=${signal})\n${output}`), - ), - ), - ); - }); - - const debugPort = new URL(browserCdpUrl).port; - const pageCdpUrl = await waitForPageWebSocket(debugPort); - const cdp = await CdpPage.connect(pageCdpUrl); - await cdp.command("Runtime.enable"); - await cdp.command("Page.enable"); - return { child, cdp, debugPort, output: () => output }; -}; - -const reconnectPackagedPage = async (app: PackagedApp): Promise => { - app.cdp.close(); - const pageCdpUrl = await waitForPageWebSocket(app.debugPort); - const cdp = await CdpPage.connect(pageCdpUrl); - await cdp.command("Runtime.enable"); - await cdp.command("Page.enable"); - app.cdp = cdp; - return cdp; -}; - -const closePackaged = async (app: PackagedApp | undefined): Promise => { - app?.cdp.close(); - await stopProcess(app?.child); -}; - const waitForServerConnectionLabel = async ( - page: CdpPage, + page: PackagedDesktopPage, expectedText: string, timeoutMs: number, ): Promise => { @@ -531,7 +178,7 @@ const settingsScrollFrameExpression = `(() => { }; })()`; -const assertDesktopSettingsScrolls = async (page: CdpPage): Promise => { +const assertDesktopSettingsScrolls = async (page: PackagedDesktopPage): Promise => { await page.setViewport(900, 420); await page.waitForExpression( `${settingsScrollFrameExpression} !== null`, @@ -564,7 +211,7 @@ const assertDesktopSettingsScrolls = async (page: CdpPage): Promise => { ); }; -const openDesktopSettings = async (page: CdpPage): Promise => { +const openDesktopSettings = async (page: PackagedDesktopPage): Promise => { const clicked = await page.evaluate(`(() => { const link = document.querySelector('a[href*="desktop-settings"]'); if (!(link instanceof HTMLAnchorElement)) return false; @@ -575,6 +222,216 @@ const openDesktopSettings = async (page: CdpPage): Promise => { await page.waitForText("Desktop server connection", 30_000); }; +const openServerProfiles = async (page: PackagedDesktopPage) => { + const alreadyOpen = await page.evaluate( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') !== null`, + ); + const opened = + alreadyOpen || + (await page.evaluate(`(() => { + const trigger = document.querySelector('[aria-label^="Select Executor server:"]'); + if (!(trigger instanceof HTMLButtonElement)) return false; + trigger.click(); + return true; + })()`)); + expect(opened, "the packaged desktop app should expose the server profile trigger").toBe(true); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]')?.textContent?.includes("Server profiles")`, + 30_000, + "the server profiles popover", + ); +}; + +const closeServerProfiles = async (page: PackagedDesktopPage) => { + const open = await page.evaluate( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') !== null`, + ); + const clicked = + !open || + (await page.evaluate(`(() => { + const trigger = document.querySelector('[aria-label^="Select Executor server:"]'); + if (!(trigger instanceof HTMLButtonElement)) return false; + trigger.click(); + return true; + })()`)); + expect(clicked, "the server profile trigger should close its open popover").toBe(true); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]') === null`, + 30_000, + "the server profiles popover to close", + ); +}; + +const clickServerProfileButton = async (page: PackagedDesktopPage, text: string) => { + const clicked = await page.evaluate(`(() => { + const content = document.querySelector('[data-slot="popover-content"][data-state="open"]'); + if (!(content instanceof HTMLElement)) return false; + const expected = ${JSON.stringify(text)}; + const button = Array.from(content.querySelectorAll("button")).find( + (candidate) => candidate.getClientRects().length > 0 && + candidate.textContent?.includes(expected), + ); + if (!(button instanceof HTMLButtonElement)) return false; + button.click(); + return true; + })()`); + expect(clicked, `the server profiles popover should contain a ${text} button`).toBe(true); +}; + +const setServerProfileFormControl = async ( + page: PackagedDesktopPage, + selector: string, + value: string, +) => { + const changed = await page.evaluate(`(() => { + const control = document.querySelector(${JSON.stringify(selector)}); + const nextValue = ${JSON.stringify(value)}; + const prototype = control instanceof HTMLSelectElement + ? HTMLSelectElement.prototype + : control instanceof HTMLInputElement + ? HTMLInputElement.prototype + : null; + const setter = prototype + ? Object.getOwnPropertyDescriptor(prototype, "value")?.set + : undefined; + if (!control || !setter) return false; + setter.call(control, nextValue); + control.dispatchEvent(new Event("input", { bubbles: true })); + control.dispatchEvent(new Event("change", { bubbles: true })); + return true; + })()`); + expect(changed, `the server profile form should expose ${selector}`).toBe(true); +}; + +const addServerProfile = async ( + page: PackagedDesktopPage, + input: { readonly origin: string; readonly name: string; readonly token: string }, +) => { + await openServerProfiles(page); + await clickServerProfileButton(page, "Custom server"); + await page.waitForExpression( + `document.querySelector('input[placeholder="https://executor.example"]') !== null`, + 30_000, + "the custom server form", + ); + await setServerProfileFormControl( + page, + 'input[placeholder="https://executor.example"]', + input.origin, + ); + await setServerProfileFormControl(page, 'input[placeholder="Remote executor"]', input.name); + await setServerProfileFormControl(page, "form select", "bearer"); + await page.waitForExpression( + `document.querySelector('form input[type="password"]') !== null`, + 30_000, + "the bearer token input", + ); + await setServerProfileFormControl(page, 'form input[type="password"]', input.token); + await clickServerProfileButton(page, "Add and use"); + await waitForServerConnectionLabel(page, input.name, 30_000); +}; + +const selectServerProfile = async (page: PackagedDesktopPage, name: string) => { + await openServerProfiles(page); + await page.waitForExpression( + `document.querySelector('[data-slot="popover-content"][data-state="open"]')?.textContent?.includes(${JSON.stringify(name)})`, + 30_000, + `the ${name} profile to hydrate`, + ); + await clickServerProfileButton(page, name); + await waitForServerConnectionLabel(page, name, 30_000); +}; + +const expectServerProfileKind = async ( + page: PackagedDesktopPage, + name: string, + kind: "Local" | "Remote", +) => { + await openServerProfiles(page); + await page.waitForExpression( + `(() => { + const content = document.querySelector('[data-slot="popover-content"][data-state="open"]'); + if (!(content instanceof HTMLElement)) return false; + const button = Array.from(content.querySelectorAll("button")).find( + (candidate) => candidate.textContent?.includes(${JSON.stringify(name)}), + ); + return button?.parentElement?.textContent?.includes(${JSON.stringify(kind)}) ?? false; + })()`, + 30_000, + `the ${name} profile to be classified as ${kind}`, + ); +}; + +interface PersistedDesktopProfileProof { + readonly kind: string; + readonly key: string; + readonly origin: string; + readonly displayName: string; + readonly token: string | null; +} + +interface PersistedDesktopProfilesProof { + readonly activeKey: string | null; + readonly profiles: readonly PersistedDesktopProfileProof[]; +} + +const readPersistedDesktopProfiles = (page: PackagedDesktopPage) => + page.evaluate(`(() => { + const bridge = window.executor; + if (!bridge?.getServerProfiles) return { activeKey: null, profiles: [] }; + return bridge.getServerProfiles().then((raw) => { + const snapshot = JSON.parse(raw ?? '{"profiles":[]}'); + return { + activeKey: snapshot.activeKey ?? null, + profiles: (snapshot.profiles ?? []).map((profile) => ({ + kind: profile.kind ?? "http", + key: profile.key ?? "", + origin: profile.origin ?? "", + displayName: profile.displayName ?? "", + token: profile.auth?.kind === "bearer" ? profile.auth.token : null, + })), + }; + }); + })()`); + +const waitForPersistedDesktopProfiles = async ( + page: PackagedDesktopPage, + displayNames: readonly string[], + activeDisplayName?: string, +) => { + const deadline = Date.now() + 30_000; + for (;;) { + const snapshot = await readPersistedDesktopProfiles(page); + const active = snapshot.profiles.find((profile) => profile.key === snapshot.activeKey); + if ( + displayNames.every((name) => + snapshot.profiles.some((profile) => profile.displayName === name), + ) && + (activeDisplayName === undefined || active?.displayName === activeDisplayName) + ) { + return snapshot; + } + if (Date.now() >= deadline) { + throw new Error( + `Timed out waiting for persisted desktop profiles: ${displayNames.join(", ")}`, + ); + } + await new Promise((resolve) => setTimeout(resolve, 250)); + } +}; + +const expectIntegrationAccount = async ( + page: PackagedDesktopPage, + expected: string, + rejected: string, +) => { + await page.waitForText(expected, 30_000); + expect( + await page.textPresent(rejected), + `the ${expected} account must not render data from ${rejected}`, + ).toBe(false); +}; + const writeStaleActiveServerProfile = (input: { readonly home: string; readonly port: number; @@ -610,25 +467,24 @@ scenario( "Desktop packaged supervised daemon · server manifest is owner-only", { timeout: 180_000 }, Effect.promise(async () => { - requireBundle(); - const home = mkdtempSync(join(tmpdir(), "executor-pkg-manifest-mode-")); + requirePackagedDesktopBundle(); + const home = createPackagedDesktopHome("executor-pkg-manifest-mode-"); const dataDir = join(home, ".executor"); const manifestPath = join(dataDir, "server-control", "server.json"); const port = await freePort(); let daemon: ChildProcess | undefined; const previousUmask = process.umask(0o022); try { - const started = await startSupervisedDaemon( - { - ...process.env, - HOME: home, + const started = await startSupervisedDaemon({ + home, + port, + env: { EXECUTOR_SUPERVISED: "1", EXECUTOR_DATA_DIR: dataDir, EXECUTOR_AUTH_TOKEN: "manifest-mode-token", EXECUTOR_CLIENT: "desktop", }, - port, - ); + }); daemon = started.child; expect(started.ready, `supervised daemon became ready; stderr:\n${started.stderr}`).toBe( true, @@ -642,14 +498,22 @@ scenario( ).toBe("600"); } finally { process.umask(previousUmask); - daemon?.kill("SIGTERM"); - rmSync(home, { recursive: true, force: true }); + await stopProcess(daemon); + removePackagedDesktopHome(home); } }), ); -if (!guiAvailable() || !packagedSingleInstanceAvailable()) { - it.skip("Desktop packaged supervised attach security (needs a GUI display and no already-running Executor.app)", () => {}); +const desktopPreflight = packagedDesktopPreflight(); + +if (desktopPreflight.status === "skip") { + it.skip(`Desktop packaged supervised attach security (${desktopPreflight.reason})`, () => {}); +} else if (desktopPreflight.status === "fail") { + scenario( + "Desktop packaged supervised attach security preflight", + { timeout: 30_000 }, + Effect.die(desktopPreflight.reason), + ); } else { scenario( "Desktop packaged supervised attach · a slow live daemon does not look crashed", @@ -686,6 +550,15 @@ if (!guiAvailable() || !packagedSingleInstanceAvailable()) { yield* Effect.promise(() => runSupervisedIntegrationsLoad(runDir)); }), ); + + scenario( + "Desktop packaged server profiles · same-origin accounts stay isolated across restart", + { timeout: 360_000 }, + Effect.gen(function* () { + const runDir = yield* RunDir; + yield* Effect.promise(() => runServerProfileSwitching(runDir)); + }), + ); } const writeCliDaemonManifest = (input: { @@ -726,7 +599,7 @@ const writeCliDaemonManifest = (input: { const shellSingleQuote = (value: string): string => `'${value.replaceAll("'", "'\"'\"'")}'`; const withFailingBundledInstall = async (run: () => Promise): Promise => { - const { executor } = requireBundle(); + const { executor } = requirePackagedDesktopBundle(); const original = readFileSync(executor); const mode = statSync(executor).mode & 0o777; const backup = `${executor}.e2e-real`; @@ -756,7 +629,7 @@ const withFailingBundledInstall = async (run: () => Promise): Promise = }; const runInstallFailureFallsBackToManagedSidecar = async (runDir: string) => { - const home = mkdtempSync(join(tmpdir(), "executor-pkg-install-failure-fallback-")); + const home = createPackagedDesktopHome("executor-pkg-install-failure-fallback-"); const dataDir = join(home, ".executor"); const controlDir = join(dataDir, "server-control"); const token = "install-failure-fallback-token"; @@ -767,7 +640,7 @@ const runInstallFailureFallsBackToManagedSidecar = async (runDir: string) => { const sawHealthProbe = new Promise((resolve) => { resolveHealthProbe = resolve; }); - let app: PackagedApp | undefined; + let app: PackagedDesktopApp | undefined; let serverOpen = false; const server = createServer((req: IncomingMessage, res) => { @@ -813,11 +686,14 @@ const runInstallFailureFallsBackToManagedSidecar = async (runDir: string) => { }); await withFailingBundledInstall(async () => { - app = await launchPackaged(home); - const page = app.cdp; + const launched = await launchPackagedDesktop({ home }); + app = launched; + const page = launched.cdp; await sawHealthProbe; await page.waitForText("Settings", 120_000); - await page.screenshot(join(runDir, "01-fell-back-to-managed-sidecar.png")); + await launched.captureEvidence({ + rendererPath: join(runDir, "01-fell-back-to-managed-sidecar.png"), + }); const connection = await page.evaluate<{ readonly origin: string } | null>( "window.executor.getServerConnection()", @@ -834,15 +710,15 @@ const runInstallFailureFallsBackToManagedSidecar = async (runDir: string) => { ).not.toContain(`Bearer ${token}`); }); } finally { - await closePackaged(app); + await closePackagedDesktop(app); await restoreLaunchdService(launchdSnapshot); await closeServer(); - rmSync(home, { recursive: true, force: true }); + removePackagedDesktopHome(home); } }; const runSlowLiveDaemonProbe = async (runDir: string) => { - const home = mkdtempSync(join(tmpdir(), "executor-pkg-slow-live-daemon-")); + const home = createPackagedDesktopHome("executor-pkg-slow-live-daemon-"); const dataDir = join(home, ".executor"); const controlDir = join(dataDir, "server-control"); const manifestPath = join(controlDir, "server.json"); @@ -881,7 +757,7 @@ const runSlowLiveDaemonProbe = async (runDir: string) => { }); await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); const port = (server.address() as net.AddressInfo).port; - let app: PackagedApp | undefined; + let app: PackagedDesktopApp | undefined; try { writeCliDaemonManifest({ @@ -892,10 +768,13 @@ const runSlowLiveDaemonProbe = async (runDir: string) => { token, }); - app = await launchPackaged(home); - const page = app.cdp; + const launched = await launchPackagedDesktop({ home }); + app = launched; + const page = launched.cdp; await page.waitForText("Fake Executor UI", 120_000); - await page.screenshot(join(runDir, "01-attached-to-fake-daemon.png")); + await launched.captureEvidence({ + rendererPath: join(runDir, "01-attached-to-fake-daemon.png"), + }); const firstHealthProbe = requests.find((request) => request.url.startsWith("/api/health")); expect( @@ -931,46 +810,48 @@ const runSlowLiveDaemonProbe = async (runDir: string) => { await page.textPresent("Fake Executor UI"), "the original renderer should stay loaded", ).toBe(true); - await page.screenshot(join(runDir, "02-still-rendering-after-slow-health.png")); + await launched.captureEvidence({ + rendererPath: join(runDir, "02-still-rendering-after-slow-health.png"), + }); } finally { - await closePackaged(app); + await closePackagedDesktop(app); await restoreLaunchdService(launchdSnapshot); await new Promise((resolve) => server.close(() => resolve())); - rmSync(home, { recursive: true, force: true }); + removePackagedDesktopHome(home); } }; const runSupervisedPortSetting = async (runDir: string) => { - const home = mkdtempSync(join(tmpdir(), "executor-pkg-port-setting-")); + const home = createPackagedDesktopHome("executor-pkg-port-setting-"); const dataDir = join(home, ".executor"); const launchdSnapshot = captureLaunchdService(); const oldPort = await freePort(); const newPort = await freePort(); let daemon: ChildProcess | undefined; - let app: PackagedApp | undefined; + let app: PackagedDesktopApp | undefined; try { - const started = await startSupervisedDaemon( - { - ...process.env, - HOME: home, + const started = await startSupervisedDaemon({ + home, + port: oldPort, + env: { EXECUTOR_SUPERVISED: "1", EXECUTOR_DATA_DIR: dataDir, EXECUTOR_AUTH_TOKEN: "port-setting-token", EXECUTOR_CLIENT: "desktop", }, - oldPort, - ); + }); daemon = started.child; expect(started.ready, `supervised daemon became ready; stderr:\n${started.stderr}`).toBe(true); await waitForHttp(`http://127.0.0.1:${oldPort}/`, { timeoutMs: 30_000 }); - app = await launchPackaged(home); - let page = app.cdp; + const launched = await launchPackagedDesktop({ home }); + app = launched; + let page = launched.cdp; await page.waitForText("Settings", 120_000); await openDesktopSettings(page); await assertDesktopSettingsScrolls(page); - await page.screenshot(join(runDir, "01-attached-settings.png")); + await launched.captureEvidence({ rendererPath: join(runDir, "01-attached-settings.png") }); const before = await page.evaluate<{ readonly origin: string } | null>( "window.executor.getServerConnection()", @@ -984,7 +865,7 @@ const runSupervisedPortSetting = async (runDir: string) => { await page .evaluate("window.executor.restartServer().catch(() => undefined)") .catch(() => undefined); - page = await reconnectPackagedPage(app); + page = await reconnectPackagedDesktopPage(launched); await page.waitForText("Settings", 120_000); const after = await page.evaluate<{ @@ -999,37 +880,38 @@ const runSupervisedPortSetting = async (runDir: string) => { new URL(after.connection!.origin).port, "after restart, the active supervised daemon should be serving on the saved port", ).toBe(String(newPort)); - await page.screenshot(join(runDir, "02-restarted-on-new-port.png")); + await launched.captureEvidence({ + rendererPath: join(runDir, "02-restarted-on-new-port.png"), + }); } finally { - await closePackaged(app); + await closePackagedDesktop(app); await stopProcess(daemon); await restoreLaunchdService(launchdSnapshot); - rmSync(home, { recursive: true, force: true }); + removePackagedDesktopHome(home); } }; const runSupervisedIntegrationsLoad = async (runDir: string) => { - const home = mkdtempSync(join(tmpdir(), "executor-pkg-integrations-load-")); + const home = createPackagedDesktopHome("executor-pkg-integrations-load-"); const dataDir = join(home, ".executor"); const launchdSnapshot = captureLaunchdService(); const port = await freePort(); let daemon: ChildProcess | undefined; - let app: PackagedApp | undefined; + let app: PackagedDesktopApp | undefined; try { writeStaleActiveServerProfile({ home, port }); - const started = await startSupervisedDaemon( - { - ...process.env, - HOME: home, + const started = await startSupervisedDaemon({ + home, + port, + hostname: "localhost", + env: { EXECUTOR_SUPERVISED: "1", EXECUTOR_DATA_DIR: dataDir, EXECUTOR_AUTH_TOKEN: "integrations-load-token", EXECUTOR_CLIENT: "desktop", }, - port, - "localhost", - ); + }); daemon = started.child; expect(started.ready, `supervised daemon became ready; stderr:\n${started.stderr}`).toBe(true); await waitForHttp(`http://localhost:${port}/`, { timeoutMs: 30_000 }); @@ -1047,8 +929,9 @@ const runSupervisedIntegrationsLoad = async (runDir: string) => { ).toBe("no-store"); await indexDocument.body?.cancel(); - app = await launchPackaged(home); - const page = app.cdp; + const launched = await launchPackagedDesktop({ home }); + app = launched; + const page = launched.cdp; const serverLabel = await waitForServerConnectionLabel(page, "Local Executor", 120_000); expect(serverLabel, "desktop must not auto-select a stale persisted server profile").toContain( @@ -1086,7 +969,9 @@ const runSupervisedIntegrationsLoad = async (runDir: string) => { expect(bootstrap.href, "desktop should strip bootstrap token params after load").not.toContain( "_token=", ); - await page.screenshot(join(runDir, "01-integrations-loaded.png")); + await launched.captureEvidence({ + rendererPath: join(runDir, "01-integrations-loaded.png"), + }); expect( await page.textPresent("Failed to load integrations").then((present) => (present ? 1 : 0)), "integrations should render from the attached daemon, not a cached 401/500 failure", @@ -1100,9 +985,278 @@ const runSupervisedIntegrationsLoad = async (runDir: string) => { "the packaged app is rendering data from the supervised daemon", ).toBe(String(port)); } finally { - await closePackaged(app); + await closePackagedDesktop(app); await stopProcess(daemon); await restoreLaunchdService(launchdSnapshot); - rmSync(home, { recursive: true, force: true }); + removePackagedDesktopHome(home); + } +}; + +const runServerProfileSwitching = async (runDir: string) => { + const home = createPackagedDesktopHome("executor-pkg-server-profiles-"); + const dataDir = join(home, ".executor"); + const launchdSnapshot = captureLaunchdService(); + const localPort = await freePort(); + const fixtureHost = nonLoopbackIpv4Address(); + if (!fixtureHost) { + throw new Error("Packaged desktop account switching requires a non-loopback IPv4 interface"); + } + const accountA = { + name: "Remote account A", + token: "desktop-profile-account-a", + marker: "Wire catalog alpha", + slug: "fixture-account-a", + }; + const accountB = { + name: "Remote account B", + token: "desktop-profile-account-b", + marker: "Wire catalog beta", + slug: "fixture-account-b", + }; + const requests: Array<{ + readonly method: string; + readonly url: string; + readonly authorization: string | null; + }> = []; + const integrationByAuthorization = new Map( + [accountA, accountB].map((account) => [ + `Bearer ${account.token}`, + { + slug: account.slug, + name: account.marker, + description: `Bearer-specific catalog for ${account.name}`, + kind: "fixture", + canRemove: false, + canRefresh: false, + authMethods: [], + }, + ]), + ); + const fixture = createServer((req, res) => { + const method = req.method ?? "GET"; + const url = req.url ?? "/"; + const authorization = req.headers.authorization ?? null; + requests.push({ method, url, authorization }); + + res.setHeader("Access-Control-Allow-Origin", req.headers.origin ?? "*"); + res.setHeader( + "Access-Control-Allow-Headers", + req.headers["access-control-request-headers"] ?? + "authorization, content-type, x-executor-org, traceparent, baggage", + ); + res.setHeader("Access-Control-Allow-Methods", "GET, OPTIONS"); + res.setHeader("Access-Control-Allow-Private-Network", "true"); + res.setHeader("Cache-Control", "no-store"); + res.setHeader("Vary", "Origin, Access-Control-Request-Headers"); + + if (method === "OPTIONS") { + res.writeHead(204); + res.end(); + return; + } + + const pathname = new URL(url, "http://desktop-profile-fixture").pathname; + if (method !== "GET" || pathname !== "/api/integrations") { + res.writeHead(404, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ message: "Not found" })); + return; + } + + const integration = authorization ? integrationByAuthorization.get(authorization) : undefined; + if (!integration) { + res.writeHead(401, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ message: "Invalid bearer" })); + return; + } + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify([integration])); + }); + let fixtureOpen = false; + let daemon: ChildProcess | undefined; + let app: PackagedDesktopApp | undefined; + + try { + await new Promise((resolve) => fixture.listen(0, fixtureHost, resolve)); + fixtureOpen = true; + const fixturePort = (fixture.address() as net.AddressInfo).port; + const fixtureOrigin = `http://${fixtureHost}:${fixturePort}`; + + const started = await startSupervisedDaemon({ + home, + port: localPort, + env: { + EXECUTOR_SUPERVISED: "1", + EXECUTOR_DATA_DIR: dataDir, + EXECUTOR_AUTH_TOKEN: "desktop-profile-local-token", + EXECUTOR_CLIENT: "desktop", + }, + }); + daemon = started.child; + expect(started.ready, `supervised daemon became ready; stderr:\n${started.stderr}`).toBe(true); + await waitForHttp(`http://127.0.0.1:${localPort}/`, { timeoutMs: 30_000 }); + + app = await launchPackagedDesktop({ home }); + let page = app.cdp; + await waitForServerConnectionLabel(page, "Local Executor", 120_000); + await page.waitForText("Integrations", 120_000); + + await addServerProfile(page, { + origin: fixtureOrigin, + name: accountA.name, + token: accountA.token, + }); + await expectIntegrationAccount(page, accountA.marker, accountB.marker); + await expectServerProfileKind(page, accountA.name, "Remote"); + await closeServerProfiles(page); + await app.captureEvidence({ + rendererPath: join(runDir, "01-account-a-catalog.png"), + }); + + await addServerProfile(page, { + origin: fixtureOrigin, + name: accountB.name, + token: accountB.token, + }); + await expectIntegrationAccount(page, accountB.marker, accountA.marker); + await closeServerProfiles(page); + await app.captureEvidence({ + rendererPath: join(runDir, "02-account-b-catalog.png"), + }); + + await selectServerProfile(page, accountA.name); + await expectIntegrationAccount(page, accountA.marker, accountB.marker); + await closeServerProfiles(page); + await app.captureEvidence({ + rendererPath: join(runDir, "03-account-a-restored.png"), + }); + + await selectServerProfile(page, "Local Executor"); + await page.waitForExpression( + `!document.body?.innerText.includes(${JSON.stringify(accountA.marker)}) && + !document.body?.innerText.includes(${JSON.stringify(accountB.marker)})`, + 30_000, + "the local sidecar catalog to replace remote account data", + ); + const localIntegrationsStatus = await page.evaluate(`(() => { + return window.executor.getServerConnection().then((connection) => { + if (!connection) return 0; + return fetch(new URL("/api/integrations", connection.origin)).then( + (response) => response.status, + ); + }); + })()`); + expect( + localIntegrationsStatus, + "the preserved local sidecar profile should remain usable", + ).toBe(200); + await openServerProfiles(page); + await page.waitForText(accountA.name, 30_000); + await page.waitForText(accountB.name, 30_000); + await app.captureEvidence({ + rendererPath: join(runDir, "04-local-sidecar-and-remote-profiles.png"), + }); + await closeServerProfiles(page); + + await selectServerProfile(page, accountB.name); + await expectIntegrationAccount(page, accountB.marker, accountA.marker); + await closeServerProfiles(page); + await app.captureEvidence({ + rendererPath: join(runDir, "05-account-b-before-restart.png"), + }); + + const beforeRestart = await waitForPersistedDesktopProfiles( + page, + [accountA.name, accountB.name], + accountB.name, + ); + const remoteBeforeRestart = beforeRestart.profiles.filter( + (profile) => profile.displayName === accountA.name || profile.displayName === accountB.name, + ); + expect( + beforeRestart.profiles.some((profile) => profile.kind === "desktop-sidecar"), + "the local sidecar profile should remain persisted while a remote account is active", + ).toBe(true); + expect(remoteBeforeRestart).toHaveLength(2); + expect(remoteBeforeRestart.every((profile) => profile.origin === fixtureOrigin)).toBe(true); + expect(new Set(remoteBeforeRestart.map((profile) => profile.key)).size).toBe(2); + expect(remoteBeforeRestart.every((profile) => profile.key.startsWith("profile:"))).toBe(true); + + await closePackagedDesktop(app); + app = undefined; + await waitForHttp(`http://127.0.0.1:${localPort}/`, { timeoutMs: 30_000 }); + + app = await launchPackagedDesktop({ home }); + page = app.cdp; + await waitForServerConnectionLabel(page, accountB.name, 120_000); + await expectIntegrationAccount(page, accountB.marker, accountA.marker); + const afterRestart = await waitForPersistedDesktopProfiles( + page, + [accountA.name, accountB.name], + accountB.name, + ); + const remoteAfterRestart = afterRestart.profiles + .filter( + (profile) => profile.displayName === accountA.name || profile.displayName === accountB.name, + ) + .sort((left, right) => left.displayName.localeCompare(right.displayName)); + expect( + afterRestart.profiles.some((profile) => profile.kind === "desktop-sidecar"), + "restoring the remote account must not remove the local sidecar profile", + ).toBe(true); + expect(remoteAfterRestart).toEqual([ + { + kind: "http", + key: remoteBeforeRestart.find((profile) => profile.displayName === accountA.name)!.key, + origin: fixtureOrigin, + displayName: accountA.name, + token: accountA.token, + }, + { + kind: "http", + key: remoteBeforeRestart.find((profile) => profile.displayName === accountB.name)!.key, + origin: fixtureOrigin, + displayName: accountB.name, + token: accountB.token, + }, + ]); + await expectServerProfileKind(page, accountB.name, "Remote"); + await page.waitForText("Local Executor", 30_000); + await app.captureEvidence({ + rendererPath: join(runDir, "06-account-b-restored-after-restart.png"), + }); + await closeServerProfiles(page); + + await selectServerProfile(page, accountA.name); + await expectIntegrationAccount(page, accountA.marker, accountB.marker); + await closeServerProfiles(page); + await app.captureEvidence({ + rendererPath: join(runDir, "07-account-a-after-restart.png"), + }); + + const integrationRequests = requests.filter( + (request) => + request.method === "GET" && + new URL(request.url, "http://desktop-profile-fixture").pathname === "/api/integrations", + ); + const authorizations = integrationRequests.map((request) => request.authorization); + expect(authorizations).toContain(`Bearer ${accountA.token}`); + expect(authorizations).toContain(`Bearer ${accountB.token}`); + expect( + authorizations.every( + (authorization) => + authorization === `Bearer ${accountA.token}` || + authorization === `Bearer ${accountB.token}`, + ), + "the remote fixture must never receive the local sidecar bearer", + ).toBe(true); + } finally { + await closePackagedDesktop(app); + await stopProcess(daemon); + await restoreLaunchdService(launchdSnapshot); + if (fixtureOpen) { + await new Promise((resolve) => fixture.close(() => resolve())); + } + removePackagedDesktopHome(home); } }; diff --git a/e2e/harness/boot-readiness.test.ts b/e2e/harness/boot-readiness.test.ts new file mode 100644 index 000000000..e7f4512fb --- /dev/null +++ b/e2e/harness/boot-readiness.test.ts @@ -0,0 +1,161 @@ +import { randomUUID } from "node:crypto"; +import { createServer } from "node:net"; +import { fileURLToPath } from "node:url"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { + readCloudflareAccessHealth, + verifyCloudflareAccessEmulator, +} from "../src/cloudflare-access-emulator"; +import { claimPorts } from "../src/ports"; +import { bootProcesses, targetBootMode, waitForHttp } from "../setup/boot"; +import { requiredCloudflareAccessAttachUrl } from "../setup/cloudflare.globalsetup"; + +const accessEmulator = fileURLToPath( + new URL("../scripts/cloudflare-access-emulator.ts", import.meta.url), +); +const e2eDir = fileURLToPath(new URL("..", import.meta.url)); +const bun = process.versions.bun ? process.execPath : (process.env.E2E_BUN_BIN ?? "bun"); + +const occupiedPort = Effect.acquireRelease( + Effect.promise( + () => + new Promise<{ readonly port: number; readonly server: ReturnType }>( + (resolve) => { + const server = createServer(); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") return; + resolve({ port: address.port, server }); + }); + }, + ), + ), + ({ server }) => + Effect.promise(() => new Promise((resolve) => server.close(() => resolve()))), +); + +describe("e2e boot mode and readiness", () => { + it("selects attach mode only from an explicit validated URL", () => { + expect(targetBootMode("E2E_FIXTURE_URL", {})).toEqual({ kind: "spawn" }); + expect( + targetBootMode("E2E_FIXTURE_URL", { + E2E_FIXTURE_URL: "https://executor.example.test/", + E2E_FIXTURE_PORT: "49999", + }), + ).toEqual({ kind: "attach", url: "https://executor.example.test" }); + expect(() => targetBootMode("E2E_FIXTURE_URL", { E2E_FIXTURE_URL: "localhost:49999" })).toThrow( + /http\(s\)/, + ); + }); + + it("requires a full Access issuer URL for Cloudflare attach mode", () => { + expect(() => + requiredCloudflareAccessAttachUrl({ E2E_CLOUDFLARE_ACCESS_TOKEN: "static-only" }), + ).toThrow(/requires E2E_CLOUDFLARE_ACCESS_URL/); + expect( + requiredCloudflareAccessAttachUrl({ + E2E_CLOUDFLARE_ACCESS_URL: "https://access.example.test/", + }), + ).toBe("https://access.example.test"); + }); + + it.effect("rejects an occupied explicitly pinned spawn port", () => + Effect.scoped( + Effect.gen(function* () { + const occupied = yield* occupiedPort; + const envVar = `E2E_HARNESS_PINNED_${randomUUID().replaceAll("-", "")}`; + process.env[envVar] = String(occupied.port); + + const result = yield* Effect.tryPromise({ + try: () => claimPorts([{ envVar, offset: 8, label: "occupied harness port" }]), + catch: (cause) => cause, + }).pipe( + Effect.matchEffect({ + onFailure: (error) => Effect.succeed(String(error)), + onSuccess: (claim) => + Effect.promise(() => claim.release()).pipe(Effect.as("unexpected success")), + }), + Effect.ensuring(Effect.sync(() => delete process.env[envVar])), + ); + + expect(result).toContain("is already listening"); + expect(result).toContain("use E2E__URL for attach mode"); + }), + ), + ); + + it.effect("fails readiness as soon as a spawned child exits", () => + Effect.scoped( + Effect.gen(function* () { + const procs = yield* Effect.acquireRelease( + Effect.sync(() => + bootProcesses( + [ + { + cmd: process.execPath, + args: ["--eval", "setTimeout(() => process.exit(23), 20)"], + cwd: e2eDir, + }, + ], + { label: "early-exit-test" }, + ), + ), + (booted) => Effect.promise(() => booted.teardown()), + ); + const neverReady = new Promise(() => undefined); + const error = yield* Effect.tryPromise({ + try: () => procs.waitUntilReady(neverReady), + catch: (cause) => cause, + }).pipe(Effect.flip); + expect(String(error)).toContain("stopped before readiness"); + expect(String(error)).toContain("exit 23"); + }), + ), + ); + + it.effect("identifies the exact Access emulator boot and proves its ledger", () => + Effect.scoped( + Effect.gen(function* () { + const envVar = `E2E_HARNESS_ACCESS_${randomUUID().replaceAll("-", "")}`; + const claim = yield* Effect.acquireRelease( + Effect.promise(() => + claimPorts([{ envVar, offset: 8, label: "Access emulator contract test" }]), + ), + (claimed) => Effect.promise(() => claimed.release()), + ); + const port = claim.ports[envVar]!; + const nonce = randomUUID(); + const procs = yield* Effect.acquireRelease( + Effect.sync(() => + bootProcesses( + [ + { + cmd: bun, + args: [accessEmulator, "--port", String(port), "--boot-nonce", nonce], + cwd: e2eDir, + }, + ], + { label: "access-emulator-test" }, + ), + ), + (booted) => Effect.promise(() => booted.teardown()), + ); + const baseUrl = `http://127.0.0.1:${port}`; + yield* Effect.promise(() => + procs.waitUntilReady(waitForHttp(`${baseUrl}/health`, { expectedStatus: 200 })), + ); + + const health = yield* Effect.promise(() => readCloudflareAccessHealth(baseUrl)); + expect(health.bootNonce).toBe(nonce); + const verified = yield* Effect.promise(() => + verifyCloudflareAccessEmulator(baseUrl, { expectedBootNonce: nonce }), + ); + expect(verified.bootNonce).toBe(nonce); + expect(verified.token.split(".")).toHaveLength(3); + }), + ), + ); +}); diff --git a/e2e/harness/evidence-hardening.test.ts b/e2e/harness/evidence-hardening.test.ts new file mode 100644 index 000000000..1b0ba63d6 --- /dev/null +++ b/e2e/harness/evidence-hardening.test.ts @@ -0,0 +1,731 @@ +import { execFile, spawnSync } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + readdirSync, + rmSync, + utimesSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { promisify } from "node:util"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { claimPorts } from "../src/ports"; +import { laneProvenanceFor, visualEvidencePublicationDecision } from "../src/evidence-provenance"; +import { + publishedArtifactFor, + sanitizePublishedJson, + sanitizePublishedValue, + syntheticVisualEvidenceDeclaration, +} from "../src/published-artifacts"; + +const execute = promisify(execFile); +const bun = process.versions.bun ? process.execPath : "bun"; +const sanitizer = fileURLToPath(new URL("../scripts/sanitize-evidence.ts", import.meta.url)); +const artifactModule = fileURLToPath(new URL("../src/artifact-io.ts", import.meta.url)); +const traceModule = fileURLToPath(new URL("../src/trace-harvest.ts", import.meta.url)); +const timelineModule = fileURLToPath(new URL("../src/timeline.ts", import.meta.url)); + +const writeLaneProvenance = (runDir: string, project: string, target: string): void => { + const provenance = laneProvenanceFor(project, target); + if (!provenance) throw new Error(`missing test lane provenance for ${project}/${target}`); + writeFileSync(join(runDir, "lane-provenance.json"), JSON.stringify(provenance)); +}; + +const withTemporaryDirectory = ( + use: (directory: string) => Effect.Effect, +): Effect.Effect => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-test-"))), + use, + (directory) => Effect.sync(() => rmSync(directory, { recursive: true, force: true })), + ); + +describe("e2e evidence publication", () => { + it("allows only intentional review artifacts and recursively redacts JSON", () => { + expect(publishedArtifactFor("cloud/example/result.json")?.kind).toBe("json"); + expect(publishedArtifactFor("cloud/example/lane-provenance.json")?.kind).toBe("json"); + expect(publishedArtifactFor("publication.json")?.kind).toBe("json"); + expect(publishedArtifactFor("cloud/example/00-open-settings.png")?.kind).toBe("binary"); + expect( + publishedArtifactFor( + "desktop-kvm/2026-06-27T00-00-00-000Z-1234/renderer-after-settings-click.png", + )?.kind, + ).toBe("binary"); + expect(publishedArtifactFor("desktop-kvm/UnexpectedUppercase/failure.png")).toBeUndefined(); + expect(publishedArtifactFor("desktop/traffic-light/01-sidebar-header-webview.png")?.kind).toBe( + "binary", + ); + expect(publishedArtifactFor("desktop/traffic-light/01-sidebar-header-overlap.png")?.kind).toBe( + "binary", + ); + expect(publishedArtifactFor("cloud/example/mcporter.json")).toBeUndefined(); + expect(publishedArtifactFor("cloud/example/cli-home/server-connections.json")).toBeUndefined(); + expect(publishedArtifactFor("cloud/example/trace.zip")).toBeUndefined(); + expect(publishedArtifactFor("cloud/example/trace.zip", { includeRawTrace: true })?.mime).toBe( + "application/zip", + ); + + expect( + sanitizePublishedValue({ + accessToken: "top-secret", + email: "person@example.com", + nested: { url: "http://localhost/callback?code=secret-code&safe=yes" }, + path: "/home/example/Developer/executor", + }), + ).toEqual({ + accessToken: "[REDACTED]", + email: "[REDACTED]", + nested: { url: "http://localhost/callback?code=[REDACTED]&safe=yes" }, + path: "/home/[USER]/Developer/executor", + }); + + const result = sanitizePublishedJson( + "cloud/example/result.json", + JSON.stringify({ + error: "Authorization: Bearer secret-value", + artifacts: ["terminal.cast", "trace.zip", "mcporter.json", "00-proof.png"], + }), + ); + expect(result).not.toContain("secret-value"); + expect(JSON.parse(result)).toEqual({ + error: "Authorization: [REDACTED]", + artifacts: ["terminal.cast", "00-proof.png"], + }); + }); + + it.effect("sanitizes a publication tree in place and removes private state", () => + withTemporaryDirectory((runsDir) => + Effect.gen(function* () { + const canary = `canary-${randomUUID()}`; + const runSlug = `account-switch--${randomUUID()}`; + const runDir = join(runsDir, "cloud", runSlug); + mkdirSync(join(runDir, "cli-home"), { recursive: true }); + writeLaneProvenance(runDir, "cloud-hermetic", "cloud"); + writeFileSync(join(runsDir, "index.html"), "e2e"); + writeFileSync( + join(runDir, "result.json"), + JSON.stringify({ + scenario: "Account switch", + target: "cloud", + artifacts: ["terminal.cast", "trace.zip", "mcporter.json", "00-account-b.png"], + error: `Bearer ${canary}`, + visualEvidence: syntheticVisualEvidenceDeclaration, + }), + ); + writeFileSync( + join(runDir, "terminal.cast"), + `${JSON.stringify({ version: 2, title: canary })}\n${JSON.stringify([ + 0, + "o", + `Open http://127.0.0.1/?_token=${canary}`, + ])}\n`, + ); + writeFileSync(join(runDir, "executor.log"), `authorization: Bearer ${canary}\n`); + writeFileSync(join(runDir, "00-account-b.png"), Buffer.from("safe-image")); + writeFileSync(join(runDir, "trace.zip"), Buffer.from(canary)); + writeFileSync(join(runDir, "mcporter.json"), JSON.stringify({ token: canary })); + writeFileSync(join(runDir, "cli-home", "credentials.json"), canary); + + const kvmDir = join(runsDir, "desktop-kvm", "2026-06-27T00-00-00-000Z-1234"); + mkdirSync(kvmDir, { recursive: true }); + writeLaneProvenance(kvmDir, "desktop-kvm", "desktop-kvm"); + writeFileSync( + join(kvmDir, "result.json"), + JSON.stringify({ + scenario: "Desktop KVM account switching", + target: "desktop-kvm", + artifacts: ["renderer-after-settings-click.png", "session.mp4"], + visualEvidence: syntheticVisualEvidenceDeclaration, + }), + ); + writeFileSync(join(kvmDir, "renderer-after-settings-click.png"), "synthetic-kvm-image"); + writeFileSync(join(kvmDir, "session.mp4"), "synthetic-kvm-video"); + + yield* Effect.promise(() => + execute(bun, [ + sanitizer, + "--runs-dir", + runsDir, + "--canary", + canary, + "--trusted-project", + "cloud-hermetic", + "--trusted-project", + "desktop-kvm", + ]), + ); + + expect(existsSync(join(runDir, "trace.zip"))).toBe(false); + expect(existsSync(join(runDir, "mcporter.json"))).toBe(false); + expect(existsSync(join(runDir, "cli-home"))).toBe(false); + expect(existsSync(join(runDir, "00-account-b.png"))).toBe(true); + expect(existsSync(join(kvmDir, "renderer-after-settings-click.png"))).toBe(true); + expect(existsSync(join(kvmDir, "session.mp4"))).toBe(true); + const result = readFileSync(join(runDir, "result.json"), "utf8"); + expect(result).not.toContain(canary); + expect(result).not.toContain("trace.zip"); + expect(result).not.toContain("mcporter.json"); + expect(readFileSync(join(runDir, "terminal.cast"), "utf8")).not.toContain(canary); + expect(readFileSync(join(runDir, "executor.log"), "utf8")).not.toContain(canary); + expect(JSON.parse(readFileSync(join(runsDir, "publication.json"), "utf8"))).toMatchObject({ + schemaVersion: 1, + status: "passed", + policy: { + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + binaryArtifacts: [ + `cloud/${runSlug}/00-account-b.png`, + "desktop-kvm/2026-06-27T00-00-00-000Z-1234/renderer-after-settings-click.png", + "desktop-kvm/2026-06-27T00-00-00-000Z-1234/session.mp4", + ], + }); + }), + ), + ); + + it.effect("fails publication if a canary remains in an allowed binary", () => + withTemporaryDirectory((runsDir) => + Effect.sync(() => { + const canary = `binary-canary-${randomUUID()}`; + const runDir = join(runsDir, "cloud", `binary-proof--${randomUUID()}`); + mkdirSync(runDir, { recursive: true }); + writeLaneProvenance(runDir, "cloud-hermetic", "cloud"); + writeFileSync( + join(runDir, "result.json"), + JSON.stringify({ + scenario: "Binary canary", + target: "cloud", + artifacts: ["failure.png"], + visualEvidence: syntheticVisualEvidenceDeclaration, + }), + ); + writeFileSync(join(runDir, "failure.png"), Buffer.from(canary)); + const result = spawnSync( + bun, + [ + sanitizer, + "--runs-dir", + runsDir, + "--canary", + canary, + "--trusted-project", + "cloud-hermetic", + ], + { encoding: "utf8" }, + ); + expect(result.status).toBe(1); + expect(result.stderr).toContain("canary secret survived evidence sanitization"); + }), + ), + ); + + it.effect("rejects unredacted visual evidence without a synthetic-only declaration", () => + withTemporaryDirectory((runsDir) => + Effect.sync(() => { + const runDir = join(runsDir, "desktop", "unclassified-visual"); + mkdirSync(runDir, { recursive: true }); + writeFileSync( + join(runDir, "result.json"), + JSON.stringify({ scenario: "Unclassified visual", artifacts: ["failure.png"] }), + ); + writeFileSync(join(runDir, "failure.png"), "image-with-unknown-data-source"); + + const result = spawnSync( + bun, + [sanitizer, "--runs-dir", runsDir, "--trusted-project", "desktop"], + { encoding: "utf8" }, + ); + + expect(result.status).toBe(1); + expect(result.stderr).toContain("lane provenance is missing or unreadable"); + expect(existsSync(join(runDir, "failure.png"))).toBe(false); + expect(JSON.parse(readFileSync(join(runDir, "result.json"), "utf8"))).toMatchObject({ + artifacts: [], + }); + expect(JSON.parse(readFileSync(join(runsDir, "publication.json"), "utf8"))).toMatchObject({ + status: "failed", + policy: { binaryVisuals: "unredacted-synthetic-only" }, + }); + }), + ), + ); + + it.effect("does not let a result stamp override potentially-sensitive lane provenance", () => + withTemporaryDirectory((runsDir) => + Effect.sync(() => { + const runDir = join(runsDir, "cloud", "live-provider-visual"); + mkdirSync(runDir, { recursive: true }); + const provenance = laneProvenanceFor("cloud", "cloud"); + expect(provenance).toBeDefined(); + writeLaneProvenance(runDir, "cloud", "cloud"); + const resultValue = { + scenario: "Live provider visual", + target: "cloud", + artifacts: ["failure.png"], + visualEvidence: syntheticVisualEvidenceDeclaration, + }; + writeFileSync(join(runDir, "result.json"), JSON.stringify(resultValue)); + writeFileSync(join(runDir, "failure.png"), "potentially-sensitive-image"); + + const decision = visualEvidencePublicationDecision( + resultValue, + provenance, + "cloud", + "cloud", + ); + expect(decision).toMatchObject({ + publish: false, + reason: expect.stringContaining("does not match lane classification"), + }); + expect( + visualEvidencePublicationDecision( + { + ...resultValue, + visualEvidence: { dataClassification: "potentially-sensitive" }, + }, + provenance, + "cloud", + "cloud", + ), + ).toMatchObject({ + publish: false, + reason: "lane cloud is potentially-sensitive", + }); + expect( + visualEvidencePublicationDecision( + resultValue, + { + schemaVersion: 1, + source: "e2e/src/project-matrix.ts", + project: "cloud", + target: "cloud", + hermetic: true, + dataClassification: "synthetic-only", + }, + "cloud", + "cloud", + ), + ).toMatchObject({ + publish: false, + reason: expect.stringContaining("does not match trusted project cloud"), + }); + expect( + visualEvidencePublicationDecision(resultValue, provenance, "selfhost", "selfhost"), + ).toMatchObject({ + publish: false, + reason: expect.stringContaining("does not match trusted project"), + }); + + const result = spawnSync( + bun, + [sanitizer, "--runs-dir", runsDir, "--trusted-project", "cloud"], + { encoding: "utf8" }, + ); + expect(result.status).toBe(1); + expect(result.stderr).toContain( + "result visual classification synthetic-only does not match lane classification potentially-sensitive", + ); + expect(existsSync(join(runDir, "failure.png"))).toBe(false); + }), + ), + ); + + it.effect("rejects a live lane that forges the hermetic project sharing its target", () => + withTemporaryDirectory((runsDir) => + Effect.sync(() => { + const runDir = join(runsDir, "cloud", "forged-hermetic-lane"); + mkdirSync(runDir, { recursive: true }); + writeLaneProvenance(runDir, "cloud-hermetic", "cloud"); + const resultValue = { + scenario: "Forged hermetic lane", + target: "cloud", + artifacts: ["failure.png"], + visualEvidence: syntheticVisualEvidenceDeclaration, + }; + writeFileSync(join(runDir, "result.json"), JSON.stringify(resultValue)); + writeFileSync(join(runDir, "failure.png"), "potentially-sensitive-image"); + const forged = laneProvenanceFor("cloud-hermetic", "cloud"); + + expect( + visualEvidencePublicationDecision(resultValue, forged, "cloud", "cloud"), + ).toMatchObject({ + publish: false, + reason: expect.stringContaining("does not match trusted project cloud"), + }); + + const result = spawnSync( + bun, + [sanitizer, "--runs-dir", runsDir, "--trusted-project", "cloud"], + { encoding: "utf8" }, + ); + expect(result.status).toBe(1); + expect(result.stderr).toContain( + "lane provenance does not match trusted project cloud: cloud/forged-hermetic-lane", + ); + expect(existsSync(join(runDir, "failure.png"))).toBe(false); + }), + ), + ); +}); + +describe("e2e evidence writers", () => { + it.effect( + "keeps a long synchronous owner alive with an independent heartbeat", + () => + withTemporaryDirectory((directory) => + Effect.gen(function* () { + const lockFile = join(directory, "heartbeat.json"); + const enteredFile = join(directory, "first-entered"); + const firstResult = join(directory, "first.json"); + const secondResult = join(directory, "second.json"); + const childEnv = { + ...process.env, + E2E_ARTIFACT_LOCK_STALE_MS: "80", + E2E_ARTIFACT_LOCK_TIMEOUT_MS: "3000", + }; + const worker = (resultFile: string, holdMs: number, enteredMarker?: string) => ` + import { writeFileSync } from "node:fs"; + import { withArtifactLockSync } from ${JSON.stringify(artifactModule)}; + const sleeper = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT)); + let enteredAt = 0; + let exitedAt = 0; + withArtifactLockSync(${JSON.stringify(lockFile)}, () => { + enteredAt = Date.now(); + ${enteredMarker ? `writeFileSync(${JSON.stringify(enteredMarker)}, "entered");` : ""} + Atomics.wait(sleeper, 0, 0, ${holdMs}); + exitedAt = Date.now(); + }); + writeFileSync(${JSON.stringify(resultFile)}, JSON.stringify({ enteredAt, exitedAt })); + `; + + const first = execute(bun, ["--eval", worker(firstResult, 650, enteredFile)], { + env: childEnv, + }); + yield* Effect.promise(async () => { + const deadline = Date.now() + 2_000; + while (!existsSync(enteredFile)) { + if (Date.now() >= deadline) throw new Error("first lock owner never entered"); + await new Promise((resolve) => setTimeout(resolve, 10)); + } + await new Promise((resolve) => setTimeout(resolve, 180)); + }); + const second = execute(bun, ["--eval", worker(secondResult, 10)], { env: childEnv }); + yield* Effect.promise(() => Promise.all([first, second])); + + const firstInterval = JSON.parse(readFileSync(firstResult, "utf8")) as { + enteredAt: number; + exitedAt: number; + }; + const secondInterval = JSON.parse(readFileSync(secondResult, "utf8")) as { + enteredAt: number; + exitedAt: number; + }; + expect(secondInterval.enteredAt).toBeGreaterThanOrEqual(firstInterval.exitedAt); + }), + ), + { timeout: 10_000 }, + ); + + it.effect( + "does not reclaim a live owner when its heartbeat is stale", + () => + withTemporaryDirectory((directory) => + Effect.gen(function* () { + const lockFile = join(directory, "live-owner.json"); + const lockDir = `${lockFile}.lock`; + const readyFile = join(directory, "live-owner-ready"); + const ownerResult = join(directory, "live-owner-result.json"); + const contenderResult = join(directory, "live-owner-contender.json"); + const owner = execute(bun, [ + "--eval", + ` + import { mkdirSync, utimesSync, writeFileSync } from "node:fs"; + const sleeper = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT)); + const lockDir = ${JSON.stringify(lockDir)}; + mkdirSync(lockDir); + writeFileSync( + lockDir + "/owner", + JSON.stringify({ schemaVersion: 1, token: "live-stalled-owner", pid: process.pid }), + ); + const heartbeat = lockDir + "/heartbeat"; + writeFileSync(heartbeat, "live-stalled-owner"); + const staleTime = new Date(Date.now() - 10_000); + utimesSync(heartbeat, staleTime, staleTime); + writeFileSync(${JSON.stringify(readyFile)}, "ready"); + Atomics.wait(sleeper, 0, 0, 500); + writeFileSync(${JSON.stringify(ownerResult)}, JSON.stringify({ exitedAt: Date.now() })); + `, + ]); + yield* Effect.promise(async () => { + const deadline = Date.now() + 2_000; + while (!existsSync(readyFile)) { + if (Date.now() >= deadline) throw new Error("live owner never became ready"); + await new Promise((resolve) => setTimeout(resolve, 10)); + } + }); + + const contender = execute( + bun, + [ + "--eval", + ` + import { writeFileSync } from "node:fs"; + import { withArtifactLockSync } from ${JSON.stringify(artifactModule)}; + let enteredAt = 0; + withArtifactLockSync(${JSON.stringify(lockFile)}, () => { + enteredAt = Date.now(); + }); + writeFileSync(${JSON.stringify(contenderResult)}, JSON.stringify({ enteredAt })); + `, + ], + { + env: { + ...process.env, + E2E_ARTIFACT_LOCK_STALE_MS: "80", + E2E_ARTIFACT_LOCK_TIMEOUT_MS: "3000", + }, + }, + ); + yield* Effect.promise(() => Promise.all([owner, contender])); + + const { exitedAt } = JSON.parse(readFileSync(ownerResult, "utf8")) as { + exitedAt: number; + }; + const { enteredAt } = JSON.parse(readFileSync(contenderResult, "utf8")) as { + enteredAt: number; + }; + expect(enteredAt).toBeGreaterThanOrEqual(exitedAt); + }), + ), + { timeout: 10_000 }, + ); + + it.effect( + "recovers one stale owner without admitting overlapping contenders", + () => + withTemporaryDirectory((directory) => + Effect.gen(function* () { + const lockFile = join(directory, "stale.json"); + const lockDir = `${lockFile}.lock`; + mkdirSync(lockDir); + writeFileSync(join(lockDir, "owner"), "stale-owner"); + const heartbeat = join(lockDir, "heartbeat"); + writeFileSync(heartbeat, "stale-owner"); + const staleTime = new Date(Date.now() - 10_000); + utimesSync(heartbeat, staleTime, staleTime); + + const childEnv = { + ...process.env, + E2E_ARTIFACT_LOCK_STALE_MS: "80", + E2E_ARTIFACT_LOCK_TIMEOUT_MS: "5000", + }; + const resultFiles = Array.from({ length: 8 }, (_, index) => + join(directory, `contender-${index}.json`), + ); + yield* Effect.promise(() => + Promise.all( + resultFiles.map((resultFile) => + execute( + bun, + [ + "--eval", + ` + import { writeFileSync } from "node:fs"; + import { withArtifactLockSync } from ${JSON.stringify(artifactModule)}; + const sleeper = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT)); + let enteredAt = 0; + let exitedAt = 0; + withArtifactLockSync(${JSON.stringify(lockFile)}, () => { + enteredAt = Date.now(); + Atomics.wait(sleeper, 0, 0, 35); + exitedAt = Date.now(); + }); + writeFileSync(${JSON.stringify(resultFile)}, JSON.stringify({ enteredAt, exitedAt })); + `, + ], + { env: childEnv }, + ), + ), + ), + ); + + const intervals = resultFiles + .map( + (resultFile) => + JSON.parse(readFileSync(resultFile, "utf8")) as { + enteredAt: number; + exitedAt: number; + }, + ) + .sort((left, right) => left.enteredAt - right.enteredAt); + for (let index = 1; index < intervals.length; index += 1) { + expect(intervals[index]!.enteredAt).toBeGreaterThanOrEqual( + intervals[index - 1]!.exitedAt, + ); + } + expect( + readdirSync(directory).filter((name) => + name.startsWith("stale.json.lock.tombstone-stale-owner-"), + ), + ).toHaveLength(1); + }), + ), + { timeout: 15_000 }, + ); + + it.effect("archives an abandoned recovery fence before admitting the next owner", () => + withTemporaryDirectory((directory) => + Effect.gen(function* () { + const lockFile = join(directory, "abandoned-recovery.json"); + const lockDir = `${lockFile}.lock`; + const recoveryDir = `${lockDir}.reclaim-stale-owner`; + mkdirSync(join(recoveryDir, "lock"), { recursive: true }); + writeFileSync(join(recoveryDir, "owner"), "abandoned-reclaimer"); + const recoveryHeartbeat = join(recoveryDir, "heartbeat"); + writeFileSync(recoveryHeartbeat, "abandoned-reclaimer"); + writeFileSync(join(recoveryDir, "lock", "owner"), "stale-owner"); + writeFileSync(join(recoveryDir, "lock", "heartbeat"), "stale-owner"); + const staleTime = new Date(Date.now() - 10_000); + utimesSync(recoveryHeartbeat, staleTime, staleTime); + + const resultFile = join(directory, "success.json"); + yield* Effect.promise(() => + execute( + bun, + [ + "--eval", + ` + import { writeFileSync } from "node:fs"; + import { withArtifactLockSync } from ${JSON.stringify(artifactModule)}; + withArtifactLockSync(${JSON.stringify(lockFile)}, () => { + writeFileSync(${JSON.stringify(resultFile)}, JSON.stringify({ entered: true })); + }); + `, + ], + { + env: { + ...process.env, + E2E_ARTIFACT_LOCK_STALE_MS: "80", + E2E_ARTIFACT_LOCK_TIMEOUT_MS: "3000", + }, + }, + ), + ); + + expect(JSON.parse(readFileSync(resultFile, "utf8"))).toEqual({ entered: true }); + expect( + readdirSync(directory).some((name) => + name.startsWith("abandoned-recovery.json.lock.tombstone-recovery-abandoned-reclaimer-"), + ), + ).toBe(true); + }), + ), + ); + + it.effect( + "preserves every trace and navigation from concurrent worker processes", + () => + withTemporaryDirectory((runDir) => + Effect.gen(function* () { + const workers = 8; + const entriesPerWorker = 12; + const scripts = Array.from( + { length: workers }, + (_, worker) => ` + import { appendTraces } from ${JSON.stringify(traceModule)}; + import { markNavigation } from ${JSON.stringify(timelineModule)}; + const runDir = ${JSON.stringify(runDir)}; + for (let index = 0; index < ${entriesPerWorker}; index += 1) { + appendTraces(runDir, [{ + id: String(${worker}).padStart(2, "0") + String(index).padStart(30, "0"), + at: ${worker * entriesPerWorker} + index, + url: "http://localhost/api?token=worker-secret-${worker}", + }]); + } + markNavigation(runDir, "http://localhost/worker/${worker}?code=worker-secret-${worker}"); + `, + ); + yield* Effect.all( + scripts.map((script) => Effect.promise(() => execute(bun, ["--eval", script]))), + { concurrency: "unbounded" }, + ); + + const traces = JSON.parse(readFileSync(join(runDir, "traces.json"), "utf8")) as Array<{ + attemptId: string; + invocationId: string; + sequence: number; + url: string; + }>; + expect(traces).toHaveLength(workers * entriesPerWorker); + expect(new Set(traces.map((entry) => entry.sequence)).size).toBe(traces.length); + expect(new Set(traces.map((entry) => entry.attemptId)).size).toBe(1); + expect(new Set(traces.map((entry) => entry.invocationId)).size).toBe(workers); + expect(traces.map((entry) => entry.url).join("\n")).not.toContain("worker-secret"); + + const timeline = JSON.parse(readFileSync(join(runDir, "timeline.json"), "utf8")) as { + evidence: { attemptId: string; invocationIds: string[] }; + nav: Array<{ url: string }>; + }; + expect(timeline.nav).toHaveLength(workers); + expect(new Set(timeline.evidence.invocationIds).size).toBe(workers); + expect(timeline.nav.map((entry) => entry.url).join("\n")).not.toContain("worker-secret"); + }), + ), + { timeout: 30_000 }, + ); +}); + +describe("e2e port claims", () => { + it.effect("keeps shared block locks until the last claim and relocates offset conflicts", () => { + const suffix = randomUUID().replaceAll("-", "").toUpperCase(); + const envA = `E2E_HARNESS_${suffix}_A`; + const envB = `E2E_HARNESS_${suffix}_B`; + const envC = `E2E_HARNESS_${suffix}_C`; + const envD = `E2E_HARNESS_${suffix}_D`; + + return Effect.gen(function* () { + yield* Effect.scoped( + Effect.gen(function* () { + const a = yield* Effect.acquireRelease( + Effect.promise(() => claimPorts([{ envVar: envA, offset: 6, label: "harness-a" }])), + (claim) => Effect.promise(() => claim.release()), + ); + const b = yield* Effect.acquireRelease( + Effect.promise(() => claimPorts([{ envVar: envB, offset: 7, label: "harness-b" }])), + (claim) => Effect.promise(() => claim.release()), + ); + expect(Math.floor(a.ports[envA]! / 10)).toBe(Math.floor(b.ports[envB]! / 10)); + + yield* Effect.promise(() => a.release()); + const c = yield* Effect.acquireRelease( + Effect.promise(() => claimPorts([{ envVar: envC, offset: 6, label: "harness-c" }])), + (claim) => Effect.promise(() => claim.release()), + ); + expect(Math.floor(c.ports[envC]! / 10)).toBe(Math.floor(b.ports[envB]! / 10)); + + const d = yield* Effect.acquireRelease( + Effect.promise(() => claimPorts([{ envVar: envD, offset: 7, label: "harness-d" }])), + (claim) => Effect.promise(() => claim.release()), + ); + expect(Math.floor(d.ports[envD]! / 10)).not.toBe(Math.floor(b.ports[envB]! / 10)); + }), + ); + + expect(process.env[envA]).toBeUndefined(); + expect(process.env[envB]).toBeUndefined(); + expect(process.env[envC]).toBeUndefined(); + expect(process.env[envD]).toBeUndefined(); + }); + }); +}); diff --git a/e2e/harness/evidence-merge.test.ts b/e2e/harness/evidence-merge.test.ts new file mode 100644 index 000000000..09cfe8364 --- /dev/null +++ b/e2e/harness/evidence-merge.test.ts @@ -0,0 +1,100 @@ +import { mkdtempSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { mergeEvidenceArtifacts } from "../src/evidence-merge"; +import { buildManifest } from "../src/viewer/manifest"; + +const writeAttempt = (artifactDir: string, attemptId: string, marker: string, scenario: string) => { + const attemptDir = join(artifactDir, "cloud", "account-switch"); + mkdirSync(attemptDir, { recursive: true }); + writeFileSync( + join(attemptDir, "evidence.json"), + JSON.stringify({ schemaVersion: 1, attemptId, createdAt: 1, updatedAt: 1, invocations: [] }), + ); + writeFileSync( + join(attemptDir, "result.json"), + JSON.stringify({ scenario, target: "cloud", attemptId, ok: true, endedAt: 1 }), + ); + writeFileSync(join(attemptDir, `${marker}.png`), marker); +}; + +describe("e2e evidence aggregation", () => { + it.effect("preserves colliding attempt directories and rebuilds one manifest", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-merge-"))), + (temporary) => + Effect.sync(() => { + const inputDir = join(temporary, "artifacts"); + const outputDir = join(temporary, "runs"); + const portable = join(inputDir, "e2e-cloud-hermetic-3"); + const live = join(inputDir, "e2e-live-cloud-3"); + writeAttempt(portable, "portable-attempt", "portable", "Portable account switch"); + writeAttempt(live, "live-attempt", "live", "Live account switch"); + mkdirSync(join(portable, "assets"), { recursive: true }); + writeFileSync(join(portable, "manifest.json"), "stale manifest"); + + const merged = mergeEvidenceArtifacts({ inputDir, outputDir, runAttempt: "3" }); + expect(merged).toMatchObject({ + artifactCount: 2, + attemptCount: 2, + collisionCount: 1, + }); + expect(merged.trustedRuns).toEqual( + expect.arrayContaining([ + { target: "cloud", slug: "account-switch", project: "cloud-hermetic" }, + { + target: "cloud", + slug: "account-switch--live-attempt", + project: "cloud", + }, + ]), + ); + + const attempts = readdirSync(join(outputDir, "cloud")).sort(); + expect(attempts).toEqual(["account-switch", "account-switch--live-attempt"]); + expect(readFileSync(join(outputDir, "cloud", attempts[0], "portable.png"), "utf8")).toBe( + "portable", + ); + expect(readFileSync(join(outputDir, "cloud", attempts[1], "live.png"), "utf8")).toBe( + "live", + ); + + buildManifest(outputDir); + const manifest = JSON.parse(readFileSync(join(outputDir, "manifest.json"), "utf8")); + expect(manifest.runs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + scenario: "Live account switch", + slug: "account-switch--live-attempt", + }), + expect.objectContaining({ + scenario: "Portable account switch", + slug: "account-switch", + }), + ]), + ); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); + + it.effect("fails closed when downloaded artifacts contain no attempts", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-empty-"))), + (temporary) => + Effect.sync(() => { + const inputDir = join(temporary, "artifacts"); + const outputDir = join(temporary, "runs"); + mkdirSync(join(inputDir, "e2e-empty-3"), { recursive: true }); + expect(() => mergeEvidenceArtifacts({ inputDir, outputDir, runAttempt: "3" })).toThrow( + "found no attempt directories", + ); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); +}); diff --git a/e2e/harness/evidence-publication.test.ts b/e2e/harness/evidence-publication.test.ts new file mode 100644 index 000000000..709bcd95d --- /dev/null +++ b/e2e/harness/evidence-publication.test.ts @@ -0,0 +1,241 @@ +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { laneProvenanceFor } from "../src/evidence-provenance"; +import { + evidenceRunUrl, + evidenceSummaryMarkdown, + evidenceViewerUrl, + latestSummaryRuns, + r2ObjectUrl, + summaryRunsFromManifest, + validateEvidenceBundle, + verifyPublishedEvidence, +} from "../src/evidence-publication"; +import { TRUSTED_RUN_LANES_SOURCE, type TrustedRunLanes } from "../src/evidence-trust"; + +const sourceRevision = "0123456789abcdef"; + +const trustedRuns = (project = "cloud-hermetic"): TrustedRunLanes => ({ + schemaVersion: 1, + source: TRUSTED_RUN_LANES_SOURCE, + runAttempt: "1", + runs: [{ target: "cloud", slug: "account-switch", project }], +}); + +const publication = (binaryArtifacts: ReadonlyArray = []) => ({ + schemaVersion: 1, + sanitizedAt: 1, + status: "passed", + sanitizer: { + source: "e2e/scripts/sanitize-evidence.ts", + policyVersion: 1, + sourceRevision, + }, + policy: { + unknownArtifacts: "removed", + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + runtime: { name: "bun", version: "1.3.11", platform: "linux", arch: "x64" }, + stats: { removed: 0, redacted: 2, retained: 1, canariesChecked: 0 }, + binaryArtifacts, + errors: [], +}); + +const writeBundle = (root: string) => { + const runDirectory = join(root, "cloud", "account-switch"); + mkdirSync(runDirectory, { recursive: true }); + writeFileSync(join(root, "index.html"), '
'); + writeFileSync( + join(root, "manifest.json"), + JSON.stringify({ + generatedAt: 1, + runs: [ + { + scenario: "Account switch", + target: "cloud", + slug: "account-switch", + ok: true, + endedAt: 2, + artifacts: [{ name: "result.json", kind: "json" }], + }, + ], + skips: [], + }), + ); + writeFileSync(join(root, "publication.json"), JSON.stringify(publication())); + writeFileSync( + join(runDirectory, "lane-provenance.json"), + JSON.stringify(laneProvenanceFor("cloud-hermetic", "cloud")), + ); + writeFileSync( + join(runDirectory, "result.json"), + JSON.stringify({ scenario: "Account switch", target: "cloud", ok: true, artifacts: [] }), + ); +}; + +describe("evidence static publication", () => { + it.effect("validates a sanitized bundle and fails closed on post-sanitize files", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-publication-"))), + (temporary) => + Effect.sync(() => { + writeBundle(temporary); + const bundle = validateEvidenceBundle(temporary, sourceRevision, trustedRuns()); + expect(bundle.files.map((file) => file.relativePath).sort()).toEqual([ + "cloud/account-switch/lane-provenance.json", + "cloud/account-switch/result.json", + "index.html", + "manifest.json", + "publication.json", + ]); + + writeFileSync(join(temporary, "credentials.json"), "private"); + expect(() => validateEvidenceBundle(temporary, sourceRevision, trustedRuns())).toThrow( + "private artifact", + ); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); + + it.effect("rejects forged hermetic provenance against external live-lane metadata", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-forgery-"))), + (temporary) => + Effect.sync(() => { + writeBundle(temporary); + expect(() => + validateEvidenceBundle(temporary, sourceRevision, trustedRuns("cloud")), + ).toThrow("publication lane provenance does not match external trusted project cloud"); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); + + it.effect("requires the sanitizer's binary inventory to match visual evidence", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-visual-"))), + (temporary) => + Effect.sync(() => { + writeBundle(temporary); + writeFileSync(join(temporary, "cloud", "account-switch", "failure.png"), "png"); + expect(() => validateEvidenceBundle(temporary, sourceRevision, trustedRuns())).toThrow( + "binary artifact inventory", + ); + writeFileSync( + join(temporary, "publication.json"), + JSON.stringify(publication(["cloud/account-switch/failure.png"])), + ); + expect( + validateEvidenceBundle(temporary, sourceRevision, trustedRuns()).files, + ).toHaveLength(6); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); + + it.effect("reads back the public index and control manifests before surfacing a URL", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-evidence-readback-"))), + (temporary) => + Effect.gen(function* () { + writeBundle(temporary); + const bundle = validateEvidenceBundle(temporary, sourceRevision, trustedRuns()); + const byName = new Map(bundle.files.map((file) => [file.relativePath, file])); + const requests: string[] = []; + const verification = yield* Effect.promise(() => + verifyPublishedEvidence({ + viewerUrl: "https://previews.example.test/e2e/run-1/index.html", + files: bundle.files, + attempts: 2, + retryDelayMs: 0, + fetcher: async (url) => { + requests.push(url); + const relativePath = new URL(url).pathname.split("/").at(-1) ?? ""; + const file = byName.get(relativePath); + if (!file) return new Response("missing", { status: 404 }); + if (relativePath === "manifest.json" && requests.length === 1) { + return new Response("not ready", { status: 404 }); + } + return new Response(new Uint8Array(readFileSync(file.absolutePath)), { + headers: { "content-type": file.artifact.mime }, + }); + }, + }), + ); + expect(verification).toEqual({ verifiedFiles: 3 }); + expect(requests).toEqual([ + "https://previews.example.test/e2e/run-1/manifest.json", + "https://previews.example.test/e2e/run-1/manifest.json", + "https://previews.example.test/e2e/run-1/publication.json", + "https://previews.example.test/e2e/run-1/index.html", + ]); + }), + (temporary) => Effect.sync(() => rmSync(temporary, { recursive: true, force: true })), + ), + ); + + it("builds immutable CDN paths and canonical direct run links", () => { + const viewerUrl = evidenceViewerUrl( + "https://previews.example.test/executor/", + "e2e/repo-42/pr-7/run-100/attempt-2", + ); + expect(viewerUrl).toBe( + "https://previews.example.test/executor/e2e/repo-42/pr-7/run-100/attempt-2/index.html", + ); + expect(evidenceRunUrl(viewerUrl, "cloud", "account-switch")).toBe( + `${viewerUrl}#/run/cloud/account-switch`, + ); + expect( + r2ObjectUrl( + "https://account.r2.cloudflarestorage.com", + "executor-previews", + "e2e/repo-42/pr-7/run-100/attempt-2", + "assets/index-AbC123.js", + ), + ).toBe( + "https://account.r2.cloudflarestorage.com/executor-previews/e2e/repo-42/pr-7/run-100/attempt-2/assets/index-AbC123.js", + ); + }); + + it("emits the same latest run per scenario and target as the viewer matrix", () => { + const viewerUrl = "https://previews.example.test/e2e/run-1/index.html"; + const runs = summaryRunsFromManifest({ + runs: [ + { + scenario: "Account | switch [primary]", + target: "cloud", + slug: "old-run", + ok: false, + endedAt: 1, + }, + { + scenario: "Account | switch [primary]", + target: "cloud", + slug: "new-run", + ok: true, + endedAt: 2, + }, + { + scenario: "Account | switch [primary]", + target: "selfhost", + slug: "selfhost-run", + ok: true, + endedAt: 1, + }, + ], + }); + expect(latestSummaryRuns(runs).map((run) => run.slug)).toEqual(["new-run", "selfhost-run"]); + const markdown = evidenceSummaryMarkdown(viewerUrl, runs); + expect(markdown).toContain("Account \\| switch \\[primary\\]"); + expect(markdown).toContain(`${viewerUrl}#/run/cloud/new-run`); + expect(markdown).not.toContain("old-run"); + }); +}); diff --git a/e2e/harness/packaged-desktop-environment.test.ts b/e2e/harness/packaged-desktop-environment.test.ts new file mode 100644 index 000000000..0f7056097 --- /dev/null +++ b/e2e/harness/packaged-desktop-environment.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from "@effect/vitest"; + +import { selectPackagedDesktopRuntimeEnvironment } from "../src/desktop/packaged"; + +describe("packaged desktop environment isolation", () => { + it("inherits GUI runtime state without forwarding ambient credentials", () => { + const selected = selectPackagedDesktopRuntimeEnvironment({ + PATH: "/fixture/bin", + DISPLAY: ":99", + XAUTHORITY: "/fixture/.Xauthority", + LANG: "en_US.UTF-8", + GITHUB_TOKEN: "github-secret", + ANTHROPIC_API_KEY: "anthropic-secret", + AWS_SECRET_ACCESS_KEY: "aws-secret", + HTTPS_PROXY: "https://user:password@proxy.example", + NODE_OPTIONS: "--require=/tmp/ambient-hook.js", + }); + + expect(selected).toEqual({ + PATH: "/fixture/bin", + DISPLAY: ":99", + XAUTHORITY: "/fixture/.Xauthority", + LANG: "en_US.UTF-8", + }); + expect("GITHUB_TOKEN" in selected).toBe(false); + expect("ANTHROPIC_API_KEY" in selected).toBe(false); + expect("AWS_SECRET_ACCESS_KEY" in selected).toBe(false); + expect("HTTPS_PROXY" in selected).toBe(false); + expect("NODE_OPTIONS" in selected).toBe(false); + }); +}); diff --git a/e2e/harness/test-source.test.ts b/e2e/harness/test-source.test.ts new file mode 100644 index 000000000..c9bb95117 --- /dev/null +++ b/e2e/harness/test-source.test.ts @@ -0,0 +1,96 @@ +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { extractFocusedTestSource, writeFocusedTestSource } from "../src/test-source"; + +describe("focused test source evidence", () => { + it.effect("extracts a named direct KVM-style test and writes source provenance", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-test-source-"))), + (directory) => + Effect.sync(() => { + const testFile = join(directory, "gui-acceptance.test.ts"); + const runDir = join(directory, "run"); + const directName = "Desktop KVM account switching"; + mkdirSync(runDir); + writeFileSync( + testFile, + `// Direct packaged guest acceptance. +import { expect, it } from "@effect/vitest"; + +const SCENARIO_NAME = ${JSON.stringify(directName)}; +const helper = () => "focused helper"; + +scenario("Unrelated shared scenario", {}, Effect.void); + +it(SCENARIO_NAME, async () => { + expect(helper()).toBe("focused helper"); +}); +`, + ); + + const extracted = extractFocusedTestSource(testFile, directName); + expect(extracted?.registration).toBe("it"); + expect(extracted?.source).toContain("Direct packaged guest acceptance"); + expect(extracted?.source).toContain("it(SCENARIO_NAME"); + expect(extracted?.source).toContain("focused helper"); + expect(extracted?.source).not.toContain("Unrelated shared scenario"); + expect(extracted?.source).not.toContain("@effect/vitest"); + + expect( + writeFocusedTestSource({ runDir, filePath: testFile, testName: directName }), + ).toBeDefined(); + expect(readFileSync(join(runDir, "test.ts"), "utf8")).toBe(extracted?.source); + expect( + JSON.parse(readFileSync(join(runDir, "test-source-metadata.json"), "utf8")), + ).toMatchObject({ + schemaVersion: 1, + sourcePath: "gui-acceptance.test.ts", + testName: directName, + registration: "it", + extractor: "typescript-named-test-v2", + }); + }), + (directory) => Effect.sync(() => rmSync(directory, { recursive: true, force: true })), + ), + ); + + it.effect("finds scenario registrations nested inside preflight branches", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-nested-test-source-"))), + (directory) => + Effect.sync(() => { + const testFile = join(directory, "preflight.test.ts"); + const selectedName = "Packaged desktop preflight"; + writeFileSync( + testFile, + `import { scenario } from "../src/scenario"; + +const preflight = { status: "ready" }; +const SCENARIO_NAME = "Packaged desktop"; + +if (preflight.status === "skip") { + scenario(\`\${SCENARIO_NAME} preflight\`, {}, Effect.die("unavailable")); +} else { + scenario("Packaged desktop survives restart", {}, Effect.void); + scenario("Packaged desktop sibling", {}, Effect.void); +} +`, + ); + + const extracted = extractFocusedTestSource(testFile, selectedName); + expect(extracted?.registration).toBe("scenario"); + expect(extracted?.source).toContain("if (preflight.status"); + expect(extracted?.source).toContain("scenario(`${SCENARIO_NAME} preflight`"); + expect(extracted?.source).not.toContain("survives restart"); + expect(extracted?.source).not.toContain("Packaged desktop sibling"); + expect(extracted?.source).not.toContain("../src/scenario"); + }), + (directory) => Effect.sync(() => rmSync(directory, { recursive: true, force: true })), + ), + ); +}); diff --git a/e2e/harness/viewer-portable-traces.test.ts b/e2e/harness/viewer-portable-traces.test.ts new file mode 100644 index 000000000..626944d5d --- /dev/null +++ b/e2e/harness/viewer-portable-traces.test.ts @@ -0,0 +1,244 @@ +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "@effect/vitest"; +import { Effect } from "effect"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; + +import type { EvidencePublicationMetadata } from "../src/published-artifacts"; +import { buildManifest } from "../src/viewer/manifest"; +import { ArtifactNavigation, runRoute } from "../viewer/src/App"; +import PortableTraceExplorer from "../viewer/src/PortableTraceExplorer"; +import PublicationBanner, { parsePublicationMetadata } from "../viewer/src/PublicationBanner"; +import { + liveMotelViewerFromSearch, + parsePortableTraceExport, + waterfallPosition, + type PortableTraceExport, +} from "../viewer/src/portable-traces"; + +const traceId = "0123456789abcdef0123456789abcdef"; +const portableExport: PortableTraceExport = { + schemaVersion: 1, + exportedAt: 1_751_000_000_000, + traces: [ + { + traceId, + data: { + traceId, + serviceName: "executor-cloud", + rootOperationName: "POST /api/tools/call", + startedAt: "2026-06-26T00:00:00.000Z", + isRunning: false, + durationMs: 100, + spanCount: 2, + errorCount: 0, + warnings: [], + spans: [ + { + spanId: "root-span", + parentSpanId: null, + serviceName: "executor-cloud", + scopeName: "executor.http", + kind: "server", + operationName: "POST /api/tools/call", + startTime: "2026-06-26T00:00:00.000Z", + isRunning: false, + durationMs: 100, + status: "ok", + depth: 0, + tags: { "http.response.status_code": "200", "db.system": "sqlite" }, + warnings: [], + events: [ + { + name: "cache.miss", + timestamp: "2026-06-26T00:00:00.010Z", + attributes: { key: "profile" }, + }, + ], + }, + { + spanId: "child-span", + parentSpanId: "root-span", + serviceName: "executor-storage", + scopeName: null, + kind: "internal", + operationName: "load active account", + startTime: "2026-06-26T00:00:00.025Z", + isRunning: false, + durationMs: 50, + status: "ok", + depth: 1, + tags: {}, + warnings: [], + events: [], + }, + ], + }, + }, + ], + missing: [], + invalidTraceIds: [], +}; + +describe("portable trace viewer", () => { + it("parses persisted traces and computes a stable span waterfall", () => { + expect(parsePortableTraceExport(portableExport)).toEqual(portableExport); + + const trace = portableExport.traces[0].data; + expect(waterfallPosition(trace, trace.spans[0])).toEqual({ left: 0, width: 100 }); + expect(waterfallPosition(trace, trace.spans[1])).toEqual({ left: 25, width: 50 }); + }); + + it("renders useful trace and span detail without a live telemetry service", () => { + const html = renderToStaticMarkup( + createElement(PortableTraceExplorer, { + exportData: portableExport, + ledger: [{ id: traceId, url: "http://127.0.0.1/api/tools/call" }], + onSelectTrace: () => undefined, + }), + ); + + expect(html).toContain("POST /api/tools/call"); + expect(html).toContain("load active account"); + expect(html).toContain("db.system"); + expect(html).toContain("cache.miss"); + expect(html).toContain('aria-pressed="true"'); + expect(html).not.toContain('role="listbox"'); + expect(html).not.toContain("aria-selected"); + expect(html).toContain("Trace /api/tools/call, 100ms, 2 spans"); + expect(html).toContain("POST /api/tools/call, executor-cloud, 100ms, ok"); + expect(html).not.toContain("open live Motel"); + }); + + it("uses canonical run routes and exposes persisted evidence files directly", () => { + expect(runRoute("desktop-kvm", "2026-06-27T00-00-00-000Z-1234")).toBe( + "#/run/desktop-kvm/2026-06-27T00-00-00-000Z-1234", + ); + const html = renderToStaticMarkup( + createElement(ArtifactNavigation, { + base: "desktop-kvm/run-123", + artifacts: [ + { name: "claude-code-metadata.json", kind: "json", label: "Claude code metadata" }, + { name: "anthropic-replay-ledger.json", kind: "json", label: "Anthropic replay ledger" }, + { name: "packaged-app.log", kind: "text", label: "packaged app" }, + ], + }), + ); + expect(html).toContain("Persisted evidence"); + expect(html).toContain("desktop-kvm/run-123/claude-code-metadata.json"); + expect(html).toContain("desktop-kvm/run-123/anthropic-replay-ledger.json"); + expect(html).toContain("desktop-kvm/run-123/packaged-app.log"); + }); + + it("accepts only explicit loopback live Motel enhancements", () => { + expect(liveMotelViewerFromSearch("?motel=http%3A%2F%2F127.0.0.1%3A61234%2F")).toBe( + "http://127.0.0.1:61234", + ); + expect(liveMotelViewerFromSearch("?motel=https%3A%2F%2Ftelemetry.example.com")).toBeUndefined(); + expect(liveMotelViewerFromSearch("?motel=javascript%3Aalert(1)")).toBeUndefined(); + }); +}); + +describe("publication provenance banner", () => { + const metadata: EvidencePublicationMetadata = { + schemaVersion: 1, + sanitizedAt: 1_751_000_000_000, + status: "passed", + sanitizer: { + source: "e2e/scripts/sanitize-evidence.ts", + policyVersion: 1, + sourceRevision: "abc123", + }, + policy: { + unknownArtifacts: "removed", + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + runtime: { name: "bun", version: "1.3.0", platform: "linux", arch: "x64" }, + stats: { removed: 4, redacted: 8, retained: 3, canariesChecked: 2 }, + binaryArtifacts: ["desktop/example/01-proof.png"], + errors: [], + }; + + it("parses persisted sanitizer provenance and explains the binary limitation", () => { + expect(parsePublicationMetadata(metadata)).toEqual(metadata); + expect(parsePublicationMetadata({ ...metadata, schemaVersion: 2 })).toBeNull(); + + const sanitized = renderToStaticMarkup(createElement(PublicationBanner, { metadata })); + expect(sanitized).toContain("Sanitized evidence publication"); + expect(sanitized).toContain("remain unredacted under the synthetic-only policy"); + expect(sanitized).toContain("Byte canaries checked: 2"); + + const local = renderToStaticMarkup(createElement(PublicationBanner, { metadata: null })); + expect(local).toContain("Do not publish this directory"); + }); +}); + +describe("portable trace manifest", () => { + it.effect("indexes portable trace completeness for each attempt", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-viewer-manifest-"))), + (runsDir) => + Effect.sync(() => { + const runDir = join(runsDir, "cloud", "account-switch--attempt-123"); + mkdirSync(runDir, { recursive: true }); + writeFileSync( + join(runDir, "result.json"), + JSON.stringify({ + scenario: "Account switching", + target: "cloud", + attemptId: "attempt-123", + ok: true, + durationMs: 1234, + endedAt: 1_751_000_000_000, + portableTraces: { exported: 2, missing: 1 }, + }), + ); + writeFileSync(join(runDir, "claude-code-metadata.json"), "{}"); + writeFileSync(join(runDir, "anthropic-replay-ledger.json"), "{}"); + writeFileSync(join(runDir, "account-fixture-ledger.json"), "{}"); + writeFileSync(join(runDir, "packaged-app.log"), "synthetic log"); + writeFileSync(join(runDir, "mcporter.json"), "{}"); + + buildManifest(runsDir); + + expect(JSON.parse(readFileSync(join(runsDir, "manifest.json"), "utf8"))).toMatchObject({ + runs: [ + { + scenario: "Account switching", + target: "cloud", + slug: "account-switch--attempt-123", + attemptId: "attempt-123", + portableTraceCount: 2, + portableTraceMissing: 1, + artifacts: [ + { + name: "account-fixture-ledger.json", + kind: "json", + label: "account fixture ledger", + }, + { + name: "anthropic-replay-ledger.json", + kind: "json", + label: "anthropic replay ledger", + }, + { + name: "claude-code-metadata.json", + kind: "json", + label: "Claude code metadata", + }, + { name: "packaged-app.log", kind: "text", label: "packaged app" }, + { name: "result.json", kind: "json", label: "result" }, + ], + }, + ], + }); + }), + (runsDir) => Effect.sync(() => rmSync(runsDir, { recursive: true, force: true })), + ), + ); +}); diff --git a/e2e/local/local-server.ts b/e2e/local/local-server.ts index 818b272fb..b45401a24 100644 --- a/e2e/local/local-server.ts +++ b/e2e/local/local-server.ts @@ -35,11 +35,11 @@ export interface ServerHandle { * the browser, a typed API client, an MCP client — anything that needs the live * server. */ -export const withLocalServer = ( +export const withLocalServer = ( cli: CliSurface, runDir: string, - body: (server: ServerHandle) => Effect.Effect, -): Effect.Effect => + body: (server: ServerHandle) => Effect.Effect, +): Effect.Effect => Effect.gen(function* () { const dataDir = mkdtempSync(join(tmpdir(), "executor-local-e2e-")); diff --git a/e2e/local/toolkits-mcp.test.ts b/e2e/local/toolkits-mcp.test.ts index 5cf12eef7..8fe6137d3 100644 --- a/e2e/local/toolkits-mcp.test.ts +++ b/e2e/local/toolkits-mcp.test.ts @@ -3,7 +3,7 @@ import { createServer, type Server } from "node:http"; import type { AddressInfo } from "node:net"; import { expect } from "@effect/vitest"; -import { Effect } from "effect"; +import { Effect, Schema } from "effect"; import { HttpApiClient } from "effect/unstable/httpapi"; import { FetchHttpClient, HttpClient, HttpClientRequest } from "effect/unstable/http"; import { Client } from "@modelcontextprotocol/sdk/client/index.js"; @@ -135,20 +135,47 @@ const makeMcp = async (url: string, token: string, name: string) => { return { client, transport }; }; -const textFromCall = (result: Awaited>): string => { - const blocks = result.content ?? []; +const McpTextResult = Schema.Struct({ + content: Schema.Array( + Schema.Struct({ + type: Schema.String, + text: Schema.optional(Schema.String), + }), + ), +}); + +const decodeMcpTextResult = Schema.decodeUnknownSync(McpTextResult); + +const textFromCall = (result: unknown): string => { + const blocks = decodeMcpTextResult(result).content; const text = blocks.find((block) => block.type === "text")?.text; if (typeof text !== "string") throw new Error(`MCP call returned no text block`); return text; }; -const executeJson = async (client: Client, code: string): Promise> => { - const result = await client.callTool({ - name: "execute", - arguments: { code }, +const ExecuteResult = Schema.Struct({ + ok: Schema.optional(Schema.Boolean), + reason: Schema.optional(Schema.String), + data: Schema.optional( + Schema.Struct({ + id: Schema.optional(Schema.String), + }), + ), + paths: Schema.optional(Schema.Array(Schema.String)), +}); + +const decodeExecuteResult = Schema.decodeUnknownEffect(Schema.fromJsonString(ExecuteResult)); + +const executeJson = (client: Client, code: string) => + Effect.gen(function* () { + const result = yield* Effect.promise(() => + client.callTool({ + name: "execute", + arguments: { code }, + }), + ); + return yield* decodeExecuteResult(textFromCall(result)); }); - return JSON.parse(textFromCall(result)) as Record; -}; scenario( "Local toolkits · scoped MCP hides blocked and unselected connections", @@ -239,49 +266,42 @@ scenario( Effect.promise(() => toolkitMcp.client.close()).pipe(Effect.ignore), ); - const selectedCall = yield* Effect.promise(() => - executeJson( - toolkitMcp.client, - callPingCode({ - integration, - connection: selected, - id: "from-toolkit", - }), - ), + const selectedCall = yield* executeJson( + toolkitMcp.client, + callPingCode({ + integration, + connection: selected, + id: "from-toolkit", + }), ); expect(selectedCall.ok, `selected call: ${JSON.stringify(selectedCall)}`).toBe(true); - expect((selectedCall.data as { id?: unknown }).id).toBe("from-toolkit"); + expect(selectedCall.data?.id).toBe("from-toolkit"); - const visible = yield* Effect.promise(() => - executeJson(toolkitMcp.client, listVisiblePathsCode(integration)), - ); - expect(visible.paths).toContain(pingToolPath(integration, selected)); - expect(visible.paths).not.toContain(pingToolPath(integration, blocked)); - expect(visible.paths).not.toContain(pingToolPath(integration, unselected)); + const visible = yield* executeJson(toolkitMcp.client, listVisiblePathsCode(integration)); + const visiblePaths = visible.paths ?? []; + expect(visiblePaths).toContain(pingToolPath(integration, selected)); + expect(visiblePaths).not.toContain(pingToolPath(integration, blocked)); + expect(visiblePaths).not.toContain(pingToolPath(integration, unselected)); - const blockedCall = yield* Effect.promise(() => - executeJson( - toolkitMcp.client, - callPingCode({ - integration, - connection: blocked, - id: "blocked-should-not-run", - }), - ), + const blockedCall = yield* executeJson( + toolkitMcp.client, + callPingCode({ + integration, + connection: blocked, + id: "blocked-should-not-run", + }), ); expect(blockedCall.reason, `blocked call: ${JSON.stringify(blockedCall)}`).toBe( "missing", ); - const unselectedCall = yield* Effect.promise(() => - executeJson( - toolkitMcp.client, - callPingCode({ - integration, - connection: unselected, - id: "should-not-run", - }), - ), + const unselectedCall = yield* executeJson( + toolkitMcp.client, + callPingCode({ + integration, + connection: unselected, + id: "should-not-run", + }), ); expect(unselectedCall.reason, `unselected call: ${JSON.stringify(unselectedCall)}`).toBe( "missing", diff --git a/e2e/package.json b/e2e/package.json index 06370f939..f14ea087d 100644 --- a/e2e/package.json +++ b/e2e/package.json @@ -5,18 +5,36 @@ "type": "module", "scripts": { "cli": "bun scripts/cli.ts", - "test": "vitest run --project cloud && vitest run --project selfhost", + "test": "bun run test:portable", + "test:portable": "vitest run --project harness && vitest run --project clients && vitest run --project cloud-hermetic && vitest run --project selfhost-hermetic && vitest run --project local && vitest run --project cloudflare-hermetic", + "test:harness": "vitest run --project harness", + "test:clients": "vitest run --project clients", "test:cloud": "vitest run --project cloud", + "test:cloud:hermetic": "vitest run --project cloud-hermetic", "test:selfhost": "vitest run --project selfhost", + "test:selfhost:hermetic": "vitest run --project selfhost-hermetic", "test:selfhost-docker": "vitest run --project selfhost-docker", + "test:selfhost-docker:hermetic": "vitest run --project selfhost-docker-hermetic", "test:cloudflare": "vitest run --project cloudflare", - "test:watch": "vitest", + "test:cloudflare:hermetic": "vitest run --project cloudflare-hermetic", + "test:desktop": "vitest run --project desktop", + "test:desktop-packaged": "vitest run --project desktop-packaged", + "test:desktop-kvm": "vitest run --project desktop-kvm", + "test:local": "vitest run --project local", + "test:cli:macos": "vitest run --project cli-macos", + "test:cli:linux": "vitest run --project cli-linux", + "test:cli:windows": "vitest run --project cli-windows", + "test:live": "bun run test:live:cloud && bun run test:live:selfhost && bun run test:live:cloudflare", + "test:live:cloud": "vitest run --project cloud scenarios/microsoft-graph-default.test.ts scenarios/microsoft-graph-full.test.ts scenarios/oauth-client-handoff.test.ts", + "test:live:selfhost": "vitest run --project selfhost scenarios/microsoft-graph-default.test.ts scenarios/microsoft-graph-full.test.ts scenarios/oauth-client-handoff.test.ts", + "test:live:cloudflare": "vitest run --project cloudflare scenarios/microsoft-graph-full.test.ts", + "test:watch": "vitest --project cloud-hermetic", "ports": "bun scripts/ports.ts", "summary": "bun scripts/summary.ts", + "evidence:sanitize": "bun scripts/sanitize-evidence.ts", "viewer:build": "bun scripts/rebuild-viewer.ts", "serve": "bun scripts/rebuild-viewer.ts && bun scripts/serve.ts", "typecheck": "tsc --noEmit", - "test:desktop": "vitest run --project desktop", "motel": "MOTEL_OTEL_BASE_URL=http://127.0.0.1:4796 MOTEL_OTEL_DB_PATH=runs/.motel/telemetry.sqlite motel server" }, "dependencies": { diff --git a/e2e/scenarios/api-tools.test.ts b/e2e/scenarios/api-tools.test.ts index bd64c71cb..414c8aaad 100644 --- a/e2e/scenarios/api-tools.test.ts +++ b/e2e/scenarios/api-tools.test.ts @@ -1,14 +1,31 @@ -// Cross-target: the typed API surface, exactly as a consumer uses it. The -// contract is the CORE executor HttpApi (composePluginApi([])) — every target -// serves it under /api, so one scenario runs against all of them. +// Cross-target: the typed API surface, exactly as a consumer uses it. Every +// target serves the composed Executor API under /api, so one scenario runs +// against all of them. +import { randomBytes } from "node:crypto"; + import { expect } from "@effect/vitest"; import { Effect } from "effect"; import { composePluginApi } from "@executor-js/api/server"; +import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; +import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; import { scenario } from "../src/scenario"; import { Api, Target } from "../src/services"; -const coreApi = composePluginApi([] as const); +const api = composePluginApi([openApiHttpPlugin()] as const); + +const pingSpec = JSON.stringify({ + openapi: "3.0.3", + info: { title: "API Tools Scenario", version: "1.0.0" }, + paths: { + "/ping": { + get: { + operationId: "ping", + responses: { "200": { description: "pong" } }, + }, + }, + }, +}); scenario( "API · typed client lists the available tools", @@ -17,21 +34,71 @@ scenario( const target = yield* Target; const { client } = yield* Api; const identity = yield* target.newIdentity(); - const api = yield* client(coreApi, identity); - const tools = yield* api.tools.list({ query: {} }); + const typedClient = yield* client(api, identity); + const tools = yield* typedClient.tools.list({ query: {} }); expect(tools.length, "at least one tool is exposed").toBeGreaterThan(0); }), ); scenario( - "API · a fresh identity starts with zero connections", + "API · the typed client lists the connection it created", {}, Effect.gen(function* () { const target = yield* Target; const { client } = yield* Api; const identity = yield* target.newIdentity(); - const api = yield* client(coreApi, identity); - const connections = yield* api.connections.list({ query: {} }); - expect(connections.length, "no connections leak across identities").toBe(0); + const typedClient = yield* client(api, identity); + const integration = IntegrationSlug.make(`api-tools-${randomBytes(4).toString("hex")}`); + const name = ConnectionName.make(`api${randomBytes(4).toString("hex")}`); + const template = AuthTemplateSlug.make("apiKey"); + + yield* Effect.gen(function* () { + yield* typedClient.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: pingSpec }, + slug: integration, + baseUrl: "http://127.0.0.1:59999", + authenticationTemplate: [ + { + slug: template, + type: "apiKey", + headers: { + authorization: ["Bearer ", { type: "variable", name: "token" }], + }, + }, + ], + }, + }); + yield* typedClient.connections.create({ + payload: { + owner: "org", + integration, + name, + template, + value: "scenario-local-token", + }, + }); + + const connections = yield* typedClient.connections.list({ query: { integration } }); + expect( + connections.map((connection) => ({ + integration: connection.integration, + name: connection.name, + owner: connection.owner, + })), + "the list contains exactly the connection created by this scenario", + ).toEqual([{ integration, name, owner: "org" }]); + }).pipe( + Effect.ensuring( + Effect.gen(function* () { + yield* typedClient.connections + .remove({ params: { owner: "org", integration, name } }) + .pipe(Effect.ignore); + yield* typedClient.openapi + .removeSpec({ params: { slug: integration } }) + .pipe(Effect.ignore); + }), + ), + ); }), ); diff --git a/e2e/scenarios/auth-methods-ui.test.ts b/e2e/scenarios/auth-methods-ui.test.ts index cb751db7b..c73e2093c 100644 --- a/e2e/scenarios/auth-methods-ui.test.ts +++ b/e2e/scenarios/auth-methods-ui.test.ts @@ -1,6 +1,6 @@ // Cross-target (browser): the multi-method auth add flow, end to end through // the real web UI against a live loopback MCP test server (the target's dev -// server probes it). A no-auth server gets an API key declared at add time — +// server probes it). A no-auth server gets an API key declared at add time, // the case where the server advertises nothing but the user knows better. // The session video + per-step screenshots are the artifact. // @@ -11,10 +11,16 @@ import { randomBytes } from "node:crypto"; import { expect } from "@effect/vitest"; import { Effect } from "effect"; +import { composePluginApi } from "@executor-js/api/server"; +import { deriveMcpNamespace } from "@executor-js/plugin-mcp"; +import { mcpHttpPlugin } from "@executor-js/plugin-mcp/api"; import { makeGreetingMcpServer, serveMcpServer } from "@executor-js/plugin-mcp/testing"; +import { IntegrationSlug } from "@executor-js/sdk/shared"; import { scenario } from "../src/scenario"; -import { Browser, Target } from "../src/services"; +import { Api, Browser, Target } from "../src/services"; + +const api = composePluginApi([mcpHttpPlugin()] as const); scenario( "Auth methods · the add flow declares an API key alongside the detected method", @@ -23,57 +29,61 @@ scenario( Effect.gen(function* () { const target = yield* Target; const browser = yield* Browser; + const { client: makeApiClient } = yield* Api; // An OPEN server: the probe connects without auth, so the method list // seeds with the detected "no authentication" row. The server name is - // unique per run — the derived integration namespace must not collide + // unique per run. The derived integration namespace must not collide // on targets whose identities share one tenant (selfhost admin). - const server = yield* serveMcpServer(() => - makeGreetingMcpServer({ name: `open-mcp-${randomBytes(3).toString("hex")}` }), - ); + const serverName = `open-mcp-${randomBytes(3).toString("hex")}`; + const slug = IntegrationSlug.make(deriveMcpNamespace({ name: serverName })); + const server = yield* serveMcpServer(() => makeGreetingMcpServer({ name: serverName })); const identity = yield* target.newIdentity(); + const client = yield* makeApiClient(api, identity); - yield* browser.session(identity, async ({ page, step }) => { - await step("Open the add-MCP flow pointed at the server", async () => { - await page.goto(`/integrations/add/mcp?url=${encodeURIComponent(server.endpoint)}`, { - waitUntil: "networkidle", + yield* browser + .session(identity, async ({ page, step }) => { + await step("Open the add-MCP flow pointed at the server", async () => { + await page.goto(`/integrations/add/mcp?url=${encodeURIComponent(server.endpoint)}`, { + waitUntil: "networkidle", + }); + // The URL auto-probes (debounced); the method list appears once + // the probe lands. + await page.getByText("How does this server authenticate?").waitFor(); }); - // The URL auto-probes (debounced); the method list appears once - // the probe lands. - await page.getByText("How does this server authenticate?").waitFor(); - }); - await step("The probe seeded the detected method", async () => { - await page.getByText("Method 1 · Detected").waitFor(); - }); + await step("The probe seeded the detected method", async () => { + await page.getByText("Method 1 · Detected").waitFor(); + }); - await step("Declare an API key method alongside it", async () => { - await page.getByRole("button", { name: "Add method" }).click(); - await page.getByText("Method 2").waitFor(); - // The new row opens on the API key editor with the standard - // Authorization-header placement prefilled. - const headerName = page.getByPlaceholder("Authorization").last(); - await headerName.waitFor(); - }); + await step("Declare an API key method alongside it", async () => { + await page.getByRole("button", { name: "Add method" }).click(); + await page.getByText("Method 2").waitFor(); + // The new row opens on the API key editor with the standard + // Authorization-header placement prefilled. + const headerName = page.getByPlaceholder("Authorization").last(); + await headerName.waitFor(); + }); - await step("Add the source with both methods", async () => { - await page.getByRole("button", { name: "Add source" }).click(); - // onComplete routes to the new integration's detail hub. - await page.waitForURL(/\/integrations\/(?!add\b)[^/?]+$/, { timeout: 30_000 }); - await page.getByText("Connections").first().waitFor(); - }); + await step("Add the source with both methods", async () => { + await page.getByRole("button", { name: "Add source" }).click(); + // onComplete routes to the new integration's detail hub. + await page.waitForURL(/\/integrations\/(?!add\b)[^/?]+$/, { timeout: 30_000 }); + await page.getByText("Connections").first().waitFor(); + }); - await step("The connect modal offers both methods", async () => { - await page.getByRole("button", { name: "Add connection" }).first().click(); - await page.getByRole("tab", { name: "No authentication" }).waitFor(); - await page.getByRole("tab", { name: "API key (Authorization)" }).waitFor(); - }); + await step("The connect modal offers both methods", async () => { + await page.getByRole("button", { name: "Add connection" }).first().click(); + await page.getByRole("tab", { name: "No authentication" }).waitFor(); + await page.getByRole("tab", { name: "API key (Authorization)" }).waitFor(); + }); - const tabs = await page.getByRole("tab").allInnerTexts(); - expect(tabs.join(", "), "both declared methods are selectable").toContain( - "No authentication", - ); - expect(tabs.join(", ")).toContain("API key (Authorization)"); - }); + const tabs = await page.getByRole("tab").allInnerTexts(); + expect(tabs.join(", "), "both declared methods are selectable").toContain( + "No authentication", + ); + expect(tabs.join(", ")).toContain("API key (Authorization)"); + }) + .pipe(Effect.ensuring(client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore))); }), ), ); diff --git a/e2e/scenarios/connect-handoff-session.test.ts b/e2e/scenarios/connect-handoff-session.test.ts index 70b5d0f75..32338294a 100644 --- a/e2e/scenarios/connect-handoff-session.test.ts +++ b/e2e/scenarios/connect-handoff-session.test.ts @@ -1,38 +1,66 @@ -// The connect handoff as a DEVELOPER SESSION — the way a human actually +// The connect handoff as a DEVELOPER SESSION, the way a human actually // tests this: an agent chat in a real terminal where the agent wires up the // API over MCP and drops a connect link, a browser hop to paste the key, // then back to the chat to prove the connection works with a live send. // // No inference, no third-party agent binary: the "agent" is the chat // theater (src/clients/chat-theater.ts) presenting REAL mcporter MCP calls -// — OAuth, execute, approval pause/resume all genuine, every tool spinner +// OAuth, execute, and approval pause/resume are genuine, with every tool spinner // on screen bracketing the actual call it narrates. The provider on the -// other side is real too (resend.emulators.dev); its request ledger is the -// final evidence. +// other side is a local, wire-level Resend emulator; its request ledger is +// the final evidence without shared hosted state or internet drift. import { randomBytes } from "node:crypto"; +import { createServer } from "node:net"; import { join } from "node:path"; import { expect } from "@effect/vitest"; import { Effect } from "effect"; +import { composePluginApi } from "@executor-js/api/server"; +import { createEmulator, type Emulator } from "@executor-js/emulate"; +import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; import { scenario } from "../src/scenario"; -import { Browser, Cli, Mcp, RunDir, Target } from "../src/services"; +import { Api, Browser, Cli, Mcp, RunDir, Target } from "../src/services"; import { withChatTheater } from "../src/clients/chat-theater"; import type { McpSession } from "../src/surfaces/mcp"; -const EMULATOR_BASE = "https://resend.emulators.dev"; +const api = composePluginApi([openApiHttpPlugin()] as const); const unique = (prefix: string) => `${prefix}_${randomBytes(4).toString("hex")}`; +const availablePort = Effect.callback((resume) => { + const server = createServer(); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + server.close(() => resume(Effect.succeed(port))); + }); +}); + +const resendEmulator = Effect.acquireRelease( + Effect.gen(function* () { + const port = yield* availablePort; + // The suite-owned app targets share the host network with this worker. + // An attached remote target fails while importing this URL, which is + // preferable to silently falling back to shared hosted emulator state. + return yield* Effect.promise(() => + createEmulator({ + service: "resend", + port, + baseUrl: `http://127.0.0.1:${port}`, + }), + ); + }), + (emulator: Emulator) => Effect.promise(() => emulator.close()).pipe(Effect.ignore), +); + // The emulator serves its own OpenAPI document (bearer auth, base URL in -// `servers`) — adding by URL with nothing else is exactly what an agent +// `servers`). Adding by URL with nothing else is exactly what an agent // does, and the platform derives the paste-a-token auth method from the // spec's security scheme. -const EMULATOR_SPEC_URL = `${EMULATOR_BASE}/openapi.json`; - -const addSpecCode = (slug: string) => ` +const addSpecCode = (slug: string, specUrl: string) => ` const added = await tools.executor.openapi.addSpec({ - spec: { kind: "url", url: ${JSON.stringify(EMULATOR_SPEC_URL)} }, + spec: { kind: "url", url: ${JSON.stringify(specUrl)} }, slug: ${JSON.stringify(slug)}, }); return added.ok ? { ok: true, slug: added.data.slug, toolCount: added.data.toolCount } : { ok: false, error: added.error }; @@ -64,6 +92,15 @@ const sent = await t({ return { ok: sent.ok, path, result: sent.ok ? sent.data : sent.error }; `; +const removeConnectionsCode = (slug: string) => ` +const list = await tools.executor.coreTools.connections.list({}); +const mine = (list.ok ? list.data.connections : []).filter((c) => c.integration === ${JSON.stringify(slug)}); +for (const c of mine) { + await tools.executor.coreTools.connections.remove({ owner: c.owner, integration: c.integration, name: c.name }); +} +return { removed: mine.length }; +`; + /** Run `execute`, auto-approving a paused execution (policy elicitation) * once, and parse the sandbox's JSON return value. */ const executeJson = (session: McpSession, code: string) => @@ -76,18 +113,6 @@ const executeJson = (session: McpSession, code: string) => return JSON.parse(result.text) as Record; }); -const mintEmulatorApiKey = Effect.promise(async () => { - const response = await fetch(`${EMULATOR_BASE}/_emulate/credentials`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ type: "api-key" }), - }); - const body = (await response.json()) as { credential?: { token?: string } }; - const token = body.credential?.token; - if (!token) throw new Error(`emulator credential mint failed: ${JSON.stringify(body)}`); - return token; -}); - scenario( "Connect · developer session: agent chat → handoff link → paste key → verified send", { timeout: 240_000 }, @@ -98,92 +123,108 @@ scenario( const browser = yield* Browser; const cli = yield* Cli; const runDir = yield* RunDir; + const { client: makeApiClient } = yield* Api; + const emulator = yield* resendEmulator; const integration = unique("resendsesh"); const emailSubject = unique("dev-session"); - const apiKey = yield* mintEmulatorApiKey; + const credential = yield* Effect.promise(() => + emulator.credentials.mint({ type: "api-key" }), + ); + const apiKey = credential.token; + if (!apiKey) return yield* Effect.die("Resend emulator returned no API key."); const identity = yield* target.newIdentity(); const session = mcp.session(identity); + const client = yield* makeApiClient(api, identity); + + yield* Effect.gen(function* () { + yield* withChatTheater( + cli, + { title: "executor agent: connect Resend", record: join(runDir, "terminal.cast") }, + (chat) => + Effect.gen(function* () { + // Real MCP OAuth + tool discovery happens behind this call. + yield* chat.tool( + { name: "executor (mcp)", result: (tools) => `${tools.length} tools available` }, + session.listTools(), + ); + + yield* chat.user( + "Add the Resend API to my executor and give me a link to connect my account", + ); + yield* chat.assistant("I'll register the Resend API in your Executor now."); + const added = yield* chat.tool( + { name: "execute", input: addSpecCode(integration, emulator.openapiUrl) }, + executeJson(session, addSpecCode(integration, emulator.openapiUrl)), + ); + expect(added.ok, `addSpec succeeded: ${JSON.stringify(added)}`).toBe(true); + + yield* chat.assistant("Registered. Creating your connect link…"); + const handoff = yield* chat.tool( + { name: "execute", input: createHandoffCode(integration) }, + executeJson(session, createHandoffCode(integration)), + ); + expect(handoff.ok, `createHandoff succeeded: ${JSON.stringify(handoff)}`).toBe(true); + const handoffUrl = String(handoff.url); + expect(new URL(handoffUrl).origin, "handoff targets this deployment").toBe( + new URL(target.baseUrl).origin, + ); + + yield* chat.assistant( + `Open this link to connect your Resend account:\n\n${handoffUrl}\n\nTell me once you've pasted your API key.`, + ); + + // The browser hop. The terminal session stays open while the + // "user" pastes the key; the paste is the real add-account UI. + yield* chat.status("you, in the browser: opening the link and pasting the API key…"); + yield* browser.session(identity, async ({ page, step }) => { + await step("Open the connect link from the chat", async () => { + await page.goto(handoffUrl, { waitUntil: "networkidle" }); + await page + .getByRole("heading", { name: /Add connection/ }) + .waitFor({ timeout: 15_000 }); + }); + await step("Paste the Resend API key and connect", async () => { + const credential = page.getByPlaceholder(/paste the value \/ token/i); + await credential.waitFor({ timeout: 15_000 }); + await credential.fill(apiKey); + await page.getByRole("button", { name: "Add connection", exact: true }).click(); + await page + .getByRole("heading", { name: /Add connection/ }) + .waitFor({ state: "hidden", timeout: 20_000 }); + }); + }); - yield* withChatTheater( - cli, - { title: "executor agent — connect Resend", record: join(runDir, "terminal.cast") }, - (chat) => + yield* chat.user("Connected, now send a test email to prove it works"); + yield* chat.assistant("Sending a test email through your new connection…"); + const sent = yield* chat.tool( + { name: "execute", input: sendEmailCode(integration, emailSubject) }, + executeJson(session, sendEmailCode(integration, emailSubject)), + ); + expect(sent.ok, `email sent through the connection: ${JSON.stringify(sent)}`).toBe( + true, + ); + + yield* chat.assistant("Test email sent - your Resend connection works."); + }), + ); + + // Final evidence: the emulator's typed ledger saw the send from Executor. + const ledger = yield* Effect.promise(() => emulator.ledger.list()); + expect( + ledger.some((entry) => JSON.stringify(entry.request.body ?? "").includes(emailSubject)), + "the emulator request ledger recorded the test email", + ).toBe(true); + }).pipe( + // Install cleanup before registering the integration. This covers the + // shared selfhost admin as well as isolated cloud identities. + Effect.ensuring( Effect.gen(function* () { - // Real MCP OAuth + tool discovery happens behind this call. - yield* chat.tool( - { name: "executor (mcp)", result: (tools) => `${tools.length} tools available` }, - session.listTools(), - ); - - yield* chat.user( - "Add the Resend API to my executor and give me a link to connect my account", - ); - yield* chat.assistant("I'll register the Resend API in your Executor now."); - const added = yield* chat.tool( - { name: "execute", input: addSpecCode(integration) }, - executeJson(session, addSpecCode(integration)), - ); - expect(added.ok, `addSpec succeeded: ${JSON.stringify(added)}`).toBe(true); - - yield* chat.assistant("Registered. Creating your connect link…"); - const handoff = yield* chat.tool( - { name: "execute", input: createHandoffCode(integration) }, - executeJson(session, createHandoffCode(integration)), - ); - expect(handoff.ok, `createHandoff succeeded: ${JSON.stringify(handoff)}`).toBe(true); - const handoffUrl = String(handoff.url); - expect(new URL(handoffUrl).origin, "handoff targets this deployment").toBe( - new URL(target.baseUrl).origin, - ); - - yield* chat.assistant( - `Open this link to connect your Resend account:\n\n${handoffUrl}\n\nTell me once you've pasted your API key.`, - ); - - // The browser hop — the terminal session stays open while the - // "user" pastes the key; the paste is the real add-account UI. - yield* chat.status("you, in the browser: opening the link and pasting the API key…"); - yield* browser.session(identity, async ({ page, step }) => { - await step("Open the connect link from the chat", async () => { - await page.goto(handoffUrl, { waitUntil: "networkidle" }); - await page - .getByRole("heading", { name: /Add connection/ }) - .waitFor({ timeout: 15_000 }); - }); - await step("Paste the Resend API key and connect", async () => { - const credential = page.getByPlaceholder(/paste the value \/ token/i); - await credential.waitFor({ timeout: 15_000 }); - await credential.fill(apiKey); - await page.getByRole("button", { name: "Add connection", exact: true }).click(); - await page - .getByRole("heading", { name: /Add connection/ }) - .waitFor({ state: "hidden", timeout: 20_000 }); - }); - }); - - yield* chat.user("Connected, now send a test email to prove it works"); - yield* chat.assistant("Sending a test email through your new connection…"); - const sent = yield* chat.tool( - { name: "execute", input: sendEmailCode(integration, emailSubject) }, - executeJson(session, sendEmailCode(integration, emailSubject)), - ); - expect(sent.ok, `email sent through the connection: ${JSON.stringify(sent)}`).toBe( - true, - ); - - yield* chat.assistant("Test email sent - your Resend connection works."); + yield* executeJson(session, removeConnectionsCode(integration)).pipe(Effect.ignore); + yield* client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore); }), + ), ); - - // Final evidence: the emulator's ledger saw the send from Executor. - const ledger = yield* Effect.promise(async () => - (await fetch(`${EMULATOR_BASE}/_emulate/ledger`)).text(), - ); - expect( - ledger.includes(emailSubject), - "the emulator request ledger recorded the test email", - ).toBe(true); }), ), ); diff --git a/e2e/scenarios/connect-handoff.test.ts b/e2e/scenarios/connect-handoff.test.ts index af29eccce..7c89b9351 100644 --- a/e2e/scenarios/connect-handoff.test.ts +++ b/e2e/scenarios/connect-handoff.test.ts @@ -1,8 +1,8 @@ // The agentic connect handoff: an agent adds an API over MCP, asks for a // handoff URL (`coreTools.connections.createHandoff`), and the user opens that // URL in a browser to paste the credential. This scenario walks the WHOLE -// path — the exact flow that failed in production with a "wrong / bad" URL — -// against a real emulated provider (resend.emulators.dev) so the failure +// path, the exact flow that failed in production with a "wrong / bad" URL, +// against a per-run local Resend emulator so the failure // point is captured with trace + screenshots instead of guessed at: // // 1. MCP `execute` → `openapi.addSpec` registers the emulated Resend API @@ -17,12 +17,13 @@ // the new tools and the emulator's request ledger shows the call // arriving with the pasted bearer token import { randomBytes } from "node:crypto"; +import { createServer } from "node:net"; import { expect } from "@effect/vitest"; import { Effect } from "effect"; import { AccountHttpApi } from "@executor-js/api"; import { composePluginApi } from "@executor-js/api/server"; -import { connectEmulator } from "@executor-js/emulate"; +import { createEmulator, type Emulator } from "@executor-js/emulate"; import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; import { scenario } from "../src/scenario"; @@ -35,18 +36,14 @@ const api = composePluginApi([openApiHttpPlugin()] as const); const unique = (prefix: string) => `${prefix}_${randomBytes(4).toString("hex")}`; -const EMULATOR_BASE = "https://resend.emulators.dev"; - // The emulator serves its own OpenAPI document (bearer auth, same shape as -// real Resend — and as the Sentry spec that failed in prod). Adding it by URL +// real Resend, and as the Sentry spec that failed in prod). Adding it by URL // with no authenticationTemplate exercises exactly the agentic path: the // add-account modal must render a paste-a-token flow derived from the spec's // bare `http`/`bearer` security scheme. -const EMULATOR_SPEC_URL = `${EMULATOR_BASE}/openapi.json`; - -const addSpecCode = (slug: string) => ` +const addSpecCode = (slug: string, specUrl: string) => ` const added = await tools.executor.openapi.addSpec({ - spec: { kind: "url", url: ${JSON.stringify(EMULATOR_SPEC_URL)} }, + spec: { kind: "url", url: ${JSON.stringify(specUrl)} }, slug: ${JSON.stringify(slug)}, }); return added.ok ? { ok: true, slug: added.data.slug, toolCount: added.data.toolCount } : { ok: false, error: added.error }; @@ -61,8 +58,8 @@ const handoff = await tools.executor.coreTools.connections.createHandoff({ return handoff.ok ? { ok: true, url: handoff.data.url } : { ok: false, error: handoff.error }; `; -// Selfhost scenarios share one workspace identity — leaked connections fail -// other scenarios' zero-state assertions, so remove everything this one made. +// Selfhost scenarios share one workspace identity. Remove everything this +// scenario made so later shared-admin scenarios never inherit its state. const removeConnectionsCode = (slug: string) => ` const list = await tools.executor.coreTools.connections.list({}); const mine = (list.ok ? list.data.connections : []).filter((c) => c.integration === ${JSON.stringify(slug)}); @@ -103,64 +100,87 @@ const executeJson = (session: McpSession, code: string) => return JSON.parse(result.text) as Record; }); -// The typed control-plane client — minting and ledger reads with real shapes -// instead of hand-rolled fetch + casts. -const emulator = Effect.promise(() => connectEmulator({ baseUrl: EMULATOR_BASE })); - -const mintEmulatorApiKey = Effect.gen(function* () { - const client = yield* emulator; - const credential = yield* Effect.promise(() => client.credentials.mint({ type: "api-key" })); - const token = credential.token; - if (!token) throw new Error(`emulator credential mint failed: ${JSON.stringify(credential)}`); - return token; +const availablePort = Effect.callback((resume) => { + const server = createServer(); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + server.close(() => resume(Effect.succeed(port))); + }); }); -scenario( - "Connect · the agentic handoff URL opens this deployment's add-account flow and the pasted key works", - { timeout: 240_000 }, +const resendEmulator = Effect.acquireRelease( Effect.gen(function* () { - const target = yield* Target; - const mcp = yield* Mcp; - const browser = yield* Browser; - const { client: makeApiClient } = yield* Api; - - const integration = unique("resendhf"); - const emailSubject = unique("connect-handoff"); - const apiKey = yield* mintEmulatorApiKey; - - const identity = yield* target.newIdentity(); - const session = mcp.session(identity); - const client = yield* makeApiClient(api, identity); - - // The bound org's slug, read from the same account surface the console - // shell reads — the handoff URL must canonicalize onto exactly this. - const accountClient = yield* makeApiClient(AccountHttpApi, identity); - const me = yield* accountClient.account.me(); - const orgSlug = me.organization?.slug; - expect(orgSlug, "the bound organization advertises a URL slug").toBeTruthy(); - - yield* runScenario({ - target, - browser, - session, - identity, - integration, - emailSubject, - apiKey, - orgSlug: orgSlug!, - }).pipe( - // Best-effort cleanup even on failure: drop the created connection(s) - // over MCP, then the integration over the API. `connections.remove` is - // approval-gated, so the cleanup execute pauses per connection; - // `executeJson` auto-approves each pause so the removes actually run. - Effect.ensuring( - Effect.gen(function* () { - yield* executeJson(session, removeConnectionsCode(integration)); - yield* client.openapi.removeSpec({ params: { slug: integration } }); - }).pipe(Effect.ignore), - ), + const port = yield* availablePort; + // The suite-owned cloud and selfhost apps run on this host, and the + // production Docker lane uses host networking. An attached remote target + // cannot reach this URL and therefore fails at addSpec instead of passing + // against shared hosted state. + return yield* Effect.promise(() => + createEmulator({ + service: "resend", + port, + baseUrl: `http://127.0.0.1:${port}`, + }), ); }), + (emulator: Emulator) => Effect.promise(() => emulator.close()).pipe(Effect.ignore), +); + +scenario( + "Connect · the agentic handoff URL opens this deployment's add-account flow and the pasted key works", + { timeout: 240_000 }, + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const mcp = yield* Mcp; + const browser = yield* Browser; + const { client: makeApiClient } = yield* Api; + const emulator = yield* resendEmulator; + + const integration = unique("resendhf"); + const emailSubject = unique("connect-handoff"); + const credential = yield* Effect.promise(() => + emulator.credentials.mint({ type: "api-key" }), + ); + const apiKey = credential.token; + if (!apiKey) return yield* Effect.die("Resend emulator returned no API key."); + + const identity = yield* target.newIdentity(); + const session = mcp.session(identity); + const client = yield* makeApiClient(api, identity); + + // The bound org's slug, read from the same account surface the console + // shell reads. The handoff URL must canonicalize onto exactly this. + const accountClient = yield* makeApiClient(AccountHttpApi, identity); + const me = yield* accountClient.account.me(); + const orgSlug = me.organization?.slug; + expect(orgSlug, "the bound organization advertises a URL slug").toBeTruthy(); + + yield* runScenario({ + target, + browser, + session, + identity, + integration, + emailSubject, + apiKey, + orgSlug: orgSlug!, + emulator, + }).pipe( + // Best-effort cleanup even on failure: drop the created connection(s) + // over MCP, then the integration over the API. `connections.remove` is + // approval-gated, so the cleanup execute pauses per connection; + // `executeJson` auto-approves each pause so the removes actually run. + Effect.ensuring( + Effect.gen(function* () { + yield* executeJson(session, removeConnectionsCode(integration)).pipe(Effect.ignore); + yield* client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore); + }), + ), + ); + }), + ), ); const runScenario = (input: { @@ -172,13 +192,23 @@ const runScenario = (input: { readonly emailSubject: string; readonly apiKey: string; readonly orgSlug: string; + readonly emulator: Emulator; }) => Effect.gen(function* () { - const { target, browser, session, identity, integration, emailSubject, apiKey, orgSlug } = - input; + const { + target, + browser, + session, + identity, + integration, + emailSubject, + apiKey, + orgSlug, + emulator, + } = input; // 1. Agent registers the emulated provider over MCP. - const added = yield* executeJson(session, addSpecCode(integration)); + const added = yield* executeJson(session, addSpecCode(integration, emulator.openapiUrl)); expect(added.ok, `addSpec succeeded: ${JSON.stringify(added)}`).toBe(true); // 2. Agent asks for the browser handoff URL. @@ -187,7 +217,7 @@ const runScenario = (input: { const handoffUrl = String(handoff.url); // 3. The URL must target THIS deployment AND carry the bound org's slug. - // (Production returned a URL the user called "wrong/bad" — it had no slug, + // (Production returned a URL the user called "wrong/bad". It had no slug, // so a multi-org user could land in the wrong workspace. Pin both here.) const parsed = new URL(handoffUrl); expect(parsed.origin, `handoff URL (${handoffUrl}) targets this deployment`).toBe( @@ -227,8 +257,7 @@ const runScenario = (input: { const sent = yield* executeJson(session, sendEmailCode(integration, emailSubject)); expect(sent.ok, `email sent through the pasted connection: ${JSON.stringify(sent)}`).toBe(true); - const emulatorClient = yield* emulator; - const entries = yield* Effect.promise(() => emulatorClient.ledger.list()); + const entries = yield* Effect.promise(() => emulator.ledger.list()); const recorded = entries.find((entry) => JSON.stringify(entry.request.body ?? "").includes(emailSubject), ); diff --git a/e2e/scenarios/mcp-claude-code-real.test.ts b/e2e/scenarios/mcp-claude-code-real.test.ts new file mode 100644 index 000000000..b8b31372e --- /dev/null +++ b/e2e/scenarios/mcp-claude-code-real.test.ts @@ -0,0 +1,230 @@ +// Cross-target client compatibility with the REAL pinned Claude Code binary. +// Executor and MCP stay real. Only Anthropic Messages inference is replaced by +// a deterministic loopback replay transcript. +import { randomBytes } from "node:crypto"; + +import { expect } from "@effect/vitest"; +import { Effect } from "effect"; +import { composePluginApi } from "@executor-js/api/server"; +import { openApiHttpPlugin } from "@executor-js/plugin-openapi/api"; +import { IntegrationSlug } from "@executor-js/sdk/shared"; + +import { scenario } from "../src/scenario"; +import { Api, Billing, ClaudeCode, Mcp, RunDir, Target } from "../src/services"; +import { serveAnthropicReplayBrain } from "../src/clients/anthropic-replay-brain"; +import { expectedClaudeCodeVersion } from "../src/clients/claude-code"; +import { writeClaudeCodeEvidence } from "../src/clients/claude-code-evidence"; + +const SERVER_NAME = "executor"; +const api = composePluginApi([openApiHttpPlugin()] as const); + +const executeBrain = (code: string) => + serveAnthropicReplayBrain((context) => + context.lastToolResult + ? { text: `executor-result:${context.lastToolResult}` } + : { tool: { name: "execute", input: { code } } }, + ); + +const integrationsCode = ` +const result = await tools.executor.coreTools.integrations.list({}); +if (!result.ok) return result; +return result.data.integrations.map((integration) => integration.slug); +`; + +const pingSpec = JSON.stringify({ + openapi: "3.0.3", + info: { title: "Claude account marker", version: "1.0.0" }, + paths: { + "/ping": { + get: { + operationId: "ping", + responses: { "200": { description: "pong" } }, + }, + }, + }, +}); + +scenario( + "Claude Code · the real client discovers Executor MCP and invokes execute", + { timeout: 180_000 }, + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const mcp = yield* Mcp; + const claude = yield* ClaudeCode; + const runDir = yield* RunDir; + const identity = yield* target.newIdentity(); + const bearer = yield* mcp.mintBearer(identity); + const home = claude.makeHome(SERVER_NAME, { + url: target.mcpUrl, + authorizationHeader: `Bearer ${bearer}`, + }); + + yield* Effect.gen(function* () { + const brain = yield* executeBrain("return 6 * 7;"); + const result = yield* claude.run(home, { + brainBaseUrl: brain.baseUrl, + prompt: "Use Executor to calculate six times seven.", + }); + yield* Effect.sync(() => + writeClaudeCodeEvidence(runDir, { + label: "execute-discovery", + executable: home.binaryPath, + expectedVersion: expectedClaudeCodeVersion(), + observedVersion: result.claudeCodeVersion, + durationMs: result.durationMs, + status: "success", + exitCode: 0, + stdout: result.stdout, + stderr: result.stderr, + structuredResult: result.result, + mcpServerName: SERVER_NAME, + mcpOrigin: target.mcpUrl, + replayOrigin: brain.baseUrl, + replayRequestPaths: brain.requests().map((request) => request.path), + replayErrors: brain.errors(), + secrets: [bearer], + }), + ); + + expect(result.result, "Claude returns Executor's real tool result").toContain("42"); + expect( + brain + .requests() + .some((request) => request.toolNames.some((name) => name.endsWith("__execute"))), + "Claude discovered Executor's execute tool through MCP", + ).toBe(true); + expect( + brain + .requests() + .flatMap((request) => request.messages) + .flatMap((message) => message.toolResults) + .some((toolResult) => toolResult.content.includes("42")), + "Claude returned the MCP tool result to the model boundary", + ).toBe(true); + expect(brain.errors()).toEqual([]); + }).pipe(Effect.ensuring(Effect.sync(() => claude.removeHome(home)))); + }), + ), +); + +scenario( + "Claude Code · replacing one MCP server name switches accounts without cache bleed", + { timeout: 240_000 }, + Effect.scoped( + Effect.gen(function* () { + yield* Billing; + const target = yield* Target; + const { client: makeClient } = yield* Api; + const mcp = yield* Mcp; + const claude = yield* ClaudeCode; + const runDir = yield* RunDir; + const accountA = yield* target.newIdentity(); + const accountB = yield* target.newIdentity(); + const clientA = yield* makeClient(api, accountA); + const clientB = yield* makeClient(api, accountB); + const suffix = randomBytes(4).toString("hex"); + const markerA = IntegrationSlug.make(`claude-account-a-${suffix}`); + const markerB = IntegrationSlug.make(`claude-account-b-${suffix}`); + + yield* clientA.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: pingSpec }, + slug: markerA, + authenticationTemplate: [], + }, + }); + yield* clientB.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: pingSpec }, + slug: markerB, + authenticationTemplate: [], + }, + }); + + const cleanup = Effect.all( + [ + clientA.openapi.removeSpec({ params: { slug: markerA } }).pipe(Effect.ignore), + clientB.openapi.removeSpec({ params: { slug: markerB } }).pipe(Effect.ignore), + ], + { concurrency: "unbounded" }, + ).pipe(Effect.asVoid); + + yield* Effect.gen(function* () { + const bearerA = yield* mcp.mintBearer(accountA); + const bearerB = yield* mcp.mintBearer(accountB); + const home = claude.makeHome(SERVER_NAME, { + url: target.mcpUrl, + authorizationHeader: `Bearer ${bearerA}`, + }); + + yield* Effect.gen(function* () { + const brainA = yield* executeBrain(integrationsCode); + const first = yield* claude.run(home, { + brainBaseUrl: brainA.baseUrl, + prompt: "List the integration slugs visible to the current Executor account.", + }); + yield* Effect.sync(() => + writeClaudeCodeEvidence(runDir, { + label: "account-a-before-switch", + executable: home.binaryPath, + expectedVersion: expectedClaudeCodeVersion(), + observedVersion: first.claudeCodeVersion, + durationMs: first.durationMs, + status: "success", + exitCode: 0, + stdout: first.stdout, + stderr: first.stderr, + structuredResult: first.result, + mcpServerName: SERVER_NAME, + mcpOrigin: target.mcpUrl, + replayOrigin: brainA.baseUrl, + replayRequestPaths: brainA.requests().map((request) => request.path), + replayErrors: brainA.errors(), + secrets: [bearerA, bearerB], + }), + ); + expect(first.result, "account A sees its own marker").toContain(markerA); + expect(first.result, "account A cannot see account B's marker").not.toContain(markerB); + expect(brainA.errors()).toEqual([]); + + yield* claude.replaceServer(home, { + url: target.mcpUrl, + authorizationHeader: `Bearer ${bearerB}`, + }); + + const brainB = yield* executeBrain(integrationsCode); + const second = yield* claude.run(home, { + brainBaseUrl: brainB.baseUrl, + prompt: "List the integration slugs after switching the Executor account.", + }); + yield* Effect.sync(() => + writeClaudeCodeEvidence(runDir, { + label: "account-b-after-switch", + executable: home.binaryPath, + expectedVersion: expectedClaudeCodeVersion(), + observedVersion: second.claudeCodeVersion, + durationMs: second.durationMs, + status: "success", + exitCode: 0, + stdout: second.stdout, + stderr: second.stderr, + structuredResult: second.result, + mcpServerName: SERVER_NAME, + mcpOrigin: target.mcpUrl, + replayOrigin: brainB.baseUrl, + replayRequestPaths: brainB.requests().map((request) => request.path), + replayErrors: brainB.errors(), + secrets: [bearerA, bearerB], + }), + ); + expect(second.result, "account B sees its own marker").toContain(markerB); + expect(second.result, "Claude did not reuse account A's cached grant").not.toContain( + markerA, + ); + expect(brainB.errors()).toEqual([]); + }).pipe(Effect.ensuring(Effect.sync(() => claude.removeHome(home)))); + }).pipe(Effect.ensuring(cleanup)); + }), + ), +); diff --git a/e2e/scenarios/no-auth-connection.test.ts b/e2e/scenarios/no-auth-connection.test.ts index 7b47555bf..46916d375 100644 --- a/e2e/scenarios/no-auth-connection.test.ts +++ b/e2e/scenarios/no-auth-connection.test.ts @@ -1,25 +1,27 @@ // The agentic no-auth wire-up: an agent registers a public REST API over MCP // and then creates its connection PROGRAMMATICALLY through the gateway core -// tool — `coreTools.connections.create` with `template: "none"` and no +// tool, `coreTools.connections.create` with `template: "none"` and no // credential origin. This is the path that used to be impossible: the core // tool's arg schema demanded "exactly one provider credential origin", so an // agent wiring up a public, no-auth integration (public MCP server, public // REST API) was forced to bounce the user into the web UI via createHandoff, // even though the engine fully supports a zero-credential connection. // -// This scenario walks the WHOLE path against a real public no-auth API (the -// npm registry downloads endpoint, https://api.npmjs.org) so the proof is an -// actual 200 over the wire, not a stub: +// This scenario walks the WHOLE path against a deterministic wire-level +// no-auth API, so the proof is an actual 200 over the wire plus the upstream +// request ledger, without depending on the public npm registry: // // 1. MCP `execute` → `openapi.addSpec` registers a tiny no-auth spec // (no securitySchemes ⇒ the integration is no-auth) // 2. MCP `execute` → `coreTools.connections.create` with template "none" -// and NEITHER `from` NOR `inputs` — the call that used to fail validation +// and NEITHER `from` NOR `inputs`, the call that used to fail validation // 3. The operation is now a callable tool: invoke it and read back a 200 -// with the real download count +// with the deterministic download count // 4. Guard the relaxed-but-still-strict contract: a no-auth create that // DOES carry an origin (here an empty `inputs: {}`) is still rejected import { randomBytes } from "node:crypto"; +import { createServer, type Server } from "node:http"; +import type { AddressInfo } from "node:net"; import { expect } from "@effect/vitest"; import { Effect } from "effect"; @@ -34,34 +36,83 @@ const api = composePluginApi([openApiHttpPlugin()] as const); const unique = (prefix: string) => `${prefix}_${randomBytes(4).toString("hex")}`; -// A real public, no-auth REST API. No `components.securitySchemes` and no -// top-level `security`, so addSpec derives no auth method and the integration -// is no-auth — exactly the shape a connection on `template: "none"` targets. -const NPM_DOWNLOADS_SPEC = JSON.stringify({ - openapi: "3.0.3", - info: { title: "npm Registry Downloads", version: "1.0.0" }, - servers: [{ url: "https://api.npmjs.org" }], - paths: { - "/downloads/point/{period}/{package}": { - get: { - operationId: "getPackageDownloads", - summary: "Total downloads for a package over a fixed period", - parameters: [ - { name: "period", in: "path", required: true, schema: { type: "string" } }, - { name: "package", in: "path", required: true, schema: { type: "string" } }, - ], - responses: { - "200": { - description: "Download counts for the package", - content: { - "application/json": { - schema: { - type: "object", - properties: { - downloads: { type: "number" }, - start: { type: "string" }, - end: { type: "string" }, - package: { type: "string" }, +interface DownloadsApi { + readonly baseUrl: string; + readonly requests: ReadonlyArray<{ readonly method: string; readonly path: string }>; + readonly server: Server; +} + +const serveDownloadsApi = Effect.acquireRelease( + Effect.callback((resume) => { + const requests: Array<{ method: string; path: string }> = []; + const server = createServer((request, response) => { + const url = new URL(request.url ?? "/", "http://executor.test"); + requests.push({ method: request.method ?? "GET", path: url.pathname }); + if (request.method === "GET" && url.pathname === "/downloads/point/last-week/react") { + response.writeHead(200, { "content-type": "application/json" }); + response.end( + JSON.stringify({ + downloads: 4242, + start: "2026-06-15", + end: "2026-06-21", + package: "react", + }), + ); + return; + } + response.writeHead(404, { "content-type": "application/json" }); + response.end(JSON.stringify({ error: "not_found" })); + }); + server.listen(0, "127.0.0.1", () => { + const { port } = server.address() as AddressInfo; + resume( + Effect.succeed({ + // Suite-owned app targets run on this host, and the production + // Docker lane uses host networking for loopback test servers. + baseUrl: `http://127.0.0.1:${port}`, + requests, + server, + }), + ); + }); + }), + ({ server }) => + Effect.sync(() => { + server.close(); + server.closeAllConnections?.(); + }), +); + +// No `components.securitySchemes` and no top-level `security`, so addSpec +// derives no auth method and the integration is no-auth, exactly the shape a +// connection on `template: "none"` targets. +const downloadsSpec = (baseUrl: string) => + JSON.stringify({ + openapi: "3.0.3", + info: { title: "Deterministic Downloads API", version: "1.0.0" }, + servers: [{ url: baseUrl }], + paths: { + "/downloads/point/{period}/{package}": { + get: { + operationId: "getPackageDownloads", + summary: "Total downloads for a package over a fixed period", + parameters: [ + { name: "period", in: "path", required: true, schema: { type: "string" } }, + { name: "package", in: "path", required: true, schema: { type: "string" } }, + ], + responses: { + "200": { + description: "Download counts for the package", + content: { + "application/json": { + schema: { + type: "object", + properties: { + downloads: { type: "number" }, + start: { type: "string" }, + end: { type: "string" }, + package: { type: "string" }, + }, }, }, }, @@ -70,14 +121,13 @@ const NPM_DOWNLOADS_SPEC = JSON.stringify({ }, }, }, - }, -}); + }); -const addSpecCode = (slug: string) => ` +const addSpecCode = (slug: string, baseUrl: string) => ` const added = await tools.executor.openapi.addSpec({ - spec: { kind: "blob", value: ${JSON.stringify(NPM_DOWNLOADS_SPEC)} }, + spec: { kind: "blob", value: ${JSON.stringify(downloadsSpec(baseUrl))} }, slug: ${JSON.stringify(slug)}, - baseUrl: "https://api.npmjs.org", + baseUrl: ${JSON.stringify(baseUrl)}, }); return added.ok ? { ok: true, slug: added.data.slug, toolCount: added.data.toolCount } : { ok: false, error: added.error }; `; @@ -93,7 +143,7 @@ const created = await tools.executor.coreTools.connections.create({ return created.ok ? { ok: true, connection: created.data } : { ok: false, error: created.error }; `; -// The relaxed filter must still reject an origin on a no-auth create — an +// The relaxed filter must still reject an origin on a no-auth create. An // empty `inputs: {}` is a (degenerate) origin and a credential the connection // can't hold, so it stays a validation failure. const createNoAuthWithEmptyInputsCode = (slug: string) => ` @@ -141,66 +191,72 @@ const executeJson = (session: McpSession, code: string) => }); scenario( - "Connections · an agent creates a no-auth connection over the core tool and the public API answers 200", + "Connections · an agent creates a no-auth connection and the upstream API answers 200", { timeout: 180_000 }, - Effect.gen(function* () { - const target = yield* Target; - const mcp = yield* Mcp; - const { client: makeApiClient } = yield* Api; - - const integration = unique("npmdl"); - const identity = yield* target.newIdentity(); - const session = mcp.session(identity); - const client = yield* makeApiClient(api, identity); - - yield* Effect.gen(function* () { - // 1. Register the public no-auth API over MCP. - const added = yield* executeJson(session, addSpecCode(integration)); - expect(added.ok, `addSpec succeeded: ${JSON.stringify(added)}`).toBe(true); - expect(added.toolCount, "the spec's operation was extracted as a tool").toBe(1); - - // 2. THE FIX: create the connection with template "none" and NO origin. - // Pre-fix this failed arg validation with - // "Expected exactly one provider credential origin". - const created = yield* executeJson(session, createNoAuthConnectionCode(integration)); - expect( - created.ok, - `no-auth connection created via the core tool: ${JSON.stringify(created)}`, - ).toBe(true); - expect( - (created.connection as { template?: string } | undefined)?.template, - "the connection is saved on the no-auth template", - ).toBe("none"); - - // 3. The operation is a live tool: invoke it and read back a real 200. - const invoked = yield* executeJson(session, invokeDownloadsCode(integration)); - expect( - invoked.ok, - `the no-auth operation answered over the wire: ${JSON.stringify(invoked)}`, - ).toBe(true); - const downloads = (invoked.data as { downloads?: number } | undefined)?.downloads; - expect(typeof downloads, "the public API returned a download count").toBe("number"); - expect(downloads as number, "react has a non-zero weekly download count").toBeGreaterThan(0); - - // 4. The relaxation is narrow: a no-auth create that carries an origin - // (empty `inputs: {}`) is still rejected. - const rejected = yield* executeJson(session, createNoAuthWithEmptyInputsCode(integration)); - expect( - rejected.ok, - `a no-auth create with an empty inputs origin is rejected: ${JSON.stringify(rejected)}`, - ).toBe(false); - }).pipe( - // Selfhost shares one workspace identity — leaked connections fail other - // scenarios' zero-state assertions, so drop everything this run made. - // `connections.remove` is approval-gated, so the cleanup execute pauses - // per connection; `executeJson` auto-approves each pause so the removes - // actually run. - Effect.ensuring( - Effect.gen(function* () { - yield* executeJson(session, removeConnectionsCode(integration)); - yield* client.openapi.removeSpec({ params: { slug: integration } }); - }).pipe(Effect.ignore), - ), - ); - }), + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const mcp = yield* Mcp; + const { client: makeApiClient } = yield* Api; + const upstream = yield* serveDownloadsApi; + + const integration = unique("downloads"); + const identity = yield* target.newIdentity(); + const session = mcp.session(identity); + const client = yield* makeApiClient(api, identity); + + yield* Effect.gen(function* () { + // 1. Register the no-auth API over MCP. + const added = yield* executeJson(session, addSpecCode(integration, upstream.baseUrl)); + expect(added.ok, `addSpec succeeded: ${JSON.stringify(added)}`).toBe(true); + expect(added.toolCount, "the spec's operation was extracted as a tool").toBe(1); + + // 2. THE FIX: create the connection with template "none" and NO origin. + // Pre-fix this failed arg validation with + // "Expected exactly one provider credential origin". + const created = yield* executeJson(session, createNoAuthConnectionCode(integration)); + expect( + created.ok, + `no-auth connection created via the core tool: ${JSON.stringify(created)}`, + ).toBe(true); + expect( + (created.connection as { template?: string } | undefined)?.template, + "the connection is saved on the no-auth template", + ).toBe("none"); + + // 3. The operation is a live tool: invoke it and read back a real 200. + const invoked = yield* executeJson(session, invokeDownloadsCode(integration)); + expect( + invoked.ok, + `the no-auth operation answered over the wire: ${JSON.stringify(invoked)}`, + ).toBe(true); + expect( + (invoked.data as { downloads?: number } | undefined)?.downloads, + "the deterministic API response crossed the full tool path", + ).toBe(4242); + expect( + upstream.requests, + "the upstream ledger recorded the exact no-auth request", + ).toContainEqual({ method: "GET", path: "/downloads/point/last-week/react" }); + + // 4. The relaxation is narrow: a no-auth create that carries an origin + // (empty `inputs: {}`) is still rejected. + const rejected = yield* executeJson(session, createNoAuthWithEmptyInputsCode(integration)); + expect( + rejected.ok, + `a no-auth create with an empty inputs origin is rejected: ${JSON.stringify(rejected)}`, + ).toBe(false); + }).pipe( + // Install cleanup before any product resource is created. Selfhost + // shares one workspace identity, so every connection and the spec must + // be removed even when an assertion or upstream call fails. + Effect.ensuring( + Effect.gen(function* () { + yield* executeJson(session, removeConnectionsCode(integration)).pipe(Effect.ignore); + yield* client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore); + }), + ), + ); + }), + ), ); diff --git a/e2e/scenarios/oauth-callback-url.test.ts b/e2e/scenarios/oauth-callback-url.test.ts index f2012b7e3..b71a5b9be 100644 --- a/e2e/scenarios/oauth-callback-url.test.ts +++ b/e2e/scenarios/oauth-callback-url.test.ts @@ -2,7 +2,7 @@ // they register an OAuth app. Two guarantees: // // 1. Accuracy (every target): the callback the authorization-code flow sends -// to the provider is `${origin}/api/oauth/callback` — the URL the form +// to the provider is `${origin}/api/oauth/callback`, the URL the form // shows. Run on cloud + self-host so the per-platform mount prefix is // proven, not assumed. `ExecutorApp.make` derives this path from the same // `mountPrefix` that mounts the API, so omitting a per-host knob can no @@ -83,50 +83,67 @@ scenario( const identity = yield* target.newIdentity(); const client = yield* makeApiClient(api, identity); - // What the registration form shows for THIS target — the same value the + // What the registration form shows for THIS target, the same value the // React `oauthCallbackUrl()` helper resolves from `window.location`. const expectedCallback = new URL("/api/oauth/callback", target.baseUrl).toString(); const integration = IntegrationSlug.make(unique("cburlint")); - yield* client.openapi.addSpec({ - payload: { ...oauthIntegrationSpec(oauth), slug: integration }, - }); - const clientSlug = OAuthClientSlug.make(unique("cburlc")); - yield* client.oauth.createClient({ - payload: { - owner: "org", - slug: clientSlug, - authorizationUrl: oauth.authorizationEndpoint, - tokenUrl: oauth.tokenEndpoint, - grant: "authorization_code", - clientId: "test-client", - clientSecret: "test-secret", - }, - }); + const connection = ConnectionName.make("main"); - // start WITHOUT a redirectUri — the platform falls back to its OWN - // configured callback, which is exactly what the form would have shown. - const started = yield* client.oauth.start({ - payload: { - client: clientSlug, - clientOwner: "org", - owner: "org", - name: ConnectionName.make("main"), - integration, - template: AuthTemplateSlug.make("oauth"), - }, - }); - expect(started.status, "oauth.start hands back a redirect to the authorization server").toBe( - "redirect", - ); - const authorizationUrl = started.status === "redirect" ? started.authorizationUrl : ""; + yield* Effect.gen(function* () { + yield* client.openapi.addSpec({ + payload: { ...oauthIntegrationSpec(oauth), slug: integration }, + }); - const redirectUri = new URL(authorizationUrl).searchParams.get("redirect_uri"); - expect( - redirectUri, - "the authorization request redirects to this platform's served callback", - ).toBe(expectedCallback); + yield* client.oauth.createClient({ + payload: { + owner: "org", + slug: clientSlug, + authorizationUrl: oauth.authorizationEndpoint, + tokenUrl: oauth.tokenEndpoint, + grant: "authorization_code", + clientId: "test-client", + clientSecret: "test-secret", + }, + }); + + // start WITHOUT a redirectUri. The platform falls back to its OWN + // configured callback, which is exactly what the form would have shown. + const started = yield* client.oauth.start({ + payload: { + client: clientSlug, + clientOwner: "org", + owner: "org", + name: connection, + integration, + template: AuthTemplateSlug.make("oauth"), + }, + }); + expect( + started.status, + "oauth.start hands back a redirect to the authorization server", + ).toBe("redirect"); + const authorizationUrl = started.status === "redirect" ? started.authorizationUrl : ""; + + const redirectUri = new URL(authorizationUrl).searchParams.get("redirect_uri"); + expect( + redirectUri, + "the authorization request redirects to this platform's served callback", + ).toBe(expectedCallback); + }).pipe( + Effect.ensuring( + Effect.gen(function* () { + yield* client.connections + .remove({ params: { owner: "org", integration, name: connection } }) + .pipe(Effect.ignore); + yield* client.oauth + .removeClient({ params: { slug: clientSlug }, payload: { owner: "org" } }) + .pipe(Effect.ignore); + yield* client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore); + }), + ), + ); }), ), ); @@ -147,26 +164,32 @@ scenario( // offers the "Register app" CTA (no automatic registration to short-circuit // it). const integration = IntegrationSlug.make(unique("cburlui")); - yield* client.openapi.addSpec({ - payload: { ...oauthIntegrationSpec(oauth), slug: integration }, - }); - - yield* browser.session(identity, async ({ page, step }) => { - await step("Open the connect modal for an OAuth integration", async () => { - await page.goto(`/integrations/${String(integration)}?addAccount=1`, { - waitUntil: "networkidle", - }); - await page.getByRole("button", { name: "Register app", exact: true }).click(); + yield* Effect.gen(function* () { + yield* client.openapi.addSpec({ + payload: { ...oauthIntegrationSpec(oauth), slug: integration }, }); - await step("The OAuth app form shows this platform's callback URL", async () => { - const callback = page.locator("#oauth-callback-url"); - await callback.waitFor(); - const shown = (await callback.textContent())?.trim(); - expect(shown, "the displayed callback URL matches the platform's served callback").toBe( - expectedCallback, - ); + yield* browser.session(identity, async ({ page, step }) => { + await step("Open the connect modal for an OAuth integration", async () => { + await page.goto(`/integrations/${String(integration)}?addAccount=1`, { + waitUntil: "networkidle", + }); + await page.getByRole("button", { name: "Register app", exact: true }).click(); + }); + + await step("The OAuth app form shows this platform's callback URL", async () => { + const callback = page.locator("#oauth-callback-url"); + await callback.waitFor(); + const shown = (await callback.textContent())?.trim(); + expect(shown, "the displayed callback URL matches the platform's served callback").toBe( + expectedCallback, + ); + }); }); - }); + }).pipe( + Effect.ensuring( + client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore), + ), + ); }).pipe(Effect.scoped), ); diff --git a/e2e/scenarios/policies.test.ts b/e2e/scenarios/policies.test.ts index 632dae2e5..18cff19dd 100644 --- a/e2e/scenarios/policies.test.ts +++ b/e2e/scenarios/policies.test.ts @@ -1,5 +1,7 @@ -// Cross-target: policies CRUD through the typed HttpApiClient — a created +// Cross-target: policies CRUD through the typed HttpApiClient. A created // policy comes back in the list with the shape that was sent. +import { randomBytes } from "node:crypto"; + import { expect } from "@effect/vitest"; import { Effect } from "effect"; import { composePluginApi } from "@executor-js/api/server"; @@ -17,18 +19,31 @@ scenario( const { client } = yield* Api; const identity = yield* target.newIdentity(); const api = yield* client(coreApi, identity); + const pattern = `policies-scn-${randomBytes(4).toString("hex")}.*`; - const created = yield* api.policies.create({ - payload: { owner: "org", pattern: "policies-scn.*", action: "block" }, - }); - expect(created.owner).toBe("org"); - expect(created.pattern).toBe("policies-scn.*"); - expect(created.action).toBe("block"); + yield* Effect.acquireUseRelease( + api.policies.create({ + payload: { owner: "org", pattern, action: "block" }, + }), + (created) => + Effect.gen(function* () { + expect(created.owner).toBe("org"); + expect(created.pattern).toBe(pattern); + expect(created.action).toBe("block"); - const list = yield* api.policies.list(); - const found = list.find((p) => p.id === created.id); - expect(found, "created policy appears in the list").toBeDefined(); - expect(found?.pattern, "listed entry preserves the pattern").toBe("policies-scn.*"); - expect(found?.action, "listed entry preserves the action").toBe("block"); + const list = yield* api.policies.list(); + const found = list.find((policy) => policy.id === created.id); + expect(found, "created policy appears in the list").toBeDefined(); + expect(found?.pattern, "listed entry preserves the pattern").toBe(pattern); + expect(found?.action, "listed entry preserves the action").toBe("block"); + }), + (created) => + api.policies + .remove({ + params: { policyId: created.id }, + payload: { owner: created.owner }, + }) + .pipe(Effect.ignore), + ); }), ); diff --git a/e2e/scenarios/restart-persistence.test.ts b/e2e/scenarios/restart-persistence.test.ts index 2990d80ab..dbf687b97 100644 --- a/e2e/scenarios/restart-persistence.test.ts +++ b/e2e/scenarios/restart-persistence.test.ts @@ -1,10 +1,10 @@ -// Cross-target (runs where the target can restart itself — today the +// Cross-target (runs where the target can restart itself, today the // production Docker artifact): writes survive a process restart. This is the // durability property a dev-server suite with a fresh data dir can never // catch by accident, and the one the selfhost WAL split-brain broke: the // executor's libSQL connection wrote to a WAL that was unlinked during boot // while Better Auth's connection created a fresh one, so every executor-core -// write (integrations, connections, tools) silently vanished on restart — +// write (integrations, connections, tools) silently vanished on restart, // surfacing to users as "my reconnected Google account has zero Gmail tools". import { randomBytes } from "node:crypto"; @@ -46,40 +46,45 @@ scenario( const before = yield* client(api, identity); const slug = `restart-persist-${randomBytes(4).toString("hex")}`; - const added = yield* before.openapi.addSpec({ - payload: { - spec: { kind: "blob", value: pingSpec }, - slug, - authenticationTemplate: [], - }, - }); - expect(added.toolCount, "the spec registered with tools").toBeGreaterThan(0); + yield* Effect.gen(function* () { + const added = yield* before.openapi.addSpec({ + payload: { + spec: { kind: "blob", value: pingSpec }, + slug, + authenticationTemplate: [], + }, + }); + expect(added.toolCount, "the spec registered with tools").toBeGreaterThan(0); - // The write is visible before the restart — so a post-restart absence is - // a durability failure, not a registration failure. - const integrationsBefore = yield* before.integrations.list(); - expect( - integrationsBefore.map((i) => String(i.slug)), - "the integration is listed before the restart", - ).toContain(slug); + // The write is visible before the restart, so a post-restart absence is + // a durability failure, not a registration failure. + const integrationsBefore = yield* before.integrations.list(); + expect( + integrationsBefore.map((integration) => String(integration.slug)), + "the integration is listed before the restart", + ).toContain(slug); - yield* restart(); + yield* restart(); - // Sessions are DB-backed; sign in fresh anyway so this scenario only - // asserts on the executor-core rows, not on auth-session survival. - const after = yield* client(api, yield* target.newIdentity()); + // Sessions are DB-backed; sign in fresh anyway so this scenario only + // asserts on the executor-core rows, not on auth-session survival. + const after = yield* client(api, yield* target.newIdentity()); - yield* Effect.ensuring( - Effect.gen(function* () { - const integrationsAfter = yield* after.integrations.list(); - expect( - integrationsAfter.map((i) => String(i.slug)), - "the integration survived the restart", - ).toContain(slug); - }), - // Shared bootstrap-admin instance — never leak the integration, even - // when the survival assertion fails. - after.openapi.removeSpec({ params: { slug } }).pipe(Effect.ignore), + const integrationsAfter = yield* after.integrations.list(); + expect( + integrationsAfter.map((integration) => String(integration.slug)), + "the integration survived the restart", + ).toContain(slug); + }).pipe( + Effect.ensuring( + // Shared bootstrap-admin instance: never leak the integration, even + // when registration, restart, or the survival assertion fails. + Effect.gen(function* () { + const cleanupIdentity = yield* target.newIdentity(); + const cleanupClient = yield* client(api, cleanupIdentity); + yield* cleanupClient.openapi.removeSpec({ params: { slug } }); + }).pipe(Effect.ignore), + ), ); }), ); diff --git a/e2e/scenarios/toolkits-mcp.test.ts b/e2e/scenarios/toolkits-mcp.test.ts index 7ebc682f9..ba3ca6c35 100644 --- a/e2e/scenarios/toolkits-mcp.test.ts +++ b/e2e/scenarios/toolkits-mcp.test.ts @@ -170,8 +170,6 @@ scenario( "metadata still advertises authorization servers", ).toBe(true); - if (target.name === "cloudflare") return; - const challenged = yield* Effect.promise(() => fetch(mcpUrl, { method: "POST", diff --git a/e2e/scripts/boot-stack.ts b/e2e/scripts/boot-stack.ts index 20d210f1f..9683bcb85 100644 --- a/e2e/scripts/boot-stack.ts +++ b/e2e/scripts/boot-stack.ts @@ -57,8 +57,22 @@ const procs = bootProcesses( { label: "record-stack" }, ); -await waitForHttp(baseUrl); -await waitForHttp(`${baseUrl}/api/auth/login`, { expectRedirect: true }); +await procs.waitUntilReady( + (async () => { + await waitForHttp(baseUrl); + await waitForHttp(`${baseUrl}/api/auth/login`, { + expectRedirect: true, + validateResponse: (response) => { + const location = response.headers.get("location"); + return ( + location !== null && + URL.canParse(location, workos.url) && + new URL(location, workos.url).origin === new URL(workos.url).origin + ); + }, + }); + })(), +); console.log(`ready: ${baseUrl}`); const teardown = async () => { diff --git a/e2e/scripts/cleanup-linux-kvm.ts b/e2e/scripts/cleanup-linux-kvm.ts new file mode 100644 index 000000000..4f3e52858 --- /dev/null +++ b/e2e/scripts/cleanup-linux-kvm.ts @@ -0,0 +1,125 @@ +// Exact-resource recovery for a cancelled desktop KVM job. The driver writes +// this ledger before creating its work directory or invoking virt-install. +// Cleanup validates scope, work root, libvirt URI, and per-process markers, +// then addresses only the recorded host children, domain, and work directory. + +import { existsSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { + cleanupLibvirtLinuxKvmFromLedger, + sweepStaleLibvirtLinuxKvm, +} from "../src/vm/linux-kvm-libvirt"; + +export const cleanupLinuxKvmLedger = async (input: { + readonly ledgerPath: string | undefined; + readonly expectedRepositoryScope: string | undefined; + readonly expectedRunScope: string | undefined; + readonly expectedLedgerDirectory?: string; + readonly expectedWorkRoot?: string; + readonly expectedLibvirtUri?: string; + readonly ledgerExists?: (path: string) => boolean; +}) => { + if (!input.ledgerPath) { + throw new Error("cleanup-linux-kvm requires a ledger path or E2E_KVM_CLEANUP_LEDGER"); + } + const ledgerExists = input.ledgerExists ?? existsSync; + if (!ledgerExists(input.ledgerPath)) { + return { status: "missing", ledgerPath: input.ledgerPath } as const; + } + if (!input.expectedRunScope) { + throw new Error("cleanup-linux-kvm requires E2E_KVM_RUN_SCOPE when a ledger exists"); + } + if (!input.expectedRepositoryScope) { + throw new Error("cleanup-linux-kvm requires E2E_KVM_REPOSITORY_SCOPE when a ledger exists"); + } + if (!input.expectedLedgerDirectory) { + throw new Error("cleanup-linux-kvm requires E2E_KVM_LEDGER_DIR when a ledger exists"); + } + const ledgerPath = resolve(input.ledgerPath); + const expectedLedgerDirectory = resolve(input.expectedLedgerDirectory); + if (dirname(ledgerPath) !== expectedLedgerDirectory) { + throw new Error( + `cleanup-linux-kvm ledger is outside ${expectedLedgerDirectory}: ${ledgerPath}`, + ); + } + const cleaned = await cleanupLibvirtLinuxKvmFromLedger(ledgerPath, { + expectedRepositoryScope: input.expectedRepositoryScope, + expectedRunScope: input.expectedRunScope, + expectedWorkRoot: input.expectedWorkRoot || tmpdir(), + expectedLibvirtUri: input.expectedLibvirtUri || "qemu:///system", + }); + return { + status: "cleaned", + ledgerPath, + domainName: cleaned.domainName, + } as const; +}; + +export const sweepLinuxKvmRepository = (input: { + readonly ledgerDirectory: string | undefined; + readonly repositoryScope: string | undefined; + readonly staleTtlMs: string | undefined; + readonly currentLedgerPath?: string; + readonly expectedWorkRoot?: string; + readonly expectedLibvirtUri?: string; +}) => { + if (!input.ledgerDirectory) { + throw new Error("cleanup-linux-kvm sweep requires E2E_KVM_LEDGER_DIR"); + } + if (!input.repositoryScope) { + throw new Error("cleanup-linux-kvm sweep requires E2E_KVM_REPOSITORY_SCOPE"); + } + const ttlMs = Number(input.staleTtlMs); + if (!input.staleTtlMs || !Number.isSafeInteger(ttlMs) || ttlMs <= 0) { + throw new Error("cleanup-linux-kvm sweep requires a positive E2E_KVM_STALE_TTL_MS"); + } + return sweepStaleLibvirtLinuxKvm({ + ledgerDirectory: input.ledgerDirectory, + repositoryScope: input.repositoryScope, + ttlMs, + currentLedgerPath: input.currentLedgerPath, + expectedWorkRoot: input.expectedWorkRoot || tmpdir(), + expectedLibvirtUri: input.expectedLibvirtUri || "qemu:///system", + }); +}; + +const main = async () => { + if (process.argv[2] === "sweep") { + const result = await sweepLinuxKvmRepository({ + ledgerDirectory: process.env.E2E_KVM_LEDGER_DIR, + repositoryScope: process.env.E2E_KVM_REPOSITORY_SCOPE, + staleTtlMs: process.env.E2E_KVM_STALE_TTL_MS, + currentLedgerPath: process.env.E2E_KVM_CLEANUP_LEDGER, + expectedWorkRoot: process.env.E2E_KVM_WORK_ROOT, + expectedLibvirtUri: process.env.E2E_LIBVIRT_URI, + }); + console.log( + `cleanup-linux-kvm: scanned=${result.scanned} cleaned=${result.cleaned.length} fresh=${result.preservedFresh.length} active=${result.preservedActive.length} current=${result.preservedCurrent.length}`, + ); + return; + } + const result = await cleanupLinuxKvmLedger({ + ledgerPath: process.argv[2] || process.env.E2E_KVM_CLEANUP_LEDGER, + expectedLedgerDirectory: process.env.E2E_KVM_LEDGER_DIR, + expectedRepositoryScope: process.env.E2E_KVM_REPOSITORY_SCOPE, + expectedRunScope: process.env.E2E_KVM_RUN_SCOPE, + expectedWorkRoot: process.env.E2E_KVM_WORK_ROOT, + expectedLibvirtUri: process.env.E2E_LIBVIRT_URI, + }); + if (result.status === "missing") { + console.log(`cleanup-linux-kvm: no ledger at ${result.ledgerPath}`); + } else { + console.log(`cleanup-linux-kvm: removed ${result.domainName}`); + } +}; + +const invokedPath = process.argv[1] ? resolve(process.argv[1]) : ""; +if (invokedPath === fileURLToPath(import.meta.url)) { + void main().catch((error) => { + console.error(error); + process.exitCode = 1; + }); +} diff --git a/e2e/scripts/cleanup-vms.ts b/e2e/scripts/cleanup-vms.ts new file mode 100644 index 000000000..bdee421ef --- /dev/null +++ b/e2e/scripts/cleanup-vms.ts @@ -0,0 +1,51 @@ +import { cleanupCurrentEc2Resources, sweepExpiredEc2Resources } from "../src/vm/ec2-lifecycle"; +import { cleanupCurrentTartResources, sweepExpiredTartResources } from "../src/vm/tart-lifecycle"; + +const usage = + "usage: bun e2e/scripts/cleanup-vms.ts tart|ec2 [--sweep-expired --minimum-age-hours N]"; + +const optionValue = (args: readonly string[], option: string) => { + const index = args.indexOf(option); + if (index === -1) return undefined; + return args[index + 1]; +}; + +const main = async () => { + const [provider, ...options] = process.argv.slice(2); + if (provider === "tart") { + if (!options.includes("--sweep-expired")) { + if (options.length > 0) throw new Error(usage); + const result = await cleanupCurrentTartResources(); + console.log(`deleted ${result.deleted} tart VM(s) for scope ${result.scope}`); + return; + } + + const rawMinimumAge = optionValue(options, "--minimum-age-hours"); + if (!rawMinimumAge || options.length !== 3) throw new Error(usage); + const result = await sweepExpiredTartResources({ + minimumAgeHours: Number(rawMinimumAge), + }); + console.log(`deleted ${result.deleted} expired tart VM(s) owned by ${result.repository}`); + return; + } + + if (provider === "ec2") { + if (!options.includes("--sweep-expired")) { + if (options.length > 0) throw new Error(usage); + const result = await cleanupCurrentEc2Resources(); + console.log(`deleted ${result.deleted} EC2 resource(s) for scope ${result.scope}`); + return; + } + + const rawMinimumAge = optionValue(options, "--minimum-age-hours"); + if (!rawMinimumAge || options.length !== 3) throw new Error(usage); + const minimumAgeHours = Number(rawMinimumAge); + const result = await sweepExpiredEc2Resources({ minimumAgeHours }); + console.log(`deleted ${result.deleted} expired EC2 resource(s) owned by ${result.repository}`); + return; + } + + throw new Error(usage); +}; + +await main(); diff --git a/e2e/scripts/cloudflare-access-emulator.ts b/e2e/scripts/cloudflare-access-emulator.ts new file mode 100644 index 000000000..714e21e2a --- /dev/null +++ b/e2e/scripts/cloudflare-access-emulator.ts @@ -0,0 +1,267 @@ +import { generateKeyPairSync, sign } from "node:crypto"; +import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; + +import { + E2E_CLOUDFLARE_ACCESS_AUDIENCE, + type CloudflareAccessLedgerEntry, + type CloudflareAccessTokenRequest, +} from "../src/cloudflare-access-emulator"; + +// @executor-js/emulate has generic OIDC providers, including Okta, but none +// implements Cloudflare Access. Okta can issue human ID tokens and serve JWKS, +// but it cannot emit Access's application-token wire shape (`type: app`, array +// `aud`, empty service `sub`, and `common_name`) or the +// `/cdn-cgi/access/certs` endpoint. A single scoped issuer here proves both +// human and service assertions against the exact origin-facing contract. +const MAX_BODY_BYTES = 16 * 1024; +const MAX_LEDGER_ENTRIES = 200; + +const argument = (name: string) => { + const index = process.argv.indexOf(name); + return index < 0 ? undefined : process.argv[index + 1]; +}; + +const port = Number(argument("--port")); +const audience = argument("--audience") ?? E2E_CLOUDFLARE_ACCESS_AUDIENCE; +const bootNonce = argument("--boot-nonce"); +if (!Number.isInteger(port) || port <= 0 || port > 65_535) { + console.error("cloudflare-access-emulator: --port must be a port in 1-65535"); + process.exit(2); +} +if (!bootNonce || !/^[a-zA-Z0-9-]{8,128}$/.test(bootNonce)) { + console.error("cloudflare-access-emulator: --boot-nonce must identify this boot"); + process.exit(2); +} + +const issuer = `http://127.0.0.1:${port}`; +const keyId = `executor-e2e-access-${bootNonce}`; +const { privateKey, publicKey } = generateKeyPairSync("rsa", { + modulusLength: 2048, + publicExponent: 0x10001, +}); +const exportedPublicKey = publicKey.export({ format: "jwk" }); +const jwk = { + kty: exportedPublicKey.kty, + n: exportedPublicKey.n, + e: exportedPublicKey.e, + alg: "RS256", + use: "sig", + kid: keyId, +}; + +// This process is one ephemeral, loopback-only fixture. The ledger intentionally +// records only route metadata and token kind, never headers, claims, or JWTs. +const ledger: CloudflareAccessLedgerEntry[] = []; +let nextLedgerId = 1; + +const record = ( + request: IncomingMessage, + path: string, + status: number, + operation: string, + tokenKind?: "human" | "service", +) => { + ledger.push({ + id: nextLedgerId++, + timestamp: new Date().toISOString(), + method: request.method ?? "UNKNOWN", + path, + status, + operation, + ...(tokenKind ? { tokenKind } : {}), + }); + if (ledger.length > MAX_LEDGER_ENTRIES) ledger.splice(0, ledger.length - MAX_LEDGER_ENTRIES); +}; + +const sendJson = ( + request: IncomingMessage, + response: ServerResponse, + path: string, + status: number, + operation: string, + body: unknown, + options: { + readonly headers?: Readonly>; + readonly tokenKind?: "human" | "service"; + } = {}, +) => { + record(request, path, status, operation, options.tokenKind); + response.writeHead(status, { + "content-type": "application/json", + ...options.headers, + }); + response.end(JSON.stringify(body)); +}; + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const optionalString = (value: unknown) => (typeof value === "string" ? value : undefined); + +const optionalStringArray = (value: unknown) => + Array.isArray(value) && value.every((item) => typeof item === "string") ? value : undefined; + +const optionalExpiry = (value: unknown) => + typeof value === "number" && Number.isInteger(value) && value >= -3_600 && value <= 3_600 + ? value + : undefined; + +const tokenRequest = (value: unknown): CloudflareAccessTokenRequest | undefined => { + if (!isRecord(value)) return undefined; + const requestedAudience = optionalString(value.audience); + const expiresInSeconds = optionalExpiry(value.expiresInSeconds); + if (value.expiresInSeconds !== undefined && expiresInSeconds === undefined) return undefined; + + if ( + value.kind === "human" && + typeof value.subject === "string" && + value.subject.length > 0 && + typeof value.email === "string" && + value.email.length > 0 + ) { + const groups = optionalStringArray(value.groups); + if (value.groups !== undefined && groups === undefined) return undefined; + return { + kind: "human", + subject: value.subject, + email: value.email, + ...(optionalString(value.name) ? { name: optionalString(value.name) } : {}), + ...(groups ? { groups } : {}), + ...(requestedAudience ? { audience: requestedAudience } : {}), + ...(expiresInSeconds === undefined ? {} : { expiresInSeconds }), + }; + } + + if ( + value.kind === "service" && + typeof value.commonName === "string" && + value.commonName.length > 0 + ) { + return { + kind: "service", + commonName: value.commonName, + ...(requestedAudience ? { audience: requestedAudience } : {}), + ...(expiresInSeconds === undefined ? {} : { expiresInSeconds }), + }; + } + return undefined; +}; + +const encode = (value: unknown) => Buffer.from(JSON.stringify(value)).toString("base64url"); + +const issue = (request: CloudflareAccessTokenRequest) => { + const now = Math.floor(Date.now() / 1_000); + const claims: Record = { + type: "app", + aud: [request.audience ?? audience], + iss: issuer, + iat: now, + exp: now + (request.expiresInSeconds ?? 300), + ...(request.kind === "human" + ? { + sub: request.subject, + email: request.email, + name: request.name ?? request.email, + groups: request.groups ?? ["member"], + } + : { sub: "", common_name: request.commonName }), + }; + const header = encode({ alg: "RS256", kid: keyId, typ: "JWT" }); + const payload = encode(claims); + const signingInput = `${header}.${payload}`; + const signature = sign("RSA-SHA256", Buffer.from(signingInput), privateKey).toString("base64url"); + return { token: `${signingInput}.${signature}` }; +}; + +const server = createServer((request, response) => { + const url = new URL(request.url ?? "/", issuer); + if (request.method === "GET" && url.pathname === "/health") { + sendJson(request, response, url.pathname, 200, "health.read", { ok: true, bootNonce }); + return; + } + if (request.method === "GET" && url.pathname === "/cdn-cgi/access/certs") { + sendJson( + request, + response, + url.pathname, + 200, + "jwks.read", + { keys: [jwk] }, + { + headers: { "cache-control": "public, max-age=60" }, + }, + ); + return; + } + if (request.method === "GET" && url.pathname === "/_e2e/ledger") { + sendJson( + request, + response, + url.pathname, + 200, + "ledger.read", + { entries: [...ledger] }, + { + headers: { "cache-control": "no-store" }, + }, + ); + return; + } + if (request.method !== "POST" || url.pathname !== "/_e2e/issue") { + sendJson(request, response, url.pathname, 404, "route.not-found", { error: "not found" }); + return; + } + + const chunks: Buffer[] = []; + let size = 0; + let tooLarge = false; + request.on("data", (chunk: Buffer) => { + size += chunk.byteLength; + if (size > MAX_BODY_BYTES) { + tooLarge = true; + return; + } + chunks.push(chunk); + }); + request.on("end", () => { + if (tooLarge) { + sendJson(request, response, url.pathname, 413, "token.issue.rejected", { + error: "request body too large", + }); + return; + } + let value: unknown; + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: malformed control-plane JSON becomes an HTTP 400 + try { + value = JSON.parse(Buffer.concat(chunks).toString("utf8")); + } catch { + sendJson(request, response, url.pathname, 400, "token.issue.rejected", { + error: "invalid JSON", + }); + return; + } + const parsed = tokenRequest(value); + if (!parsed) { + sendJson(request, response, url.pathname, 400, "token.issue.rejected", { + error: "invalid token request", + }); + return; + } + sendJson(request, response, url.pathname, 200, "token.issue", issue(parsed), { + headers: { "cache-control": "no-store" }, + tokenKind: parsed.kind, + }); + }); +}); + +server.on("error", (error) => { + console.error(`cloudflare-access-emulator: ${error.message}`); + process.exit(1); +}); +server.listen(port, "127.0.0.1", () => { + console.log(`cloudflare-access-emulator: ${issuer}`); +}); + +const close = () => server.close(() => process.exit(0)); +process.on("SIGINT", close); +process.on("SIGTERM", close); diff --git a/e2e/scripts/evidence-publication-summary.ts b/e2e/scripts/evidence-publication-summary.ts new file mode 100644 index 000000000..e8d0fbe7f --- /dev/null +++ b/e2e/scripts/evidence-publication-summary.ts @@ -0,0 +1,30 @@ +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; + +import { evidenceSummaryMarkdown, summaryRunsFromManifest } from "../src/evidence-publication"; + +const argumentValue = (name: string) => { + const args = process.argv.slice(2); + const equals = args.find((argument) => argument.startsWith(`${name}=`)); + if (equals) return equals.slice(name.length + 1); + const index = args.indexOf(name); + return index >= 0 ? args[index + 1] : undefined; +}; + +const manifestPath = argumentValue("--manifest"); +const viewerUrl = argumentValue("--viewer-url"); + +if (!manifestPath || !viewerUrl) { + console.error( + "usage: bun e2e/scripts/evidence-publication-summary.ts --manifest --viewer-url ", + ); + process.exitCode = 1; +} else { + try { + const manifest: unknown = JSON.parse(readFileSync(resolve(manifestPath), "utf8")); + process.stdout.write(evidenceSummaryMarkdown(viewerUrl, summaryRunsFromManifest(manifest))); + } catch (error) { + console.error(`evidence-publication-summary: ${String(error)}`); + process.exitCode = 1; + } +} diff --git a/e2e/scripts/merge-evidence.ts b/e2e/scripts/merge-evidence.ts new file mode 100644 index 000000000..82805563e --- /dev/null +++ b/e2e/scripts/merge-evidence.ts @@ -0,0 +1,56 @@ +import { resolve, sep } from "node:path"; + +import { writeJsonAtomicSync } from "../src/artifact-io"; +import { mergeEvidenceArtifacts } from "../src/evidence-merge"; +import { TRUSTED_RUN_LANES_SOURCE } from "../src/evidence-trust"; + +const argumentValue = (name: string) => { + const args = process.argv.slice(2); + const equals = args.find((argument) => argument.startsWith(`${name}=`)); + if (equals) return equals.slice(name.length + 1); + const index = args.indexOf(name); + return index >= 0 ? args[index + 1] : undefined; +}; + +const inputDir = argumentValue("--input-dir"); +const outputDir = argumentValue("--output-dir"); +const runAttempt = argumentValue("--run-attempt"); +const trustedRunsOutput = argumentValue("--trusted-runs-output"); + +if (!inputDir || !outputDir || !runAttempt || !trustedRunsOutput) { + console.error( + "usage: bun e2e/scripts/merge-evidence.ts --input-dir --output-dir --run-attempt --trusted-runs-output ", + ); + process.exitCode = 1; +} else { + try { + const resolvedInput = resolve(inputDir); + const resolvedOutput = resolve(outputDir); + const resolvedTrust = resolve(trustedRunsOutput); + if ( + resolvedTrust === resolvedInput || + resolvedTrust.startsWith(`${resolvedInput}${sep}`) || + resolvedTrust === resolvedOutput || + resolvedTrust.startsWith(`${resolvedOutput}${sep}`) + ) { + throw new Error("trusted run metadata must be outside downloaded artifacts and merged runs"); + } + const result = mergeEvidenceArtifacts({ + inputDir: resolvedInput, + outputDir: resolvedOutput, + runAttempt, + }); + writeJsonAtomicSync(resolvedTrust, { + schemaVersion: 1, + source: TRUSTED_RUN_LANES_SOURCE, + runAttempt, + runs: result.trustedRuns, + }); + console.log( + `evidence merge: ${result.attemptCount} attempts from ${result.artifactCount} artifacts (${result.collisionCount} collisions preserved)`, + ); + } catch (error) { + console.error(`evidence merge: ${String(error)}`); + process.exitCode = 1; + } +} diff --git a/e2e/scripts/preflight-vm-lane.ts b/e2e/scripts/preflight-vm-lane.ts new file mode 100644 index 000000000..ebbe079d1 --- /dev/null +++ b/e2e/scripts/preflight-vm-lane.ts @@ -0,0 +1,149 @@ +import { execFile } from "node:child_process"; +import { constants } from "node:fs"; +import { access } from "node:fs/promises"; +import { delimiter, join } from "node:path"; +import { promisify } from "node:util"; + +import { projectDefinition, type E2eCapability } from "../src/project-matrix"; +import { preflightLinuxKvm } from "../src/vm/linux-kvm"; + +const execFileP = promisify(execFile); + +const requireEnvironment = (name: string) => { + const value = process.env[name]; + if (!value) throw new Error(`${name} is required for this requested VM lane`); + return value; +}; + +const executablePath = async (command: string) => { + const candidates = command.includes("/") + ? [command] + : (process.env.PATH ?? "") + .split(delimiter) + .filter(Boolean) + .map((directory) => join(directory, command)); + for (const candidate of candidates) { + try { + await access(candidate, constants.X_OK); + return candidate; + } catch { + // Keep searching PATH. + } + } + throw new Error(`required executable is unavailable: ${command}`); +}; + +const execute = async (command: string, args: ReadonlyArray) => { + const executable = await executablePath(command); + try { + const result = await execFileP(executable, [...args], { + encoding: "utf8", + maxBuffer: 16 * 1024 * 1024, + }); + return `${result.stdout}\n${result.stderr}`.trim(); + } catch (error) { + throw new Error(`required capability probe failed: ${command} ${args.join(" ")}`, { + cause: error, + }); + } +}; + +const assertProjectContract = ( + projectName: string, + expectedCapabilities: ReadonlyArray, +) => { + const project = projectDefinition(projectName); + if (!project) throw new Error(`unknown VM project: ${projectName}`); + if (project.tier !== "heavy-vm" || !project.hermetic) { + throw new Error(`${projectName} must remain a hermetic heavy-vm project`); + } + const missing = expectedCapabilities.filter( + (capability) => !project.requiredCapabilities.some((required) => required === capability), + ); + if (missing.length > 0) { + throw new Error(`${projectName} does not require expected capabilities: ${missing.join(", ")}`); + } +}; + +const preflightTart = async (os: "macos" | "linux") => { + assertProjectContract(`cli-${os}`, ["api", "restart"]); + requireEnvironment("E2E_VM_RUN_SCOPE"); + if (process.platform !== "darwin" || process.arch !== "arm64") { + throw new Error(`tart ${os} requires a darwin arm64 runner`); + } + + const tart = process.env.E2E_TART_BIN ?? "/opt/homebrew/bin/tart"; + const sshpass = process.env.E2E_SSHPASS_BIN ?? "/opt/homebrew/bin/sshpass"; + const baseImage = + os === "macos" + ? (process.env.E2E_TART_MACOS_BASE ?? "executor-macos-base") + : (process.env.E2E_TART_LINUX_BASE ?? "executor-linux-base"); + await Promise.all([ + execute(tart, ["--version"]), + execute(sshpass, ["-V"]), + executablePath("ssh"), + executablePath("scp"), + ]); + const images = await execute(tart, ["list"]); + if (!images.split(/\s+/).includes(baseImage)) { + throw new Error(`required tart base image is unavailable: ${baseImage}`); + } +}; + +const preflightEc2 = async () => { + assertProjectContract("cli-windows", ["api", "restart"]); + requireEnvironment("E2E_VM_RUN_SCOPE"); + requireEnvironment("AWS_ACCESS_KEY_ID"); + requireEnvironment("AWS_SECRET_ACCESS_KEY"); + const aws = process.env.E2E_AWS_BIN ?? "aws"; + const region = process.env.E2E_EC2_REGION ?? "us-west-2"; + await Promise.all([ + executablePath("curl"), + executablePath("scp"), + executablePath("ssh"), + executablePath("ssh-keygen"), + ]); + await execute(aws, ["--region", region, "sts", "get-caller-identity", "--output", "json"]); + const defaultVpc = await execute(aws, [ + "--region", + region, + "ec2", + "describe-vpcs", + "--filters", + "Name=isDefault,Values=true", + "--query", + "Vpcs[0].VpcId", + "--output", + "text", + ]); + if (!defaultVpc || defaultVpc === "None") { + throw new Error(`EC2 ${region} has no default VPC for the Windows VM lane`); + } +}; + +const main = async () => { + const [lane, guest] = process.argv.slice(2); + if (process.env.E2E_REQUIRED_CAPABILITY_MODE !== "required") { + throw new Error("requested VM lanes must set E2E_REQUIRED_CAPABILITY_MODE=required"); + } + if (lane === "linux-kvm") { + assertProjectContract("desktop-kvm", ["desktop-gui"]); + requireEnvironment("E2E_KVM_CLEANUP_LEDGER"); + requireEnvironment("E2E_KVM_RUN_SCOPE"); + await preflightLinuxKvm({ requirement: "required" }); + } else if (lane === "tart" && (guest === "macos" || guest === "linux")) { + await preflightTart(guest); + } else if (lane === "ec2" && guest === "windows") { + await preflightEc2(); + } else { + throw new Error( + "usage: bun e2e/scripts/preflight-vm-lane.ts linux-kvm | tart | ec2 windows", + ); + } + console.log(`VM capability preflight passed: ${lane}${guest ? ` ${guest}` : ""}`); +}; + +main().catch((error: unknown) => { + console.error(`VM capability preflight failed: ${String(error)}`); + process.exitCode = 1; +}); diff --git a/e2e/scripts/publish-evidence-r2.ts b/e2e/scripts/publish-evidence-r2.ts new file mode 100644 index 000000000..0f5c6d530 --- /dev/null +++ b/e2e/scripts/publish-evidence-r2.ts @@ -0,0 +1,180 @@ +import { spawn } from "node:child_process"; +import { appendFileSync } from "node:fs"; + +import { + evidenceViewerUrl, + r2ObjectUrl, + validateEvidenceBundle, + verifyPublishedEvidence, +} from "../src/evidence-publication"; +import { loadTrustedRunLanes } from "../src/evidence-trust"; + +const CACHE_CONTROL = "public, max-age=31536000, immutable"; +const UPLOAD_CONCURRENCY = 6; +const SECRET_ENVIRONMENT_KEYS = new Set([ + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", + "R2_ACCESS_KEY_ID", + "R2_SECRET_ACCESS_KEY", +]); + +const argumentValue = (name: string) => { + const args = process.argv.slice(2); + const equals = args.find((argument) => argument.startsWith(`${name}=`)); + if (equals) return equals.slice(name.length + 1); + const index = args.indexOf(name); + return index >= 0 ? args[index + 1] : undefined; +}; + +const requiredArgument = (name: string) => { + const value = argumentValue(name); + if (!value) throw new Error(`publish-evidence-r2: ${name} is required`); + return value; +}; + +const requiredEnvironment = (name: string) => { + const value = process.env[name]; + if (!value) throw new Error(`publish-evidence-r2: ${name} is required`); + return value; +}; + +const curlConfigValue = (value: string) => { + if (value.includes("\r") || value.includes("\n") || value.includes("\0")) { + throw new Error("publish-evidence-r2: R2 credentials contain an unsupported character"); + } + return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"'); +}; + +const curlEnvironment = () => { + const environment: NodeJS.ProcessEnv = {}; + for (const [key, value] of Object.entries(process.env)) { + if (!SECRET_ENVIRONMENT_KEYS.has(key) && value !== undefined) environment[key] = value; + } + return environment; +}; + +const runCurl = (args: ReadonlyArray, configuration: string) => + new Promise<{ readonly ok: true } | { readonly ok: false; readonly error: Error }>((resolve) => { + const child = spawn("curl", [...args], { + env: curlEnvironment(), + stdio: ["pipe", "pipe", "pipe"], + }); + let stderr = ""; + child.stderr.setEncoding("utf8"); + child.stderr.on("data", (chunk: string) => { + stderr = `${stderr}${chunk}`.slice(-16_384); + }); + child.stdout.resume(); + child.once("error", (error) => resolve({ ok: false, error })); + child.once("close", (code) => { + if (code === 0) resolve({ ok: true }); + else { + resolve({ + ok: false, + error: new Error(`curl exited with code ${code}: ${stderr.trim()}`), + }); + } + }); + child.stdin.on("error", () => undefined); + child.stdin.end(configuration); + }); + +const uploadFiles = async ( + files: ReadonlyArray["files"][number]>, + upload: (file: ReturnType["files"][number]) => Promise, +) => { + let cursor = 0; + await Promise.all( + Array.from({ length: Math.min(UPLOAD_CONCURRENCY, files.length) }, async () => { + for (;;) { + const index = cursor; + cursor += 1; + const file = files[index]; + if (!file) return; + await upload(file); + } + }), + ); +}; + +const main = async () => { + const runsDir = requiredArgument("--runs-dir"); + const bucket = requiredArgument("--bucket"); + const prefix = requiredArgument("--prefix"); + const endpoint = requiredArgument("--endpoint"); + const publicBaseUrl = requiredArgument("--public-base-url"); + const sourceRevision = requiredArgument("--source-revision"); + const trustedLanesFile = requiredArgument("--trusted-lanes"); + const accessKeyId = requiredEnvironment("R2_ACCESS_KEY_ID"); + const secretAccessKey = requiredEnvironment("R2_SECRET_ACCESS_KEY"); + const trustedRuns = loadTrustedRunLanes(trustedLanesFile, runsDir); + const bundle = validateEvidenceBundle(runsDir, sourceRevision, trustedRuns); + const viewerUrl = evidenceViewerUrl(publicBaseUrl, prefix); + const curlConfiguration = `user = "${curlConfigValue(accessKeyId)}:${curlConfigValue(secretAccessKey)}"\n`; + const sortedFiles = [...bundle.files].sort((left, right) => + left.relativePath.localeCompare(right.relativePath), + ); + const controlFiles = new Set(["index.html", "manifest.json", "publication.json"]); + const payloadFiles = sortedFiles.filter((file) => !controlFiles.has(file.relativePath)); + const finalFiles = ["manifest.json", "publication.json", "index.html"].map((relativePath) => { + const file = sortedFiles.find((candidate) => candidate.relativePath === relativePath); + if (!file) throw new Error(`publish-evidence-r2: missing ${relativePath}`); + return file; + }); + + let uploaded = 0; + const upload = async (file: (typeof sortedFiles)[number]) => { + const objectUrl = r2ObjectUrl(endpoint, bucket, prefix, file.relativePath); + const result = await runCurl( + [ + "--config", + "-", + "--fail-with-body", + "--silent", + "--show-error", + "--retry", + "3", + "--retry-all-errors", + "--connect-timeout", + "30", + "--request", + "PUT", + "--upload-file", + file.absolutePath, + "--aws-sigv4", + "aws:amz:auto:s3", + "--header", + `Content-Type: ${file.artifact.mime}`, + "--header", + `Cache-Control: ${CACHE_CONTROL}`, + objectUrl, + ], + curlConfiguration, + ); + if (!result.ok) throw result.error; + uploaded += 1; + if (uploaded % 25 === 0 || uploaded === sortedFiles.length) { + console.log(`publish-evidence-r2: uploaded ${uploaded}/${sortedFiles.length} files`); + } + }; + + await uploadFiles(payloadFiles, upload); + for (const file of finalFiles) await upload(file); + + await verifyPublishedEvidence({ viewerUrl, files: bundle.files }); + + const outputFile = process.env.GITHUB_OUTPUT; + if (outputFile) { + appendFileSync( + outputFile, + `viewer_url=${viewerUrl}\nobject_prefix=${prefix}\nuploaded_files=${uploaded}\n`, + ); + } + console.log(`publish-evidence-r2: verified ${viewerUrl}`); +}; + +main().catch((error) => { + console.error(`publish-evidence-r2: ${String(error)}`); + process.exitCode = 1; +}); diff --git a/e2e/scripts/sanitize-evidence.ts b/e2e/scripts/sanitize-evidence.ts new file mode 100644 index 000000000..b247c967c --- /dev/null +++ b/e2e/scripts/sanitize-evidence.ts @@ -0,0 +1,458 @@ +// Prepare e2e/runs for CI artifact publication. The pass is intentionally +// destructive: private configs and credential stores are removed, text and +// JSON evidence is redacted in place, and unknown files are denied by the +// same allowlist used by the viewer server. +import { + closeSync, + existsSync, + lstatSync, + openSync, + readFileSync, + readdirSync, + readSync, + rmSync, +} from "node:fs"; +import { dirname, join, relative, resolve, sep } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { writeJsonAtomicSync, writeTextAtomicSync } from "../src/artifact-io"; +import { + isPublishedDirectory, + publishedArtifactFor, + sanitizePublishedCast, + sanitizePublishedJson, + sanitizePublishedText, + type EvidencePublicationMetadata, +} from "../src/published-artifacts"; +import { + LANE_PROVENANCE_FILE, + parseLaneProvenance, + visualEvidencePublicationDecision, + type VisualEvidencePublicationDecision, +} from "../src/evidence-provenance"; +import { + loadTrustedRunLanes, + trustedRunLaneKey, + trustedRunLaneMap, + type TrustedRunLane, + type TrustedRunLanes, +} from "../src/evidence-trust"; +import { projectDefinition } from "../src/project-matrix"; +import { buildManifest } from "../src/viewer/manifest"; + +interface CommandOptions { + readonly runsDir: string; + readonly canaries: ReadonlyArray; + readonly trustedProjectsByTarget: ReadonlyMap; + readonly trustedRuns?: TrustedRunLanes; + readonly trustedRunsByKey?: ReadonlyMap; +} + +const argumentsFor = (): CommandOptions => { + const args = process.argv.slice(2); + let runsDir = fileURLToPath(new URL("../runs/", import.meta.url)); + let trustedLanesFile: string | undefined; + const trustedProjects: string[] = []; + const canaries = [process.env.E2E_EVIDENCE_CANARY, process.env.E2E_EVIDENCE_CANARY_SECRET].filter( + (value): value is string => typeof value === "string" && value.length > 0, + ); + + for (let index = 0; index < args.length; index += 1) { + const argument = args[index]; + if (argument === "--runs-dir") { + const value = args[index + 1]; + if (!value) throw new Error("sanitize-evidence: --runs-dir needs a path"); + runsDir = resolve(value); + index += 1; + } else if (argument?.startsWith("--runs-dir=")) { + runsDir = resolve(argument.slice("--runs-dir=".length)); + } else if (argument === "--canary") { + const value = args[index + 1]; + if (!value) throw new Error("sanitize-evidence: --canary needs a value"); + canaries.push(value); + index += 1; + } else if (argument?.startsWith("--canary=")) { + canaries.push(argument.slice("--canary=".length)); + } else if (argument === "--trusted-project") { + const value = args[index + 1]; + if (!value) throw new Error("sanitize-evidence: --trusted-project needs a project name"); + trustedProjects.push(value); + index += 1; + } else if (argument?.startsWith("--trusted-project=")) { + trustedProjects.push(argument.slice("--trusted-project=".length)); + } else if (argument === "--trusted-lanes") { + const value = args[index + 1]; + if (!value) throw new Error("sanitize-evidence: --trusted-lanes needs a path"); + trustedLanesFile = value; + index += 1; + } else if (argument?.startsWith("--trusted-lanes=")) { + trustedLanesFile = argument.slice("--trusted-lanes=".length); + } else { + throw new Error(`sanitize-evidence: unknown argument ${argument}`); + } + } + + if (trustedLanesFile && trustedProjects.length > 0) { + throw new Error("sanitize-evidence: choose trusted lanes or trusted projects, not both"); + } + const trustedProjectsByTarget = new Map(); + for (const projectName of new Set(trustedProjects)) { + const project = projectDefinition(projectName); + if (!project) throw new Error(`sanitize-evidence: unknown trusted project ${projectName}`); + const existing = trustedProjectsByTarget.get(project.target); + if (existing && existing !== projectName) { + throw new Error( + `sanitize-evidence: trusted projects ${existing} and ${projectName} share target ${project.target}`, + ); + } + trustedProjectsByTarget.set(project.target, projectName); + } + const trustedRuns = trustedLanesFile ? loadTrustedRunLanes(trustedLanesFile, runsDir) : undefined; + return { + runsDir, + canaries: [...new Set(canaries)].filter((value) => value.length >= 4), + trustedProjectsByTarget, + ...(trustedRuns ? { trustedRuns, trustedRunsByKey: trustedRunLaneMap(trustedRuns) } : {}), + }; +}; + +const portablePath = (root: string, file: string): string => + relative(root, file).split(sep).join("/"); + +interface SanitizeStats { + removed: number; + redacted: number; + retained: number; + binaryArtifacts: string[]; + errors: string[]; +} + +interface EvidenceAttemptDirectory { + readonly target: string; + readonly slug: string; + readonly directory: string; +} + +const evidenceAttemptDirectories = (root: string): EvidenceAttemptDirectory[] => { + const attempts: EvidenceAttemptDirectory[] = []; + for (const target of readdirSync(root, { withFileTypes: true })) { + if ( + !target.isDirectory() || + target.name === "assets" || + target.name === "trace-viewer" || + !isPublishedDirectory(target.name) + ) { + continue; + } + const targetDirectory = join(root, target.name); + for (const slug of readdirSync(targetDirectory, { withFileTypes: true })) { + const relativePath = `${target.name}/${slug.name}`; + if (!slug.isDirectory() || !isPublishedDirectory(relativePath)) continue; + attempts.push({ + target: target.name, + slug: slug.name, + directory: join(targetDirectory, slug.name), + }); + } + } + return attempts; +}; + +const trustedProjectForAttempt = (options: CommandOptions, target: string, slug: string) => + options.trustedRunsByKey + ? options.trustedRunsByKey.get(trustedRunLaneKey(target, slug))?.project + : options.trustedProjectsByTarget.get(target); + +const validateTrustedLaneBindings = (options: CommandOptions, errors: string[]): void => { + const attempts = evidenceAttemptDirectories(options.runsDir); + const actualKeys = new Set( + attempts.map((attempt) => trustedRunLaneKey(attempt.target, attempt.slug)), + ); + if (options.trustedRuns) { + for (const trusted of options.trustedRuns.runs) { + if (!actualKeys.has(trustedRunLaneKey(trusted.target, trusted.slug))) { + errors.push(`trusted lane has no evidence directory: ${trusted.target}/${trusted.slug}`); + } + } + } + + for (const attempt of attempts) { + const relativePath = `${attempt.target}/${attempt.slug}`; + const trustedProject = trustedProjectForAttempt(options, attempt.target, attempt.slug); + if (!trustedProject) { + errors.push(`evidence lane has no external trusted project: ${relativePath}`); + continue; + } + try { + const provenance: unknown = JSON.parse( + readFileSync(join(attempt.directory, LANE_PROVENANCE_FILE), "utf8"), + ); + if (!parseLaneProvenance(provenance, trustedProject, attempt.target)) { + errors.push( + `lane provenance does not match trusted project ${trustedProject}: ${relativePath}`, + ); + } + } catch { + errors.push(`lane provenance is missing or unreadable: ${relativePath}`); + } + } +}; + +const visualEvidenceDecision = ( + root: string, + file: string, + options: CommandOptions, +): VisualEvidencePublicationDecision => { + try { + const result: unknown = JSON.parse(readFileSync(join(dirname(file), "result.json"), "utf8")); + const provenance: unknown = JSON.parse( + readFileSync(join(dirname(file), LANE_PROVENANCE_FILE), "utf8"), + ); + const [target = "", slug = ""] = portablePath(root, file).split("/"); + const trustedProject = trustedProjectForAttempt(options, target, slug) ?? ""; + return visualEvidencePublicationDecision(result, provenance, target, trustedProject); + } catch { + return { publish: false, reason: "lane provenance is missing or unreadable" }; + } +}; + +const sanitizeFile = ( + root: string, + file: string, + canaries: ReadonlyArray, + stats: SanitizeStats, + options: CommandOptions, +): void => { + const relativePath = portablePath(root, file); + const artifact = publishedArtifactFor(relativePath); + if (!artifact) { + rmSync(file, { force: true }); + stats.removed += 1; + return; + } + + if (artifact.kind === "json" || artifact.kind === "text") { + try { + const contents = readFileSync(file, "utf8"); + const publication = relativePath.endsWith("/result.json") + ? { + availableArtifacts: new Set( + readdirSync(dirname(file), { withFileTypes: true }) + .filter((entry) => entry.isFile()) + .map((entry) => entry.name), + ), + } + : {}; + const sanitized = + artifact.kind === "json" + ? sanitizePublishedJson(relativePath, contents, publication, { secrets: canaries }) + : relativePath.endsWith("/terminal.cast") + ? sanitizePublishedCast(contents, { secrets: canaries }) + : sanitizePublishedText(contents, { secrets: canaries }); + writeTextAtomicSync(file, sanitized); + stats.redacted += 1; + return; + } catch (error) { + rmSync(file, { force: true }); + stats.removed += 1; + stats.errors.push( + `removed unreadable publication artifact ${relativePath}: ${String(error)}`, + ); + return; + } + } + + if (artifact.unredactedVisual) { + const decision = visualEvidenceDecision(root, file, options); + if (!decision.publish) { + rmSync(file, { force: true }); + stats.removed += 1; + stats.errors.push(`removed unauthorized visual evidence ${relativePath}: ${decision.reason}`); + return; + } + stats.binaryArtifacts.push(relativePath); + } + stats.retained += 1; +}; + +const sanitizeDirectory = ( + root: string, + directory: string, + canaries: ReadonlyArray, + stats: SanitizeStats, + options: CommandOptions, +): void => { + const entries = readdirSync(directory, { withFileTypes: true }).sort((left, right) => { + if (left.name === "result.json") return 1; + if (right.name === "result.json") return -1; + return left.name.localeCompare(right.name); + }); + for (const entry of entries) { + const file = join(directory, entry.name); + const relativePath = portablePath(root, file); + const metadata = lstatSync(file); + if (metadata.isSymbolicLink()) { + rmSync(file, { recursive: true, force: true }); + stats.removed += 1; + continue; + } + if (metadata.isDirectory()) { + if (entry.name.endsWith(".lock")) { + stats.errors.push(`active evidence lock prevents publication: ${relativePath}`); + continue; + } + if (!isPublishedDirectory(relativePath)) { + rmSync(file, { recursive: true, force: true }); + stats.removed += 1; + continue; + } + sanitizeDirectory(root, file, canaries, stats, options); + continue; + } + if (metadata.isFile()) sanitizeFile(root, file, canaries, stats, options); + else { + rmSync(file, { force: true }); + stats.removed += 1; + } + } +}; + +const fileContains = (file: string, canary: Buffer): boolean => { + const handle = openSync(file, "r"); + const chunkSize = 64 * 1024; + const chunk = Buffer.allocUnsafe(chunkSize); + let carry = Buffer.alloc(0); + try { + for (;;) { + const bytes = readSync(handle, chunk, 0, chunk.length, null); + if (bytes === 0) return false; + const combined = Buffer.concat([carry, chunk.subarray(0, bytes)]); + if (combined.includes(canary)) return true; + const overlap = Math.max(0, canary.length - 1); + carry = overlap === 0 ? Buffer.alloc(0) : combined.subarray(-overlap); + } + } finally { + closeSync(handle); + } +}; + +const verifyDirectory = ( + root: string, + directory: string, + canaries: ReadonlyArray, + errors: string[], +): void => { + for (const entry of readdirSync(directory, { withFileTypes: true })) { + const file = join(directory, entry.name); + const relativePath = portablePath(root, file); + const metadata = lstatSync(file); + if (metadata.isSymbolicLink()) { + errors.push(`symlink survived evidence sanitization: ${relativePath}`); + continue; + } + if (metadata.isDirectory()) { + if (!isPublishedDirectory(relativePath)) { + errors.push(`private directory survived evidence sanitization: ${relativePath}`); + } else { + verifyDirectory(root, file, canaries, errors); + } + continue; + } + if (!metadata.isFile() || !publishedArtifactFor(relativePath)) { + errors.push(`private artifact survived evidence sanitization: ${relativePath}`); + continue; + } + for (const canary of canaries) { + if (fileContains(file, Buffer.from(canary))) { + errors.push(`canary secret survived evidence sanitization: ${relativePath}`); + } + } + } +}; + +const selfCheck = (): void => { + const canary = "executor-evidence-sanitizer-self-check"; + const sample = JSON.stringify({ + authorization: `Bearer ${canary}`, + url: `http://127.0.0.1/?_token=${canary}`, + artifacts: ["terminal.cast", "mcporter.json", "trace.zip"], + }); + const sanitized = sanitizePublishedJson( + "cloud/self-check/result.json", + sample, + {}, + { secrets: [canary] }, + ); + if (sanitized.includes(canary) || sanitized.includes("mcporter.json")) { + throw new Error("sanitize-evidence: sanitizer self-check failed"); + } +}; + +const main = (): void => { + selfCheck(); + const options = argumentsFor(); + if (!existsSync(options.runsDir)) { + console.log(`sanitize-evidence: ${options.runsDir} does not exist; nothing to publish`); + return; + } + + const stats: SanitizeStats = { + removed: 0, + redacted: 0, + retained: 0, + binaryArtifacts: [], + errors: [], + }; + validateTrustedLaneBindings(options, stats.errors); + sanitizeDirectory(options.runsDir, options.runsDir, options.canaries, stats, options); + buildManifest(options.runsDir); + verifyDirectory(options.runsDir, options.runsDir, options.canaries, stats.errors); + + const sourceRevision = process.env.GITHUB_SHA; + const metadata: EvidencePublicationMetadata = { + schemaVersion: 1, + sanitizedAt: Date.now(), + status: stats.errors.length === 0 ? "passed" : "failed", + sanitizer: { + source: "e2e/scripts/sanitize-evidence.ts", + policyVersion: 1, + ...(sourceRevision ? { sourceRevision } : {}), + }, + policy: { + unknownArtifacts: "removed", + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + runtime: { + name: process.versions.bun ? "bun" : "node", + version: process.versions.bun ?? process.version, + platform: process.platform, + arch: process.arch, + }, + stats: { + removed: stats.removed, + redacted: stats.redacted, + retained: stats.retained, + canariesChecked: options.canaries.length, + }, + binaryArtifacts: stats.binaryArtifacts.sort(), + errors: stats.errors.map((error) => + sanitizePublishedText(error, { secrets: options.canaries }), + ), + }; + writeJsonAtomicSync(join(options.runsDir, "publication.json"), metadata); + + console.log( + `sanitize-evidence: removed ${stats.removed}, redacted ${stats.redacted}, retained ${stats.retained}`, + ); + for (const error of stats.errors) console.error(`sanitize-evidence: ${error}`); + if (stats.errors.length > 0) process.exitCode = 1; +}; + +try { + main(); +} catch (error) { + console.error(`sanitize-evidence: ${String(error)}`); + process.exitCode = 1; +} diff --git a/e2e/scripts/serve.ts b/e2e/scripts/serve.ts index a1f53eb97..22257b5c8 100644 --- a/e2e/scripts/serve.ts +++ b/e2e/scripts/serve.ts @@ -1,115 +1,263 @@ -// Static server for runs/ — the review URL. Supports range requests so the -// session videos seek/stream, gzips text assets, and marks vite's hashed -// /assets/ as immutable so Monaco/React chunks download once, ever. -// `bun e2e/scripts/serve.ts` → prints the bound URL (default port 8901, but -// it walks forward to the next free port if that's taken, so two worktrees — -// or a leaked previous viewer — never wedge each other). `PORT=…` pins a port -// explicitly and fails loudly if it's busy (the strictPort rule from -// src/ports.ts). The SPA itself is port- and mount-agnostic (relative assets + -// hash routing), so any port the server lands on just works in the browser. -import { createReadStream, existsSync, statSync } from "node:fs"; -import { createServer } from "node:http"; +// Static server for sanitized e2e evidence. Only explicit publication +// artifacts are reachable. Token-bearing CLI homes, MCP configs, telemetry +// databases, temp files, and arbitrary run-directory contents stay private. +import { + createReadStream, + existsSync, + lstatSync, + readFileSync, + realpathSync, + statSync, +} from "node:fs"; +import { createServer, type ServerResponse } from "node:http"; import type { AddressInfo } from "node:net"; -import { extname, join, normalize } from "node:path"; +import { dirname, join, relative, resolve, sep } from "node:path"; +import { Readable } from "node:stream"; import { fileURLToPath } from "node:url"; import { createGzip } from "node:zlib"; +import { + publishedArtifactFor, + sanitizePublishedCast, + sanitizePublishedJson, + sanitizePublishedText, + type PublishedArtifact, +} from "../src/published-artifacts"; +import { + LANE_PROVENANCE_FILE, + visualEvidencePublicationDecision, +} from "../src/evidence-provenance"; + const ROOT = fileURLToPath(new URL("../runs/", import.meta.url)); -// Explicit PORT pins (and fails visibly if busy); otherwise 8901 is just a -// starting preference we walk forward from. const PINNED = process.env.PORT !== undefined; const PREFERRED = Number(process.env.PORT ?? 8901); +const INCLUDE_RAW_TRACE = process.env.E2E_VIEWER_INCLUDE_RAW_TRACE === "1"; +const TRUSTED_PROJECT = process.env.E2E_PROJECT ?? process.env.E2E_TARGET ?? ""; +const MAX_SANITIZED_BYTES = 20 * 1024 * 1024; +const COMPRESSIBLE = new Set(["static", "json", "text"]); + +const notFound = (response: ServerResponse): void => { + response.writeHead(404, { "content-type": "text/plain; charset=utf-8" }).end("not found"); +}; -const MIME: Record = { - ".html": "text/html; charset=utf-8", - ".js": "text/javascript", - ".css": "text/css", - ".map": "application/json", - ".svg": "image/svg+xml", - ".json": "application/json", - ".ts": "text/plain; charset=utf-8", - ".png": "image/png", - ".webm": "video/webm", - ".mp4": "video/mp4", - ".zip": "application/zip", +const unsafePath = (file: string): boolean => { + const root = realpathSync(ROOT); + const actual = realpathSync(file); + return actual !== root && !actual.startsWith(`${root}${sep}`); +}; + +const requestedPath = (requestUrl: string): string | undefined => { + let decoded: string; + try { + decoded = decodeURIComponent(new URL(requestUrl, "http://viewer.invalid").pathname); + } catch { + return undefined; + } + if (decoded.includes("\\") || decoded.includes("\0")) return undefined; + const normalized = decoded.replace(/^\/+|\/+$/g, ""); + if (normalized === "") return "index.html"; + if (normalized === "trace-viewer") return "trace-viewer/index.html"; + const resolved = resolve(ROOT, normalized); + if (resolved !== resolve(ROOT) && !resolved.startsWith(`${resolve(ROOT)}${sep}`)) + return undefined; + return relative(ROOT, resolved).split(sep).join("/"); +}; + +const securityHeaders = (response: ServerResponse): void => { + response.setHeader("cross-origin-resource-policy", "same-origin"); + response.setHeader("referrer-policy", "no-referrer"); + response.setHeader("x-content-type-options", "nosniff"); +}; + +const sanitizedContents = ( + relativePath: string, + artifact: PublishedArtifact, + file: string, +): Buffer | undefined => { + const size = statSync(file).size; + if (size > MAX_SANITIZED_BYTES) return undefined; + const raw = readFileSync(file, "utf8"); + if (artifact.kind === "json") { + return Buffer.from( + sanitizePublishedJson(relativePath, raw, { includeRawTrace: INCLUDE_RAW_TRACE }), + ); + } + if (relativePath.endsWith("/terminal.cast")) { + return Buffer.from(sanitizePublishedCast(raw)); + } + return Buffer.from(sanitizePublishedText(raw)); }; -const COMPRESSIBLE = new Set([".html", ".js", ".css", ".map", ".svg", ".json", ".ts"]); - -const server = createServer((req, res) => { - const url = new URL(req.url ?? "/", "http://x"); - let path = normalize(decodeURIComponent(url.pathname)).replace(/^([/\\])+/, ""); - if (path === "" || path === ".") path = "index.html"; - let file = join(ROOT, path); - // Directory request → its index.html (the page itself fixes a missing - // trailing slash client-side; a server redirect would drop the /runs mount). - if (file.startsWith(ROOT) && existsSync(file) && statSync(file).isDirectory()) { - file = join(file, "index.html"); +const visualEvidenceIsSynthetic = (file: string, target: string): boolean => { + try { + const result: unknown = JSON.parse(readFileSync(join(dirname(file), "result.json"), "utf8")); + const provenance: unknown = JSON.parse( + readFileSync(join(dirname(file), LANE_PROVENANCE_FILE), "utf8"), + ); + return visualEvidencePublicationDecision(result, provenance, target, TRUSTED_PROJECT).publish; + } catch { + return false; } - if (!file.startsWith(ROOT) || !existsSync(file) || !statSync(file).isFile()) { - res.writeHead(404).end("not found"); +}; + +const sendBuffer = ( + response: ServerResponse, + requestMethod: string | undefined, + artifact: PublishedArtifact, + contents: Buffer, + acceptsGzip: boolean, +): void => { + if (acceptsGzip && COMPRESSIBLE.has(artifact.kind)) { + response.writeHead(200, { + "content-type": artifact.mime, + "content-encoding": "gzip", + vary: "accept-encoding", + }); + if (requestMethod === "HEAD") response.end(); + else Readable.from([contents]).pipe(createGzip()).pipe(response); return; } + response.writeHead(200, { + "content-type": artifact.mime, + "content-length": contents.byteLength, + }); + response.end(requestMethod === "HEAD" ? undefined : contents); +}; + +const sendFile = ( + response: ServerResponse, + requestMethod: string | undefined, + rangeHeader: string | undefined, + artifact: PublishedArtifact, + file: string, +): void => { const size = statSync(file).size; - const ext = extname(file); - const type = MIME[ext] ?? "application/octet-stream"; - // trace.playwright.dev fetches trace.zip from the user's browser — allow it. - res.setHeader("access-control-allow-origin", "*"); - // Vite content-hashes /assets/ filenames → cache forever. Everything else - // (run data, index.html) must revalidate so fresh runs show up. - res.setHeader( - "cache-control", - path.startsWith("assets/") ? "public, max-age=31536000, immutable" : "no-cache", - ); - const range = /bytes=(\d+)-(\d*)/.exec(req.headers.range ?? ""); + const range = /^bytes=(\d+)-(\d*)$/.exec(rangeHeader ?? ""); if (range) { const start = Number(range[1]); - const end = range[2] ? Number(range[2]) : size - 1; - res.writeHead(206, { - "content-type": type, + const requestedEnd = range[2] ? Number(range[2]) : size - 1; + if (!Number.isSafeInteger(start) || !Number.isSafeInteger(requestedEnd) || start >= size) { + response.writeHead(416, { "content-range": `bytes */${size}` }).end(); + return; + } + const end = Math.min(requestedEnd, size - 1); + if (end < start) { + response.writeHead(416, { "content-range": `bytes */${size}` }).end(); + return; + } + response.writeHead(206, { + "content-type": artifact.mime, "content-range": `bytes ${start}-${end}/${size}`, "accept-ranges": "bytes", "content-length": end - start + 1, }); - createReadStream(file, { start, end }).pipe(res); + if (requestMethod === "HEAD") response.end(); + else createReadStream(file, { start, end }).pipe(response); return; } - const wantsGzip = - COMPRESSIBLE.has(ext) && /\bgzip\b/.test(String(req.headers["accept-encoding"] ?? "")); - if (wantsGzip) { - res.writeHead(200, { - "content-type": type, - "content-encoding": "gzip", - vary: "accept-encoding", - }); - createReadStream(file).pipe(createGzip()).pipe(res); + response.writeHead(200, { + "content-type": artifact.mime, + "content-length": size, + "accept-ranges": "bytes", + }); + if (requestMethod === "HEAD") response.end(); + else createReadStream(file).pipe(response); +}; + +const server = createServer((request, response) => { + securityHeaders(response); + if (request.method !== "GET" && request.method !== "HEAD") { + response.writeHead(405, { allow: "GET, HEAD" }).end(); return; } - res.writeHead(200, { "content-type": type, "content-length": size, "accept-ranges": "bytes" }); - createReadStream(file).pipe(res); + + const path = requestedPath(request.url ?? "/"); + const artifact = path + ? publishedArtifactFor(path, { includeRawTrace: INCLUDE_RAW_TRACE }) + : undefined; + if (!path || !artifact) { + notFound(response); + return; + } + + const file = join(ROOT, ...path.split("/")); + if ( + !existsSync(file) || + lstatSync(file).isSymbolicLink() || + !statSync(file).isFile() || + unsafePath(file) + ) { + notFound(response); + return; + } + + const immutable = path.startsWith("assets/") || path.startsWith("trace-viewer/"); + response.setHeader( + "cache-control", + immutable ? "public, max-age=31536000, immutable" : "private, no-store", + ); + + if (artifact.kind === "json" || artifact.kind === "text") { + try { + const contents = sanitizedContents(path, artifact, file); + if (!contents) { + response + .writeHead(413, { "content-type": "text/plain; charset=utf-8" }) + .end("artifact too large to sanitize"); + return; + } + sendBuffer( + response, + request.method, + artifact, + contents, + /\bgzip\b/.test(String(request.headers["accept-encoding"] ?? "")), + ); + } catch { + response + .writeHead(422, { "content-type": "text/plain; charset=utf-8" }) + .end("artifact could not be sanitized"); + } + return; + } + + if (artifact.unredactedVisual && !visualEvidenceIsSynthetic(file, path.split("/")[0] ?? "")) { + response + .writeHead(403, { "content-type": "text/plain; charset=utf-8" }) + .end("visual evidence lacks matching synthetic-only lane provenance"); + return; + } + + sendFile(response, request.method, request.headers.range, artifact, file); }); -// Host omitted → bind every interface (reachable over the tailnet). On a busy -// port: a pinned PORT is a hard error (predictable, matches --strictPort); an -// unpinned default walks forward to the next free port instead of crashing. +// Host omitted intentionally: the viewer remains reachable over the tailnet. +// A pinned port fails loudly; an unpinned preference walks to a free port. const MAX_WALK = 50; const listen = (port: number, attempt = 0): void => { - server.once("error", (err: NodeJS.ErrnoException) => { - if (err.code !== "EADDRINUSE") throw err; + const onError = (error: NodeJS.ErrnoException) => { + if (error.code !== "EADDRINUSE") throw error; if (PINNED) { - console.error(`e2e viewer: PORT=${port} is in use — free it or pick another port.`); + console.error(`e2e viewer: PORT=${port} is in use; free it or pick another port.`); process.exit(1); } if (attempt >= MAX_WALK) { console.error(`e2e viewer: no free port found in ${PREFERRED}..${PREFERRED + MAX_WALK}.`); process.exit(1); } - console.warn(`e2e viewer: port ${port} in use, trying ${port + 1}…`); + console.warn(`e2e viewer: port ${port} in use, trying ${port + 1}`); listen(port + 1, attempt + 1); - }); + }; + server.once("error", onError); server.listen(port, () => { + server.off("error", onError); const actual = (server.address() as AddressInfo).port; - console.log(`e2e viewer → http://localhost:${actual}/`); + console.log(`e2e viewer: http://localhost:${actual}/`); + if (!INCLUDE_RAW_TRACE) { + console.log( + "e2e viewer: raw trace.zip files are private; set E2E_VIEWER_INCLUDE_RAW_TRACE=1 for trusted local use", + ); + } }); }; diff --git a/e2e/selfhost/auth-methods-ui.test.ts b/e2e/selfhost/auth-methods-ui.test.ts index ae75bbb15..b0946d697 100644 --- a/e2e/selfhost/auth-methods-ui.test.ts +++ b/e2e/selfhost/auth-methods-ui.test.ts @@ -1,4 +1,4 @@ -// Selfhost-only (browser): the multi-method auth UX beyond the no-auth case — +// Selfhost-only (browser): the multi-method auth UX beyond the no-auth case. // an OAuth-DETECTED server gets an API key declared alongside at add time, and // the connect modal's "+ method" adds a custom API key to an OAuth integration // without displacing it. Selfhost-only because cloud has no browser identity @@ -11,6 +11,7 @@ import { randomBytes } from "node:crypto"; import { expect } from "@effect/vitest"; import { Effect } from "effect"; import { composePluginApi } from "@executor-js/api/server"; +import { deriveMcpNamespace } from "@executor-js/plugin-mcp"; import { mcpHttpPlugin } from "@executor-js/plugin-mcp/api"; import { makeGreetingMcpServer, @@ -32,52 +33,58 @@ scenario( Effect.gen(function* () { const target = yield* Target; const browser = yield* Browser; + const { client: makeApiClient } = yield* Api; // An OAuth-PROTECTED server: the probe gets a 401 with protected- // resource metadata pointing at the test OAuth issuer, so the method // list seeds with the detected OAuth row. + const serverName = `oauth-mcp-${randomBytes(3).toString("hex")}`; + const slug = IntegrationSlug.make(deriveMcpNamespace({ name: serverName })); const server = yield* serveMcpServerWithOAuth( () => makeGreetingMcpServer({ - name: `oauth-mcp-${randomBytes(3).toString("hex")}`, + name: serverName, }), { path: "/mcp" }, ); const identity = yield* target.newIdentity(); + const client = yield* makeApiClient(api, identity); - yield* browser.session(identity, async ({ page, step }) => { - await step("Open the add-MCP flow pointed at the server", async () => { - await page.goto(`/integrations/add/mcp?url=${encodeURIComponent(server.endpoint)}`, { - waitUntil: "networkidle", + yield* browser + .session(identity, async ({ page, step }) => { + await step("Open the add-MCP flow pointed at the server", async () => { + await page.goto(`/integrations/add/mcp?url=${encodeURIComponent(server.endpoint)}`, { + waitUntil: "networkidle", + }); + await page.getByText("How does this server authenticate?").waitFor(); }); - await page.getByText("How does this server authenticate?").waitFor(); - }); - await step("The probe detected OAuth", async () => { - await page.getByText("Method 1 · Detected").waitFor(); - // The OAuth editor declares discovery-at-connect, not pasted URLs. - await page.getByText("OAuth metadata is discovered from this server").waitFor(); - }); + await step("The probe detected OAuth", async () => { + await page.getByText("Method 1 · Detected").waitFor(); + // The OAuth editor declares discovery-at-connect, not pasted URLs. + await page.getByText("OAuth metadata is discovered from this server").waitFor(); + }); - await step("Declare an API key method alongside OAuth", async () => { - await page.getByRole("button", { name: "Add method" }).click(); - await page.getByText("Method 2").waitFor(); - await page.getByPlaceholder("Authorization").last().waitFor(); - }); + await step("Declare an API key method alongside OAuth", async () => { + await page.getByRole("button", { name: "Add method" }).click(); + await page.getByText("Method 2").waitFor(); + await page.getByPlaceholder("Authorization").last().waitFor(); + }); - await step("Add the source with both methods", async () => { - await page.getByRole("button", { name: "Add source" }).click(); - await page.waitForURL(/\/integrations\/(?!add\b)[^/?]+$/, { - timeout: 30_000, + await step("Add the source with both methods", async () => { + await page.getByRole("button", { name: "Add source" }).click(); + await page.waitForURL(/\/integrations\/(?!add\b)[^/?]+$/, { + timeout: 30_000, + }); + await page.getByText("Connections").first().waitFor(); }); - await page.getByText("Connections").first().waitFor(); - }); - await step("The connect modal offers OAuth and the API key", async () => { - await page.getByRole("button", { name: "Add connection" }).first().click(); - await page.getByRole("tab", { name: "OAuth" }).waitFor(); - await page.getByRole("tab", { name: "API key (Authorization)" }).waitFor(); - }); - }); + await step("The connect modal offers OAuth and the API key", async () => { + await page.getByRole("button", { name: "Add connection" }).first().click(); + await page.getByRole("tab", { name: "OAuth" }).waitFor(); + await page.getByRole("tab", { name: "API key (Authorization)" }).waitFor(); + }); + }) + .pipe(Effect.ensuring(client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore))); }), ).pipe(Effect.provide(OAuthTestServer.layer())), ); @@ -90,7 +97,7 @@ scenario( const target = yield* Target; const browser = yield* Browser; const { client: makeApiClient } = yield* Api; - // A server that only accepts the bearer key — the connection created + // A server that only accepts the bearer key. The connection created // through the custom method must render it on the wire. const token = `e2e-modal-key-${randomBytes(6).toString("hex")}`; const server = yield* serveMcpServer(() => makeGreetingMcpServer(), { @@ -104,21 +111,21 @@ scenario( const client = yield* makeApiClient(api, identity); const slug = `mcp-modal-key-${randomBytes(3).toString("hex")}`; - // The integration as the add flow would have left it: OAuth only. - yield* client.mcp.addServer({ - payload: { - transport: "remote", - name: "OAuth-only MCP", - endpoint: server.endpoint, - slug, - authenticationTemplate: [{ kind: "oauth2" }], - }, - }); - - // Remove the integration (and the connection it creates) afterward — - // selfhost identities share one tenant, so a leaked connection would - // break the "fresh identity has zero connections" scenario. + // Remove the integration (and the connection it creates) afterward. + // Selfhost identities share one tenant, so a leaked connection would + // pollute later shared-admin scenarios. yield* Effect.gen(function* () { + // The integration as the add flow would have left it: OAuth only. + yield* client.mcp.addServer({ + payload: { + transport: "remote", + name: "OAuth-only MCP", + endpoint: server.endpoint, + slug, + authenticationTemplate: [{ kind: "oauth2" }], + }, + }); + yield* browser.session(identity, async ({ page, step }) => { await step("Open the integration's connect modal", async () => { await page.goto(`/integrations/${slug}`, { @@ -191,26 +198,26 @@ scenario( const client = yield* makeApiClient(api, identity); const slug = `mcp-two-input-${randomBytes(3).toString("hex")}`; - yield* client.mcp.addServer({ - payload: { - transport: "remote", - name: "Two-input MCP", - endpoint: server.endpoint, - slug, - authenticationTemplate: [ - { - slug: "token_and_team", - type: "apiKey", - headers: { - Authorization: ["Bearer ", { type: "variable", name: "api_token" }], + yield* Effect.gen(function* () { + yield* client.mcp.addServer({ + payload: { + transport: "remote", + name: "Two-input MCP", + endpoint: server.endpoint, + slug, + authenticationTemplate: [ + { + slug: "token_and_team", + type: "apiKey", + headers: { + Authorization: ["Bearer ", { type: "variable", name: "api_token" }], + }, + queryParams: { team_id: [{ type: "variable", name: "team_id" }] }, }, - queryParams: { team_id: [{ type: "variable", name: "team_id" }] }, - }, - ], - }, - }); + ], + }, + }); - yield* Effect.gen(function* () { yield* browser.session(identity, async ({ page, step }) => { await step("Open the integration's connect modal", async () => { await page.goto(`/integrations/${slug}`, { @@ -233,7 +240,7 @@ scenario( }); }); - // Wire proof: the discovery dial rendered BOTH inputs — the bearer + // Wire proof: the discovery dial rendered BOTH inputs: the bearer // header (the server rejects anything else) and the team-id query. const requests = yield* server.requests; expect( diff --git a/e2e/selfhost/cli-device-login.test.ts b/e2e/selfhost/cli-device-login.test.ts index b80da531f..4b69ab6bd 100644 --- a/e2e/selfhost/cli-device-login.test.ts +++ b/e2e/selfhost/cli-device-login.test.ts @@ -7,7 +7,8 @@ // "Authorize device". The terminal then runs `whoami` and `tools sources`; a // clean exit of that chain proves the Better Auth device token is accepted as a // Bearer on the protected /api/* plane. -import { readFileSync } from "node:fs"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; @@ -45,26 +46,44 @@ scenario( const cli = yield* Cli; const browser = yield* Browser; const runDir = yield* RunDir; - const dataDir = join(runDir, "cli-home"); + // This directory contains live OAuth credentials. Keep it outside the + // viewer-served runs tree and remove it when the scenario finishes. + const dataDir = mkdtempSync(join(tmpdir(), "executor-e2e-cli-selfhost-")); + yield* Effect.addFinalizer(() => + Effect.sync(() => rmSync(dataDir, { recursive: true, force: true })), + ); // A signed-in identity (its session cookie authorizes the /device page). const identity = yield* target.newIdentity(); - const env = { ...process.env, EXECUTOR_DATA_DIR: dataDir }; - for (const key of ["EXECUTOR_API_KEY", "EXECUTOR_AUTH_TOKEN", "EXECUTOR_AUTH_PASSWORD"]) { - delete (env as Record)[key]; - } + const env: Record = { + ...process.env, + EXECUTOR_DATA_DIR: dataDir, + }; + delete env.EXECUTOR_API_KEY; + delete env.EXECUTOR_AUTH_TOKEN; + delete env.EXECUTOR_AUTH_PASSWORD; - let resolveUrl!: (url: string) => void; - const verificationUrl = new Promise((r) => { - resolveUrl = r; + let resolveFirstUrl!: (url: string) => void; + const firstVerificationUrl = new Promise((resolveUrl) => { + resolveFirstUrl = resolveUrl; + }); + let resolveSecondUrl!: (url: string) => void; + const secondVerificationUrl = new Promise((resolveUrl) => { + resolveSecondUrl = resolveUrl; }); const cli_ = `bun run ${CLI_ENTRY}`; const journey = `${cli_} login --base-url ${SELFHOST_BASE_URL} --no-browser --name selfhost && ` + `${cli_} whoami --server selfhost && ` + - `${cli_} tools sources --server selfhost`; + `${cli_} tools sources --server selfhost && ` + + `${cli_} logout --server selfhost && ` + + `${cli_} whoami --server selfhost && ` + + `echo SECOND_LOGIN_START && ` + + `${cli_} login --base-url ${SELFHOST_BASE_URL} --no-browser && ` + + `${cli_} tools sources --server selfhost && ` + + `${cli_} server list`; const terminal = cli.session( ["bash", "-c", journey], @@ -72,8 +91,21 @@ scenario( await session.screen.waitForText(/user_code=/, { timeoutMs: 60_000 }); const match = (await session.screen.text()).match(/(https?:\/\/\S*user_code=\S+)/); if (!match) throw new Error("verification URL not found on screen"); - resolveUrl(match[1]); + resolveFirstUrl(match[1]); await session.screen.waitForText("Logged in to", { timeoutMs: 60_000 }); + + const secondLogin = await session.screen.waitUntil( + (current) => { + const marker = current.text.lastIndexOf("SECOND_LOGIN_START"); + return marker >= 0 && /https?:\/\/\S*user_code=\S+/.test(current.text.slice(marker)); + }, + { timeoutMs: 60_000 }, + ); + const marker = secondLogin.text.lastIndexOf("SECOND_LOGIN_START"); + const secondMatch = secondLogin.text.slice(marker).match(/(https?:\/\/\S*user_code=\S+)/); + if (!secondMatch) throw new Error("second verification URL not found on screen"); + resolveSecondUrl(secondMatch[1]); + const exit = await session.waitForExit({ timeoutMs: 60_000 }); if (exit.reason !== "exited" || exit.exit.code !== 0) { throw new Error( @@ -93,16 +125,27 @@ scenario( // The browser leg, approve on the self-host /device page (session cookie // from the identity authorizes it). Recorded to session.mp4. const browserApproval = Effect.gen(function* () { - const url = yield* Effect.promise(() => verificationUrl); + const firstUrl = yield* Effect.promise(() => firstVerificationUrl); yield* browser.session(identity, async ({ page, step }) => { - await step("Open the device verification page", async () => { - await page.goto(url, { waitUntil: "domcontentloaded" }); + await step("Open the first device verification page", async () => { + await page.goto(firstUrl, { waitUntil: "domcontentloaded" }); // The Authorize button appears once the page binds the signed-in user. await page .getByRole("button", { name: /Authorize device/i }) .waitFor({ timeout: 20_000 }); }); - await step("Authorize the device", async () => { + await step("Authorize the first login", async () => { + await page.getByRole("button", { name: /Authorize device/i }).click(); + await page.getByText(/Device approved/i).waitFor({ timeout: 15_000 }); + }); + const secondUrl = await secondVerificationUrl; + await step("Open the re-login device verification page", async () => { + await page.goto(secondUrl, { waitUntil: "domcontentloaded" }); + await page + .getByRole("button", { name: /Authorize device/i }) + .waitFor({ timeout: 20_000 }); + }); + await step("Authorize the re-login", async () => { await page.getByRole("button", { name: /Authorize device/i }).click(); await page.getByText(/Device approved/i).waitFor({ timeout: 15_000 }); }); @@ -112,23 +155,19 @@ scenario( yield* Effect.promise(() => enterFocus(runDir, "terminal")); }); - // Reaching here means the whole `&&` chain exited 0, including the - // authenticated `tools sources` /api call. - yield* Effect.all([terminal, browserApproval], { concurrency: "unbounded" }); - - // The stored profile carries an oauth device-login credential, not a key. - const store = JSON.parse(readFileSync(join(dataDir, "server-connections.json"), "utf8")) as { - defaultProfile: string | null; - profiles: Array<{ - name: string; - connection: { auth?: { kind: string; accessToken?: string } }; - }>; - }; - expect(store.defaultProfile, "the login became the default profile").toBe("selfhost"); - const profile = store.profiles.find((p) => p.name === "selfhost"); - expect(profile?.connection.auth?.kind, "credential is an oauth device token").toBe("oauth"); - expect(typeof profile?.connection.auth?.accessToken, "an access token is stored").toBe( - "string", + // Reaching here means the whole chain exited 0, including protected calls + // before logout and after re-login. + const [finalScreen] = yield* Effect.all([terminal, browserApproval], { + concurrency: "unbounded", + }); + expect(finalScreen, "named logout cleared the selected local credential").toContain( + "Not logged in (no stored credentials).", + ); + expect(finalScreen, "re-login reused the named profile").toMatch( + /Logged in to \S+ \(profile "selfhost", now the default\)\./, + ); + expect(finalScreen, "the public profile list reports restored authentication").toMatch( + /\* selfhost\s+http\s+\S+\s+\S+\s+stored-auth/, ); }), ), diff --git a/e2e/selfhost/oauth-callback-unauthenticated.test.ts b/e2e/selfhost/oauth-callback-unauthenticated.test.ts index b44fcdb77..5b32b2130 100644 --- a/e2e/selfhost/oauth-callback-unauthenticated.test.ts +++ b/e2e/selfhost/oauth-callback-unauthenticated.test.ts @@ -64,83 +64,103 @@ scenario( const client = yield* makeApiClient(api, identity); const integration = IntegrationSlug.make(unique("selfhostsignedoutcb")); - yield* client.openapi.addSpec({ - payload: { ...oauthIntegrationSpec(oauth), slug: integration }, - }); - const clientSlug = OAuthClientSlug.make(unique("selfhostsignedoutc")); - yield* client.oauth.createClient({ - payload: { - owner: "org", - slug: clientSlug, - authorizationUrl: oauth.authorizationEndpoint, - tokenUrl: oauth.tokenEndpoint, - grant: "authorization_code", - clientId: "test-client", - clientSecret: "test-secret", - }, - }); + const connection = ConnectionName.make(unique("main")); - const started = yield* client.oauth.start({ - payload: { - client: clientSlug, - clientOwner: "org", - owner: "org", - name: ConnectionName.make("main"), - integration, - template: AuthTemplateSlug.make("oauth"), - }, - }); - expect(started.status, "oauth.start begins at the provider").toBe("redirect"); - const authorizationUrl = started.status === "redirect" ? started.authorizationUrl : ""; + yield* Effect.gen(function* () { + yield* client.openapi.addSpec({ + payload: { ...oauthIntegrationSpec(oauth), slug: integration }, + }); - const authorize = yield* Effect.promise(() => fetch(authorizationUrl, { redirect: "manual" })); - expect(authorize.status, "the provider asks the user to log in").toBe(302); - const consent = yield* Effect.promise(() => - fetch(authorize.headers.get("location") ?? "", { - method: "POST", - redirect: "manual", - headers: { - authorization: `Basic ${Buffer.from("alice:password").toString("base64")}`, + yield* client.oauth.createClient({ + payload: { + owner: "org", + slug: clientSlug, + authorizationUrl: oauth.authorizationEndpoint, + tokenUrl: oauth.tokenEndpoint, + grant: "authorization_code", + clientId: "test-client", + clientSecret: "test-secret", }, - }), - ); - expect(consent.status, "provider consent redirects back to Executor").toBe(302); - const callback = new URL(consent.headers.get("location") ?? ""); - const callbackPath = `${callback.pathname}${callback.search}`; + }); - yield* browser.session({ label: "anonymous" }, async ({ page, step }) => { - await step("Provider sends a signed-out browser to the OAuth callback", async () => { - const response = await page.goto(callbackPath, { waitUntil: "networkidle" }); - expect(response?.status(), "the callback redirects into the login flow").toBe(200); - await page.getByText("Sign in to your instance").waitFor(); + const started = yield* client.oauth.start({ + payload: { + client: clientSlug, + clientOwner: "org", + owner: "org", + name: connection, + integration, + template: AuthTemplateSlug.make("oauth"), + }, }); + expect(started.status, "oauth.start begins at the provider").toBe("redirect"); + const authorizationUrl = started.status === "redirect" ? started.authorizationUrl : ""; - const loginUrl = new URL(page.url()); - expect(loginUrl.pathname, "the signed-out callback lands on the sign-in page").toBe("/login"); - expect( - loginUrl.searchParams.get("returnTo"), - "login preserves the callback so it can resume after sign-in", - ).toBe(callbackPath); + const authorize = yield* Effect.promise(() => + fetch(authorizationUrl, { redirect: "manual" }), + ); + expect(authorize.status, "the provider asks the user to log in").toBe(302); + const consent = yield* Effect.promise(() => + fetch(authorize.headers.get("location") ?? "", { + method: "POST", + redirect: "manual", + headers: { + authorization: `Basic ${Buffer.from("alice:password").toString("base64")}`, + }, + }), + ); + expect(consent.status, "provider consent redirects back to Executor").toBe(302); + const callback = new URL(consent.headers.get("location") ?? ""); + const callbackPath = `${callback.pathname}${callback.search}`; - await step("Sign in resumes the original OAuth callback", async () => { - await page.getByLabel("Email").fill(identity.credentials!.email); - await page.getByLabel("Password").fill(identity.credentials!.password); - await page.getByRole("button", { name: "Sign in" }).click(); - await page.waitForURL((url) => url.pathname === "/api/oauth/callback", { - timeout: 30_000, + yield* browser.session({ label: "anonymous" }, async ({ page, step }) => { + await step("Provider sends a signed-out browser to the OAuth callback", async () => { + const response = await page.goto(callbackPath, { waitUntil: "networkidle" }); + expect(response?.status(), "the callback redirects into the login flow").toBe(200); + await page.getByText("Sign in to your instance").waitFor(); }); - await page.waitForFunction(() => document.body.innerText.includes("Connected"), null, { - timeout: 30_000, + + const loginUrl = new URL(page.url()); + expect(loginUrl.pathname, "the signed-out callback lands on the sign-in page").toBe( + "/login", + ); + expect( + loginUrl.searchParams.get("returnTo"), + "login preserves the callback so it can resume after sign-in", + ).toBe(callbackPath); + + await step("Sign in resumes the original OAuth callback", async () => { + await page.getByLabel("Email").fill(identity.credentials!.email); + await page.getByLabel("Password").fill(identity.credentials!.password); + await page.getByRole("button", { name: "Sign in" }).click(); + await page.waitForURL((url) => url.pathname === "/api/oauth/callback", { + timeout: 30_000, + }); + await page.waitForFunction(() => document.body.innerText.includes("Connected"), null, { + timeout: 30_000, + }); }); - }); - const body = (await page.locator("body").textContent())?.trim() ?? ""; - expect(new URL(page.url()).pathname, "the login returnTo lands back on the callback").toBe( - "/api/oauth/callback", - ); - expect(body, "the callback completes after the sign-in recovery").toContain("Connected"); - expect(body, "the raw protected API response is not shown").not.toContain("Unauthorized"); - }); + const body = (await page.locator("body").textContent())?.trim() ?? ""; + expect(new URL(page.url()).pathname, "the login returnTo lands back on the callback").toBe( + "/api/oauth/callback", + ); + expect(body, "the callback completes after the sign-in recovery").toContain("Connected"); + expect(body, "the raw protected API response is not shown").not.toContain("Unauthorized"); + }); + }).pipe( + Effect.ensuring( + Effect.gen(function* () { + yield* client.connections + .remove({ params: { owner: "org", integration, name: connection } }) + .pipe(Effect.ignore); + yield* client.oauth + .removeClient({ params: { slug: clientSlug }, payload: { owner: "org" } }) + .pipe(Effect.ignore); + yield* client.openapi.removeSpec({ params: { slug: integration } }).pipe(Effect.ignore); + }), + ), + ); }).pipe(Effect.scoped), ); diff --git a/e2e/selfhost/posthog-mcp-oauth.test.ts b/e2e/selfhost/posthog-mcp-oauth.test.ts index a80b11f95..baef89995 100644 --- a/e2e/selfhost/posthog-mcp-oauth.test.ts +++ b/e2e/selfhost/posthog-mcp-oauth.test.ts @@ -1,8 +1,9 @@ -// Selfhost browser regression for the reported PostHog MCP OAuth dead-end. A -// real Executor instance adds https://mcp.posthog.com/mcp, then starts the -// connection flow. The product guarantee: clicking Connect opens PostHog's -// OAuth authorization page through dynamic client registration, not the -// bring-your-own OAuth app picker with "Automatic setup unavailable". +// Selfhost browser regression for the reported PostHog MCP OAuth dead-end. +// The deterministic upstream reproduces PostHog's protected-resource +// discovery and dynamic-registration shape while Executor, its MCP plugin, +// the OAuth client, and the browser flow all remain real. The product +// guarantee: clicking Connect starts provider authorization through dynamic +// client registration, not the bring-your-own OAuth app picker. import { randomBytes } from "node:crypto"; import { expect } from "@effect/vitest"; @@ -10,12 +11,13 @@ import { Effect } from "effect"; import { composePluginApi } from "@executor-js/api/server"; import { deriveMcpNamespace } from "@executor-js/plugin-mcp"; import { mcpHttpPlugin } from "@executor-js/plugin-mcp/api"; +import { makeGreetingMcpServer, serveMcpServerWithOAuth } from "@executor-js/plugin-mcp/testing"; +import { OAuthTestServer } from "@executor-js/sdk/testing"; import { IntegrationSlug } from "@executor-js/sdk/shared"; import { scenario } from "../src/scenario"; import { Api, Browser, Target } from "../src/services"; -const POSTHOG_MCP_URL = "https://mcp.posthog.com/mcp"; const api = composePluginApi([mcpHttpPlugin()] as const); scenario( @@ -26,6 +28,11 @@ scenario( const target = yield* Target; const browser = yield* Browser; const { client: makeApiClient } = yield* Api; + const oauth = yield* OAuthTestServer; + const upstream = yield* serveMcpServerWithOAuth( + () => makeGreetingMcpServer({ name: "posthog-shaped-mcp" }), + { path: "/mcp" }, + ); const identity = yield* target.newIdentity(); const client = yield* makeApiClient(api, identity); const displayName = `PostHog MCP ${randomBytes(3).toString("hex")}`; @@ -35,7 +42,7 @@ scenario( yield* browser.session(identity, async ({ page, step }) => { await step("Open the add-MCP flow pointed at PostHog", async () => { const addUrl = new URL("/integrations/add/mcp", target.baseUrl); - addUrl.searchParams.set("url", POSTHOG_MCP_URL); + addUrl.searchParams.set("url", upstream.endpoint); await page.goto(addUrl.toString(), { waitUntil: "networkidle" }); await page.getByText("How does this server authenticate?").waitFor({ timeout: 30_000 }); await page.getByText("Method 1 · Detected").waitFor(); @@ -59,26 +66,39 @@ scenario( const popupPromise = page.waitForEvent("popup", { timeout: 30_000 }); await page.getByRole("button", { name: "Connect", exact: true }).click(); const popup = await popupPromise; - await popup.waitForURL(/^https:\/\/oauth\.posthog\.com\/oauth\/authorize\//, { - timeout: 30_000, - }); + await popup.waitForURL( + (url) => url.origin === oauth.issuerUrl && url.pathname === "/login", + { + timeout: 30_000, + }, + ); await popup.waitForLoadState("domcontentloaded", { timeout: 30_000 }); const authorizeUrl = new URL(popup.url()); - expect(authorizeUrl.origin, "OAuth opened PostHog's authorization host").toBe( - "https://oauth.posthog.com", - ); - expect(authorizeUrl.pathname, "OAuth opened the authorize endpoint").toBe( - "/oauth/authorize/", + expect(authorizeUrl.origin, "OAuth opened the discovered authorization host").toBe( + oauth.issuerUrl, ); - expect( - authorizeUrl.searchParams.get("resource"), - "resource targets the MCP endpoint", - ).toBe(POSTHOG_MCP_URL); + expect(authorizeUrl.pathname, "the provider rendered its login page").toBe("/login"); await popup.close(); }); }); - }).pipe(Effect.ensuring(client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore))); + + const requests = yield* oauth.requests; + expect( + requests.map((request) => `${request.method} ${request.path}`), + "Executor dynamically registered an OAuth client", + ).toContain("POST /register"); + const authorize = requests.find( + (request) => request.method === "GET" && request.path === "/authorize", + ); + expect( + authorize?.query.resource, + "the authorization request is bound to the MCP resource", + ).toBe(upstream.endpoint); + }).pipe( + // Install cleanup before the browser can create the integration. + Effect.ensuring(client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore)), + ); }), - ), + ).pipe(Effect.provide(OAuthTestServer.layer())), ); diff --git a/e2e/setup/boot.ts b/e2e/setup/boot.ts index 2b6b13731..ebdc391ba 100644 --- a/e2e/setup/boot.ts +++ b/e2e/setup/boot.ts @@ -9,8 +9,43 @@ export interface BootedProcesses { readonly teardown: () => Promise; /** Process-group leader pids — what an external `down` must signal. */ readonly pids: ReadonlyArray; + /** Race a readiness probe against any child stopping before readiness. */ + readonly waitUntilReady:
(readiness: Promise) => Promise; } +export type TargetBootMode = + | { readonly kind: "spawn" } + | { readonly kind: "attach"; readonly url: string }; + +/** + * Attaching is selected only by an explicit target URL. Port overrides still + * mean "spawn on this port" and must pass the normal collision checks. + */ +export const targetBootMode = ( + urlEnvVar: string, + env: Readonly> = process.env, +): TargetBootMode => { + const configured = env[urlEnvVar]?.trim(); + if (!configured) return { kind: "spawn" }; + if (!URL.canParse(configured)) { + throw new Error(`e2e: ${urlEnvVar} must be an absolute http(s) URL`); + } + const parsed = new URL(configured); + if (!["http:", "https:"].includes(parsed.protocol) || parsed.username || parsed.password) { + throw new Error(`e2e: ${urlEnvVar} must be an http(s) URL without embedded credentials`); + } + return { kind: "attach", url: parsed.toString().replace(/\/$/, "") }; +}; + +/** Resolve readiness unless a monitored process reports a concrete failure. */ +export const waitForReadiness = (readiness: Promise, failure: Promise): Promise => + Promise.race([ + readiness, + failure.then((error) => { + throw error; + }), + ]); + export const bootProcesses = ( procs: ReadonlyArray<{ readonly cmd: string; @@ -23,6 +58,7 @@ export const bootProcesses = ( options: { readonly label: string }, ): BootedProcesses => { const children: ChildProcess[] = []; + const stopped: Array> = []; let tearingDown = false; for (const proc of procs) { const log = proc.logFile ? openSync(proc.logFile, "a") : undefined; @@ -36,13 +72,32 @@ export const bootProcesses = ( // kill and squat the port into the NEXT invocation's waitForHttp. detached: true, }); - child.on("exit", (code) => { - if (code !== 0 && code !== null && !tearingDown) { - console.error(`[e2e:${options.label}] ${proc.cmd} exited with ${code}`); + stopped.push( + new Promise((resolve) => { + child.once("error", (error) => + resolve( + new Error(`[e2e:${options.label}] ${proc.cmd} failed to start: ${error.message}`), + ), + ); + child.once("exit", (code, signal) => + resolve( + new Error( + `[e2e:${options.label}] ${proc.cmd} stopped before readiness (${signal ? `signal ${signal}` : `exit ${code ?? "unknown"}`})`, + ), + ), + ); + }), + ); + child.on("exit", (code, signal) => { + if (!tearingDown) { + console.error( + `[e2e:${options.label}] ${proc.cmd} stopped (${signal ? `signal ${signal}` : `exit ${code ?? "unknown"}`})`, + ); } }); children.push(child); } + const firstStop = Promise.race(stopped); // Signal the process GROUP (negative pid); fall back to the direct child // when the group is already gone. @@ -61,6 +116,7 @@ export const bootProcesses = ( : new Promise((resolve) => child.once("exit", () => resolve())); return { + waitUntilReady: (readiness) => waitForReadiness(readiness, firstStop), teardown: async () => { tearingDown = true; const allExited = Promise.all(children.map(exited)); @@ -82,17 +138,35 @@ export const bootProcesses = ( export const waitForHttp = async ( url: string, - options: { readonly timeoutMs?: number; readonly expectRedirect?: boolean } = {}, + options: { + readonly timeoutMs?: number; + readonly expectRedirect?: boolean; + readonly expectedStatus?: number; + readonly headers?: HeadersInit; + readonly validateResponse?: (response: Response) => boolean | Promise; + } = {}, ): Promise => { const deadline = Date.now() + (options.timeoutMs ?? 90_000); let lastError: unknown; while (Date.now() < deadline) { try { - const response = await fetch(url, { redirect: "manual" }); + const response = await fetch(url, { redirect: "manual", headers: options.headers }); // During a cold vite compile /api/* falls back to the SPA's 200 HTML — // expectRedirect waits for the real handler (302) instead. - if (options.expectRedirect ? response.status === 302 : response.status < 500) return; - lastError = new Error(`status ${response.status}`); + const ready = + options.expectedStatus !== undefined + ? response.status === options.expectedStatus + : options.expectRedirect + ? response.status === 302 + : response.status < 500; + const valid = + ready && (options.validateResponse ? await options.validateResponse(response) : true); + if (valid) return; + lastError = new Error( + ready + ? `response validation failed with status ${response.status}` + : `status ${response.status}`, + ); } catch (error) { lastError = error; } diff --git a/e2e/setup/cloud.boot.ts b/e2e/setup/cloud.boot.ts index 24e7cd9c9..596fa773e 100644 --- a/e2e/setup/cloud.boot.ts +++ b/e2e/setup/cloud.boot.ts @@ -131,9 +131,24 @@ export const bootCloud = async (options: CloudBootOptions): Promise try { const local = `http://127.0.0.1:${options.cloudPort}`; - await waitForHttp(local); - // The API plane is ready when login actually redirects to AuthKit. - await waitForHttp(`${local}/api/auth/login`, { expectRedirect: true }); + await procs.waitUntilReady( + (async () => { + await waitForHttp(local); + // The API plane is ready only when login redirects to this boot's + // AuthKit emulator, not merely when something answers the app port. + await waitForHttp(`${local}/api/auth/login`, { + expectRedirect: true, + validateResponse: (response) => { + const location = response.headers.get("location"); + return ( + location !== null && + URL.canParse(location, workosUrl) && + new URL(location, workosUrl).origin === new URL(workosUrl).origin + ); + }, + }); + })(), + ); } catch (error) { await teardown(); throw error; diff --git a/e2e/setup/cloud.globalsetup.ts b/e2e/setup/cloud.globalsetup.ts index b947cec40..ead20e23c 100644 --- a/e2e/setup/cloud.globalsetup.ts +++ b/e2e/setup/cloud.globalsetup.ts @@ -5,13 +5,15 @@ // stack instead. import { claimPorts } from "../src/ports"; import { E2E_COOKIE_PASSWORD, E2E_WORKOS_CLIENT_ID } from "../targets/cloud"; -import { waitForHttp } from "./boot"; +import { targetBootMode, waitForHttp } from "./boot"; import { bootCloud } from "./cloud.boot"; import { bootMotel, motelExporterEnv } from "./motel"; export default async function setup(): Promise<(() => Promise) | void> { - if (process.env.E2E_CLOUD_URL) { - await waitForHttp(process.env.E2E_CLOUD_URL); + const mode = targetBootMode("E2E_CLOUD_URL"); + if (mode.kind === "attach") { + process.env.E2E_CLOUD_URL = mode.url; + await waitForHttp(mode.url); return; } @@ -24,10 +26,15 @@ export default async function setup(): Promise<(() => Promise) | void> { { envVar: "E2E_CLOUD_DB_PORT", offset: 1, label: "cloud dev-db (PGlite)" }, { envVar: "E2E_WORKOS_EMULATOR_PORT", offset: 2, label: "WorkOS emulator" }, { envVar: "E2E_AUTUMN_EMULATOR_PORT", offset: 3, label: "Autumn emulator" }, + { envVar: "E2E_MOTEL_PORT", offset: 7, label: "Motel telemetry store" }, ]); // Suite-owned trace store — every run captures distributed traces. - const motel = await bootMotel(); + const motel = await bootMotel(ports.E2E_MOTEL_PORT!, { + required: + process.env.E2E_REQUIRED_CAPABILITY_MODE === "required" && + (process.env.E2E_REQUIRED_CAPABILITIES ?? "").split(",").includes("telemetry"), + }); // Publish to the test workers (they inherit this process's env): scenarios // that assert on exported spans yield the Telemetry service, which exists // only when this is set. No motel → those scenarios skip, never fail. diff --git a/e2e/setup/cloudflare.boot.ts b/e2e/setup/cloudflare.boot.ts index df58a54b7..74039795b 100644 --- a/e2e/setup/cloudflare.boot.ts +++ b/e2e/setup/cloudflare.boot.ts @@ -1,23 +1,35 @@ // The Cloudflare host boot recipe: the REAL worker on workerd via `wrangler dev` -// (Miniflare) with a local D1 + R2 and dev-auth on. Shared by the vitest -// globalsetup (ephemeral) and, like the other hosts, available to a dev CLI. +// (Miniflare) with a local D1 + R2. A loopback Cloudflare Access issuer signs +// real human and service-token assertions, so the worker's production JWT/JWKS +// boundary stays enabled in hermetic runs. Shared by the vitest globalsetup. // // The browser scenarios drive the console `/resume` page, which the worker // serves as Static Assets from `dist/` — so the SPA is built first (vite build, // a couple of seconds) before wrangler serves it. import { execFile } from "node:child_process"; +import { randomUUID } from "node:crypto"; import { fileURLToPath } from "node:url"; import { promisify } from "node:util"; +import { + accessAssertionHeaders, + E2E_CLOUDFLARE_ACCESS_AUDIENCE, + verifyCloudflareAccessEmulator, +} from "../src/cloudflare-access-emulator"; import { bootProcesses, waitForHttp, type BootedProcesses } from "./boot"; export const cloudflareDir = fileURLToPath(new URL("../../apps/host-cloudflare/", import.meta.url)); const wranglerBin = fileURLToPath( new URL("../../apps/host-cloudflare/node_modules/.bin/wrangler", import.meta.url), ); +const accessEmulator = fileURLToPath( + new URL("../scripts/cloudflare-access-emulator.ts", import.meta.url), +); +const e2eDir = fileURLToPath(new URL("../", import.meta.url)); export interface CloudflareBootOptions { readonly port: number; + readonly accessPort: number; readonly logFile?: string; /** Skip the SPA build when `dist/` is already current (fast local iteration). */ readonly skipBuild?: boolean; @@ -28,13 +40,28 @@ export const bootCloudflare = async (options: CloudflareBootOptions): Promise { + await waitForHttp(`${accessBaseUrl}/health`, { expectedStatus: 200 }); + const verified = await verifyCloudflareAccessEmulator(accessBaseUrl, { + expectedBootNonce: bootNonce, + }); + // A 200 here proves the worker fetched this boot's emulator JWKS and + // accepted the configured issuer and audience. An unrelated listener + // or anonymous 401 cannot false-pass boot. + await waitForHttp(`http://127.0.0.1:${options.port}/api/account/me`, { + timeoutMs: 120_000, + expectedStatus: 200, + headers: accessAssertionHeaders(verified.token), + }); + })(), + ); } catch (error) { await procs.teardown(); throw error; diff --git a/e2e/setup/cloudflare.globalsetup.ts b/e2e/setup/cloudflare.globalsetup.ts index dbb3b7f8a..6827cd6e0 100644 --- a/e2e/setup/cloudflare.globalsetup.ts +++ b/e2e/setup/cloudflare.globalsetup.ts @@ -1,24 +1,55 @@ // Boot the Cloudflare target: claim this checkout's port atomically, then run -// the shared boot recipe (cloudflare.boot.ts). Set E2E_CLOUDFLARE_URL to attach -// to an already-running instance instead. +// the shared boot recipe (cloudflare.boot.ts). Attach mode requires both +// E2E_CLOUDFLARE_URL and E2E_CLOUDFLARE_ACCESS_URL because the full auth suite +// needs the issuer's token-minting and ledger capabilities. +import { + accessAssertionHeaders, + verifyCloudflareAccessEmulator, +} from "../src/cloudflare-access-emulator"; import { claimPorts } from "../src/ports"; -import { waitForHttp } from "./boot"; +import { targetBootMode, waitForHttp } from "./boot"; import { bootCloudflare } from "./cloudflare.boot"; +export const requiredCloudflareAccessAttachUrl = ( + env: Readonly> = process.env, +) => { + const accessMode = targetBootMode("E2E_CLOUDFLARE_ACCESS_URL", env); + if (accessMode.kind !== "attach") { + throw new Error( + "e2e: Cloudflare attach mode requires E2E_CLOUDFLARE_ACCESS_URL with the test issuer, token minting, and ledger endpoints; a static token alone cannot run the full auth suite", + ); + } + return accessMode.url; +}; + export default async function setup(): Promise<(() => Promise) | void> { - if (process.env.E2E_CLOUDFLARE_URL) { - await waitForHttp(`${process.env.E2E_CLOUDFLARE_URL}/api/account/me`); + const mode = targetBootMode("E2E_CLOUDFLARE_URL"); + if (mode.kind === "attach") { + const accessUrl = requiredCloudflareAccessAttachUrl(); + process.env.E2E_CLOUDFLARE_URL = mode.url; + process.env.E2E_CLOUDFLARE_ACCESS_URL = accessUrl; + const verified = await verifyCloudflareAccessEmulator(accessUrl); + await waitForHttp(`${mode.url}/api/account/me`, { + expectedStatus: 200, + headers: accessAssertionHeaders(verified.token), + }); return; } const { ports, release } = await claimPorts([ - { envVar: "E2E_CLOUDFLARE_PORT", offset: 5, label: "cloudflare wrangler dev" }, + { envVar: "E2E_CLOUDFLARE_PORT", offset: 6, label: "cloudflare wrangler dev" }, + { + envVar: "E2E_CLOUDFLARE_ACCESS_PORT", + offset: 7, + label: "cloudflare Access issuer", + }, ]); const port = ports.E2E_CLOUDFLARE_PORT!; + const accessPort = ports.E2E_CLOUDFLARE_ACCESS_PORT!; let procs; try { - procs = await bootCloudflare({ port }); + procs = await bootCloudflare({ port, accessPort }); } catch (error) { await release(); throw error; diff --git a/e2e/setup/desktop-kvm.globalsetup.ts b/e2e/setup/desktop-kvm.globalsetup.ts new file mode 100644 index 000000000..6931baf9a --- /dev/null +++ b/e2e/setup/desktop-kvm.globalsetup.ts @@ -0,0 +1,149 @@ +// Required, opt-in Linux desktop lane. The host builds the real packaged app, +// then a disposable libvirt/QEMU guest runs it on a QXL-backed Xorg display. +// remote-viewer projects the guest's SPICE framebuffer onto a dedicated host X +// display and ffmpeg records those pixels for the entire acceptance journey. + +import { execFileSync } from "node:child_process"; +import { mkdirSync, realpathSync } from "node:fs"; +import { basename, dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { requirePackagedDesktopBundle } from "../src/desktop/packaged"; +import { libvirtLinuxKvmDesktop } from "../src/vm/linux-kvm-libvirt"; +import type { LinuxKvmDesktopHandle } from "../src/vm/linux-kvm"; +import setupPackagedDesktop from "./desktop-packaged.globalsetup"; + +const e2eRoot = fileURLToPath(new URL("../", import.meta.url)); +const optionalEnv = (name: string) => process.env[name] || undefined; +const expectedBunVersion = () => optionalEnv("E2E_BUN_VERSION") ?? "1.3.11"; +const expectedClaudeVersion = () => optionalEnv("E2E_CLAUDE_CODE_VERSION") ?? "2.1.195"; +const shellQuote = (value: string) => `'${value.replaceAll("'", `'"'"'`)}'`; + +const executablePath = (environmentName: string, command: string) => { + const configured = optionalEnv(environmentName); + const path = configured ?? execFileSync("which", [command], { encoding: "utf8" }).trim(); + if (!path) throw new Error(`${command} is required for the desktop KVM guest payload`); + return realpathSync(path); +}; + +const firstVersionToken = (binary: string, args: ReadonlyArray) => + /^(\S+)/.exec(execFileSync(binary, [...args], { encoding: "utf8" }).trim())?.[1]; + +export default async function setup() { + const baseImagePath = process.env.E2E_KVM_BASE_IMAGE ?? ""; + const guestDisplay = optionalEnv("E2E_KVM_GUEST_DISPLAY") ?? ":0"; + const provider = libvirtLinuxKvmDesktop({ + baseImagePath, + baseImageFormat: optionalEnv("E2E_KVM_BASE_FORMAT"), + cleanupLedgerPath: optionalEnv("E2E_KVM_CLEANUP_LEDGER"), + guestDisplay, + guestUser: optionalEnv("E2E_KVM_GUEST_USER"), + libvirtNetwork: optionalEnv("E2E_LIBVIRT_NETWORK"), + libvirtUri: optionalEnv("E2E_LIBVIRT_URI"), + osVariant: optionalEnv("E2E_KVM_OS_VARIANT"), + repositoryScope: optionalEnv("E2E_KVM_REPOSITORY_SCOPE"), + runScope: optionalEnv("E2E_KVM_RUN_SCOPE"), + workRoot: optionalEnv("E2E_KVM_WORK_ROOT"), + }); + + await provider.preflight("required"); + const bunPath = executablePath("E2E_BUN_BIN", "bun"); + const claudePath = executablePath("E2E_CLAUDE_CODE_BIN", "claude"); + const bunVersion = firstVersionToken(bunPath, ["--version"]); + const claudeVersion = firstVersionToken(claudePath, ["--version"]); + if (bunVersion !== expectedBunVersion()) { + throw new Error(`Bun ${expectedBunVersion()} is required, found ${bunVersion ?? "unknown"}`); + } + if (claudeVersion !== expectedClaudeVersion()) { + throw new Error( + `Claude Code ${expectedClaudeVersion()} is required, found ${claudeVersion ?? "unknown"}`, + ); + } + setupPackagedDesktop(); + const bundle = requirePackagedDesktopBundle(); + const artifactDir = join( + e2eRoot, + "runs", + "desktop-kvm", + `${new Date().toISOString().replace(/[:.]/g, "-").toLowerCase()}-${process.pid}`, + ); + mkdirSync(artifactDir, { recursive: true }); + + let vm: LinuxKvmDesktopHandle | undefined; + try { + vm = await provider.provision(); + const remoteRoot = `/home/${vm.sshUser}/executor-desktop-e2e`; + const remoteHome = `/home/${vm.sshUser}/executor-desktop-home`; + const remoteTools = `/home/${vm.sshUser}/executor-kvm-tools`; + const remoteApp = `${remoteRoot}/${basename(bundle.app)}`; + const remoteBun = `${remoteTools}/bun`; + const remoteClaude = `${remoteTools}/claude`; + const remoteGuestRuntime = `${remoteTools}/guest-runtime.ts`; + const guestRuntimeSource = fileURLToPath( + new URL("../desktop-kvm/guest-runtime.ts", import.meta.url), + ); + await vm.run( + `rm -rf '${remoteRoot}' '${remoteHome}' '${remoteTools}' && mkdir -p '${remoteHome}' '${remoteTools}'`, + ); + await vm.push(dirname(bundle.app), remoteRoot); + await vm.push(bunPath, remoteBun); + await vm.push(claudePath, remoteClaude); + await vm.push(guestRuntimeSource, remoteGuestRuntime); + const prepared = await vm.run( + `find '${remoteRoot}' -type f \\( -name executor -o -name executor-sidecar -o -name executor-desktop \\) -exec chmod +x {} + && chmod 755 '${remoteBun}' '${remoteClaude}' && chmod 600 '${remoteGuestRuntime}' && test -x '${remoteApp}' && test -x '${remoteBun}' && test -x '${remoteClaude}'`, + ); + if (prepared.code !== 0) { + throw new Error(`packaged desktop upload failed: ${prepared.stderr || prepared.stdout}`); + } + const guestVersionProbe = await vm.run( + `${shellQuote(remoteBun)} --version && ${shellQuote(remoteClaude)} --version`, + ); + const [guestBunVersion, guestClaudeVersionLine] = guestVersionProbe.stdout + .trim() + .split(/\r?\n/); + const guestClaudeVersion = /^(\S+)/.exec(guestClaudeVersionLine ?? "")?.[1]; + if ( + guestVersionProbe.code !== 0 || + guestBunVersion !== expectedBunVersion() || + guestClaudeVersion !== expectedClaudeVersion() + ) { + throw new Error( + `guest client probe failed: expected Bun ${expectedBunVersion()} and Claude Code ${expectedClaudeVersion()}, got ${guestBunVersion ?? "unknown"} and ${guestClaudeVersion ?? "unknown"}\n${guestVersionProbe.stderr}`, + ); + } + + const recordingPath = join(artifactDir, "session.mp4"); + await vm.display.startRecording(recordingPath); + const cdpForward = await vm.forward(9_222); + + process.env.E2E_KVM_ARTIFACT_DIR = artifactDir; + process.env.E2E_KVM_CDP_PORT = String(cdpForward.localPort); + process.env.E2E_KVM_GUEST_DISPLAY = guestDisplay; + process.env.E2E_KVM_GUEST_HOST = vm.host; + process.env.E2E_KVM_GUEST_USER = vm.sshUser; + process.env.E2E_KVM_RECORDING_PATH = recordingPath; + process.env.E2E_KVM_REMOTE_APP = remoteApp; + process.env.E2E_KVM_REMOTE_BUN = remoteBun; + process.env.E2E_KVM_REMOTE_CLAUDE = remoteClaude; + process.env.E2E_KVM_REMOTE_GUEST_RUNTIME = remoteGuestRuntime; + process.env.E2E_KVM_REMOTE_HOME = remoteHome; + process.env.E2E_KVM_SSH_KEY = vm.sshKeyPath; + process.env.E2E_KVM_CLAUDE_CODE_VERSION = expectedClaudeVersion(); + } catch (error) { + if (vm) { + try { + await vm.discard(); + } catch (cleanupError) { + throw new AggregateError( + [error, cleanupError], + "desktop KVM setup failed and guest cleanup was incomplete", + ); + } + } + throw error; + } + + return async () => { + await vm?.discard(); + }; +} diff --git a/e2e/setup/motel.ts b/e2e/setup/motel.ts index ad14e8ec4..0e39b3405 100644 --- a/e2e/setup/motel.ts +++ b/e2e/setup/motel.ts @@ -2,17 +2,15 @@ // dev stack (same pattern as the WorkOS/Autumn emulators) so EVERY run // captures distributed traces — hermetically, in CI too, with no dependence // on a machine-global daemon whose health or leftover data could leak into -// results. DB lives under runs/.motel so the suite's evidence stays with -// the suite; wiped per boot like the target's dev DB. -import { mkdirSync, rmSync } from "node:fs"; +// results. The raw telemetry database stays outside the publishable runs tree; +// scenario artifacts contain the portable, sanitized trace export. +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; import { join } from "node:path"; import { fileURLToPath } from "node:url"; import { bootProcesses, waitForHttp, type BootedProcesses } from "./boot"; -export const MOTEL_PORT = 4796; -export const MOTEL_URL = `http://127.0.0.1:${MOTEL_PORT}`; - const e2eDir = fileURLToPath(new URL("..", import.meta.url)); export interface SuiteMotel { @@ -20,13 +18,15 @@ export interface SuiteMotel { readonly teardown: () => Promise; } -/** Boot the suite's motel server. Never fails the suite: if the binary or - * the port is unavailable, tracing is simply off (null) and targets skip - * the exporter env. */ -export const bootMotel = async (): Promise => { - const dataDir = join(e2eDir, "runs", ".motel"); - rmSync(dataDir, { recursive: true, force: true }); - mkdirSync(dataDir, { recursive: true }); +/** Boot the suite's motel server on a port claimed with the rest of the target. + * Optional local runs can continue without it. Required trace lanes fail the + * setup instead of turning a missing dependency into a green skip. */ +export const bootMotel = async ( + port: number, + options: { readonly required: boolean }, +): Promise => { + const url = `http://127.0.0.1:${port}`; + const dataDir = mkdtempSync(join(tmpdir(), "executor-e2e-motel-")); let procs: BootedProcesses | null = null; // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: optional infrastructure; a motel-less host still runs the suite @@ -38,19 +38,27 @@ export const bootMotel = async (): Promise => { args: ["motel", "server"], cwd: e2eDir, env: { - MOTEL_OTEL_BASE_URL: MOTEL_URL, + MOTEL_OTEL_BASE_URL: url, MOTEL_OTEL_DB_PATH: join(dataDir, "telemetry.sqlite"), }, }, ], { label: "motel" }, ); - await waitForHttp(`${MOTEL_URL}/api/health`); - console.log(`[e2e] traces → suite motel at ${MOTEL_URL}`); - return { url: MOTEL_URL, teardown: procs.teardown }; + await procs.waitUntilReady(waitForHttp(`${url}/api/health`)); + console.log(`[e2e] traces at suite motel ${url}`); + return { + url, + teardown: async () => { + await procs?.teardown(); + rmSync(dataDir, { recursive: true, force: true }); + }, + }; } catch (error) { - console.warn(`[e2e] motel unavailable, tracing off: ${String(error)}`); await procs?.teardown(); + rmSync(dataDir, { recursive: true, force: true }); + if (options.required) throw error; + console.warn(`[e2e] optional motel unavailable, tracing off: ${String(error)}`); return null; } }; diff --git a/e2e/setup/selfhost-docker.boot.ts b/e2e/setup/selfhost-docker.boot.ts index 67f49d0fb..566c47207 100644 --- a/e2e/setup/selfhost-docker.boot.ts +++ b/e2e/setup/selfhost-docker.boot.ts @@ -19,7 +19,7 @@ import { appendFileSync } from "node:fs"; import { fileURLToPath } from "node:url"; import { promisify } from "node:util"; -import { waitForHttp, type BootedProcesses } from "./boot"; +import { waitForHttp, waitForReadiness, type BootedProcesses } from "./boot"; const exec = promisify(execFile); @@ -82,7 +82,7 @@ export interface RunContainerOptions { * — a restart starts a genuinely new container, so it MUST run the exact * same way the boot did. */ -export const runSelfhostContainer = async (options: RunContainerOptions): Promise => { +export const runSelfhostContainer = async (options: RunContainerOptions) => { const name = selfhostDockerContainerName(options.port); const volume = selfhostDockerVolumeName(options.port); const args = [ @@ -111,12 +111,52 @@ export const runSelfhostContainer = async (options: RunContainerOptions): Promis options.image, ]; log(options.logFile, `docker ${args.join(" ")}`); - await exec("docker", args).catch((error: { stderr?: string }) => { + const started = await exec("docker", args).catch((error: { stderr?: string }) => { throw new Error(`selfhost-docker: docker run failed: ${String(error.stderr ?? error)}`); }); + const containerId = started.stdout.trim(); + if (!containerId) throw new Error("selfhost-docker: docker run returned no container id"); + const stopped = exec("docker", ["wait", name]).then( + ({ stdout }) => + new Error( + `selfhost-docker: container ${containerId.slice(0, 12)} stopped before readiness (exit ${stdout.trim() || "unknown"})`, + ), + (error: { stderr?: string }) => + new Error( + `selfhost-docker: could not monitor container ${containerId.slice(0, 12)}: ${String(error.stderr ?? error)}`, + ), + ); + + const assertRunning = async () => { + const inspected = await exec("docker", [ + "inspect", + "--format", + "{{.Id}} {{.State.Running}}", + name, + ]); + const [actualId, running] = inspected.stdout.trim().split(/\s+/); + if (actualId !== containerId || running !== "true") { + throw new Error( + `selfhost-docker: expected running container ${containerId.slice(0, 12)}, got ${actualId?.slice(0, 12) ?? "none"} (${running ?? "missing"})`, + ); + } + }; try { - await waitForHttp(`${options.webBaseUrl}/api/health`, { timeoutMs: 120_000 }); + await waitForReadiness( + waitForHttp(`${options.webBaseUrl}/api/health`, { + timeoutMs: 120_000, + expectedStatus: 200, + validateResponse: async (response) => { + const body: unknown = await response.json(); + return ( + typeof body === "object" && body !== null && "status" in body && body.status === "ok" + ); + }, + }), + stopped, + ); + await assertRunning(); } catch (error) { const { stdout } = await exec("docker", ["logs", "--tail", "100", name]).catch(() => ({ stdout: "(docker logs unavailable)", @@ -125,6 +165,7 @@ export const runSelfhostContainer = async (options: RunContainerOptions): Promis await exec("docker", ["rm", "-f", name]).catch(() => {}); throw error; } + return { containerId, stopped, assertRunning }; }; /** @@ -154,7 +195,7 @@ export const bootSelfhostDocker = async ( await exec("docker", ["rm", "-f", name]).catch(() => {}); await exec("docker", ["volume", "rm", "-f", volume]).catch(() => {}); - await runSelfhostContainer({ image, ...options }); + const container = await runSelfhostContainer({ image, ...options }); // The target's restart() runs in a different process (test worker, not // globalsetup) and must re-run the same image. claimPorts-style env @@ -162,6 +203,11 @@ export const bootSelfhostDocker = async ( process.env.E2E_SELFHOST_DOCKER_RESOLVED_IMAGE = image; return { + waitUntilReady: async (readiness: Promise) => { + const value = await waitForReadiness(readiness, container.stopped); + await container.assertRunning(); + return value; + }, teardown: async () => { await stopSelfhostContainer(options.port, options.logFile); await exec("docker", ["volume", "rm", "-f", volume]).catch(() => {}); diff --git a/e2e/setup/selfhost-docker.globalsetup.ts b/e2e/setup/selfhost-docker.globalsetup.ts index efeff81cb..15b02d3e9 100644 --- a/e2e/setup/selfhost-docker.globalsetup.ts +++ b/e2e/setup/selfhost-docker.globalsetup.ts @@ -8,12 +8,22 @@ import { fileURLToPath } from "node:url"; import { claimPorts } from "../src/ports"; import { SELFHOST_ADMIN } from "../targets/selfhost"; -import { waitForHttp } from "./boot"; +import { targetBootMode, waitForHttp } from "./boot"; import { bootSelfhostDocker } from "./selfhost-docker.boot"; export default async function setup(): Promise<(() => Promise) | void> { - if (process.env.E2E_SELFHOST_DOCKER_URL) { - await waitForHttp(process.env.E2E_SELFHOST_DOCKER_URL); + const mode = targetBootMode("E2E_SELFHOST_DOCKER_URL"); + if (mode.kind === "attach") { + process.env.E2E_SELFHOST_DOCKER_URL = mode.url; + await waitForHttp(`${mode.url}/api/health`, { + expectedStatus: 200, + validateResponse: async (response) => { + const body: unknown = await response.json(); + return ( + typeof body === "object" && body !== null && "status" in body && body.status === "ok" + ); + }, + }); return; } diff --git a/e2e/setup/selfhost.boot.ts b/e2e/setup/selfhost.boot.ts index 6f0d743d7..d7f1e6176 100644 --- a/e2e/setup/selfhost.boot.ts +++ b/e2e/setup/selfhost.boot.ts @@ -62,7 +62,17 @@ export const bootSelfhost = async (options: SelfhostBootOptions): Promise { + const body: unknown = await response.json(); + return ( + typeof body === "object" && body !== null && "status" in body && body.status === "ok" + ); + }, + }), + ); } catch (error) { await procs.teardown(); throw error; diff --git a/e2e/setup/selfhost.globalsetup.ts b/e2e/setup/selfhost.globalsetup.ts index 6627f160e..0e5e6c9cd 100644 --- a/e2e/setup/selfhost.globalsetup.ts +++ b/e2e/setup/selfhost.globalsetup.ts @@ -4,12 +4,22 @@ // instance (with E2E_SELFHOST_ADMIN_EMAIL/PASSWORD matching it). import { claimPorts } from "../src/ports"; import { SELFHOST_ADMIN } from "../targets/selfhost"; -import { waitForHttp } from "./boot"; +import { targetBootMode, waitForHttp } from "./boot"; import { bootSelfhost } from "./selfhost.boot"; export default async function setup(): Promise<(() => Promise) | void> { - if (process.env.E2E_SELFHOST_URL) { - await waitForHttp(process.env.E2E_SELFHOST_URL); + const mode = targetBootMode("E2E_SELFHOST_URL"); + if (mode.kind === "attach") { + process.env.E2E_SELFHOST_URL = mode.url; + await waitForHttp(`${mode.url}/api/health`, { + expectedStatus: 200, + validateResponse: async (response) => { + const body: unknown = await response.json(); + return ( + typeof body === "object" && body !== null && "status" in body && body.status === "ok" + ); + }, + }); return; } diff --git a/e2e/src/artifact-io.ts b/e2e/src/artifact-io.ts new file mode 100644 index 000000000..6941575f1 --- /dev/null +++ b/e2e/src/artifact-io.ts @@ -0,0 +1,585 @@ +import { randomUUID } from "node:crypto"; +import { + closeSync, + existsSync, + mkdirSync, + openSync, + readFileSync, + readdirSync, + renameSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { basename, dirname, join } from "node:path"; +import { Worker } from "node:worker_threads"; + +const positiveIntegerFromEnv = (name: string, fallback: number): number => { + const parsed = Number.parseInt(process.env[name] ?? "", 10); + return Number.isSafeInteger(parsed) && parsed > 0 ? parsed : fallback; +}; + +const LOCK_TIMEOUT_MS = positiveIntegerFromEnv("E2E_ARTIFACT_LOCK_TIMEOUT_MS", 10_000); +const STALE_LOCK_MS = positiveIntegerFromEnv("E2E_ARTIFACT_LOCK_STALE_MS", 30_000); +const HEARTBEAT_INTERVAL_MS = Math.max(20, Math.min(1_000, Math.floor(STALE_LOCK_MS / 4))); +const TOMBSTONE_RETENTION_MS = Math.max(60_000, STALE_LOCK_MS, LOCK_TIMEOUT_MS * 4); +const LOCK_RETRY_MS = 10; +const sleeper = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT)); +const HEARTBEAT_WORKER_SOURCE = String.raw` + const { parentPort, workerData } = require("node:worker_threads"); + const { readFileSync, utimesSync } = require("node:fs"); + const locks = new Map(); + const beat = (key, lock) => { + try { + if (readFileSync(lock.ownerFile, "utf8") !== lock.owner) { + locks.delete(key); + return; + } + const now = new Date(); + utimesSync(lock.heartbeatFile, now, now); + } catch { + locks.delete(key); + } + }; + parentPort.on("message", (message) => { + if (message.type === "add") { + locks.set(message.key, message); + beat(message.key, message); + } else if (message.type === "remove" && locks.get(message.key)?.owner === message.owner) { + locks.delete(message.key); + } + }); + setInterval(() => { + for (const [key, lock] of locks) beat(key, lock); + }, workerData.intervalMs); +`; +let sharedHeartbeatWorker: Worker | undefined; + +const invocation = { + id: randomUUID(), + startedAt: Date.now(), + runtime: { + name: process.versions.bun ? "bun" : "node", + version: process.versions.bun ?? process.version, + platform: process.platform, + arch: process.arch, + }, +} as const; + +export interface EvidenceInvocation { + readonly id: string; + readonly startedAt: number; + readonly runtime: { + readonly name: string; + readonly version: string; + readonly platform: NodeJS.Platform; + readonly arch: string; + }; +} + +export interface EvidenceContext { + readonly schemaVersion: 1; + readonly attemptId: string; + readonly createdAt: number; + readonly updatedAt: number; + readonly invocations: ReadonlyArray; +} + +export interface EvidenceReference { + readonly attemptId: string; + readonly invocationId: string; +} + +const errorCode = (error: unknown): string | undefined => { + if (typeof error !== "object" || error === null || !("code" in error)) return undefined; + return typeof error.code === "string" ? error.code : undefined; +}; + +interface LockOwner { + readonly schemaVersion: 1; + readonly token: string; + readonly pid: number; + readonly processStartIdentity?: string; +} + +const linuxProcessStartIdentity = (pid: number) => { + if (process.platform !== "linux") return undefined; + try { + const stat = readFileSync(`/proc/${pid}/stat`, "utf8"); + const commandEnd = stat.lastIndexOf(")"); + if (commandEnd === -1) return undefined; + // After the command, index 0 is field 3 (state), so field 22 + // (process start time in clock ticks) is index 19. + return stat + .slice(commandEnd + 1) + .trim() + .split(/\s+/)[19]; + } catch { + return undefined; + } +}; + +const processStartIdentity = linuxProcessStartIdentity(process.pid); + +const lockOwner = (token: string): LockOwner => ({ + schemaVersion: 1, + token, + pid: process.pid, + ...(processStartIdentity ? { processStartIdentity } : {}), +}); + +const parseLockOwner = (value: string | undefined): LockOwner | undefined => { + if (!value) return undefined; + try { + const parsed: unknown = JSON.parse(value); + if (typeof parsed !== "object" || parsed === null) return undefined; + if (!("schemaVersion" in parsed) || parsed.schemaVersion !== 1) return undefined; + if (!("token" in parsed) || typeof parsed.token !== "string" || parsed.token === "") { + return undefined; + } + if ( + !("pid" in parsed) || + typeof parsed.pid !== "number" || + !Number.isSafeInteger(parsed.pid) || + parsed.pid <= 0 + ) { + return undefined; + } + const parsedProcessStartIdentity = + "processStartIdentity" in parsed ? parsed.processStartIdentity : undefined; + if ( + parsedProcessStartIdentity !== undefined && + typeof parsedProcessStartIdentity !== "string" + ) { + return undefined; + } + return { + schemaVersion: 1, + token: parsed.token, + pid: parsed.pid, + ...(typeof parsedProcessStartIdentity === "string" + ? { processStartIdentity: parsedProcessStartIdentity } + : {}), + }; + } catch { + return undefined; + } +}; + +const lockOwnerIsAlive = (value: string | undefined): boolean => { + const owner = parseLockOwner(value); + if (!owner) return false; + try { + process.kill(owner.pid, 0); + } catch (error) { + // EPERM still proves that a process owns this PID. Unknown failures are + // conservative too: timing out is safer than admitting two writers. + if (errorCode(error) === "ESRCH") return false; + return true; + } + if (!owner.processStartIdentity) return true; + const currentIdentity = linuxProcessStartIdentity(owner.pid); + return currentIdentity === undefined || currentIdentity === owner.processStartIdentity; +}; + +const pause = (): void => { + Atomics.wait(sleeper, 0, 0, LOCK_RETRY_MS); +}; + +interface LockSnapshot { + readonly owner: string | undefined; + readonly heartbeatMtimeMs: number; +} + +const lockSnapshot = (lockDir: string): LockSnapshot | undefined => { + try { + let owner: string | undefined; + try { + owner = readFileSync(join(lockDir, "owner"), "utf8"); + } catch { + owner = undefined; + } + let heartbeatMtimeMs: number; + try { + heartbeatMtimeMs = statSync(join(lockDir, "heartbeat")).mtimeMs; + } catch { + heartbeatMtimeMs = statSync(lockDir).mtimeMs; + } + return { owner, heartbeatMtimeMs }; + } catch { + return undefined; + } +}; + +const removeLockIfOwned = (lockDir: string, owner: string): void => { + if (lockSnapshot(lockDir)?.owner === owner) { + rmSync(lockDir, { recursive: true, force: true }); + } +}; + +const heartbeatIsStale = (snapshot: LockSnapshot): boolean => + Date.now() - snapshot.heartbeatMtimeMs > STALE_LOCK_MS; + +const lockIsReclaimable = (snapshot: LockSnapshot): boolean => + heartbeatIsStale(snapshot) && !lockOwnerIsAlive(snapshot.owner); + +const snapshotIdentity = (snapshot: LockSnapshot): string => { + const owner = parseLockOwner(snapshot.owner); + return ( + owner?.token ?? + snapshot.owner ?? + `unknown-${Math.floor(snapshot.heartbeatMtimeMs)}` + ).replace(/[^a-zA-Z0-9-]/g, "_"); +}; + +const recoveryPrefix = (lockDir: string): string => `${basename(lockDir)}.reclaim-`; +const tombstonePrefix = (lockDir: string): string => `${basename(lockDir)}.tombstone-`; + +const archiveStaleRecovery = ( + lockDir: string, + recoveryDir: string, + observed: LockSnapshot, +): boolean => { + if (!lockIsReclaimable(observed)) return false; + const current = lockSnapshot(recoveryDir); + if ( + !current || + current.owner !== observed.owner || + current.heartbeatMtimeMs !== observed.heartbeatMtimeMs || + !lockIsReclaimable(current) + ) { + return false; + } + + const identity = snapshotIdentity(observed); + const tombstone = `${lockDir}.tombstone-recovery-${identity}-${randomUUID()}`; + try { + renameSync(recoveryDir, tombstone); + } catch (error) { + if (errorCode(error) === "ENOENT") return false; + throw error; + } + + const moved = lockSnapshot(tombstone); + if (moved?.owner !== observed.owner) { + // A replacement fence moved instead of the one we observed. Restore it + // so its owner remains visible to every contender. + renameSync(tombstone, recoveryDir); + return false; + } + return true; +}; + +const recoveryInProgress = (lockDir: string): boolean => { + let entries: string[]; + try { + entries = readdirSync(dirname(lockDir)); + } catch { + return false; + } + let active = false; + for (const name of entries) { + if (!name.startsWith(recoveryPrefix(lockDir))) continue; + const recoveryDir = join(dirname(lockDir), name); + const observed = lockSnapshot(recoveryDir); + if (!observed || !archiveStaleRecovery(lockDir, recoveryDir, observed)) active = true; + } + return active; +}; + +const cleanupOldTombstones = (lockDir: string): void => { + const parent = dirname(lockDir); + try { + for (const name of readdirSync(parent)) { + if (!name.startsWith(tombstonePrefix(lockDir))) continue; + const tombstone = join(parent, name); + try { + if (Date.now() - statSync(tombstone).mtimeMs > TOMBSTONE_RETENTION_MS) { + rmSync(tombstone, { recursive: true, force: true }); + } + } catch { + // Another contender may already be cleaning this completed tombstone. + } + } + } catch { + // The parent can disappear during temporary-directory cleanup. + } +}; + +/** + * Fence stale recovery with a deterministic owner-specific directory. The + * stale lock is moved under that fence atomically, then the fence becomes a + * retained tombstone. New owners that race the recovery see the active fence + * and retry before entering their critical section. + */ +const reclaimStaleLock = (lockDir: string, observed: LockSnapshot): boolean => { + if (!lockIsReclaimable(observed)) return false; + const identity = snapshotIdentity(observed); + const reclaimerToken = randomUUID(); + const reclaimer = JSON.stringify(lockOwner(reclaimerToken)); + const recoveryDir = `${lockDir}.reclaim-${identity}`; + let recoveryHeartbeat: LockHeartbeat | undefined; + try { + mkdirSync(recoveryDir, { mode: 0o700 }); + writeFileSync(join(recoveryDir, "owner"), reclaimer, { mode: 0o600, flag: "wx" }); + recoveryHeartbeat = startLockHeartbeat(recoveryDir, reclaimer); + } catch (error) { + removeLockIfOwned(recoveryDir, reclaimer); + if (errorCode(error) === "EEXIST") return false; + throw error; + } + + const movedLock = join(recoveryDir, "lock"); + try { + const current = lockSnapshot(lockDir); + if ( + !current || + current.owner !== observed.owner || + current.heartbeatMtimeMs !== observed.heartbeatMtimeMs || + !lockIsReclaimable(current) + ) { + return false; + } + + try { + renameSync(lockDir, movedLock); + } catch (error) { + if (errorCode(error) === "ENOENT") return false; + throw error; + } + + const moved = lockSnapshot(movedLock); + if (moved?.owner !== observed.owner) { + // A new owner slipped between the final observation and rename. It has + // not entered its action because this recovery fence is still visible. + renameSync(movedLock, lockDir); + return false; + } + + const tombstone = `${lockDir}.tombstone-${identity}-${reclaimerToken}`; + renameSync(recoveryDir, tombstone); + return true; + } finally { + recoveryHeartbeat?.stop(); + if (existsSync(recoveryDir)) { + let recoveryOwner: string | undefined; + try { + recoveryOwner = readFileSync(join(recoveryDir, "owner"), "utf8"); + } catch { + recoveryOwner = undefined; + } + if (recoveryOwner === reclaimer) { + rmSync(recoveryDir, { recursive: true, force: true }); + } + } + } +}; + +interface LockHeartbeat { + readonly stop: () => void; +} + +const heartbeatWorker = (): Worker => { + if (sharedHeartbeatWorker) return sharedHeartbeatWorker; + const worker = new Worker(HEARTBEAT_WORKER_SOURCE, { + eval: true, + workerData: { intervalMs: HEARTBEAT_INTERVAL_MS }, + }); + worker.on("error", () => { + if (sharedHeartbeatWorker === worker) sharedHeartbeatWorker = undefined; + }); + worker.on("exit", () => { + if (sharedHeartbeatWorker === worker) sharedHeartbeatWorker = undefined; + }); + worker.unref(); + sharedHeartbeatWorker = worker; + return worker; +}; + +const startLockHeartbeat = (lockDir: string, owner: string): LockHeartbeat => { + const heartbeatFile = join(lockDir, "heartbeat"); + writeFileSync(heartbeatFile, owner, { mode: 0o600 }); + const worker = heartbeatWorker(); + worker.postMessage({ + type: "add", + key: lockDir, + owner, + ownerFile: join(lockDir, "owner"), + heartbeatFile, + }); + return { + stop: () => worker.postMessage({ type: "remove", key: lockDir, owner }), + }; +}; + +/** + * Serialize a short read-modify-write transaction across processes. Directory + * creation is atomic on the filesystems used by the e2e hosts, including + * Windows, unlike a probe followed by creating a normal lock file. + */ +export const withArtifactLockSync = (file: string, action: () => A): A => { + const lockDir = `${file}.lock`; + const owner = JSON.stringify(lockOwner(randomUUID())); + const deadline = Date.now() + LOCK_TIMEOUT_MS; + let heartbeat: LockHeartbeat | undefined; + cleanupOldTombstones(lockDir); + + for (;;) { + if (Date.now() >= deadline) throw new Error(`e2e evidence lock timed out: ${file}`); + if (recoveryInProgress(lockDir)) { + pause(); + continue; + } + try { + mkdirSync(lockDir, { mode: 0o700 }); + } catch (error) { + if (errorCode(error) !== "EEXIST") throw error; + const observed = lockSnapshot(lockDir); + if (observed && reclaimStaleLock(lockDir, observed)) { + continue; + } + pause(); + continue; + } + + try { + // Exclusive creation prevents a displaced claimant from overwriting a + // replacement owner's identity if its empty directory was reclaimed. + writeFileSync(join(lockDir, "owner"), owner, { mode: 0o600, flag: "wx" }); + heartbeat = startLockHeartbeat(lockDir, owner); + } catch (error) { + removeLockIfOwned(lockDir, owner); + if (errorCode(error) === "EEXIST" || errorCode(error) === "ENOENT") { + pause(); + continue; + } + throw error; + } + + // Recovery can move the directory between mkdir, owner creation, and + // heartbeat startup. Enter only while the canonical path still names us. + if (recoveryInProgress(lockDir) || lockSnapshot(lockDir)?.owner !== owner) { + heartbeat.stop(); + heartbeat = undefined; + removeLockIfOwned(lockDir, owner); + pause(); + continue; + } + break; + } + + try { + return action(); + } finally { + heartbeat?.stop(); + // A stale-lock recovery could have replaced us. Never remove a lock now + // owned by another writer. + removeLockIfOwned(lockDir, owner); + } +}; + +/** Write a complete file then atomically publish it with a same-dir rename. */ +export const writeTextAtomicSync = (file: string, contents: string): void => { + mkdirSync(dirname(file), { recursive: true }); + const temporary = join(dirname(file), `.${basename(file)}.${process.pid}.${randomUUID()}.tmp`); + let handle: number | undefined; + try { + handle = openSync(temporary, "wx", 0o600); + writeFileSync(handle, contents, "utf8"); + closeSync(handle); + handle = undefined; + renameSync(temporary, file); + } finally { + if (handle !== undefined) closeSync(handle); + rmSync(temporary, { force: true }); + } +}; + +export const writeJsonAtomicSync = (file: string, value: unknown): void => + writeTextAtomicSync(file, JSON.stringify(value, null, 1)); + +const isEvidenceInvocation = (value: unknown): value is EvidenceInvocation => { + if (typeof value !== "object" || value === null) return false; + if (!("id" in value) || typeof value.id !== "string") return false; + if (!("startedAt" in value) || typeof value.startedAt !== "number") return false; + if (!("runtime" in value) || typeof value.runtime !== "object" || value.runtime === null) { + return false; + } + const runtime = value.runtime; + return ( + "name" in runtime && + typeof runtime.name === "string" && + "version" in runtime && + typeof runtime.version === "string" && + "platform" in runtime && + typeof runtime.platform === "string" && + "arch" in runtime && + typeof runtime.arch === "string" + ); +}; + +const isEvidenceContext = (value: unknown): value is EvidenceContext => { + if (typeof value !== "object" || value === null) return false; + return ( + "schemaVersion" in value && + value.schemaVersion === 1 && + "attemptId" in value && + typeof value.attemptId === "string" && + "createdAt" in value && + typeof value.createdAt === "number" && + "updatedAt" in value && + typeof value.updatedAt === "number" && + "invocations" in value && + Array.isArray(value.invocations) && + value.invocations.every(isEvidenceInvocation) + ); +}; + +/** + * Create or join metadata for one attempt-specific run directory. The + * persisted UUID lets several worker processes contribute correlated evidence + * without mixing retries. + */ +export const evidenceContextFor = ( + runDir: string, + requestedAttemptId?: string, +): EvidenceContext => { + const file = join(runDir, "evidence.json"); + return withArtifactLockSync(file, () => { + let existing: EvidenceContext | undefined; + if (existsSync(file)) { + const parsed: unknown = JSON.parse(readFileSync(file, "utf8")); + if (!isEvidenceContext(parsed)) { + throw new Error(`invalid e2e evidence metadata: ${file}`); + } + existing = parsed; + } + if (existing && requestedAttemptId && existing.attemptId !== requestedAttemptId) { + throw new Error( + `e2e evidence attempt mismatch: ${requestedAttemptId} != ${existing.attemptId}`, + ); + } + + const now = Date.now(); + const invocations = existing?.invocations.some((entry) => entry.id === invocation.id) + ? existing.invocations + : [...(existing?.invocations ?? []), invocation]; + const context: EvidenceContext = { + schemaVersion: 1, + attemptId: existing?.attemptId ?? requestedAttemptId ?? randomUUID(), + createdAt: existing?.createdAt ?? now, + updatedAt: now, + invocations, + }; + writeJsonAtomicSync(file, context); + return context; + }); +}; + +export const evidenceReferenceFor = ( + runDir: string, + requestedAttemptId?: string, +): EvidenceReference => ({ + attemptId: evidenceContextFor(runDir, requestedAttemptId).attemptId, + invocationId: invocation.id, +}); diff --git a/e2e/src/clients/anthropic-replay-brain.test.ts b/e2e/src/clients/anthropic-replay-brain.test.ts new file mode 100644 index 000000000..51404abaf --- /dev/null +++ b/e2e/src/clients/anthropic-replay-brain.test.ts @@ -0,0 +1,107 @@ +import { request } from "node:http"; + +import { expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { serveAnthropicReplayBrain } from "./anthropic-replay-brain"; + +const postJson = (url: string, body: unknown) => + Effect.callback<{ readonly status: number; readonly body: string }>((resume) => { + const target = new URL(url); + const payload = JSON.stringify(body); + const req = request( + target, + { + method: "POST", + headers: { + "content-type": "application/json", + "content-length": Buffer.byteLength(payload), + }, + }, + (response) => { + let responseBody = ""; + response.on("data", (piece: Buffer) => { + responseBody += piece.toString("utf8"); + }); + response.on("end", () => + resume( + Effect.succeed({ + status: response.statusCode ?? 0, + body: responseBody, + }), + ), + ); + }, + ); + req.on("error", (cause) => resume(Effect.die(cause))); + req.end(payload); + return Effect.sync(() => req.destroy()); + }); + +it.effect("serves transcript-driven Anthropic tool use and tool-result continuation", () => + Effect.gen(function* () { + const brain = yield* serveAnthropicReplayBrain((context) => + context.lastToolResult + ? { text: `observed:${context.lastToolResult}` } + : { tool: { name: "echo", input: { value: "hello" } } }, + ); + + const first = yield* postJson(`${brain.baseUrl}/v1/messages?beta=true`, { + model: "claude-sonnet-4-6", + stream: true, + messages: [{ role: "user", content: "Use the echo tool." }], + tools: [ + { + name: "mcp__executor__echo", + description: "Echo a value", + input_schema: { type: "object" }, + }, + ], + }); + expect(first.status).toBe(200); + expect(first.body).toContain('"type":"tool_use"'); + expect(first.body).toContain('"name":"mcp__executor__echo"'); + expect(first.body).toContain('"stop_reason":"tool_use"'); + + const second = yield* postJson(`${brain.baseUrl}/v1/messages?beta=true`, { + model: "claude-sonnet-4-6", + stream: true, + messages: [ + { role: "user", content: "Use the echo tool." }, + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "toolu_replay_0", + name: "mcp__executor__echo", + input: { value: "hello" }, + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_replay_0", + content: [{ type: "text", text: "echoed:hello" }], + }, + ], + }, + ], + tools: [{ name: "mcp__executor__echo", input_schema: { type: "object" } }], + }); + expect(second.status).toBe(200); + expect(second.body).toContain("observed:echoed:hello"); + expect(second.body).toContain('"stop_reason":"end_turn"'); + + const requests = brain.requests(); + expect(requests).toHaveLength(2); + expect(requests[0]?.toolNames).toEqual(["mcp__executor__echo"]); + expect(requests[1]?.messages.at(-1)?.toolResults).toEqual([ + { toolUseId: "toolu_replay_0", content: "echoed:hello", isError: false }, + ]); + expect(brain.errors()).toEqual([]); + }), +); diff --git a/e2e/src/clients/anthropic-replay-brain.ts b/e2e/src/clients/anthropic-replay-brain.ts new file mode 100644 index 000000000..fdf802a3e --- /dev/null +++ b/e2e/src/clients/anthropic-replay-brain.ts @@ -0,0 +1,326 @@ +// Deterministic Anthropic Messages wire fixture for driving a REAL Claude Code +// binary without paid inference. Claude owns MCP discovery, tool selection, +// invocation, and result round-trips. This server only replaces the model +// boundary with a transcript-driven state machine. +import { createServer, type ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; + +import { Effect, Scope } from "effect"; + +export interface AnthropicToolResult { + readonly toolUseId: string; + readonly content: string; + readonly isError: boolean; +} + +export interface AnthropicReplayMessage { + readonly role: string; + readonly text: string; + readonly toolResults: ReadonlyArray; +} + +export interface AnthropicReplayRequest { + readonly path: string; + readonly model: string; + readonly messages: ReadonlyArray; + readonly toolNames: ReadonlyArray; + readonly stream: boolean; +} + +export interface AnthropicReplayContext { + readonly requestIndex: number; + readonly messages: ReadonlyArray; + readonly lastRole: string; + readonly lastUser: string; + readonly lastToolResult: string | undefined; + readonly toolResults: ReadonlyArray; + readonly toolNames: ReadonlyArray; +} + +export interface AnthropicReplayResponse { + readonly text?: string; + readonly tool?: { + /** Exact offered tool name or a suffix such as `execute` or `echo`. */ + readonly name: string; + readonly input: unknown; + }; +} + +export interface AnthropicReplayBrain { + /** Loopback origin assigned to ANTHROPIC_BASE_URL. */ + readonly baseUrl: string; + readonly requests: () => ReadonlyArray; + readonly errors: () => ReadonlyArray; +} + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const contentText = (content: unknown): string => { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content + .map((part) => { + if (!isUnknownRecord(part)) return ""; + if (part.type === "text" && typeof part.text === "string") return part.text; + return ""; + }) + .join(""); +}; + +const toolResultsFrom = (content: unknown): ReadonlyArray => { + if (!Array.isArray(content)) return []; + return content.flatMap((part) => { + if (!isUnknownRecord(part) || part.type !== "tool_result") return []; + return [ + { + toolUseId: typeof part.tool_use_id === "string" ? part.tool_use_id : "", + content: contentText(part.content), + isError: part.is_error === true, + }, + ]; + }); +}; + +const messagesFrom = (body: Record): ReadonlyArray => { + if (!Array.isArray(body.messages)) return []; + return body.messages.flatMap((message) => { + if (!isUnknownRecord(message)) return []; + return [ + { + role: typeof message.role === "string" ? message.role : "", + text: contentText(message.content), + toolResults: toolResultsFrom(message.content), + }, + ]; + }); +}; + +const toolNamesFrom = (body: Record): ReadonlyArray => { + if (!Array.isArray(body.tools)) return []; + return body.tools.flatMap((tool) => { + if (!isUnknownRecord(tool) || typeof tool.name !== "string") return []; + return [tool.name]; + }); +}; + +const writeJson = (response: ServerResponse, status: number, body: unknown) => { + response.writeHead(status, { "content-type": "application/json" }); + response.end(JSON.stringify(body)); +}; + +const writeEvent = (response: ServerResponse, event: string, data: unknown) => { + response.write(`event: ${event}\n`); + response.write(`data: ${JSON.stringify(data)}\n\n`); +}; + +const resolveToolName = (wanted: string, offered: ReadonlyArray) => + offered.find((name) => name === wanted) ?? + offered.find((name) => name.endsWith(`__${wanted}`)) ?? + offered.find((name) => name.endsWith(wanted)); + +const writeMessagesResponse = ( + response: ServerResponse, + requestIndex: number, + model: string, + scripted: AnthropicReplayResponse, + toolNames: ReadonlyArray, + errors: string[], +) => { + const messageId = `msg_replay_${requestIndex}`; + const resolvedToolName = scripted.tool + ? resolveToolName(scripted.tool.name, toolNames) + : undefined; + if (scripted.tool && !resolvedToolName) { + errors.push( + `request ${requestIndex}: no offered tool matches "${scripted.tool.name}" (offered: ${toolNames.join(", ")})`, + ); + } + + response.writeHead(200, { + "content-type": "text/event-stream", + "cache-control": "no-cache", + connection: "keep-alive", + }); + writeEvent(response, "message_start", { + type: "message_start", + message: { + id: messageId, + type: "message", + role: "assistant", + model, + content: [], + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 1, output_tokens: 0 }, + }, + }); + + let blockIndex = 0; + if (scripted.text) { + writeEvent(response, "content_block_start", { + type: "content_block_start", + index: blockIndex, + content_block: { type: "text", text: "" }, + }); + for (const piece of scripted.text.match(/.{1,32}/gs) ?? []) { + writeEvent(response, "content_block_delta", { + type: "content_block_delta", + index: blockIndex, + delta: { type: "text_delta", text: piece }, + }); + } + writeEvent(response, "content_block_stop", { + type: "content_block_stop", + index: blockIndex, + }); + blockIndex += 1; + } + + if (scripted.tool && resolvedToolName) { + const toolUseId = `toolu_replay_${requestIndex}`; + writeEvent(response, "content_block_start", { + type: "content_block_start", + index: blockIndex, + content_block: { + type: "tool_use", + id: toolUseId, + name: resolvedToolName, + input: {}, + }, + }); + writeEvent(response, "content_block_delta", { + type: "content_block_delta", + index: blockIndex, + delta: { + type: "input_json_delta", + partial_json: JSON.stringify(scripted.tool.input ?? {}), + }, + }); + writeEvent(response, "content_block_stop", { + type: "content_block_stop", + index: blockIndex, + }); + } + + writeEvent(response, "message_delta", { + type: "message_delta", + delta: { + stop_reason: resolvedToolName ? "tool_use" : "end_turn", + stop_sequence: null, + }, + usage: { output_tokens: 1 }, + }); + writeEvent(response, "message_stop", { type: "message_stop" }); + response.end(); +}; + +/** + * Serve an Anthropic Messages endpoint for the surrounding Effect scope. + * The callback receives normalized conversation state on every model turn. + */ +export const serveAnthropicReplayBrain = ( + respond: (context: AnthropicReplayContext) => AnthropicReplayResponse, +): Effect.Effect => + Effect.acquireRelease( + Effect.callback<{ + readonly server: ReturnType; + readonly brain: AnthropicReplayBrain; + }>((resume) => { + const served: AnthropicReplayRequest[] = []; + const errors: string[] = []; + const server = createServer((request, response) => { + const requestUrl = new URL(request.url ?? "/", "http://127.0.0.1"); + if (request.method !== "POST") { + writeJson(response, 405, { error: { type: "method_not_allowed" } }); + return; + } + if (requestUrl.pathname === "/v1/messages/count_tokens") { + writeJson(response, 200, { input_tokens: 1 }); + return; + } + if (requestUrl.pathname !== "/v1/messages") { + errors.push(`unexpected request path: ${request.method} ${requestUrl.pathname}`); + writeJson(response, 404, { error: { type: "not_found" } }); + return; + } + + let raw = ""; + request.on("data", (piece: Buffer) => { + raw += piece.toString("utf8"); + }); + request.on("end", () => { + let decoded: unknown; + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: malformed wire JSON becomes a recorded fixture error and a 400 response + try { + decoded = JSON.parse(raw || "{}"); + } catch (cause) { + errors.push(`request JSON decode failed: ${String(cause)}`); + writeJson(response, 400, { error: { type: "invalid_request_error" } }); + return; + } + if (!isUnknownRecord(decoded)) { + errors.push("request body was not a JSON object"); + writeJson(response, 400, { error: { type: "invalid_request_error" } }); + return; + } + + const messages = messagesFrom(decoded); + const toolNames = toolNamesFrom(decoded); + const toolResults = messages.flatMap((message) => message.toolResults); + const requestIndex = served.length; + const model = typeof decoded.model === "string" ? decoded.model : "replay-model"; + served.push({ + path: `${requestUrl.pathname}${requestUrl.search}`, + model, + messages, + toolNames, + stream: decoded.stream === true, + }); + + const lastMessage = messages.at(-1); + const lastHuman = [...messages] + .reverse() + .find( + (message) => + message.role === "user" && message.toolResults.length === 0 && message.text !== "", + ); + let scripted: AnthropicReplayResponse; + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a throwing transcript script is surfaced in fixture errors, never as a hung Claude process + try { + scripted = respond({ + requestIndex, + messages, + lastRole: lastMessage?.toolResults.length ? "tool" : (lastMessage?.role ?? ""), + lastUser: lastHuman?.text ?? "", + lastToolResult: toolResults.at(-1)?.content, + toolResults, + toolNames, + }); + } catch (cause) { + errors.push(`respond() threw on request ${requestIndex}: ${String(cause)}`); + scripted = { text: "(anthropic replay script error)" }; + } + writeMessagesResponse(response, requestIndex, model, scripted, toolNames, errors); + }); + }); + + server.listen(0, "127.0.0.1", () => { + const { port } = server.address() as AddressInfo; + resume( + Effect.succeed({ + server, + brain: { + baseUrl: `http://127.0.0.1:${port}`, + requests: () => served, + errors: () => errors, + }, + }), + ); + }); + }), + ({ server }) => + Effect.callback((resume) => { + server.close(() => resume(Effect.void)); + }), + ).pipe(Effect.map(({ brain }) => brain)); diff --git a/e2e/src/clients/claude-code-evidence.test.ts b/e2e/src/clients/claude-code-evidence.test.ts new file mode 100644 index 000000000..7e63a7382 --- /dev/null +++ b/e2e/src/clients/claude-code-evidence.test.ts @@ -0,0 +1,96 @@ +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { publishedArtifactFor } from "../published-artifacts"; +import { + CLAUDE_CODE_EVIDENCE_FILE, + makeClaudeCodeInvocationEvidence, + readClaudeCodeEvidence, + writeClaudeCodeEvidence, +} from "./claude-code-evidence"; + +const evidenceInput = (label: string, marker: string) => ({ + label, + executable: "/home/alice/.local/bin/claude", + expectedVersion: "2.1.195", + observedVersion: "2.1.195", + durationMs: 321, + status: "success" as const, + exitCode: 0, + stdout: `result=${marker} authorization: Bearer account-secret`, + stderr: "api_key=executor-e2e-replay-key", + structuredResult: { + marker, + authorization: "Bearer account-secret", + }, + mcpServerName: "executor", + mcpOrigin: "https://executor.example.test/mcp?token=account-secret", + replayOrigin: "http://127.0.0.1:43123", + replayRequestPaths: ["/v1/messages?token=account-secret", "/v1/messages"], + replayErrors: [] as ReadonlyArray, + secrets: ["account-secret"], +}); + +it.effect("writes ordered, sanitized Claude Code invocation evidence for an account switch", () => + Effect.acquireUseRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-claude-evidence-"))), + (runDir) => + Effect.sync(() => { + writeClaudeCodeEvidence(runDir, evidenceInput("account-a-before-switch", "account-a")); + writeClaudeCodeEvidence(runDir, evidenceInput("account-b-after-switch", "account-b")); + + const serialized = readFileSync(join(runDir, CLAUDE_CODE_EVIDENCE_FILE), "utf8"); + const document = readClaudeCodeEvidence(runDir); + expect(document.schemaVersion).toBe(1); + expect(document.client).toBe("claude-code"); + expect(publishedArtifactFor(`cloud/example-run/${CLAUDE_CODE_EVIDENCE_FILE}`)).toEqual({ + kind: "json", + mime: "application/json; charset=utf-8", + }); + expect(document.invocations.map((entry) => entry.label)).toEqual([ + "account-a-before-switch", + "account-b-after-switch", + ]); + expect(document.invocations.map((entry) => entry.output.structuredResult)).toEqual([ + { marker: "account-a", authorization: "[REDACTED]" }, + { marker: "account-b", authorization: "[REDACTED]" }, + ]); + expect(document.invocations[0]?.executable).toEqual({ + name: "claude", + path: "/home/[USER]/.local/bin/claude", + }); + expect(document.invocations[0]?.replay).toEqual({ + origin: "http://127.0.0.1:43123/", + requestCount: 2, + requestPaths: ["/v1/messages?token=[REDACTED]", "/v1/messages"], + errors: [], + }); + expect(document.invocations.map((entry) => entry.exit)).toEqual([ + { status: "success", code: 0 }, + { status: "success", code: 0 }, + ]); + expect(document.invocations.map((entry) => entry.durationMs)).toEqual([321, 321]); + expect(document.invocations.map((entry) => entry.inferenceBoundary)).toEqual([ + "loopback-replay", + "loopback-replay", + ]); + expect(serialized).not.toContain("account-secret"); + expect(serialized).not.toContain("executor-e2e-replay-key"); + expect(serialized).not.toContain("/home/alice"); + }), + (runDir) => Effect.sync(() => rmSync(runDir, { recursive: true, force: true })), + ), +); + +it("rejects non-loopback inference evidence", () => { + expect(() => + makeClaudeCodeInvocationEvidence({ + ...evidenceInput("paid-boundary", "should-not-write"), + replayOrigin: "https://api.anthropic.com", + }), + ).toThrow("requires a loopback replay origin"); +}); diff --git a/e2e/src/clients/claude-code-evidence.ts b/e2e/src/clients/claude-code-evidence.ts new file mode 100644 index 000000000..daef9dc29 --- /dev/null +++ b/e2e/src/clients/claude-code-evidence.ts @@ -0,0 +1,226 @@ +import { randomUUID } from "node:crypto"; +import { existsSync, readFileSync } from "node:fs"; +import { basename, join } from "node:path"; + +import { withArtifactLockSync, writeJsonAtomicSync } from "../artifact-io"; +import { + sanitizePublishedText, + sanitizePublishedUrl, + sanitizePublishedValue, +} from "../published-artifacts"; + +export const CLAUDE_CODE_EVIDENCE_FILE = "claude-code-metadata.json"; + +const REPLAY_API_KEY = "executor-e2e-replay-key"; + +export interface ClaudeCodeEvidenceInput { + readonly label: string; + readonly executable: string; + readonly expectedVersion: string; + readonly observedVersion: string | undefined; + readonly durationMs: number; + readonly status: "success" | "failure"; + readonly exitCode: number | null; + readonly stdout: string; + readonly stderr: string; + readonly structuredResult?: unknown; + readonly mcpServerName: string; + readonly mcpOrigin: string; + readonly replayOrigin: string; + readonly replayRequestPaths: ReadonlyArray; + readonly replayErrors: ReadonlyArray; + readonly secrets?: ReadonlyArray; +} + +export interface ClaudeCodeInvocationEvidence { + readonly invocationId: string; + readonly label: string; + readonly executable: { + readonly name: string; + readonly path: string; + }; + readonly version: { + readonly expected: string; + readonly observed: string | null; + }; + readonly durationMs: number; + readonly exit: { + readonly status: "success" | "failure"; + readonly code: number | null; + }; + readonly output: { + readonly stdout: string; + readonly stderr: string; + readonly structuredResult?: unknown; + }; + readonly mcp: { + readonly serverName: string; + readonly origin: string; + }; + readonly replay: { + readonly origin: string; + readonly requestCount: number; + readonly requestPaths: ReadonlyArray; + readonly errors: ReadonlyArray; + }; + readonly inferenceBoundary: "loopback-replay"; +} + +export interface ClaudeCodeEvidenceDocument { + readonly schemaVersion: 1; + readonly client: "claude-code"; + readonly invocations: ReadonlyArray; +} + +const normalizedOrigin = (value: string) => new URL(value).origin; + +const loopbackReplayOrigin = (value: string) => { + const url = new URL(value); + if ( + url.protocol !== "http:" || + (url.hostname !== "127.0.0.1" && url.hostname !== "localhost" && url.hostname !== "[::1]") + ) { + throw new Error(`Claude Code evidence requires a loopback replay origin: ${value}`); + } + return url.origin; +}; + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const isStringArray = (value: unknown): value is ReadonlyArray => + Array.isArray(value) && value.every((entry) => typeof entry === "string"); + +const isInvocationEvidence = (value: unknown): value is ClaudeCodeInvocationEvidence => { + if (!isUnknownRecord(value)) return false; + const executable = value.executable; + const version = value.version; + const exit = value.exit; + const output = value.output; + const mcp = value.mcp; + const replay = value.replay; + const validExit = + isUnknownRecord(exit) && + ((exit.status === "success" && exit.code === 0) || + (exit.status === "failure" && (typeof exit.code === "number" || exit.code === null))); + return ( + typeof value.invocationId === "string" && + typeof value.label === "string" && + isUnknownRecord(executable) && + typeof executable.name === "string" && + typeof executable.path === "string" && + isUnknownRecord(version) && + typeof version.expected === "string" && + (typeof version.observed === "string" || version.observed === null) && + typeof value.durationMs === "number" && + Number.isFinite(value.durationMs) && + value.durationMs >= 0 && + validExit && + isUnknownRecord(output) && + typeof output.stdout === "string" && + typeof output.stderr === "string" && + isUnknownRecord(mcp) && + typeof mcp.serverName === "string" && + typeof mcp.origin === "string" && + isUnknownRecord(replay) && + typeof replay.origin === "string" && + typeof replay.requestCount === "number" && + isStringArray(replay.requestPaths) && + replay.requestCount === replay.requestPaths.length && + isStringArray(replay.errors) && + value.inferenceBoundary === "loopback-replay" + ); +}; + +const isEvidenceDocument = (value: unknown): value is ClaudeCodeEvidenceDocument => + isUnknownRecord(value) && + value.schemaVersion === 1 && + value.client === "claude-code" && + Array.isArray(value.invocations) && + value.invocations.every(isInvocationEvidence); + +export const readClaudeCodeEvidence = (runDir: string) => { + const file = join(runDir, CLAUDE_CODE_EVIDENCE_FILE); + const parsed: unknown = JSON.parse(readFileSync(file, "utf8")); + if (!isEvidenceDocument(parsed)) { + throw new Error(`Invalid Claude Code evidence document: ${file}`); + } + return parsed; +}; + +export const makeClaudeCodeInvocationEvidence = ( + input: ClaudeCodeEvidenceInput, +): ClaudeCodeInvocationEvidence => { + if (input.durationMs < 0 || !Number.isFinite(input.durationMs)) { + throw new Error(`Claude Code evidence has invalid duration: ${input.durationMs}`); + } + if (input.status === "success" && input.exitCode !== 0) { + throw new Error("Successful Claude Code evidence must have exit code 0"); + } + if (input.status === "failure" && input.exitCode === 0) { + throw new Error("Failed Claude Code evidence cannot have exit code 0"); + } + + const sanitization = { secrets: [REPLAY_API_KEY, ...(input.secrets ?? [])] }; + const replayOrigin = loopbackReplayOrigin(input.replayOrigin); + return { + invocationId: randomUUID(), + label: sanitizePublishedText(input.label, sanitization), + executable: { + name: sanitizePublishedText(basename(input.executable), sanitization), + path: sanitizePublishedText(input.executable, sanitization), + }, + version: { + expected: sanitizePublishedText(input.expectedVersion, sanitization), + observed: + input.observedVersion === undefined + ? null + : sanitizePublishedText(input.observedVersion, sanitization), + }, + durationMs: input.durationMs, + exit: { status: input.status, code: input.exitCode }, + output: { + stdout: sanitizePublishedText(input.stdout, sanitization), + stderr: sanitizePublishedText(input.stderr, sanitization), + ...(input.structuredResult === undefined + ? {} + : { structuredResult: sanitizePublishedValue(input.structuredResult, sanitization) }), + }, + mcp: { + serverName: sanitizePublishedText(input.mcpServerName, sanitization), + origin: sanitizePublishedUrl(normalizedOrigin(input.mcpOrigin), sanitization), + }, + replay: { + origin: sanitizePublishedUrl(replayOrigin, sanitization), + requestCount: input.replayRequestPaths.length, + requestPaths: input.replayRequestPaths.map((path) => + sanitizePublishedText(path, sanitization), + ), + errors: input.replayErrors.map((error) => sanitizePublishedText(error, sanitization)), + }, + inferenceBoundary: "loopback-replay", + }; +}; + +/** + * Append one real-client invocation to the attempt's publishable evidence. + * The replay origin gate and captured request ledger are the proof that the + * client used deterministic loopback inference instead of a paid provider. + */ +export const writeClaudeCodeEvidence = (runDir: string, input: ClaudeCodeEvidenceInput) => { + const file = join(runDir, CLAUDE_CODE_EVIDENCE_FILE); + return withArtifactLockSync(file, () => { + let existing: ClaudeCodeEvidenceDocument | undefined; + if (existsSync(file)) { + existing = readClaudeCodeEvidence(runDir); + } + const invocation = makeClaudeCodeInvocationEvidence(input); + const document: ClaudeCodeEvidenceDocument = { + schemaVersion: 1, + client: "claude-code", + invocations: [...(existing?.invocations ?? []), invocation], + }; + writeJsonAtomicSync(file, document); + return invocation; + }); +}; diff --git a/e2e/src/clients/claude-code.test.ts b/e2e/src/clients/claude-code.test.ts new file mode 100644 index 000000000..7ed8b4e0e --- /dev/null +++ b/e2e/src/clients/claude-code.test.ts @@ -0,0 +1,115 @@ +import { expect, it } from "@effect/vitest"; +import { Effect } from "effect"; +import { makeEchoMcpServer, serveMcpServer } from "@executor-js/plugin-mcp/testing"; + +import { serveAnthropicReplayBrain } from "./anthropic-replay-brain"; +import { + hasClaudeCode, + isClaudeCodeRequired, + makeClaudeCodeHome, + readClaudeCodeMcpConfig, + removeClaudeCodeHome, + replaceClaudeCodeServer, + runClaudeCode, +} from "./claude-code"; + +const scopedClaudeHome = ( + serverName: string, + server: { readonly url: string; readonly authorizationHeader?: string }, +) => + Effect.acquireRelease( + Effect.sync(() => makeClaudeCodeHome(serverName, server)), + (home) => Effect.sync(() => removeClaudeCodeHome(home)), + ); + +const scriptedEchoBrain = () => + serveAnthropicReplayBrain((context) => + context.lastToolResult + ? { text: `finished:${context.lastToolResult}` } + : { tool: { name: "echo", input: { value: "account-switch" } } }, + ); + +it.effect("isolates Claude state and replaces one server name without retaining credentials", () => + Effect.gen(function* () { + const home = yield* scopedClaudeHome("executor", { + url: "http://127.0.0.1:41001/mcp", + authorizationHeader: "Bearer first-account-secret", + }); + + expect(home.rootDir).not.toContain("/e2e/runs/"); + expect(home.homeDir).not.toBe(process.env.HOME); + expect(home.configDir).toBe(home.env.CLAUDE_CONFIG_DIR); + expect(home.env).not.toHaveProperty("ANTHROPIC_AUTH_TOKEN"); + expect(home.env).not.toHaveProperty("CLAUDE_CODE_OAUTH_TOKEN"); + expect(home.env).not.toHaveProperty("HTTP_PROXY"); + expect(home.env).not.toHaveProperty("HTTPS_PROXY"); + + yield* replaceClaudeCodeServer( + home, + { + url: "http://127.0.0.1:41002/mcp", + authorizationHeader: "Bearer second-account-secret", + }, + { clearOAuthCredentials: false }, + ); + const config = JSON.stringify(readClaudeCodeMcpConfig(home)); + expect(config).toContain("http://127.0.0.1:41002/mcp"); + expect(config).toContain("second-account-secret"); + expect(config).not.toContain("http://127.0.0.1:41001/mcp"); + expect(config).not.toContain("first-account-secret"); + }), +); + +const claudeAvailable = hasClaudeCode(); + +it.effect.skipIf(!claudeAvailable && !isClaudeCodeRequired())( + "the real Claude Code binary discovers, invokes, and replaces an MCP account", + () => + Effect.gen(function* () { + expect( + claudeAvailable, + "Claude Code is required but its pinned native binary is unavailable", + ).toBe(true); + + const firstServer = yield* serveMcpServer(() => + makeEchoMcpServer({ text: (value) => `account-a:${value}` }), + ); + const secondServer = yield* serveMcpServer(() => + makeEchoMcpServer({ text: (value) => `account-b:${value}` }), + ); + const home = yield* scopedClaudeHome("executor", { url: firstServer.url }); + + const firstBrain = yield* scriptedEchoBrain(); + const first = yield* runClaudeCode(home, { + brainBaseUrl: firstBrain.baseUrl, + prompt: "Call the configured echo tool once.", + }); + expect(first.result).toContain("account-a:account-switch"); + expect(first.claudeCodeVersion).toBe("2.1.195"); + // Claude reports a catalog-price estimate even when every model request + // terminates at the loopback replay server. The driver's loopback-only + // URL gate, fake API key, and captured requests are the no-inference proof. + expect(firstBrain.requests().length).toBeGreaterThan(0); + expect( + firstBrain.requests().some((request) => request.toolNames.includes("mcp__executor__echo")), + ).toBe(true); + expect(firstBrain.errors()).toEqual([]); + + yield* replaceClaudeCodeServer(home, { url: secondServer.url }); + + const secondBrain = yield* scriptedEchoBrain(); + const second = yield* runClaudeCode(home, { + brainBaseUrl: secondBrain.baseUrl, + prompt: "Call the configured echo tool once after the account switch.", + }); + expect(second.result).toContain("account-b:account-switch"); + expect(second.result).not.toContain("account-a:account-switch"); + expect(secondBrain.errors()).toEqual([]); + + const firstRequests = yield* firstServer.requests; + const secondRequests = yield* secondServer.requests; + expect(firstRequests.some((request) => request.method === "POST")).toBe(true); + expect(secondRequests.some((request) => request.method === "POST")).toBe(true); + }), + 120_000, +); diff --git a/e2e/src/clients/claude-code.ts b/e2e/src/clients/claude-code.ts new file mode 100644 index 000000000..eb33db071 --- /dev/null +++ b/e2e/src/clients/claude-code.ts @@ -0,0 +1,366 @@ +// Hermetic driver for the REAL Claude Code native binary. The process gets a +// throwaway HOME and CLAUDE_CONFIG_DIR, explicit MCP config, and a loopback-only +// Anthropic replay endpoint. No ambient login or paid Anthropic credential can +// cross this boundary. +import { execFile, spawnSync } from "node:child_process"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { Data, Effect } from "effect"; + +export const DEFAULT_CLAUDE_CODE_BINARY = "claude"; +export const DEFAULT_CLAUDE_CODE_VERSION = "2.1.195"; +export const CLAUDE_CODE_REQUIRED_ENV = "E2E_CLAUDE_CODE_REQUIRED"; +export const CLAUDE_CODE_VERSION_ENV = "E2E_CLAUDE_CODE_VERSION"; + +export interface ClaudeCodeServer { + readonly url: string; + readonly authorizationHeader?: string; +} + +export interface ClaudeCodeHome { + readonly rootDir: string; + readonly homeDir: string; + readonly configDir: string; + readonly projectDir: string; + readonly mcpConfigPath: string; + readonly serverName: string; + readonly binaryPath: string; + readonly version: string | undefined; + readonly env: Readonly>; +} + +export interface ClaudeCodeRunInput { + readonly brainBaseUrl: string; + readonly prompt: string; + readonly model?: string; + readonly timeoutMs?: number; +} + +export interface ClaudeCodeRunResult { + readonly result: string; + readonly raw: unknown; + readonly stdout: string; + readonly stderr: string; + readonly durationMs: number; + readonly reportedDurationMs: number | undefined; + readonly totalCostUsd: number | undefined; + readonly claudeCodeVersion: string; +} + +export class ClaudeCodeInvocationError extends Data.TaggedError("ClaudeCodeInvocationError")<{ + readonly message: string; + readonly cause?: unknown; + readonly stdout?: string; + readonly stderr?: string; +}> {} + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const inheritedEnvironment = () => { + const names = [ + "PATH", + "SHELL", + "LANG", + "LC_ALL", + "TERM", + "SystemRoot", + "WINDIR", + "ComSpec", + "PATHEXT", + ] as const; + return Object.fromEntries( + names.flatMap((name) => { + const value = process.env[name]; + return value === undefined ? [] : [[name, value] as const]; + }), + ); +}; + +const controlledEnvironment = ( + rootDir: string, + homeDir: string, + configDir: string, +): Readonly> => { + const tempDir = join(rootDir, "tmp"); + const xdgDir = join(rootDir, "xdg"); + for (const dir of [tempDir, xdgDir]) mkdirSync(dir, { recursive: true }); + return { + ...inheritedEnvironment(), + HOME: homeDir, + USERPROFILE: homeDir, + CLAUDE_CONFIG_DIR: configDir, + XDG_CONFIG_HOME: join(xdgDir, "config"), + XDG_DATA_HOME: join(xdgDir, "data"), + XDG_STATE_HOME: join(xdgDir, "state"), + XDG_CACHE_HOME: join(xdgDir, "cache"), + TMPDIR: tempDir, + TEMP: tempDir, + TMP: tempDir, + CI: "1", + NO_PROXY: "127.0.0.1,localhost,::1", + no_proxy: "127.0.0.1,localhost,::1", + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1", + CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1", + CLAUDE_CODE_ATTRIBUTION_HEADER: "0", + DISABLE_AUTOUPDATER: "1", + DISABLE_UPDATES: "1", + DISABLE_TELEMETRY: "1", + DISABLE_ERROR_REPORTING: "1", + }; +}; + +const mcpConfig = (serverName: string, server: ClaudeCodeServer) => ({ + mcpServers: { + [serverName]: { + type: "http", + url: server.url, + ...(server.authorizationHeader + ? { headers: { Authorization: server.authorizationHeader } } + : {}), + }, + }, +}); + +const writeMcpConfig = (home: ClaudeCodeHome, server: ClaudeCodeServer) => { + writeFileSync( + home.mcpConfigPath, + `${JSON.stringify(mcpConfig(home.serverName, server), null, 2)}\n`, + { + mode: 0o600, + }, + ); +}; + +export const claudeCodeBinaryPath = () => + process.env.E2E_CLAUDE_CODE_BIN ?? DEFAULT_CLAUDE_CODE_BINARY; + +export const expectedClaudeCodeVersion = () => + process.env[CLAUDE_CODE_VERSION_ENV] ?? DEFAULT_CLAUDE_CODE_VERSION; + +export const isClaudeCodeRequired = () => process.env[CLAUDE_CODE_REQUIRED_ENV] === "1"; + +export const installedClaudeCodeVersion = (binaryPath = claudeCodeBinaryPath()) => { + const probe = spawnSync(binaryPath, ["--version"], { + env: controlledEnvironmentForProbe(), + encoding: "utf8", + timeout: 10_000, + }); + if (probe.error || probe.status !== 0) return undefined; + return /^(\S+)/.exec(probe.stdout.trim())?.[1]; +}; + +export const hasClaudeCode = () => installedClaudeCodeVersion() === expectedClaudeCodeVersion(); + +const controlledEnvironmentForProbe = () => { + const homeDir = join(tmpdir(), "executor-e2e-claude-version-probe"); + return { + ...inheritedEnvironment(), + HOME: homeDir, + USERPROFILE: homeDir, + CLAUDE_CONFIG_DIR: join(homeDir, "config"), + XDG_CONFIG_HOME: join(homeDir, "xdg-config"), + XDG_DATA_HOME: join(homeDir, "xdg-data"), + XDG_STATE_HOME: join(homeDir, "xdg-state"), + XDG_CACHE_HOME: join(homeDir, "xdg-cache"), + NO_PROXY: "127.0.0.1,localhost,::1", + no_proxy: "127.0.0.1,localhost,::1", + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1", + DISABLE_AUTOUPDATER: "1", + DISABLE_UPDATES: "1", + }; +}; + +export const makeClaudeCodeHome = ( + serverName: string, + server: ClaudeCodeServer, +): ClaudeCodeHome => { + const rootDir = mkdtempSync(join(tmpdir(), "executor-e2e-claude-")); + const homeDir = join(rootDir, "home"); + const configDir = join(rootDir, "claude-config"); + const projectDir = join(rootDir, "project"); + const mcpConfigPath = join(rootDir, "mcp.json"); + for (const dir of [homeDir, configDir, projectDir]) mkdirSync(dir, { recursive: true }); + const home: ClaudeCodeHome = { + rootDir, + homeDir, + configDir, + projectDir, + mcpConfigPath, + serverName, + binaryPath: claudeCodeBinaryPath(), + version: installedClaudeCodeVersion(), + env: controlledEnvironment(rootDir, homeDir, configDir), + }; + writeMcpConfig(home, server); + return home; +}; + +export const readClaudeCodeMcpConfig = (home: ClaudeCodeHome): unknown => + JSON.parse(readFileSync(home.mcpConfigPath, "utf8")); + +export const removeClaudeCodeHome = (home: ClaudeCodeHome) => { + rmSync(home.rootDir, { recursive: true, force: true }); +}; + +const invokeClaudeCode = ( + home: ClaudeCodeHome, + args: ReadonlyArray, + env: Readonly>, + timeoutMs: number, +) => + Effect.callback<{ readonly stdout: string; readonly stderr: string }, ClaudeCodeInvocationError>( + (resume) => { + const child = execFile( + home.binaryPath, + [...args], + { + cwd: home.projectDir, + env: { ...env }, + encoding: "utf8", + timeout: timeoutMs, + maxBuffer: 32 * 1024 * 1024, + }, + (cause, stdout, stderr) => { + if (cause) { + resume( + Effect.fail( + new ClaudeCodeInvocationError({ + message: `Claude Code exited unsuccessfully: ${cause.message}`, + cause, + stdout, + stderr, + }), + ), + ); + return; + } + resume(Effect.succeed({ stdout, stderr })); + }, + ); + return Effect.sync(() => child.kill("SIGKILL")); + }, + ); + +const loopbackReplayUrl = (value: string) => + Effect.try({ + try: () => new URL(value), + catch: (cause) => + new ClaudeCodeInvocationError({ + message: `Invalid Anthropic replay URL: ${value}`, + cause, + }), + }).pipe( + Effect.filterOrFail( + (url) => + url.protocol === "http:" && + (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "[::1]"), + () => + new ClaudeCodeInvocationError({ + message: `Refusing non-loopback Anthropic replay URL: ${value}`, + }), + ), + ); + +export const runClaudeCode = (home: ClaudeCodeHome, input: ClaudeCodeRunInput) => + Effect.gen(function* () { + const expectedVersion = expectedClaudeCodeVersion(); + const observedVersion = installedClaudeCodeVersion(home.binaryPath); + if (observedVersion !== expectedVersion) { + return yield* new ClaudeCodeInvocationError({ + message: `Claude Code ${expectedVersion} is required, found ${observedVersion ?? "no runnable binary"}`, + }); + } + const replayUrl = yield* loopbackReplayUrl(input.brainBaseUrl); + const startedAt = Date.now(); + const invocation = yield* invokeClaudeCode( + home, + [ + "--bare", + "--mcp-config", + home.mcpConfigPath, + "--strict-mcp-config", + "--print", + "--output-format", + "json", + "--no-session-persistence", + "--disable-slash-commands", + "--no-chrome", + "--model", + input.model ?? "claude-sonnet-4-6", + "--tools", + "", + "--allowed-tools", + `mcp__${home.serverName}__*`, + "--permission-mode", + "dontAsk", + "--system-prompt", + "Follow the user request using only the explicitly configured MCP tools.", + input.prompt, + ], + { + ...home.env, + ANTHROPIC_BASE_URL: replayUrl.origin, + ANTHROPIC_API_KEY: "executor-e2e-replay-key", + }, + input.timeoutMs ?? 90_000, + ); + const raw = yield* Effect.try({ + try: () => JSON.parse(invocation.stdout.trim()), + catch: (cause) => + new ClaudeCodeInvocationError({ + message: "Claude Code returned non-JSON output", + cause, + stdout: invocation.stdout, + stderr: invocation.stderr, + }), + }); + if (!isUnknownRecord(raw) || typeof raw.result !== "string") { + return yield* new ClaudeCodeInvocationError({ + message: "Claude Code JSON output did not contain a string result", + stdout: invocation.stdout, + stderr: invocation.stderr, + }); + } + return { + result: raw.result, + raw, + stdout: invocation.stdout, + stderr: invocation.stderr, + durationMs: Date.now() - startedAt, + reportedDurationMs: typeof raw.duration_ms === "number" ? raw.duration_ms : undefined, + totalCostUsd: typeof raw.total_cost_usd === "number" ? raw.total_cost_usd : undefined, + claudeCodeVersion: observedVersion, + } satisfies ClaudeCodeRunResult; + }); + +export const runClaudeCodeMcp = ( + home: ClaudeCodeHome, + args: ReadonlyArray, + timeoutMs = 30_000, +) => + invokeClaudeCode( + home, + ["--bare", "--mcp-config", home.mcpConfigPath, "--strict-mcp-config", "mcp", ...args], + home.env, + timeoutMs, + ); + +/** + * Reuse one Claude MCP server name for another account or endpoint. Clear any + * OAuth grant under that name before replacing the explicit config so a client + * process can never silently retain the previous account. + */ +export const replaceClaudeCodeServer = ( + home: ClaudeCodeHome, + server: ClaudeCodeServer, + options: { readonly clearOAuthCredentials?: boolean } = {}, +) => + Effect.gen(function* () { + if (options.clearOAuthCredentials !== false) { + yield* runClaudeCodeMcp(home, ["logout", home.serverName]).pipe(Effect.ignore); + } + yield* Effect.sync(() => writeMcpConfig(home, server)); + }); diff --git a/e2e/src/clients/portable-traces.test.ts b/e2e/src/clients/portable-traces.test.ts new file mode 100644 index 000000000..2b385c1bb --- /dev/null +++ b/e2e/src/clients/portable-traces.test.ts @@ -0,0 +1,71 @@ +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { expect, it } from "@effect/vitest"; +import { Effect, Layer } from "effect"; +import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"; + +import { writeJsonAtomicSync } from "../artifact-io"; +import { exportPortableTraces } from "../portable-traces"; + +const TRACE_ID = "0123456789abcdef0123456789abcdef"; + +const temporaryRun = Effect.acquireRelease( + Effect.sync(() => mkdtempSync(join(tmpdir(), "executor-portable-traces-"))), + (directory) => Effect.sync(() => rmSync(directory, { recursive: true, force: true })), +); + +it.live("exports eventually available Motel traces and redacts secrets", () => + Effect.gen(function* () { + const runDir = yield* temporaryRun; + let requests = 0; + const httpClient = Layer.succeed( + HttpClient.HttpClient, + HttpClient.make((request: HttpClientRequest.HttpClientRequest) => + Effect.sync(() => { + requests += 1; + const body = + requests === 1 + ? { error: "not ready" } + : { + data: { + traceId: TRACE_ID, + tags: { + authorization: "Bearer secret-token", + route: "/api/tools?access_token=secret-token", + }, + }, + }; + return HttpClientResponse.fromWeb( + request, + new Response(JSON.stringify(body), { + status: requests === 1 ? 404 : 200, + headers: { "content-type": "application/json" }, + }), + ); + }), + ), + ); + writeJsonAtomicSync(join(runDir, "traces.json"), [ + { id: TRACE_ID }, + { id: TRACE_ID }, + { id: "invalid" }, + ]); + + const exported = yield* exportPortableTraces(runDir, "http://motel.invalid").pipe( + Effect.provide(httpClient), + ); + const artifact = readFileSync(join(runDir, "otel-traces.json"), "utf8"); + expect(exported, artifact).toEqual({ + file: "otel-traces.json", + exported: 1, + missing: 0, + invalid: 1, + }); + + expect(artifact).toContain(TRACE_ID); + expect(artifact).toContain("[REDACTED]"); + expect(artifact).not.toContain("secret-token"); + }), +); diff --git a/e2e/src/cloudflare-access-emulator.ts b/e2e/src/cloudflare-access-emulator.ts new file mode 100644 index 000000000..ebf0eee66 --- /dev/null +++ b/e2e/src/cloudflare-access-emulator.ts @@ -0,0 +1,163 @@ +import { randomUUID } from "node:crypto"; + +export const E2E_CLOUDFLARE_ACCESS_AUDIENCE = "executor-e2e-cloudflare-access"; + +export type CloudflareAccessTokenRequest = + | { + readonly kind: "human"; + readonly subject: string; + readonly email: string; + readonly name?: string; + readonly groups?: ReadonlyArray; + readonly audience?: string; + readonly expiresInSeconds?: number; + } + | { + readonly kind: "service"; + readonly commonName: string; + readonly audience?: string; + readonly expiresInSeconds?: number; + }; + +export interface CloudflareAccessLedgerEntry { + readonly id: number; + readonly timestamp: string; + readonly method: string; + readonly path: string; + readonly status: number; + readonly operation: string; + readonly tokenKind?: "human" | "service"; +} + +export interface CloudflareAccessHealth { + readonly ok: true; + readonly bootNonce: string; +} + +const isTokenResponse = (value: unknown): value is { readonly token: string } => + typeof value === "object" && + value !== null && + "token" in value && + typeof value.token === "string"; + +const isHealthResponse = (value: unknown): value is CloudflareAccessHealth => + typeof value === "object" && + value !== null && + "ok" in value && + value.ok === true && + "bootNonce" in value && + typeof value.bootNonce === "string" && + value.bootNonce.length > 0; + +const isLedgerEntry = (value: unknown): value is CloudflareAccessLedgerEntry => + typeof value === "object" && + value !== null && + "id" in value && + typeof value.id === "number" && + "timestamp" in value && + typeof value.timestamp === "string" && + "method" in value && + typeof value.method === "string" && + "path" in value && + typeof value.path === "string" && + "status" in value && + typeof value.status === "number" && + "operation" in value && + typeof value.operation === "string" && + (!("tokenKind" in value) || value.tokenKind === "human" || value.tokenKind === "service"); + +const isLedgerResponse = ( + value: unknown, +): value is { readonly entries: ReadonlyArray } => + typeof value === "object" && + value !== null && + "entries" in value && + Array.isArray(value.entries) && + value.entries.every(isLedgerEntry); + +export const issueCloudflareAccessToken = async ( + baseUrl: string, + request: CloudflareAccessTokenRequest, +) => { + const response = await fetch(new URL("/_e2e/issue", baseUrl), { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(request), + }); + if (!response.ok) { + throw new Error( + `Cloudflare Access emulator refused token issue (${response.status}): ${await response.text()}`, + ); + } + const body: unknown = await response.json(); + if (!isTokenResponse(body)) { + throw new Error("Cloudflare Access emulator returned no token"); + } + return body.token; +}; + +export const accessAssertionHeaders = (token: string) => ({ + "Cf-Access-Jwt-Assertion": token, +}); + +export const readCloudflareAccessHealth = async (baseUrl: string) => { + const response = await fetch(new URL("/health", baseUrl)); + if (!response.ok) { + throw new Error(`Cloudflare Access emulator health failed (${response.status})`); + } + const body: unknown = await response.json(); + if (!isHealthResponse(body)) { + throw new Error("Cloudflare Access emulator returned malformed health identity"); + } + return body; +}; + +export const readCloudflareAccessLedger = async (baseUrl: string) => { + const response = await fetch(new URL("/_e2e/ledger", baseUrl)); + if (!response.ok) { + throw new Error(`Cloudflare Access emulator ledger failed (${response.status})`); + } + const body: unknown = await response.json(); + if (!isLedgerResponse(body)) { + throw new Error("Cloudflare Access emulator returned a malformed ledger"); + } + return body.entries; +}; + +/** + * Prove the supplied attach dependency is the full test issuer, not merely an + * arbitrary OIDC endpoint: it must identify its boot, mint a token, and record + * that mint in its typed ledger. + */ +export const verifyCloudflareAccessEmulator = async ( + baseUrl: string, + options: { readonly expectedBootNonce?: string } = {}, +) => { + const health = await readCloudflareAccessHealth(baseUrl); + if (options.expectedBootNonce && health.bootNonce !== options.expectedBootNonce) { + throw new Error( + `Cloudflare Access emulator boot identity mismatch: expected ${options.expectedBootNonce}, received ${health.bootNonce}`, + ); + } + const before = await readCloudflareAccessLedger(baseUrl); + const afterId = before.reduce((maximum, entry) => Math.max(maximum, entry.id), 0); + const marker = randomUUID(); + const token = await issueCloudflareAccessToken(baseUrl, { + kind: "human", + subject: `e2e-emulator-check-${marker}`, + email: "admin@e2e.test", + name: "E2E emulator capability check", + }); + const after = await readCloudflareAccessLedger(baseUrl); + const recorded = after.some( + (entry) => + entry.id > afterId && + entry.operation === "token.issue" && + entry.tokenKind === "human" && + entry.status === 200, + ); + if (!recorded) { + throw new Error("Cloudflare Access emulator did not record its capability-check token"); + } + return { bootNonce: health.bootNonce, token }; +}; diff --git a/e2e/src/desktop/packaged.ts b/e2e/src/desktop/packaged.ts new file mode 100644 index 000000000..d5eae1a09 --- /dev/null +++ b/e2e/src/desktop/packaged.ts @@ -0,0 +1,663 @@ +import { type ChildProcess, execFile, execFileSync, spawn } from "node:child_process"; +import { chmodSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import net from "node:net"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const OUTPUT_LIMIT = 64 * 1024; + +const appendOutput = (current: string, chunk: Buffer) => + (current + chunk.toString()).slice(-OUTPUT_LIMIT); + +const envFlagEnabled = (value: string | undefined) => + value !== undefined && value !== "" && value !== "0" && value.toLowerCase() !== "false"; + +const PACKAGED_DESKTOP_RUNTIME_ENV_KEYS = [ + "PATH", + "Path", + "PATHEXT", + "SHELL", + "LANG", + "LANGUAGE", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "DISPLAY", + "WAYLAND_DISPLAY", + "XAUTHORITY", + "XDG_RUNTIME_DIR", + "XDG_SESSION_TYPE", + "XDG_CURRENT_DESKTOP", + "DESKTOP_SESSION", + "DBUS_SESSION_BUS_ADDRESS", + "GDK_BACKEND", + "GTK_MODULES", + "NO_AT_BRIDGE", + "LIBGL_ALWAYS_SOFTWARE", + "LIBGL_DRIVERS_PATH", + "MESA_LOADER_DRIVER_OVERRIDE", + "GBM_BACKEND", + "VK_ICD_FILENAMES", + "LD_LIBRARY_PATH", + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "CHROME_DEVEL_SANDBOX", + "ELECTRON_ENABLE_LOGGING", + "ELECTRON_OZONE_PLATFORM_HINT", + "OZONE_PLATFORM", + "DO_NOT_TRACK", + "SystemRoot", + "SYSTEMROOT", + "WINDIR", + "ComSpec", + "COMSPEC", + "SystemDrive", + "SYSTEMDRIVE", + "ProgramData", + "PROGRAMDATA", + "ProgramFiles", + "ProgramFiles(x86)", + "ProgramW6432", + "PROCESSOR_ARCHITECTURE", + "PROCESSOR_IDENTIFIER", + "NUMBER_OF_PROCESSORS", + "OS", + "SESSIONNAME", +] as const; + +export const selectPackagedDesktopRuntimeEnvironment = (environment: NodeJS.ProcessEnv) => { + const selected: NodeJS.ProcessEnv = {}; + for (const key of PACKAGED_DESKTOP_RUNTIME_ENV_KEYS) { + const value = environment[key]; + if (value !== undefined) selected[key] = value; + } + return selected; +}; + +export interface PackagedDesktopBundle { + readonly app: string; + readonly executor: string; +} + +export const requirePackagedDesktopBundle = (): PackagedDesktopBundle => { + const app = process.env.E2E_DESKTOP_APP_EXE; + const executor = process.env.E2E_DESKTOP_EXECUTOR_BIN; + if (!app || !executor) { + throw new Error( + "E2E_DESKTOP_APP_EXE / E2E_DESKTOP_EXECUTOR_BIN not set, did desktop-packaged.globalsetup run?", + ); + } + return { app, executor }; +}; + +export const createPackagedDesktopHome = (prefix: string) => mkdtempSync(join(tmpdir(), prefix)); + +export const removePackagedDesktopHome = (home: string) => + rmSync(home, { recursive: true, force: true }); + +export const packagedDesktopSettingsDir = (home: string) => + join(home, ".executor-desktop-settings"); + +export const packagedDesktopEnvironment = ( + home: string, + overrides: NodeJS.ProcessEnv = {}, +): NodeJS.ProcessEnv => { + const inheritedRuntime = selectPackagedDesktopRuntimeEnvironment(process.env); + const appData = join(home, "AppData", "Roaming"); + const localAppData = join(home, "AppData", "Local"); + const xdgConfig = join(home, ".config"); + const xdgData = join(home, ".local", "share"); + const xdgCache = join(home, ".cache"); + const xdgState = join(home, ".local", "state"); + const isolatedXdgRuntime = join(home, ".xdg-runtime"); + const xdgRuntime = + inheritedRuntime.WAYLAND_DISPLAY && inheritedRuntime.XDG_RUNTIME_DIR + ? inheritedRuntime.XDG_RUNTIME_DIR + : isolatedXdgRuntime; + const temp = join(home, ".tmp"); + const settings = packagedDesktopSettingsDir(home); + + for (const directory of [ + home, + appData, + localAppData, + xdgConfig, + xdgData, + xdgCache, + xdgState, + temp, + settings, + ]) { + mkdirSync(directory, { recursive: true }); + } + if (xdgRuntime === isolatedXdgRuntime) { + mkdirSync(xdgRuntime, { recursive: true, mode: 0o700 }); + chmodSync(xdgRuntime, 0o700); + } + + return { + ...inheritedRuntime, + ...overrides, + HOME: home, + USERPROFILE: home, + APPDATA: appData, + LOCALAPPDATA: localAppData, + XDG_CONFIG_HOME: xdgConfig, + XDG_DATA_HOME: xdgData, + XDG_CACHE_HOME: xdgCache, + XDG_STATE_HOME: xdgState, + XDG_RUNTIME_DIR: xdgRuntime, + TMPDIR: temp, + TEMP: temp, + TMP: temp, + EXECUTOR_DESKTOP_SETTINGS_DIR: settings, + }; +}; + +const windowsGuiAvailable = () => { + // A Windows service runs in session zero, where Electron cannot create a user-visible window. + // Checking the actual process session avoids treating every Windows runner as interactive. + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: probing the Windows session manager; absence means no GUI + try { + const sessionId = Number.parseInt( + execFileSync( + "powershell.exe", + [ + "-NoLogo", + "-NoProfile", + "-NonInteractive", + "-Command", + "[System.Diagnostics.Process]::GetCurrentProcess().SessionId", + ], + { encoding: "utf8", windowsHide: true }, + ).trim(), + 10, + ); + return ( + Number.isInteger(sessionId) && + sessionId > 0 && + process.env.SESSIONNAME?.toLowerCase() !== "services" + ); + } catch { + return false; + } +}; + +const guiAvailable = () => { + if (process.platform === "darwin") { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: probing the session manager; absence means no GUI + try { + return execFileSync("launchctl", ["managername"], { encoding: "utf8" }).trim() === "Aqua"; + } catch { + return false; + } + } + if (process.platform === "linux") { + return Boolean(process.env.DISPLAY || process.env.WAYLAND_DISPLAY); + } + if (process.platform === "win32") return windowsGuiAvailable(); + return false; +}; + +const packagedSingleInstanceAvailable = () => { + const app = process.env.E2E_DESKTOP_APP_EXE; + if (process.platform !== "darwin" || !app) return true; + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: pgrep reports no match with a nonzero exit code + try { + const lines = execFileSync("pgrep", ["-fl", "Executor.app/Contents/MacOS/Executor"], { + encoding: "utf8", + }) + .split("\n") + .filter(Boolean); + return !lines.some((line) => !line.includes(app)); + } catch { + return true; + } +}; + +export type PackagedDesktopPreflight = + | { readonly status: "ready" } + | { readonly status: "skip" | "fail"; readonly reason: string }; + +export const packagedDesktopPreflight = (): PackagedDesktopPreflight => { + const capabilityMode = process.env.E2E_REQUIRED_CAPABILITY_MODE; + const required = + capabilityMode === "required" || + envFlagEnabled(process.env.E2E_DESKTOP_GUI_REQUIRED) || + (envFlagEnabled(process.env.CI) && capabilityMode !== "allow-skips"); + const reason = !guiAvailable() + ? `no interactive GUI session is available on ${process.platform}` + : !packagedSingleInstanceAvailable() + ? "another packaged Executor.app instance already owns the single-instance lock" + : null; + if (!reason) return { status: "ready" }; + return { status: required ? "fail" : "skip", reason }; +}; + +interface CdpResponse { + readonly id: number; + readonly result?: T; + readonly error?: { readonly message?: string; readonly data?: string }; +} + +interface CdpEvaluateResult { + readonly result: { readonly value?: unknown }; + readonly exceptionDetails?: unknown; +} + +interface CdpTarget { + readonly type: string; + readonly url: string; + readonly webSocketDebuggerUrl?: string; +} + +export class PackagedDesktopPage { + private nextId = 1; + private readonly pending = new Map< + number, + { + readonly resolve: (value: unknown) => void; + readonly reject: (error: Error) => void; + } + >(); + + private constructor(private readonly socket: WebSocket) { + socket.addEventListener("message", (event) => { + const data = event.data; + if (typeof data !== "string") return; + const message = JSON.parse(data) as CdpResponse; + if (!message.id) return; + const pending = this.pending.get(message.id); + if (!pending) return; + this.pending.delete(message.id); + if (message.error) { + pending.reject( + new Error( + [message.error.message ?? "CDP command failed", message.error.data] + .filter(Boolean) + .join("\n"), + ), + ); + return; + } + pending.resolve(message.result); + }); + socket.addEventListener("close", () => { + for (const [, pending] of this.pending) { + pending.reject(new Error("CDP socket closed")); + } + this.pending.clear(); + }); + } + + static connect = (url: string): Promise => + new Promise((resolve, reject) => { + const socket = new WebSocket(url); + const timer = setTimeout(() => { + socket.close(); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter + reject(new Error(`Timed out connecting to page CDP target ${url}`)); + }, 30_000); + socket.addEventListener( + "open", + () => { + clearTimeout(timer); + resolve(new PackagedDesktopPage(socket)); + }, + { once: true }, + ); + socket.addEventListener( + "error", + () => { + clearTimeout(timer); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter + reject(new Error(`Failed to connect to page CDP target ${url}`)); + }, + { once: true }, + ); + }); + + command = async (method: string, params: Record = {}): Promise => { + const id = this.nextId; + this.nextId += 1; + const result = new Promise((resolve, reject) => { + this.pending.set(id, { + resolve: (value) => resolve(value as T), + reject, + }); + }); + this.socket.send(JSON.stringify({ id, method, params })); + return result; + }; + + evaluate = async (expression: string): Promise => { + const result = await this.command("Runtime.evaluate", { + expression, + awaitPromise: true, + returnByValue: true, + }); + if (result.exceptionDetails) { + throw new Error(`CDP evaluation failed: ${JSON.stringify(result.exceptionDetails)}`); + } + return result.result.value as T; + }; + + waitForText = async (text: string, timeoutMs: number): Promise => { + const expression = `document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`; + await this.waitForExpression(expression, timeoutMs, `text: ${text}`); + }; + + waitForExpression = async ( + expression: string, + timeoutMs: number, + description: string, + ): Promise => { + const deadline = Date.now() + timeoutMs; + for (;;) { + if (await this.evaluate(`Boolean(${expression})`).catch(() => false)) return; + if (Date.now() >= deadline) throw new Error(`Timed out waiting for ${description}`); + await new Promise((resolve) => setTimeout(resolve, 250)); + } + }; + + textPresent = (text: string) => + this.evaluate(`document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`); + + setViewport = async (width: number, height: number): Promise => { + await this.command("Emulation.setDeviceMetricsOverride", { + width, + height, + deviceScaleFactor: 1, + mobile: false, + }); + }; + + wheel = async (x: number, y: number, deltaY: number): Promise => { + await this.command("Input.dispatchMouseEvent", { + type: "mouseWheel", + x, + y, + deltaX: 0, + deltaY, + }); + }; + + screenshot = async (path: string): Promise => { + const result = await this.command<{ readonly data: string }>("Page.captureScreenshot", { + format: "png", + fromSurface: true, + }); + writeFileSync(path, Buffer.from(result.data, "base64")); + }; + + close = () => this.socket.close(); +} + +export interface PackagedDesktopEvidenceCapture { + readonly rendererPath: string; + readonly osPixelPath?: string; +} + +export interface PackagedDesktopEvidenceHooks { + readonly beforeCapture?: ( + capture: PackagedDesktopEvidenceCapture, + app: PackagedDesktopApp, + ) => Promise | void; + readonly captureOsPixels?: (path: string, app: PackagedDesktopApp) => Promise; + readonly afterCapture?: ( + capture: PackagedDesktopEvidenceCapture, + app: PackagedDesktopApp, + ) => Promise | void; +} + +export interface PackagedDesktopLaunchOptions { + readonly home: string; + readonly args?: readonly string[]; + readonly env?: NodeJS.ProcessEnv; + readonly evidence?: PackagedDesktopEvidenceHooks; +} + +export interface PackagedDesktopApp { + readonly child: ChildProcess; + readonly debugPort: string; + cdp: PackagedDesktopPage; + readonly stdout: () => string; + readonly stderr: () => string; + readonly output: () => string; + readonly captureEvidence: (capture: PackagedDesktopEvidenceCapture) => Promise; + readonly close: () => Promise; +} + +const waitForPageWebSocket = async (debugPort: string) => { + const deadline = Date.now() + 120_000; + for (;;) { + const targets = (await fetch(`http://127.0.0.1:${debugPort}/json/list`) + .then((response) => (response.ok ? response.json() : [])) + .catch(() => [])) as ReadonlyArray; + const page = targets.find( + (target) => + target.type === "page" && + target.webSocketDebuggerUrl && + !target.url.startsWith("devtools://"), + ); + if (page?.webSocketDebuggerUrl) return page.webSocketDebuggerUrl; + if (Date.now() >= deadline) { + throw new Error("Timed out waiting for packaged app page CDP target"); + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } +}; + +export const stopProcess = async (child: ChildProcess | undefined): Promise => { + if (!child || child.exitCode !== null || child.signalCode !== null) return; + await new Promise((resolve) => { + let settled = false; + const settle = () => { + if (settled) return; + settled = true; + resolve(); + }; + const forceStop = () => { + if (process.platform === "win32" && child.pid) { + execFile( + "taskkill", + ["/PID", String(child.pid), "/T", "/F"], + { windowsHide: true }, + settle, + ); + } else { + child.kill("SIGKILL"); + setTimeout(settle, 1_000); + } + }; + const timeout = setTimeout(forceStop, 5_000); + child.once("exit", () => { + clearTimeout(timeout); + settle(); + }); + if (process.platform === "win32" && child.pid) { + execFile("taskkill", ["/PID", String(child.pid), "/T"], { windowsHide: true }, () => {}); + } else { + child.kill("SIGTERM"); + } + }); +}; + +export const closePackagedDesktop = async (app: PackagedDesktopApp | undefined) => { + if (!app) return; + app.cdp.close(); + await stopProcess(app.child); +}; + +export const launchPackagedDesktop = async ( + options: PackagedDesktopLaunchOptions, +): Promise => { + const { app: executable } = requirePackagedDesktopBundle(); + const evidence = options.evidence ?? {}; + let stdout = ""; + let stderr = ""; + let settled = false; + const child = spawn(executable, ["--remote-debugging-port=0", ...(options.args ?? [])], { + env: packagedDesktopEnvironment(options.home, options.env), + stdio: ["ignore", "pipe", "pipe"], + }); + child.stdout?.on("data", (chunk: Buffer) => { + stdout = appendOutput(stdout, chunk); + }); + child.stderr?.on("data", (chunk: Buffer) => { + stderr = appendOutput(stderr, chunk); + }); + const output = () => [stdout, stderr].filter(Boolean).join("\n"); + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: a failed launch must reap the Electron process + try { + const browserCdpUrl = await new Promise((resolve, reject) => { + let timer: ReturnType; + const settle = (fn: () => void) => { + if (settled) return; + settled = true; + clearTimeout(timer); + fn(); + }; + timer = setTimeout( + () => + settle(() => { + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter + reject(new Error(`Timed out waiting for packaged app CDP URL\n${output()}`)); + }), + 120_000, + ); + const detectCdpUrl = () => { + const match = output().match(/DevTools listening on (ws:\/\/[^\s]+)/); + if (match) settle(() => resolve(match[1]!)); + }; + child.stdout?.on("data", detectCdpUrl); + child.stderr?.on("data", detectCdpUrl); + // oxlint-disable-next-line executor/no-promise-reject -- boundary: packaged-app launch promise adapter + child.once("error", (error) => settle(() => reject(error))); + child.once("exit", (code, signal) => + settle(() => + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: packaged-app launch promise adapter + reject( + new Error( + `Packaged app exited before CDP (code=${code} signal=${signal})\n${output()}`, + ), + ), + ), + ); + }); + + const debugPort = new URL(browserCdpUrl).port; + const pageCdpUrl = await waitForPageWebSocket(debugPort); + const cdp = await PackagedDesktopPage.connect(pageCdpUrl); + await cdp.command("Runtime.enable"); + await cdp.command("Page.enable"); + + const packagedApp: PackagedDesktopApp = { + child, + cdp, + debugPort, + stdout: () => stdout, + stderr: () => stderr, + output, + captureEvidence: async (capture) => { + await evidence.beforeCapture?.(capture, packagedApp); + await packagedApp.cdp.screenshot(capture.rendererPath); + if (capture.osPixelPath) { + if (!evidence.captureOsPixels) { + throw new Error("OS-pixel evidence was requested without a captureOsPixels hook"); + } + await evidence.captureOsPixels(capture.osPixelPath, packagedApp); + } + await evidence.afterCapture?.(capture, packagedApp); + }, + close: () => closePackagedDesktop(packagedApp), + }; + return packagedApp; + } catch (error) { + await stopProcess(child); + throw error; + } +}; + +export const reconnectPackagedDesktopPage = async (app: PackagedDesktopApp) => { + app.cdp.close(); + const pageCdpUrl = await waitForPageWebSocket(app.debugPort); + const cdp = await PackagedDesktopPage.connect(pageCdpUrl); + await cdp.command("Runtime.enable"); + await cdp.command("Page.enable"); + app.cdp = cdp; + return cdp; +}; + +export const freePort = () => + new Promise((resolve, reject) => { + const server = net.createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const port = (server.address() as net.AddressInfo).port; + server.close(() => resolve(port)); + }); + }); + +export interface SupervisedDaemonStart { + readonly child: ChildProcess; + readonly ready: boolean; + readonly stdout: string; + readonly stderr: string; +} + +export const startSupervisedDaemon = (options: { + readonly home: string; + readonly port: number; + readonly hostname?: string; + readonly env?: NodeJS.ProcessEnv; +}): Promise => + new Promise((resolve) => { + const { executor } = requirePackagedDesktopBundle(); + const child = spawn( + executor, + [ + "daemon", + "run", + "--foreground", + "--port", + String(options.port), + "--hostname", + options.hostname ?? "127.0.0.1", + ], + { + env: packagedDesktopEnvironment(options.home, options.env), + stdio: ["ignore", "pipe", "pipe"], + }, + ); + let stdout = ""; + let stderr = ""; + let settled = false; + const settle = (ready: boolean) => { + if (settled) return; + settled = true; + resolve({ child, ready, stdout, stderr }); + }; + const timer = setTimeout(() => settle(false), 60_000); + child.stdout.on("data", (chunk: Buffer) => { + stdout = appendOutput(stdout, chunk); + if (/Daemon ready on http:\/\//.test(chunk.toString())) { + clearTimeout(timer); + settle(true); + } + }); + child.stderr.on("data", (chunk: Buffer) => { + stderr = appendOutput(stderr, chunk); + }); + child.on("error", (error) => { + clearTimeout(timer); + stderr = appendOutput(stderr, Buffer.from(error.message)); + settle(false); + }); + child.on("exit", () => { + clearTimeout(timer); + settle(false); + }); + }); diff --git a/e2e/src/evidence-merge.ts b/e2e/src/evidence-merge.ts new file mode 100644 index 000000000..ffc06c2a3 --- /dev/null +++ b/e2e/src/evidence-merge.ts @@ -0,0 +1,171 @@ +import { cpSync, existsSync, lstatSync, mkdirSync, readdirSync, readFileSync } from "node:fs"; +import { join, parse, resolve, sep } from "node:path"; + +import { trustedProjectForArtifactTarget, type TrustedRunLane } from "./evidence-trust"; + +const GENERATED_DIRECTORIES = new Set(["assets", "trace-viewer"]); + +export interface EvidenceMergeOptions { + readonly inputDir: string; + readonly outputDir: string; + readonly runAttempt: string; +} + +export interface EvidenceMergeEntry { + readonly artifact: string; + readonly target: string; + readonly sourceSlug: string; + readonly mergedSlug: string; +} + +export interface EvidenceMergeResult { + readonly artifactCount: number; + readonly attemptCount: number; + readonly collisionCount: number; + readonly entries: ReadonlyArray; + readonly trustedRuns: ReadonlyArray; +} + +const isWithin = (parent: string, child: string) => child.startsWith(`${parent}${sep}`); + +const safeSuffix = (value: string) => { + const sanitized = value.replace(/[^a-zA-Z0-9_-]+/g, "-").replace(/^-+|-+$/g, ""); + return sanitized.slice(0, 80) || "artifact"; +}; + +const attemptIdFor = (directory: string) => { + const file = join(directory, "evidence.json"); + if (!existsSync(file)) return undefined; + try { + const parsed: unknown = JSON.parse(readFileSync(file, "utf8")); + if ( + typeof parsed === "object" && + parsed !== null && + "attemptId" in parsed && + typeof parsed.attemptId === "string" && + parsed.attemptId.length > 0 + ) { + return safeSuffix(parsed.attemptId); + } + } catch { + // The rebuilt manifest will ignore malformed evidence metadata. A source + // artifact still gets a collision-safe name derived from its artifact. + } + return undefined; +}; + +const assertCopyableTree = (directory: string) => { + for (const entry of readdirSync(directory, { withFileTypes: true })) { + const path = join(directory, entry.name); + if (entry.isSymbolicLink() || lstatSync(path).isSymbolicLink()) { + throw new Error(`evidence merge refuses symbolic link: ${path}`); + } + if (entry.isDirectory()) assertCopyableTree(path); + } +}; + +const destinationSlug = ( + destinationTarget: string, + sourceSlug: string, + sourceArtifact: string, + sourceDirectory: string, +) => { + if (!existsSync(join(destinationTarget, sourceSlug))) return sourceSlug; + + const suffix = attemptIdFor(sourceDirectory) ?? safeSuffix(sourceArtifact); + const sourcePrefix = sourceSlug.slice(0, Math.max(1, 180 - suffix.length)); + const base = `${sourcePrefix}--${suffix}`; + if (!existsSync(join(destinationTarget, base))) return base; + + for (let index = 2; index < 10_000; index += 1) { + const candidate = `${base}-${index}`; + if (!existsSync(join(destinationTarget, candidate))) return candidate; + } + throw new Error(`evidence merge exhausted collision suffixes for ${sourceSlug}`); +}; + +/** + * Merge independently uploaded `runs/` trees without allowing one job to + * overwrite another job's attempt directory. Generated viewer files are + * intentionally discarded because the aggregate job rebuilds them once. + */ +export const mergeEvidenceArtifacts = (options: EvidenceMergeOptions): EvidenceMergeResult => { + const inputDir = resolve(options.inputDir); + const outputDir = resolve(options.outputDir); + if (inputDir === outputDir || isWithin(inputDir, outputDir) || isWithin(outputDir, inputDir)) { + throw new Error("evidence merge input and output directories must not overlap"); + } + if (parse(outputDir).root === outputDir) { + throw new Error("evidence merge output must not be a filesystem root"); + } + if (!existsSync(inputDir)) throw new Error(`evidence merge input does not exist: ${inputDir}`); + + if (existsSync(outputDir) && readdirSync(outputDir).length > 0) { + throw new Error(`evidence merge output must be empty: ${outputDir}`); + } + mkdirSync(outputDir, { recursive: true }); + + const artifacts = readdirSync(inputDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .sort((left, right) => left.name.localeCompare(right.name)); + const entries: EvidenceMergeEntry[] = []; + const trustedRuns: TrustedRunLane[] = []; + let collisionCount = 0; + + for (const artifact of artifacts) { + const artifactDir = join(inputDir, artifact.name); + const targets = readdirSync(artifactDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && !GENERATED_DIRECTORIES.has(entry.name)) + .sort((left, right) => left.name.localeCompare(right.name)); + + for (const target of targets) { + const sourceTarget = join(artifactDir, target.name); + const destinationTarget = join(outputDir, target.name); + const trustedProject = trustedProjectForArtifactTarget( + artifact.name, + options.runAttempt, + target.name, + ); + mkdirSync(destinationTarget, { recursive: true }); + const attempts = readdirSync(sourceTarget, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .sort((left, right) => left.name.localeCompare(right.name)); + + for (const attempt of attempts) { + const sourceDirectory = join(sourceTarget, attempt.name); + assertCopyableTree(sourceDirectory); + const mergedSlug = destinationSlug( + destinationTarget, + attempt.name, + artifact.name, + sourceDirectory, + ); + if (mergedSlug !== attempt.name) collisionCount += 1; + cpSync(sourceDirectory, join(destinationTarget, mergedSlug), { + recursive: true, + force: false, + errorOnExist: true, + }); + entries.push({ + artifact: artifact.name, + target: target.name, + sourceSlug: attempt.name, + mergedSlug, + }); + trustedRuns.push({ target: target.name, slug: mergedSlug, project: trustedProject }); + } + } + } + + if (entries.length === 0) { + throw new Error(`evidence merge found no attempt directories in ${inputDir}`); + } + + return { + artifactCount: artifacts.length, + attemptCount: entries.length, + collisionCount, + entries, + trustedRuns, + }; +}; diff --git a/e2e/src/evidence-provenance.ts b/e2e/src/evidence-provenance.ts new file mode 100644 index 000000000..6bd0fa295 --- /dev/null +++ b/e2e/src/evidence-provenance.ts @@ -0,0 +1,149 @@ +import { join } from "node:path"; + +import { writeJsonAtomicSync } from "./artifact-io"; +import { + projectDefinition, + visualDataClassificationForProject, + type VisualDataClassification, +} from "./project-matrix"; + +export const LANE_PROVENANCE_FILE = "lane-provenance.json"; + +export interface LaneProvenance { + readonly schemaVersion: 1; + readonly source: "e2e/src/project-matrix.ts"; + readonly project: string; + readonly target: string; + readonly hermetic: boolean; + readonly dataClassification: VisualDataClassification; +} + +export interface VisualEvidenceDeclaration { + readonly dataClassification: VisualDataClassification; +} + +const record = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +export const laneProvenanceFor = ( + projectName: string, + target: string, +): LaneProvenance | undefined => { + const project = projectDefinition(projectName); + const dataClassification = visualDataClassificationForProject(projectName); + if (!project || !dataClassification || project.target !== target) return undefined; + return { + schemaVersion: 1, + source: "e2e/src/project-matrix.ts", + project: project.name, + target: project.target, + hermetic: project.hermetic, + dataClassification, + }; +}; + +export const laneProvenanceForEnvironment = ( + target: string, + env: Readonly> = process.env, +) => laneProvenanceFor(env.E2E_PROJECT ?? env.E2E_TARGET ?? "", target); + +export const parseLaneProvenance = ( + value: unknown, + trustedProject: string, + expectedTarget: string, +): LaneProvenance | undefined => { + if (!record(value)) return undefined; + if ( + value.schemaVersion !== 1 || + value.source !== "e2e/src/project-matrix.ts" || + typeof value.project !== "string" || + typeof value.target !== "string" || + typeof value.hermetic !== "boolean" || + (value.dataClassification !== "synthetic-only" && + value.dataClassification !== "potentially-sensitive") + ) { + return undefined; + } + const expected = laneProvenanceFor(trustedProject, expectedTarget); + if ( + !expected || + value.project !== expected.project || + value.target !== expected.target || + expected.hermetic !== value.hermetic || + expected.dataClassification !== value.dataClassification + ) { + return undefined; + } + return expected; +}; + +export const writeRunLaneProvenance = ( + runDir: string, + target: string, + env: Readonly> = process.env, +) => { + const provenance = laneProvenanceForEnvironment(target, env); + if (!provenance) return undefined; + writeJsonAtomicSync(join(runDir, LANE_PROVENANCE_FILE), provenance); + return provenance; +}; + +const visualEvidenceDeclaration = (value: unknown): VisualEvidenceDeclaration | undefined => { + if (!record(value) || !record(value.visualEvidence)) return undefined; + const dataClassification = value.visualEvidence.dataClassification; + if (dataClassification !== "synthetic-only" && dataClassification !== "potentially-sensitive") { + return undefined; + } + return { dataClassification }; +}; + +export type VisualEvidencePublicationDecision = + | { readonly publish: true; readonly provenance: LaneProvenance } + | { readonly publish: false; readonly reason: string }; + +export const visualEvidencePublicationDecision = ( + result: unknown, + laneProvenance: unknown, + expectedTarget: string, + trustedProject: string, +): VisualEvidencePublicationDecision => { + if (!trustedProject) { + return { publish: false, reason: "trusted lane project is missing" }; + } + const provenance = parseLaneProvenance(laneProvenance, trustedProject, expectedTarget); + if (!provenance) { + return { + publish: false, + reason: `lane provenance does not match trusted project ${trustedProject} for target ${expectedTarget}`, + }; + } + if (provenance.target !== expectedTarget) { + return { + publish: false, + reason: `lane target ${provenance.target} does not match run target ${expectedTarget}`, + }; + } + if (!record(result) || result.target !== provenance.target) { + return { + publish: false, + reason: "result target is missing or does not match lane provenance", + }; + } + const declaration = visualEvidenceDeclaration(result); + if (!declaration) { + return { publish: false, reason: "result visual classification is missing or invalid" }; + } + if (declaration.dataClassification !== provenance.dataClassification) { + return { + publish: false, + reason: `result visual classification ${declaration.dataClassification} does not match lane classification ${provenance.dataClassification}`, + }; + } + if (provenance.dataClassification !== "synthetic-only") { + return { + publish: false, + reason: `lane ${provenance.project} is ${provenance.dataClassification}`, + }; + } + return { publish: true, provenance }; +}; diff --git a/e2e/src/evidence-publication.ts b/e2e/src/evidence-publication.ts new file mode 100644 index 000000000..a143580e5 --- /dev/null +++ b/e2e/src/evidence-publication.ts @@ -0,0 +1,444 @@ +import { lstatSync, readFileSync, readdirSync, type Stats } from "node:fs"; +import { join, relative, resolve, sep } from "node:path"; + +import { + isPublishedDirectory, + publishedArtifactFor, + type PublishedArtifact, +} from "./published-artifacts"; +import { parseLaneProvenance } from "./evidence-provenance"; +import { trustedRunLaneKey, trustedRunLaneMap, type TrustedRunLanes } from "./evidence-trust"; + +const PREFIX_SEGMENT = /^[a-z0-9][a-z0-9._-]*$/; +const OBJECT_SEGMENT = /^[a-zA-Z0-9][a-zA-Z0-9._-]*$/; +const BUCKET_NAME = /^[a-z0-9][a-z0-9-]{1,61}[a-z0-9]$/; +const MAX_SUMMARY_RUNS = 500; + +interface PublicationGate { + readonly schemaVersion: 1; + readonly status: "passed"; + readonly sanitizer: { + readonly source: "e2e/scripts/sanitize-evidence.ts"; + readonly policyVersion: 1; + readonly sourceRevision: string; + }; + readonly policy: { + readonly unknownArtifacts: "removed"; + readonly textAndJson: "redacted"; + readonly binaryVisuals: "unredacted-synthetic-only"; + readonly binarySecretDetection: "byte-canary-only"; + }; + readonly binaryArtifacts: ReadonlyArray; + readonly errors: ReadonlyArray; +} + +export interface EvidenceBundleFile { + readonly absolutePath: string; + readonly relativePath: string; + readonly artifact: PublishedArtifact; + readonly stats: Stats; +} + +export interface EvidenceSummaryRun { + readonly scenario: string; + readonly target: string; + readonly slug: string; + readonly ok: boolean; + readonly endedAt?: number; +} + +export interface PublicEvidenceVerificationOptions { + readonly viewerUrl: string; + readonly files: ReadonlyArray; + readonly fetcher?: (url: string, init: RequestInit) => Promise; + readonly attempts?: number; + readonly retryDelayMs?: number; +} + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const stringArray = (value: unknown): value is string[] => + Array.isArray(value) && value.every((entry) => typeof entry === "string"); + +const hasControlCharacter = (value: string) => + [...value].some((character) => { + const code = character.charCodeAt(0); + return code <= 31 || code === 127; + }); + +const parsePublicationGate = (value: unknown) => { + if (!isRecord(value)) throw new Error("publication.json must contain an object"); + const sanitizer = value.sanitizer; + const policy = value.policy; + if (!isRecord(sanitizer) || !isRecord(policy)) { + throw new Error("publication.json is missing sanitizer policy metadata"); + } + if ( + value.schemaVersion !== 1 || + value.status !== "passed" || + sanitizer.source !== "e2e/scripts/sanitize-evidence.ts" || + sanitizer.policyVersion !== 1 || + typeof sanitizer.sourceRevision !== "string" || + policy.unknownArtifacts !== "removed" || + policy.textAndJson !== "redacted" || + policy.binaryVisuals !== "unredacted-synthetic-only" || + policy.binarySecretDetection !== "byte-canary-only" || + !stringArray(value.binaryArtifacts) || + !stringArray(value.errors) || + value.errors.length > 0 + ) { + throw new Error("publication.json does not describe a passing supported sanitizer policy"); + } + return { + schemaVersion: 1, + status: "passed", + sanitizer: { + source: "e2e/scripts/sanitize-evidence.ts", + policyVersion: 1, + sourceRevision: sanitizer.sourceRevision, + }, + policy: { + unknownArtifacts: "removed", + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + binaryArtifacts: value.binaryArtifacts, + errors: value.errors, + } satisfies PublicationGate; +}; + +const portablePath = (root: string, file: string) => relative(root, file).split(sep).join("/"); + +const collectBundleFiles = (root: string, directory: string, files: EvidenceBundleFile[]): void => { + for (const entry of readdirSync(directory, { withFileTypes: true })) { + const file = join(directory, entry.name); + const relativePath = portablePath(root, file); + const stats = lstatSync(file); + if (stats.isSymbolicLink()) { + throw new Error(`publication bundle contains a symlink: ${relativePath}`); + } + if (stats.isDirectory()) { + if (!isPublishedDirectory(relativePath)) { + throw new Error(`publication bundle contains a private directory: ${relativePath}`); + } + collectBundleFiles(root, file, files); + continue; + } + const artifact = stats.isFile() ? publishedArtifactFor(relativePath) : undefined; + if (!artifact) { + throw new Error(`publication bundle contains a private artifact: ${relativePath}`); + } + files.push({ absolutePath: file, relativePath, artifact, stats }); + } +}; + +const parseJsonFile = (file: string) => { + const value: unknown = JSON.parse(readFileSync(file, "utf8")); + return value; +}; + +const safeRunPath = (target: string, slug: string) => + publishedArtifactFor(`${target}/${slug}/result.json`) !== undefined; + +export const summaryRunsFromManifest = (value: unknown) => { + if (!isRecord(value) || !Array.isArray(value.runs)) { + throw new Error("manifest.json must contain a runs array"); + } + return value.runs.map((entry, index): EvidenceSummaryRun => { + if ( + !isRecord(entry) || + typeof entry.scenario !== "string" || + entry.scenario.length === 0 || + hasControlCharacter(entry.scenario) || + typeof entry.target !== "string" || + typeof entry.slug !== "string" || + typeof entry.ok !== "boolean" || + !safeRunPath(entry.target, entry.slug) || + (entry.endedAt !== undefined && + (typeof entry.endedAt !== "number" || !Number.isFinite(entry.endedAt))) + ) { + throw new Error(`manifest.json contains an invalid run at index ${index}`); + } + return { + scenario: entry.scenario, + target: entry.target, + slug: entry.slug, + ok: entry.ok, + ...(typeof entry.endedAt === "number" ? { endedAt: entry.endedAt } : {}), + }; + }); +}; + +export const latestSummaryRuns = (runs: ReadonlyArray) => { + const latest = new Map(); + for (const run of runs) { + const key = `${run.scenario}\u0000${run.target}`; + const current = latest.get(key); + if (!current || (run.endedAt ?? 0) > (current.endedAt ?? 0)) latest.set(key, run); + } + return [...latest.values()].sort( + (left, right) => + left.scenario.localeCompare(right.scenario) || left.target.localeCompare(right.target), + ); +}; + +const validatedHttpsUrl = (input: string, label: string) => { + const url = new URL(input); + if ( + url.protocol !== "https:" || + url.username !== "" || + url.password !== "" || + url.search !== "" || + url.hash !== "" + ) { + throw new Error(`${label} must be an HTTPS URL without credentials, query, or fragment`); + } + return url; +}; + +export const validateObjectPrefix = (prefix: string) => { + const segments = prefix.split("/"); + if (segments.length === 0 || segments.some((segment) => !PREFIX_SEGMENT.test(segment))) { + throw new Error("R2 object prefix contains an unsafe segment"); + } + return segments.join("/"); +}; + +export const evidenceViewerUrl = (publicBaseUrl: string, prefix: string) => { + const base = validatedHttpsUrl(publicBaseUrl, "public evidence base URL"); + const normalizedPrefix = validateObjectPrefix(prefix); + if (!base.pathname.endsWith("/")) base.pathname += "/"; + return new URL(`${normalizedPrefix}/index.html`, base).toString(); +}; + +export const evidenceRunUrl = (viewerUrl: string, target: string, slug: string) => { + const viewer = validatedHttpsUrl(viewerUrl, "evidence viewer URL"); + if (!viewer.pathname.endsWith("/index.html") || !safeRunPath(target, slug)) { + throw new Error("evidence run URL contains an invalid viewer, target, or slug"); + } + return `${viewer.toString()}#/run/${encodeURIComponent(target)}/${encodeURIComponent(slug)}`; +}; + +export const r2ObjectUrl = ( + endpoint: string, + bucket: string, + prefix: string, + relativePath: string, +) => { + const url = validatedHttpsUrl(endpoint, "R2 endpoint"); + if (url.pathname !== "/" || !BUCKET_NAME.test(bucket)) { + throw new Error("R2 endpoint or bucket name is invalid"); + } + const normalizedPrefix = validateObjectPrefix(prefix); + const pathSegments = relativePath.split("/"); + if ( + pathSegments.length === 0 || + pathSegments.some((segment) => !OBJECT_SEGMENT.test(segment)) || + !publishedArtifactFor(relativePath) + ) { + throw new Error(`R2 object path is not publication-allowlisted: ${relativePath}`); + } + url.pathname = `/${bucket}/${normalizedPrefix}/${pathSegments.join("/")}`; + return url.toString(); +}; + +export const validateEvidenceBundle = ( + runsDir: string, + sourceRevision: string, + trustedRuns: TrustedRunLanes, +) => { + if (!sourceRevision) throw new Error("source revision is required for evidence publication"); + const root = resolve(runsDir); + const files: EvidenceBundleFile[] = []; + collectBundleFiles(root, root, files); + const byPath = new Map(files.map((file) => [file.relativePath, file])); + for (const required of ["index.html", "manifest.json", "publication.json"]) { + if (!byPath.has(required)) throw new Error(`publication bundle is missing ${required}`); + } + + const publication = parsePublicationGate(parseJsonFile(join(root, "publication.json"))); + if (publication.sanitizer.sourceRevision !== sourceRevision) { + throw new Error("publication sanitizer revision does not match the workflow revision"); + } + + const actualAttempts = new Map(); + for (const file of files) { + const [target, slug, name, extra] = file.relativePath.split("/"); + if ( + !target || + !slug || + !name || + extra !== undefined || + target === "assets" || + target === "trace-viewer" + ) { + continue; + } + actualAttempts.set(trustedRunLaneKey(target, slug), { target, slug }); + } + const trustedAttempts = trustedRunLaneMap(trustedRuns); + if (actualAttempts.size !== trustedAttempts.size) { + throw new Error("publication evidence directories do not match external trusted lane metadata"); + } + for (const [key, attempt] of actualAttempts) { + const trusted = trustedAttempts.get(key); + if (!trusted) { + throw new Error( + `publication evidence has no external trusted lane: ${attempt.target}/${attempt.slug}`, + ); + } + const provenanceFile = byPath.get(`${attempt.target}/${attempt.slug}/lane-provenance.json`); + if ( + !provenanceFile || + !parseLaneProvenance( + parseJsonFile(provenanceFile.absolutePath), + trusted.project, + attempt.target, + ) + ) { + throw new Error( + `publication lane provenance does not match external trusted project ${trusted.project}: ${attempt.target}/${attempt.slug}`, + ); + } + const resultFile = byPath.get(`${attempt.target}/${attempt.slug}/result.json`); + const skippedFile = byPath.get(`${attempt.target}/${attempt.slug}/skipped.json`); + if ((resultFile ? 1 : 0) + (skippedFile ? 1 : 0) !== 1) { + throw new Error( + `publication evidence needs exactly one result or skip marker: ${attempt.target}/${attempt.slug}`, + ); + } + const marker = parseJsonFile((resultFile ?? skippedFile)?.absolutePath ?? ""); + if (!isRecord(marker) || marker.target !== attempt.target) { + throw new Error( + `publication evidence marker target does not match its directory: ${attempt.target}/${attempt.slug}`, + ); + } + } + + const visualArtifacts = files + .filter((file) => file.artifact.unredactedVisual) + .map((file) => file.relativePath) + .sort(); + const declaredVisualArtifacts = [...new Set(publication.binaryArtifacts)].sort(); + if ( + visualArtifacts.length !== declaredVisualArtifacts.length || + visualArtifacts.some((file, index) => file !== declaredVisualArtifacts[index]) + ) { + throw new Error("publication binary artifact inventory does not match the bundle"); + } + + const runs = summaryRunsFromManifest(parseJsonFile(join(root, "manifest.json"))); + for (const run of runs) { + const resultPath = `${run.target}/${run.slug}/result.json`; + if (!byPath.has(resultPath)) { + throw new Error(`manifest run is missing its publication result: ${resultPath}`); + } + } + return { root, files, publication, runs }; +}; + +const delay = (milliseconds: number) => + new Promise((resolve) => setTimeout(resolve, milliseconds)); + +const publicControlFileUrl = (viewerUrl: string, relativePath: string) => { + const viewer = validatedHttpsUrl(viewerUrl, "evidence viewer URL"); + if (!viewer.pathname.endsWith("/index.html")) { + throw new Error("evidence viewer URL must end in index.html"); + } + return relativePath === "index.html" + ? viewer.toString() + : new URL(relativePath, viewer).toString(); +}; + +const verifyPublicFile = async ( + url: string, + file: EvidenceBundleFile, + fetcher: (url: string, init: RequestInit) => Promise, + attempts: number, + retryDelayMs: number, +) => { + const expected = readFileSync(file.absolutePath); + let lastError: unknown; + for (let attempt = 0; attempt < attempts; attempt += 1) { + try { + const response = await fetcher(url, { headers: { "cache-control": "no-cache" } }); + if (!response.ok) throw new Error(`public read returned HTTP ${response.status}`); + const contentType = response.headers.get("content-type")?.split(";", 1)[0]?.trim(); + const expectedType = file.artifact.mime.split(";", 1)[0]?.trim(); + if (contentType !== expectedType) { + throw new Error(`public read returned content-type ${contentType || "missing"}`); + } + const actual = Buffer.from(await response.arrayBuffer()); + if (!actual.equals(expected)) throw new Error("public read did not match the uploaded file"); + return; + } catch (error) { + lastError = error; + if (attempt < attempts - 1) await delay(retryDelayMs * (attempt + 1)); + } + } + throw new Error(`public verification failed for ${url}: ${String(lastError)}`); +}; + +export const verifyPublishedEvidence = async (options: PublicEvidenceVerificationOptions) => { + const attempts = options.attempts ?? 5; + const retryDelayMs = options.retryDelayMs ?? 500; + if (!Number.isInteger(attempts) || attempts <= 0 || retryDelayMs < 0) { + throw new Error("public evidence verification retry settings are invalid"); + } + const fetcher = options.fetcher ?? fetch; + const controlFiles = ["manifest.json", "publication.json", "index.html"].map((relativePath) => { + const file = options.files.find((candidate) => candidate.relativePath === relativePath); + if (!file) throw new Error(`public evidence verification is missing ${relativePath}`); + return file; + }); + for (const file of controlFiles) { + await verifyPublicFile( + publicControlFileUrl(options.viewerUrl, file.relativePath), + file, + fetcher, + attempts, + retryDelayMs, + ); + } + return { verifiedFiles: controlFiles.length }; +}; + +const markdownText = (value: string) => + value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/\\/g, "\\\\") + .replace(/\|/g, "\\|") + .replace(/\[/g, "\\[") + .replace(/\]/g, "\\]"); + +export const evidenceSummaryMarkdown = ( + viewerUrl: string, + runs: ReadonlyArray, +) => { + const currentRuns = latestSummaryRuns(runs); + const visibleRuns = currentRuns.slice(0, MAX_SUMMARY_RUNS); + const lines = [ + "## End-to-end evidence", + "", + `- [Open the hosted evidence matrix](${viewerUrl})`, + "- This is an immutable, sanitizer-approved bundle for this workflow attempt.", + "", + "| Scenario | Target | Result | Direct run |", + "| --- | --- | --- | --- |", + ...visibleRuns.map( + (run) => + `| ${markdownText(run.scenario)} | ${markdownText(run.target)} | ${run.ok ? "passed" : "failed"} | [open run](${evidenceRunUrl(viewerUrl, run.target, run.slug)}) |`, + ), + ]; + if (visibleRuns.length < currentRuns.length) { + lines.push( + "", + `${currentRuns.length - visibleRuns.length} additional current matrix cells are linked from the hosted matrix.`, + ); + } + return `${lines.join("\n")}\n`; +}; diff --git a/e2e/src/evidence-trust.ts b/e2e/src/evidence-trust.ts new file mode 100644 index 000000000..926b8ce48 --- /dev/null +++ b/e2e/src/evidence-trust.ts @@ -0,0 +1,122 @@ +import { readFileSync, realpathSync } from "node:fs"; +import { resolve, sep } from "node:path"; + +import { projectDefinition } from "./project-matrix"; +import { publishedArtifactFor } from "./published-artifacts"; + +export const TRUSTED_RUN_LANES_SOURCE = "e2e/scripts/merge-evidence.ts"; + +export interface TrustedRunLane { + readonly target: string; + readonly slug: string; + readonly project: string; +} + +export interface TrustedRunLanes { + readonly schemaVersion: 1; + readonly source: typeof TRUSTED_RUN_LANES_SOURCE; + readonly runAttempt: string; + readonly runs: ReadonlyArray; +} + +const ARTIFACT_PROJECTS = { + harness: ["harness"], + clients: ["clients"], + "cloud-hermetic": ["cloud-hermetic"], + "selfhost-hermetic": ["selfhost-hermetic"], + "cloudflare-hermetic": ["cloudflare-hermetic"], + local: ["local"], + "selfhost-production": ["selfhost-docker-hermetic"], + "desktop-linux": ["desktop", "desktop-packaged"], + "desktop-linux-kvm": ["desktop-kvm"], + "live-cloud": ["cloud"], + "live-selfhost": ["selfhost"], + "live-cloudflare": ["cloudflare"], + "tart-macos": ["cli-macos"], + "tart-linux": ["cli-linux"], + "windows-service-vm": ["cli-windows"], +} as const satisfies Readonly>>; + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const validRunAttempt = (value: string) => /^[1-9][0-9]*$/.test(value); + +export const trustedProjectsForArtifact = (artifact: string, runAttempt: string) => { + if (!validRunAttempt(runAttempt)) + throw new Error("trusted run attempt must be a positive integer"); + const suffix = `-${runAttempt}`; + if (!artifact.startsWith("e2e-") || !artifact.endsWith(suffix)) return undefined; + const lane = artifact.slice("e2e-".length, -suffix.length); + return ARTIFACT_PROJECTS[lane as keyof typeof ARTIFACT_PROJECTS]; +}; + +export const trustedProjectForArtifactTarget = ( + artifact: string, + runAttempt: string, + target: string, +) => { + const projects = trustedProjectsForArtifact(artifact, runAttempt); + if (!projects) throw new Error(`evidence artifact has no trusted lane binding: ${artifact}`); + const matching = projects.filter((project) => projectDefinition(project)?.target === target); + if (matching.length !== 1) { + throw new Error(`evidence artifact ${artifact} has no unique trusted project for ${target}`); + } + return matching[0]; +}; + +export const trustedRunLaneKey = (target: string, slug: string) => `${target}\u0000${slug}`; + +export const parseTrustedRunLanes = (value: unknown): TrustedRunLanes => { + if ( + !isRecord(value) || + value.schemaVersion !== 1 || + value.source !== TRUSTED_RUN_LANES_SOURCE || + typeof value.runAttempt !== "string" || + !validRunAttempt(value.runAttempt) || + !Array.isArray(value.runs) + ) { + throw new Error("trusted run lanes file is invalid"); + } + + const seen = new Set(); + const runs = value.runs.map((entry, index): TrustedRunLane => { + if ( + !isRecord(entry) || + typeof entry.target !== "string" || + typeof entry.slug !== "string" || + typeof entry.project !== "string" || + !publishedArtifactFor(`${entry.target}/${entry.slug}/result.json`) || + projectDefinition(entry.project)?.target !== entry.target + ) { + throw new Error(`trusted run lanes file has an invalid entry at index ${index}`); + } + const key = trustedRunLaneKey(entry.target, entry.slug); + if (seen.has(key)) + throw new Error(`trusted run lanes file repeats ${entry.target}/${entry.slug}`); + seen.add(key); + return { target: entry.target, slug: entry.slug, project: entry.project }; + }); + + return { + schemaVersion: 1, + source: TRUSTED_RUN_LANES_SOURCE, + runAttempt: value.runAttempt, + runs, + }; +}; + +const isWithin = (parent: string, child: string) => child.startsWith(`${parent}${sep}`); + +export const loadTrustedRunLanes = (file: string, runsDir: string) => { + const trustedFile = realpathSync(resolve(file)); + const untrustedRuns = realpathSync(resolve(runsDir)); + if (trustedFile === untrustedRuns || isWithin(untrustedRuns, trustedFile)) { + throw new Error("trusted run lanes file must be outside the evidence runs directory"); + } + const value: unknown = JSON.parse(readFileSync(trustedFile, "utf8")); + return parseTrustedRunLanes(value); +}; + +export const trustedRunLaneMap = (trusted: TrustedRunLanes) => + new Map(trusted.runs.map((run) => [trustedRunLaneKey(run.target, run.slug), run])); diff --git a/e2e/src/portable-traces.ts b/e2e/src/portable-traces.ts new file mode 100644 index 000000000..cdac7d94a --- /dev/null +++ b/e2e/src/portable-traces.ts @@ -0,0 +1,109 @@ +import { existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { Cause, Effect, Schema } from "effect"; +import { HttpClient, HttpClientRequest } from "effect/unstable/http"; + +import { writeJsonAtomicSync } from "./artifact-io"; +import { sanitizePublishedText, sanitizePublishedValue } from "./published-artifacts"; + +const TraceLedger = Schema.Array( + Schema.Struct({ + id: Schema.String, + }), +); +const MotelTraceResponse = Schema.Struct({ data: Schema.Unknown }); +const decodeTraceLedger = Schema.decodeUnknownSync(TraceLedger); +const decodeMotelTraceResponse = Schema.decodeUnknownEffect(MotelTraceResponse); +const TRACE_ID = /^[0-9a-f]{32}$/i; +const TRACE_POLL_ATTEMPTS = 40; + +const fetchTraceOnce = (motelUrl: string, traceId: string) => + Effect.gen(function* () { + const client = yield* HttpClient.HttpClient; + const response = yield* client.execute( + HttpClientRequest.get(`${motelUrl}/api/traces/${encodeURIComponent(traceId)}`), + ); + if (response.status !== 200) { + yield* response.text.pipe(Effect.catch(() => Effect.succeed(""))); + return yield* Effect.fail({ + _tag: "MotelTraceNotReady", + traceId, + status: response.status, + } as const); + } + return yield* response.json.pipe(Effect.flatMap(decodeMotelTraceResponse)); + }); + +const fetchTrace = (motelUrl: string, traceId: string) => + Effect.gen(function* () { + for (let attempt = 0; attempt < TRACE_POLL_ATTEMPTS; attempt += 1) { + const result = yield* Effect.result(fetchTraceOnce(motelUrl, traceId)); + if (result._tag === "Success") return result.success; + yield* Effect.sleep("500 millis"); + } + return yield* fetchTraceOnce(motelUrl, traceId); + }); + +export interface PortableTraceExport { + readonly file?: string; + readonly exported: number; + readonly missing: number; + readonly invalid: number; +} + +/** + * Copy every trace named by the run ledger out of the suite-owned Motel store. + * The store is deleted by global teardown, so this sanitized export is the + * portable evidence reviewers can inspect after CI has finished. + */ +export const exportPortableTraces = (runDir: string, motelUrl: string) => + Effect.gen(function* () { + const ledgerFile = join(runDir, "traces.json"); + if (!existsSync(ledgerFile)) { + return { exported: 0, missing: 0, invalid: 0 } satisfies PortableTraceExport; + } + + const ledger = decodeTraceLedger(JSON.parse(readFileSync(ledgerFile, "utf8"))); + const ids = [...new Set(ledger.map((entry) => entry.id))]; + const validIds = ids.filter((id) => TRACE_ID.test(id)); + const invalidIds = ids.filter((id) => !TRACE_ID.test(id)); + const fetched = yield* Effect.all( + validIds.map((traceId) => + fetchTrace(motelUrl, traceId).pipe( + Effect.map(({ data }) => ({ traceId, data, found: true }) as const), + Effect.catchCause((cause) => + Effect.succeed({ + traceId, + found: false, + error: sanitizePublishedText(String(Cause.squash(cause))), + } as const), + ), + ), + ), + { concurrency: 8 }, + ); + const traces = fetched + .filter((entry) => entry.found) + .map((entry) => ({ traceId: entry.traceId, data: entry.data })); + const missing = fetched + .filter((entry) => !entry.found) + .map((entry) => ({ traceId: entry.traceId, error: entry.error })); + const file = join(runDir, "otel-traces.json"); + writeJsonAtomicSync( + file, + sanitizePublishedValue({ + schemaVersion: 1, + exportedAt: Date.now(), + traces, + missing, + invalidTraceIds: invalidIds, + }), + ); + return { + file: "otel-traces.json", + exported: traces.length, + missing: missing.length, + invalid: invalidIds.length, + } satisfies PortableTraceExport; + }); diff --git a/e2e/src/ports.ts b/e2e/src/ports.ts index a2e4c05d6..5233a6683 100644 --- a/e2e/src/ports.ts +++ b/e2e/src/ports.ts @@ -2,7 +2,7 @@ // /tmp rig) hashes its repo root into a PREFERRED block of e2e ports, so // concurrent suites normally never fight over a shared default. The hash is // only a preference, not a guarantee (28 checkouts over 400 blocks is -// birthday-paradox territory) — the globalsetups call `claimPorts`, which +// birthday-paradox territory), the globalsetups call `claimPorts`, which // probes the preferred block and walks forward to the next fully-free one, // then publishes the claimed ports via the E2E_*_PORT env vars so vitest's // test workers (spawned after globalsetup) compute the same URLs. The @@ -12,6 +12,7 @@ // instead of one clear bind error. Individual E2E_*_PORT env vars still // override everything, and E2E__URL still attaches to a running // instance. +import { randomUUID } from "node:crypto"; import { connect, createServer, type Server } from "node:net"; import { resolve } from "node:path"; import { fileURLToPath } from "node:url"; @@ -19,7 +20,7 @@ import { fileURLToPath } from "node:url"; /** The repo root identifies the checkout (stable regardless of process cwd). */ export const repoRoot = resolve(fileURLToPath(new URL("../..", import.meta.url))); -// FNV-1a — tiny, deterministic, and the same value in every process of this +// FNV-1a is tiny, deterministic, and the same value in every process of this // checkout (globalsetup and test workers must agree on the ports). const hash = (text: string): number => { let h = 2166136261; @@ -45,9 +46,9 @@ export const e2ePort = (envVar: string, offset: number): number => { return fromEnv ? Number(fromEnv) : portBlock + offset; }; -const isListening = (port: number): Promise => +const isListeningOn = (port: number, host: string): Promise => new Promise((done) => { - const socket = connect({ port, host: "127.0.0.1" }); + const socket = connect({ port, host }); socket.once("connect", () => { socket.destroy(); done(true); @@ -59,6 +60,14 @@ const isListening = (port: number): Promise => }); }); +const isListening = async (port: number): Promise => { + const listening = await Promise.all([ + isListeningOn(port, "127.0.0.1"), + isListeningOn(port, "::1"), + ]); + return listening.some(Boolean); +}; + export interface PortClaim { readonly envVar: string; readonly offset: number; @@ -67,81 +76,252 @@ export interface PortClaim { export interface ClaimedPorts { readonly ports: Record; - /** Releases the block's lock port — call from the suite teardown. */ + /** Releases this claim and closes an otherwise-unused block lock. */ readonly release: () => Promise; } +interface PortReservation { + readonly id: string; + readonly envVar: string; + readonly offset: number; + readonly label: string; +} + +interface HeldBlock { + readonly server: Server; + readonly reservations: Map; + readonly claimIds: Set; +} + // Binding is atomic where probing is not: holding the block's lock port for // the suite's lifetime means two suites racing for the same block can never // both win (the second bind EADDRINUSEs and walks on). const tryLockBlock = (block: number): Promise => new Promise((done) => { const server = createServer(); - server.once("error", () => done(undefined)); - server.listen(block + LOCK_OFFSET, "127.0.0.1", () => done(server)); + const failed = () => done(undefined); + server.once("error", failed); + server.listen(block + LOCK_OFFSET, "127.0.0.1", () => { + server.off("error", failed); + done(server); + }); + }); + +const closeServer = (server: Server): Promise => + new Promise((done, fail) => { + // oxlint-disable-next-line executor/no-promise-reject -- boundary: node:net exposes close failure only through this callback + server.close((error) => (error ? fail(error) : done())); }); +const validPort = (port: number): boolean => Number.isInteger(port) && port > 0 && port <= 65_535; + +const validateClaims = (claims: ReadonlyArray): void => { + const envVars = new Set(); + const offsets = new Set(); + for (const claim of claims) { + if (envVars.has(claim.envVar)) throw new Error(`e2e: duplicate port env var ${claim.envVar}`); + envVars.add(claim.envVar); + if (!Number.isInteger(claim.offset) || claim.offset < 0 || claim.offset >= LOCK_OFFSET) { + throw new Error( + `e2e: ${claim.label} offset ${claim.offset} is outside the claimable 0-${LOCK_OFFSET - 1} range`, + ); + } + if (offsets.has(claim.offset)) { + throw new Error(`e2e: duplicate port offset ${claim.offset} in one claim`); + } + offsets.add(claim.offset); + } +}; + +let operationQueue: Promise = Promise.resolve(); + +/** Serialize claims and releases so in-process global setups cannot race. */ +const serialize = (operation: () => Promise): Promise => { + const result = operationQueue.then(operation, operation); + operationQueue = result.then( + () => undefined, + () => undefined, + ); + return result; +}; + /** * Claim a free set of ports for a target and publish them via env. Starts at * this checkout's preferred block and walks forward block-by-block until it - * can atomically lock a block whose requested ports are all free — so two + * can atomically lock a block whose requested ports are all free, so two * checkouts whose hashes collide (or a leaked server squatting the preferred * block) degrade to "boot one block over" instead of attaching to a foreign - * server. Explicit env overrides win and are never probed or locked: if you - * pin a port and it's busy, vite's --strictPort fails visibly. A target - * re-claiming inside an already-locked process (cloud + selfhost projects in - * one vitest run) shares the block via disjoint offsets. + * server. Explicit env overrides still mean this suite will spawn on that + * exact port, so they are probed and fail immediately when occupied. Attaching + * to an existing instance is a separate E2E__URL mode and never calls + * this function. A target re-claiming inside an already-locked process (cloud + * + selfhost projects in one vitest run) shares the block via disjoint offsets. */ -export const claimPorts = async (claims: ReadonlyArray): Promise => { +const claimPortsUnlocked = async (claims: ReadonlyArray): Promise => { + validateClaims(claims); + const claimId = randomUUID(); const ports: Record = {}; + const previousEnv = new Map(); const unpinned = claims.filter((claim) => { + if (publishedEnvVars.has(claim.envVar)) { + throw new Error(`e2e: ${claim.envVar} is already owned by an active port claim`); + } const pinned = process.env[claim.envVar]; - if (pinned) ports[claim.envVar] = Number(pinned); - return !pinned; + previousEnv.set(claim.envVar, pinned); + if (pinned === undefined || pinned === "") return true; + const port = Number(pinned); + if (!validPort(port)) { + throw new Error(`e2e: ${claim.envVar} must be a port in 1-65535, got ${pinned}`); + } + ports[claim.envVar] = port; + return false; }); - if (unpinned.length === 0) return { ports, release: async () => {} }; + if (new Set(Object.values(ports)).size !== Object.values(ports).length) { + throw new Error("e2e: two explicitly pinned claims use the same port"); + } + for (const [envVar, port] of Object.entries(ports)) { + if (activePinnedPorts.has(port)) { + throw new Error(`e2e: ${envVar}=${port} conflicts with another active pinned claim`); + } + } + const busyPinned = ( + await Promise.all( + Object.entries(ports).map(async ([envVar, port]) => ({ + envVar, + port, + busy: await isListening(port), + })), + ) + ).filter((entry) => entry.busy); + if (busyPinned.length > 0) { + throw new Error( + `e2e: explicitly pinned spawn ${busyPinned.map(({ envVar, port }) => `${envVar}=${port}`).join(", ")} is already listening; use E2E__URL for attach mode`, + ); + } + if (unpinned.length === 0) { + for (const port of Object.values(ports)) activePinnedPorts.set(port, claimId); + let releaseRequested = false; + return { + ports, + release: () => { + if (releaseRequested) return Promise.resolve(); + releaseRequested = true; + return serialize(async () => { + for (const port of Object.values(ports)) { + if (activePinnedPorts.get(port) === claimId) activePinnedPorts.delete(port); + } + }); + }, + }; + } + + const pinnedPorts = new Set([...Object.values(ports), ...activePinnedPorts.keys()]); for (let attempt = 0; attempt < BLOCK_COUNT; attempt++) { const block = BLOCK_BASE + ((portBlock - BLOCK_BASE + attempt * BLOCK_SIZE) % (BLOCK_COUNT * BLOCK_SIZE)); + if (pinnedPorts.has(block + LOCK_OFFSET)) { + console.warn( + `[e2e] port block ${block} lock conflicts with an explicitly pinned port; trying next block`, + ); + continue; + } // This process may already hold the block's lock (the other target's // globalsetup in the same vitest run); reuse it instead of re-locking. - let lock = heldLocks.get(block); - if (!lock) { - lock = await tryLockBlock(block); - if (!lock) { + let held = heldBlocks.get(block); + const reused = held !== undefined; + if (!held) { + const server = await tryLockBlock(block); + if (!server) { console.warn(`[e2e] port block ${block} is locked by another suite; trying next block`); continue; } - heldLocks.set(block, lock); + held = { server, reservations: new Map(), claimIds: new Set() }; + heldBlocks.set(block, held); } - const busy = await Promise.all(unpinned.map((claim) => isListening(block + claim.offset))); + + const reserved = unpinned.filter((claim) => held.reservations.has(claim.offset)); + if (reserved.length > 0) { + const conflicts = reserved.map((claim) => { + const owner = held.reservations.get(claim.offset); + return `${block + claim.offset} (${claim.label}, already ${owner?.label ?? "reserved"})`; + }); + console.warn( + `[e2e] port block ${block} has in-process offset conflicts: ${conflicts.join(", ")}; trying next block`, + ); + continue; + } + + const busy = await Promise.all( + unpinned.map((claim) => + pinnedPorts.has(block + claim.offset) + ? Promise.resolve(true) + : isListening(block + claim.offset), + ), + ); if (busy.some(Boolean)) { const taken = unpinned .filter((_, index) => busy[index]) .map((claim) => `${block + claim.offset} (${claim.label})`); console.warn( - `[e2e] port block ${block} has squatters — ${taken.join(", ")}; trying next block`, + `[e2e] port block ${block} has squatters: ${taken.join(", ")}; trying next block`, ); - continue; // Keep the lock: a half-busy block is still ours, just unusable now. + if (!reused) { + heldBlocks.delete(block); + await closeServer(held.server); + } + continue; } + + for (const port of Object.values(ports)) activePinnedPorts.set(port, claimId); for (const claim of unpinned) { const port = block + claim.offset; ports[claim.envVar] = port; // Workers spawn after globalsetup, so they inherit these and agree. process.env[claim.envVar] = String(port); + publishedEnvVars.set(claim.envVar, claimId); + held.reservations.set(claim.offset, { ...claim, id: claimId }); } + held.claimIds.add(claimId); + let releaseRequested = false; return { ports, - release: async () => { - const held = heldLocks.get(block); - if (!held) return; - heldLocks.delete(block); - await new Promise((done) => held.close(() => done())); + release: () => { + if (releaseRequested) return Promise.resolve(); + releaseRequested = true; + return serialize(async () => { + const current = heldBlocks.get(block); + for (const port of Object.values(ports)) { + if (activePinnedPorts.get(port) === claimId) activePinnedPorts.delete(port); + } + for (const claim of unpinned) { + if (publishedEnvVars.get(claim.envVar) === claimId) { + publishedEnvVars.delete(claim.envVar); + } + const published = ports[claim.envVar]; + if (process.env[claim.envVar] === String(published)) { + const previous = previousEnv.get(claim.envVar); + if (previous === undefined) delete process.env[claim.envVar]; + else process.env[claim.envVar] = previous; + } + } + if (!current || !current.claimIds.delete(claimId)) return; + for (const [offset, reservation] of current.reservations) { + if (reservation.id === claimId) current.reservations.delete(offset); + } + if (current.claimIds.size > 0) return; + heldBlocks.delete(block); + await closeServer(current.server); + }); }, }; } - throw new Error("e2e: no free port block found — the 42000-45999 range is exhausted?"); + throw new Error("e2e: no free port block found; the 42000-45999 range is exhausted?"); }; -const heldLocks = new Map(); +export const claimPorts = (claims: ReadonlyArray): Promise => + serialize(() => claimPortsUnlocked(claims)); + +const heldBlocks = new Map(); +const publishedEnvVars = new Map(); +const activePinnedPorts = new Map(); diff --git a/e2e/src/project-matrix.ts b/e2e/src/project-matrix.ts new file mode 100644 index 000000000..35224cc26 --- /dev/null +++ b/e2e/src/project-matrix.ts @@ -0,0 +1,353 @@ +/** + * One registry for every Vitest project. A project names both the deployed + * target and the execution policy used for that run. Keeping those separate + * lets CI run a hermetic subset without inventing a second target factory. + */ + +export type E2eCapability = + | "api" + | "billing" + | "browser" + | "claude-code" + | "desktop-gui" + | "mcp-oauth" + | "opencode" + | "restart" + | "telemetry" + | "ttl-control"; + +export type CapabilityRequirementMode = "allow-skips" | "required"; +export type VisualDataClassification = "synthetic-only" | "potentially-sensitive"; + +interface E2eProjectDefinition { + readonly name: string; + readonly target: string; + readonly include: ReadonlyArray; + readonly exclude?: ReadonlyArray; + readonly globalSetup: ReadonlyArray; + readonly requiredCapabilities: ReadonlyArray; + readonly fileParallelism: boolean; + readonly testTimeout: number; + readonly hookTimeout: number; + readonly env?: Readonly>; + readonly tier: "portable" | "native-desktop" | "heavy-vm" | "manual"; + readonly hermetic: boolean; +} + +const SHARED_SCENARIOS = "scenarios/**/*.test.ts"; + +/** + * These scenarios intentionally verify public-service compatibility. They are + * useful drift signals, but are not deterministic enough to gate pull requests. + */ +export const LIVE_DRIFT_SCENARIOS = [ + "scenarios/microsoft-graph-default.test.ts", + "scenarios/microsoft-graph-full.test.ts", + "scenarios/oauth-client-handoff.test.ts", +] as const; + +const cloudCapabilities = [ + "api", + "billing", + "browser", + "claude-code", + "mcp-oauth", + "opencode", + "telemetry", + "ttl-control", +] as const satisfies ReadonlyArray; +const selfhostCapabilities = [ + "api", + "browser", + "claude-code", + "mcp-oauth", + "opencode", +] as const satisfies ReadonlyArray; +const selfhostDockerCapabilities = [ + "api", + "browser", + "claude-code", + "mcp-oauth", + "opencode", + "restart", +] as const satisfies ReadonlyArray; +const cloudflareCapabilities = [ + "api", + "browser", + "mcp-oauth", +] as const satisfies ReadonlyArray; + +export const E2E_PROJECTS = [ + { + name: "harness", + target: "harness", + include: ["harness/**/*.test.ts"], + globalSetup: [], + requiredCapabilities: [], + fileParallelism: true, + testTimeout: 120_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "clients", + target: "clients", + include: ["src/clients/**/*.test.ts"], + globalSetup: [], + requiredCapabilities: ["claude-code"], + fileParallelism: true, + testTimeout: 120_000, + hookTimeout: 120_000, + env: { E2E_CLAUDE_CODE_VERSION: "2.1.195" }, + tier: "portable", + hermetic: true, + }, + { + name: "cloud", + target: "cloud", + include: [SHARED_SCENARIOS, "cloud/**/*.test.ts"], + globalSetup: ["./setup/cloud.globalsetup.ts"], + requiredCapabilities: cloudCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "manual", + hermetic: false, + }, + { + name: "cloud-hermetic", + target: "cloud", + include: [SHARED_SCENARIOS, "cloud/**/*.test.ts"], + exclude: LIVE_DRIFT_SCENARIOS, + globalSetup: ["./setup/cloud.globalsetup.ts"], + requiredCapabilities: cloudCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "selfhost", + target: "selfhost", + include: [SHARED_SCENARIOS, "selfhost/**/*.test.ts"], + globalSetup: ["./setup/selfhost.globalsetup.ts"], + requiredCapabilities: selfhostCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "manual", + hermetic: false, + }, + { + name: "selfhost-hermetic", + target: "selfhost", + include: [SHARED_SCENARIOS, "selfhost/**/*.test.ts"], + exclude: LIVE_DRIFT_SCENARIOS, + globalSetup: ["./setup/selfhost.globalsetup.ts"], + requiredCapabilities: selfhostCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "selfhost-docker", + target: "selfhost-docker", + include: [SHARED_SCENARIOS, "selfhost/**/*.test.ts"], + globalSetup: ["./setup/selfhost-docker.globalsetup.ts"], + requiredCapabilities: selfhostDockerCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "manual", + hermetic: false, + }, + { + name: "selfhost-docker-hermetic", + target: "selfhost-docker", + include: [SHARED_SCENARIOS, "selfhost/**/*.test.ts"], + exclude: LIVE_DRIFT_SCENARIOS, + globalSetup: ["./setup/selfhost-docker.globalsetup.ts"], + requiredCapabilities: selfhostDockerCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "cloudflare", + target: "cloudflare", + include: [ + "scenarios/browser-approval.test.ts", + "scenarios/microsoft-graph-full.test.ts", + "scenarios/toolkits-mcp.test.ts", + "cloudflare/**/*.test.ts", + ], + globalSetup: ["./setup/cloudflare.globalsetup.ts"], + requiredCapabilities: cloudflareCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "manual", + hermetic: false, + }, + { + name: "cloudflare-hermetic", + target: "cloudflare", + include: [ + "scenarios/browser-approval.test.ts", + "scenarios/toolkits-mcp.test.ts", + "cloudflare/**/*.test.ts", + ], + globalSetup: ["./setup/cloudflare.globalsetup.ts"], + requiredCapabilities: cloudflareCapabilities, + fileParallelism: false, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "desktop", + target: "desktop", + include: ["desktop/**/*.test.ts"], + globalSetup: ["./setup/desktop.globalsetup.ts"], + requiredCapabilities: [], + fileParallelism: false, + testTimeout: 300_000, + hookTimeout: 120_000, + tier: "native-desktop", + hermetic: true, + }, + { + name: "desktop-packaged", + target: "desktop-packaged", + include: ["desktop-packaged/**/*.test.ts"], + globalSetup: ["./setup/desktop-packaged.globalsetup.ts"], + requiredCapabilities: [], + fileParallelism: false, + testTimeout: 360_000, + hookTimeout: 600_000, + tier: "native-desktop", + hermetic: true, + }, + { + name: "desktop-kvm", + target: "desktop-kvm", + include: ["desktop-kvm/**/*.test.ts"], + globalSetup: ["./setup/desktop-kvm.globalsetup.ts"], + requiredCapabilities: ["desktop-gui"], + fileParallelism: false, + testTimeout: 360_000, + hookTimeout: 900_000, + env: { E2E_DESKTOP_GUI_REQUIRED: "1" }, + tier: "heavy-vm", + hermetic: true, + }, + { + name: "local", + target: "local", + include: ["local/**/*.test.ts"], + globalSetup: [], + requiredCapabilities: ["browser"], + fileParallelism: true, + testTimeout: 180_000, + hookTimeout: 120_000, + tier: "portable", + hermetic: true, + }, + { + name: "cli-macos", + target: "cli-macos", + include: ["scenarios/restart-persistence.test.ts", "cli/**/*.test.ts"], + globalSetup: ["./setup/cli-macos.globalsetup.ts"], + requiredCapabilities: ["api", "restart"], + fileParallelism: false, + testTimeout: 300_000, + hookTimeout: 900_000, + env: { E2E_VM_OS: "macos" }, + tier: "heavy-vm", + hermetic: true, + }, + { + name: "cli-linux", + target: "cli-linux", + include: ["scenarios/restart-persistence.test.ts", "cli/**/*.test.ts"], + globalSetup: ["./setup/cli-linux.globalsetup.ts"], + requiredCapabilities: ["api", "restart"], + fileParallelism: false, + testTimeout: 300_000, + hookTimeout: 900_000, + env: { E2E_VM_OS: "linux" }, + tier: "heavy-vm", + hermetic: true, + }, + { + name: "cli-windows", + target: "cli-windows", + include: ["scenarios/restart-persistence.test.ts", "cli/**/*.test.ts"], + globalSetup: ["./setup/cli-windows.globalsetup.ts"], + requiredCapabilities: ["api", "restart"], + fileParallelism: false, + testTimeout: 300_000, + hookTimeout: 900_000, + env: { E2E_VM_OS: "windows" }, + tier: "heavy-vm", + hermetic: true, + }, +] as const satisfies ReadonlyArray; + +export type E2eProjectName = (typeof E2E_PROJECTS)[number]["name"]; + +export const capabilityRequirementMode = ( + env: Readonly> = process.env, +): CapabilityRequirementMode => + env.E2E_REQUIRED_CAPABILITY_MODE === "required" ? "required" : "allow-skips"; + +export const projectDefinition = (projectName: string) => + E2E_PROJECTS.find((project) => project.name === projectName); + +/** + * Visual publication policy belongs to the lane, not an individual test. + * Hermetic lanes use controlled fixtures; live/manual lanes may touch data + * whose provenance the evidence sanitizer cannot establish. + */ +export const visualDataClassificationForProject = ( + projectName: string, +): VisualDataClassification | undefined => { + const project = projectDefinition(projectName); + if (!project) return undefined; + return project.hermetic ? "synthetic-only" : "potentially-sensitive"; +}; + +export const requiredCapabilitiesFor = (projectName: string): ReadonlyArray => + projectDefinition(projectName)?.requiredCapabilities ?? []; + +/** + * Required mode is fail-closed for an unknown project. A typo in CI must not + * turn a missing capability back into a green skip. + */ +export const isCapabilityRequired = ( + projectName: string, + capability: string, + env: Readonly> = process.env, +) => { + if (capabilityRequirementMode(env) !== "required") return false; + const project = projectDefinition(projectName); + return project === undefined || project.requiredCapabilities.some((item) => item === capability); +}; + +export const currentProjectPolicy = ( + env: Readonly> = process.env, +) => { + const projectName = env.E2E_PROJECT ?? env.E2E_TARGET ?? ""; + return { + projectName, + mode: capabilityRequirementMode(env), + requiredCapabilities: requiredCapabilitiesFor(projectName), + }; +}; diff --git a/e2e/src/published-artifacts.ts b/e2e/src/published-artifacts.ts new file mode 100644 index 000000000..2362730cf --- /dev/null +++ b/e2e/src/published-artifacts.ts @@ -0,0 +1,322 @@ +const SAFE_SEGMENT = /^[a-zA-Z0-9][a-zA-Z0-9._-]*$/; +const TARGET_SEGMENT = /^[a-z0-9][a-z0-9-]*$/; +// Normal scenario slugs are lowercase. Direct KVM runs retain ISO 8601's T +// and Z markers, so permit those two uppercase characters without widening +// the publication namespace to arbitrary mixed-case names. +const RUN_SEGMENT = /^[a-z0-9][a-z0-9TZ-]*$/; +const SYNTHETIC_VISUAL_DATA_CLASSIFICATION = "synthetic-only"; +const STATIC_EXTENSIONS = new Set([ + ".css", + ".html", + ".js", + ".svg", + ".ttf", + ".webmanifest", + ".woff", + ".woff2", +]); + +export type PublishedArtifactKind = "static" | "json" | "text" | "binary"; + +export interface PublishedArtifact { + readonly kind: PublishedArtifactKind; + readonly mime: string; + readonly unredactedVisual?: boolean; +} + +export interface PublicationOptions { + /** Playwright traces contain raw cookies and request bodies. Safe default: omit them. */ + readonly includeRawTrace?: boolean; + /** Sanitizer-only inventory used to remove artifact references to files denied in this pass. */ + readonly availableArtifacts?: ReadonlySet; +} + +export interface SanitizationOptions { + /** Known CI canaries or credentials that must be removed regardless of context. */ + readonly secrets?: ReadonlyArray; +} + +export interface EvidencePublicationMetadata { + readonly schemaVersion: 1; + readonly sanitizedAt: number; + readonly status: "passed" | "failed"; + readonly sanitizer: { + readonly source: "e2e/scripts/sanitize-evidence.ts"; + readonly policyVersion: 1; + readonly sourceRevision?: string; + }; + readonly policy: { + readonly unknownArtifacts: "removed"; + readonly textAndJson: "redacted"; + readonly binaryVisuals: "unredacted-synthetic-only"; + readonly binarySecretDetection: "byte-canary-only"; + }; + readonly runtime: { + readonly name: string; + readonly version: string; + readonly platform: string; + readonly arch: string; + }; + readonly stats: { + readonly removed: number; + readonly redacted: number; + readonly retained: number; + readonly canariesChecked: number; + }; + readonly binaryArtifacts: ReadonlyArray; + readonly errors: ReadonlyArray; +} + +/** A result claim only. Publication also requires matching central lane provenance. */ +export const syntheticVisualEvidenceDeclaration = { + dataClassification: SYNTHETIC_VISUAL_DATA_CLASSIFICATION, +} as const; + +const extensionOf = (name: string): string => { + const index = name.lastIndexOf("."); + return index === -1 ? "" : name.slice(index).toLowerCase(); +}; + +const staticArtifact = (name: string): PublishedArtifact | undefined => { + const extension = extensionOf(name); + if (!STATIC_EXTENSIONS.has(extension)) return undefined; + const mime: Record = { + ".css": "text/css; charset=utf-8", + ".html": "text/html; charset=utf-8", + ".js": "text/javascript; charset=utf-8", + ".svg": "image/svg+xml", + ".ttf": "font/ttf", + ".webmanifest": "application/manifest+json", + ".woff": "font/woff", + ".woff2": "font/woff2", + }; + const contentType = mime[extension]; + return contentType ? { kind: "static", mime: contentType } : undefined; +}; + +const runArtifact = (name: string, options: PublicationOptions): PublishedArtifact | undefined => { + if ( + name === "result.json" || + name === "skipped.json" || + name === "traces.json" || + name === "timeline.json" || + name === "evidence.json" || + name === "lane-provenance.json" || + /^[a-z0-9][a-z0-9-]*(?:-events|-ledger|-metadata|-traces)\.json$/.test(name) + ) { + return { kind: "json", mime: "application/json; charset=utf-8" }; + } + if (name === "test.ts") return { kind: "text", mime: "text/plain; charset=utf-8" }; + if (name === "terminal.cast") { + return { kind: "text", mime: "application/x-asciicast; charset=utf-8" }; + } + if (/^[a-z0-9][a-z0-9-]*\.log$/.test(name)) { + return { kind: "text", mime: "text/plain; charset=utf-8" }; + } + if ( + name === "failure.png" || + name === "renderer-after-settings-click.png" || + /^\d{2,4}-[a-z0-9][a-z0-9-]*\.png$/.test(name) + ) { + return { kind: "binary", mime: "image/png", unredactedVisual: true }; + } + if (name === "session.mp4" || name === "film.mp4") { + return { kind: "binary", mime: "video/mp4", unredactedVisual: true }; + } + if (name === "session.webm") { + return { kind: "binary", mime: "video/webm", unredactedVisual: true }; + } + if (name === "trace.zip" && options.includeRawTrace) { + return { kind: "binary", mime: "application/zip" }; + } + return undefined; +}; + +/** + * Classify one path relative to e2e/runs. Anything not returned here is + * private, including CLI homes, MCP configs, telemetry databases, temp dirs, + * source maps, and raw Playwright traces by default. + */ +export const publishedArtifactFor = ( + relativePath: string, + options: PublicationOptions = {}, +): PublishedArtifact | undefined => { + const parts = relativePath.split("/").filter(Boolean); + if (parts.some((part) => !SAFE_SEGMENT.test(part))) return undefined; + + if (parts.length === 1) { + if (parts[0] === "index.html") return staticArtifact(parts[0]); + if (parts[0] === "manifest.json" || parts[0] === "publication.json") { + return { kind: "json", mime: "application/json; charset=utf-8" }; + } + return undefined; + } + + if (parts[0] === "assets") { + if (parts.length !== 2) return undefined; + return staticArtifact(parts[1] ?? ""); + } + + if (parts[0] === "trace-viewer") { + if (parts.length > 3 || parts.slice(1).some((part) => !SAFE_SEGMENT.test(part))) { + return undefined; + } + return staticArtifact(parts.at(-1) ?? ""); + } + + if (parts.length !== 3) return undefined; + const [target, slug, name] = parts; + if (!target || !slug || !name || !TARGET_SEGMENT.test(target) || !RUN_SEGMENT.test(slug)) { + return undefined; + } + return runArtifact(name, options); +}; + +/** Directories that may contain allowlisted publication files. */ +export const isPublishedDirectory = (relativePath: string): boolean => { + const parts = relativePath.split("/").filter(Boolean); + if (parts.length === 0) return true; + if (parts.some((part) => !SAFE_SEGMENT.test(part))) return false; + if (parts[0] === "assets") return parts.length === 1; + if (parts[0] === "trace-viewer") return parts.length <= 2; + if (parts.length === 1) return TARGET_SEGMENT.test(parts[0] ?? ""); + return ( + parts.length === 2 && TARGET_SEGMENT.test(parts[0] ?? "") && RUN_SEGMENT.test(parts[1] ?? "") + ); +}; + +const SENSITIVE_KEY = + /authorization|cookie|password|passphrase|secret|token|api.?key|credential|private.?key|client.?secret|code.?verifier/i; +const SENSITIVE_EXACT_KEY = /^(?:code|email|state)$/i; +const SENSITIVE_QUERY_KEY = + /^(?:_?token|access_token|refresh_token|id_token|authorization|code|code_verifier|cookie|credential|password|secret|session|state)$/i; + +const replaceKnownSecrets = (text: string, secrets: ReadonlyArray): string => { + let sanitized = text; + for (const secret of secrets) { + if (secret.length >= 4) sanitized = sanitized.split(secret).join("[REDACTED]"); + } + return sanitized; +}; + +/** Redact credentials and local usernames from a string while preserving its shape. */ +export const sanitizePublishedText = (text: string, options: SanitizationOptions = {}): string => { + const secrets = options.secrets ?? []; + return replaceKnownSecrets(text, secrets) + .replace( + /([?&](?:_?token|access_token|refresh_token|id_token|authorization|code|code_verifier|cookie|credential|password|secret|session|state)=)[^&#\s"'<>]*/gi, + "$1[REDACTED]", + ) + .replace( + /(\b(?:authorization|proxy-authorization|cookie|set-cookie)\b\s*[:=]\s*)[^\r\n]+/gi, + "$1[REDACTED]", + ) + .replace( + /(\b(?:_?token|access_token|refresh_token|id_token|authorization|cookie|password|passphrase|secret|api[-_]?key|credential|private[-_]?key|client[-_]?secret|code[-_]?verifier)\b\s*[:=]\s*)(?:"[^"]*"|'[^']*'|[^\s,;&]+)/gi, + "$1[REDACTED]", + ) + .replace(/\b(Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+/gi, "$1 [REDACTED]") + .replace(/\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g, "[REDACTED_JWT]") + .replace(/(https?:\/\/)[^/@\s:]+:[^/@\s]+@/gi, "$1[REDACTED]@") + .replace(/\/(home|Users)\/[^/\s"']+/g, "/$1/[USER]") + .replace(/\b[A-Za-z]:\\Users\\[^\\\s"']+/gi, "[DRIVE]:\\Users\\[USER]"); +}; + +export const sanitizePublishedUrl = (input: string, options: SanitizationOptions = {}): string => { + const knownSecretsRemoved = replaceKnownSecrets(input, options.secrets ?? []); + try { + const url = new URL(knownSecretsRemoved); + if (url.username || url.password) { + url.username = "[REDACTED]"; + url.password = ""; + } + for (const key of [...url.searchParams.keys()]) { + if (SENSITIVE_QUERY_KEY.test(key)) url.searchParams.set(key, "[REDACTED]"); + } + if (url.hash.includes("=")) { + const fragment = new URLSearchParams(url.hash.slice(1)); + for (const key of [...fragment.keys()]) { + if (SENSITIVE_QUERY_KEY.test(key)) fragment.set(key, "[REDACTED]"); + } + url.hash = fragment.toString(); + } + return sanitizePublishedText(url.toString(), options); + } catch { + return sanitizePublishedText(knownSecretsRemoved, options); + } +}; + +const sensitiveKey = (key: string): boolean => + SENSITIVE_KEY.test(key) || SENSITIVE_EXACT_KEY.test(key); + +/** Reusable recursive redactor for OTLP exports and other JSON evidence. */ +export const sanitizePublishedValue = ( + value: unknown, + options: SanitizationOptions = {}, +): unknown => { + if (typeof value === "string") return sanitizePublishedText(value, options); + if (Array.isArray(value)) return value.map((entry) => sanitizePublishedValue(entry, options)); + if (typeof value !== "object" || value === null) return value; + return Object.fromEntries( + Object.entries(value).map(([key, entry]) => [ + key, + sensitiveKey(key) ? "[REDACTED]" : sanitizePublishedValue(entry, options), + ]), + ); +}; + +export const publishedArtifactNames = ( + names: ReadonlyArray, + options: PublicationOptions = {}, +): string[] => + names.filter( + (name) => + runArtifact(name, options) !== undefined && + name !== "result.json" && + (options.availableArtifacts === undefined || options.availableArtifacts.has(name)), + ); + +/** Parse, redact, and normalize one JSON publication artifact. */ +export const sanitizePublishedJson = ( + relativePath: string, + contents: string, + publication: PublicationOptions = {}, + sanitization: SanitizationOptions = {}, +): string => { + const parsed: unknown = JSON.parse(contents); + const sanitized = sanitizePublishedValue(parsed, sanitization); + if ( + relativePath.endsWith("/result.json") && + typeof sanitized === "object" && + sanitized !== null && + "artifacts" in sanitized && + Array.isArray(sanitized.artifacts) + ) { + const artifacts = sanitized.artifacts.filter( + (entry): entry is string => typeof entry === "string", + ); + return JSON.stringify( + { ...sanitized, artifacts: publishedArtifactNames(artifacts, publication) }, + null, + 1, + ); + } + return JSON.stringify(sanitized, null, 1); +}; + +/** Asciinema is JSON Lines, so redact values without corrupting its framing. */ +export const sanitizePublishedCast = ( + contents: string, + options: SanitizationOptions = {}, +): string => + contents + .split("\n") + .map((line) => { + if (line === "") return line; + try { + return JSON.stringify(sanitizePublishedValue(JSON.parse(line), options)); + } catch { + return sanitizePublishedText(line, options); + } + }) + .join("\n"); diff --git a/e2e/src/scenario.ts b/e2e/src/scenario.ts index cb233c237..863a995ea 100644 --- a/e2e/src/scenario.ts +++ b/e2e/src/scenario.ts @@ -10,7 +10,8 @@ // scenario × target matrix) plus whatever artifacts the surfaces produced // (browser video/trace/screenshots, terminal casts). import { execFileSync } from "node:child_process"; -import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs"; +import { randomUUID } from "node:crypto"; +import { existsSync, mkdirSync, readdirSync } from "node:fs"; import { join } from "node:path"; import { fileURLToPath } from "node:url"; @@ -25,11 +26,23 @@ import { makeBrowserSurface } from "./surfaces/browser"; import { makeCliSurface } from "./surfaces/cli"; import { makeMcpSurface } from "./surfaces/mcp"; import { makeTelemetrySurface } from "./surfaces/telemetry"; +import { + hasClaudeCode, + makeClaudeCodeHome, + removeClaudeCodeHome, + replaceClaudeCodeServer, + runClaudeCode, +} from "./clients/claude-code"; import { completeOAuthConsent, hasOpenCode, makeOpenCodeHome, warmUp } from "./clients/opencode"; +import { evidenceReferenceFor, writeJsonAtomicSync } from "./artifact-io"; +import { writeRunLaneProvenance } from "./evidence-provenance"; +import { currentProjectPolicy, isCapabilityRequired } from "./project-matrix"; +import { exportPortableTraces } from "./portable-traces"; import { Api, Billing, Browser, + ClaudeCode, Cli, Mcp, OpenCode, @@ -39,6 +52,7 @@ import { Telemetry, TtlControl, } from "./services"; +import { writeFocusedTestSource } from "./test-source"; import { buildManifest } from "./viewer/manifest"; export const RUNS_DIR = fileURLToPath(new URL("../runs/", import.meta.url)); @@ -63,6 +77,7 @@ type AllServices = | Mcp | Billing | OpenCode + | ClaudeCode | TtlControl | Restart | Telemetry; @@ -73,7 +88,8 @@ type AllServices = * one fails with Effect's missing-service defect, which the runner turns * into the skip. */ -const contextFor = (target: TargetShape, dir: string): Context.Context => { +const contextFor = (target: TargetShape, dir: string) => { + let mcpSurface: ReturnType | undefined; let context = Context.empty().pipe( Context.add(Target, target), Context.add(RunDir, dir), @@ -82,7 +98,10 @@ const contextFor = (target: TargetShape, dir: string): Context.Context, ): void => { const target = resolveTarget(); - const dir = join(RUNS_DIR, target.name, slugify(name)); - const context = contextFor(target, dir); + const slug = slugify(name); const testFile = captureTestFile(); it.live( name, (testCtx) => Effect.gen(function* () { - // A run's directory is the run — never mix artifacts across attempts. - rmSync(dir, { recursive: true, force: true }); + const attemptId = randomUUID(); + const dir = join(RUNS_DIR, target.name, `${slug}--${attemptId}`); mkdirSync(dir, { recursive: true }); + const laneProvenance = writeRunLaneProvenance(dir, target.name); + const evidence = evidenceReferenceFor(dir, attemptId); + const { context, cleanup } = contextFor(target, dir); const startedAt = Date.now(); const exit = yield* Effect.exit( - body.pipe(Effect.provideContext(context)) as Effect.Effect< - void, - unknown, - HttpClient.HttpClient - >, + ( + body.pipe(Effect.provideContext(context)) as Effect.Effect< + void, + unknown, + HttpClient.HttpClient + > + ).pipe(Effect.ensuring(cleanup)), ); const endedAt = Date.now(); + const portableTraces = process.env.E2E_MOTEL_URL + ? yield* exportPortableTraces(dir, process.env.E2E_MOTEL_URL) + : undefined; // Yielding a service this target can't provide is the skip signal. const missing = exit._tag === "Failure" ? missingServices(exit.cause) : []; - if (missing.length > 0) { - rmSync(dir, { recursive: true, force: true }); - mkdirSync(dir, { recursive: true }); - writeFileSync( - join(dir, "skipped.json"), - JSON.stringify({ scenario: name, target: target.name, missing }, null, 1), - ); + const policy = currentProjectPolicy(); + const requiredMissing = missing.filter((capability) => + isCapabilityRequired(policy.projectName, capability), + ); + if (missing.length > 0 && requiredMissing.length === 0) { + writeJsonAtomicSync(join(dir, "skipped.json"), { + scenario: name, + target: target.name, + missing, + ...evidence, + }); buildManifest(RUNS_DIR); return yield* Effect.sync(() => testCtx.skip(`needs ${missing.join(", ")} — not on ${target.name}`), @@ -146,27 +187,15 @@ export const scenario = ( } const error = exit._tag === "Failure" ? failureMessage(exit.cause) : undefined; - // The test source is the review artifact — ship this scenario's code - // (imports + sibling scenarios stripped) alongside the run. - const source = testFile ? extractScenarioSource(testFile, name) : undefined; - if (source) writeFileSync(join(dir, "test.ts"), source); - writeFileSync( - join(dir, "result.json"), - JSON.stringify( - { - scenario: name, - target: target.name, - ok: exit._tag === "Success", - startedAt, - endedAt, - durationMs: endedAt - startedAt, - ...(error ? { error } : {}), - artifacts: readdirSync(dir).filter((f) => f !== "result.json"), - }, - null, - 1, - ), - ); + const evidenceError = + portableTraces && + isCapabilityRequired(policy.projectName, "telemetry") && + (portableTraces.missing > 0 || portableTraces.invalid > 0) + ? `portable trace export incomplete: ${portableTraces.missing} missing, ${portableTraces.invalid} invalid` + : undefined; + // The test source is the review artifact. Ship the named registration + // with imports and sibling tests removed, plus extraction provenance. + if (testFile) writeFocusedTestSource({ runDir: dir, filePath: testFile, testName: name }); // A run with both recordings is ONE developer session — splice them // into film.mp4 (scripts/film.ts cuts on the focus timeline) so the // viewer plays a single recording, not parts. Best-effort: missing @@ -190,10 +219,37 @@ export const scenario = ( } }); } + writeJsonAtomicSync(join(dir, "result.json"), { + scenario: name, + target: target.name, + ok: exit._tag === "Success" && evidenceError === undefined, + startedAt, + endedAt, + durationMs: endedAt - startedAt, + ...evidence, + ...(laneProvenance + ? { + project: laneProvenance.project, + visualEvidence: { + dataClassification: laneProvenance.dataClassification, + }, + } + : {}), + ...(requiredMissing.length > 0 ? { missingRequiredCapabilities: requiredMissing } : {}), + ...(portableTraces ? { portableTraces } : {}), + ...((error ?? evidenceError) ? { error: error ?? evidenceError } : {}), + artifacts: readdirSync(dir).filter((f) => f !== "result.json"), + }); buildManifest(RUNS_DIR); if (exit._tag === "Failure") { return yield* Effect.failCause(exit.cause); } + if (evidenceError) { + return yield* Effect.fail({ + _tag: "PortableTraceEvidenceIncomplete", + message: evidenceError, + } as const); + } }).pipe(Effect.provide(FetchHttpClient.layer)), options.timeout ?? 120_000, ); @@ -221,47 +277,3 @@ const captureTestFile = (): string | undefined => { } return undefined; }; - -/** - * This scenario's code as a reader sees it: the file minus import statements - * and minus every OTHER scenario() block (module-level helpers stay — they're - * part of understanding the test). Falls back to undefined on any surprise so - * a parsing edge case can never fail a run. - */ -const extractScenarioSource = (filePath: string, name: string): string | undefined => { - try { - const source = readFileSync(filePath, "utf8").replace(/^import[\s\S]*?;[^\S\n]*$/gm, ""); - const needle = "scenario("; - const blocks: Array<{ start: number; end: number; mine: boolean }> = []; - let index = 0; - while ((index = source.indexOf(needle, index)) !== -1) { - let depth = 0; - let end = -1; - for (let i = index + needle.length - 1; i < source.length; i++) { - if (source[i] === "(") depth++; - else if (source[i] === ")") { - depth--; - if (depth === 0) { - end = source[i + 1] === ";" ? i + 2 : i + 1; - break; - } - } - } - if (end === -1) return undefined; // unbalanced — bail to be safe - blocks.push({ - start: index, - end, - mine: source.slice(index, end).includes(`"${name}"`), - }); - index = end; - } - if (!blocks.some((b) => b.mine)) return undefined; - let out = source; - for (const block of [...blocks].reverse()) { - if (!block.mine) out = out.slice(0, block.start) + out.slice(block.end); - } - return `${out.replace(/\n{3,}/g, "\n\n").trim()}\n`; - } catch { - return undefined; - } -}; diff --git a/e2e/src/services.ts b/e2e/src/services.ts index 0c1bf6e27..8dd9f563d 100644 --- a/e2e/src/services.ts +++ b/e2e/src/services.ts @@ -12,6 +12,12 @@ import type { BrowserSurface } from "./surfaces/browser"; import type { CliSurface } from "./surfaces/cli"; import type { McpSurface } from "./surfaces/mcp"; import type { TelemetrySurface } from "./surfaces/telemetry"; +import type { + makeClaudeCodeHome, + removeClaudeCodeHome, + replaceClaudeCodeServer, + runClaudeCode, +} from "./clients/claude-code"; import type { completeOAuthConsent, makeOpenCodeHome, warmUp } from "./clients/opencode"; /** The target under test (always provided). */ @@ -47,6 +53,17 @@ export interface OpenCodeClient { } export class OpenCode extends Context.Service()("e2e/opencode") {} +/** The real Claude Code binary with isolated state and replayed model inference. */ +export interface ClaudeCodeClient { + readonly makeHome: typeof makeClaudeCodeHome; + readonly run: typeof runClaudeCode; + readonly replaceServer: typeof replaceClaudeCodeServer; + readonly removeHome: typeof removeClaudeCodeHome; +} +export class ClaudeCode extends Context.Service()( + "e2e/claude-code", +) {} + /** Compress (or restore, with null) the authorization server's access-token TTL. */ export class TtlControl extends Context.Service< TtlControl, diff --git a/e2e/src/surfaces/browser.ts b/e2e/src/surfaces/browser.ts index 2cc24903e..90d351b07 100644 --- a/e2e/src/surfaces/browser.ts +++ b/e2e/src/surfaces/browser.ts @@ -63,12 +63,29 @@ export const makeBrowserSurface = (dir: string, target: Target): BrowserSurface ? { slowMo } : {}, ); + // Cloudflare Access adds its signed assertion as an origin-facing + // header. Browser identities carry that header directly in hermetic + // runs. Cookie headers remain handled by addCookies below because + // Chromium owns the Cookie header. + const identityHeaders = Object.fromEntries( + Object.entries(identity.headers ?? {}).filter( + ([name]) => name.toLowerCase() !== "cookie", + ), + ); const context = await browser.newContext({ colorScheme: "dark", viewport: { width: 1280, height: 800 }, recordVideo: { dir: videoTmp, size: { width: 1280, height: 800 } }, baseURL: target.baseUrl, }); + if (Object.keys(identityHeaders).length > 0) { + const targetOrigin = new URL(target.baseUrl).origin; + await context.route(`${targetOrigin}/**`, (route) => + route.continue({ + headers: { ...route.request().headers(), ...identityHeaders }, + }), + ); + } await context.tracing.start({ screenshots: true, snapshots: true, diff --git a/e2e/src/surfaces/mcp.ts b/e2e/src/surfaces/mcp.ts index 99360a76b..a950c7f1e 100644 --- a/e2e/src/surfaces/mcp.ts +++ b/e2e/src/surfaces/mcp.ts @@ -4,7 +4,8 @@ // methods are Effects; // mcporter itself is promise-native underneath. Assertions are vitest's job. import { createHash, randomBytes, randomUUID } from "node:crypto"; -import { mkdtempSync, writeFileSync } from "node:fs"; +import { AsyncLocalStorage } from "node:async_hooks"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -28,8 +29,14 @@ import type { Identity, Target } from "../target"; // duration, status, and source: "terminal". // --------------------------------------------------------------------------- -let traceFetchInstalled = false; -let traceSink: { mcpUrl: string; runDir: string } | null = null; +interface TraceSink { + readonly mcpUrl: string; + readonly runDir: string; +} + +const traceSink = new AsyncLocalStorage(); +let traceFetchUsers = 0; +let originalFetch: typeof globalThis.fetch | undefined; /** JSON-RPC body → a human label: tool name for tools/call, else method. */ const rpcLabel = (body: unknown): string | undefined => { @@ -48,17 +55,23 @@ const rpcLabel = (body: unknown): string | undefined => { } }; -const installTraceparentFetch = (mcpUrl: string, runDir: string): void => { - traceSink = { mcpUrl, runDir }; - if (traceFetchInstalled) return; - traceFetchInstalled = true; +const acquireTraceparentFetch = () => { + traceFetchUsers += 1; + const release = () => { + traceFetchUsers -= 1; + if (traceFetchUsers !== 0 || !originalFetch) return; + globalThis.fetch = originalFetch; + originalFetch = undefined; + }; + if (originalFetch) return release; const original = globalThis.fetch; + originalFetch = original; globalThis.fetch = async (input, init) => { const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; const method = ( init?.method ?? (input instanceof Request ? input.method : "GET") ).toUpperCase(); - const sink = traceSink; + const sink = traceSink.getStore(); if (!sink || method !== "POST" || !url.startsWith(sink.mcpUrl)) { return original(input, init); } @@ -89,8 +102,12 @@ const installTraceparentFetch = (mcpUrl: string, runDir: string): void => { throw error; } }; + return release; }; +const withTraceSink = (sink: TraceSink | undefined, operation: () => Promise) => + sink ? traceSink.run(sink, operation) : operation(); + export interface McpCallResult { readonly raw: unknown; readonly text: string; @@ -172,7 +189,9 @@ export interface McpSurface { * client *behavior* (scope choices, refresh, token storage) is never * modeled here; that's what driving the real client binaries is for. */ - readonly mintBearer: (email: string) => Effect.Effect; + readonly mintBearer: (identity: Identity | string) => Effect.Effect; + /** Close every client/runtime and remove its private credential/config home. */ + readonly close: () => Effect.Effect; } const textOf = (result: unknown): string => { @@ -201,11 +220,12 @@ const jsonFrom = async (response: Response, label: string): Promise => { return JSON.parse(text) as T; }; -const mintBearerFlow = async (target: Target, email: string): Promise => { - const consent = target.mcpConsent?.({ - label: email, - credentials: { email, password: "" }, - }); +const mintBearerFlow = async (target: Target, identity: Identity | string): Promise => { + const consentIdentity: Identity = + typeof identity === "string" + ? { label: identity, credentials: { email: identity, password: "" } } + : identity; + const consent = target.mcpConsent?.(consentIdentity); if (!consent) throw new Error(`target ${target.name} has no mcpConsent strategy`); const mcpPath = new URL(target.mcpUrl).pathname; @@ -282,67 +302,179 @@ const mintBearerFlow = async (target: Target, email: string): Promise => return token.access_token; }; -export const makeMcpSurface = (target: Target, runDir?: string): McpSurface => ({ - url: target.mcpUrl, - mintBearer: (email) => Effect.promise(() => mintBearerFlow(target, email)), - session: (identity, options) => { - const mcpUrl = options?.url ?? target.mcpUrl; - if (runDir) installTraceparentFetch(mcpUrl, runDir); - // mcporter caches OAuth tokens (and the DCR client) per server NAME, so a - // constant name would let a later session reuse an earlier identity's token - // — landing in the wrong org. A unique name per session keeps each - // identity's OAuth isolated. The traceparent ledger keys off the URL, not - // this name, so it is unaffected. - const serverName = `${target.name}-${randomUUID().slice(0, 8)}`; - // `browser` mode is selected per the ecosystem convention — an - // `?elicitation_mode=` query on the MCP endpoint — so a paused execution - // yields an approvalUrl instead of letting the model resume inline. - const sessionUrl = options?.elicitationMode - ? `${mcpUrl}?elicitation_mode=${options.elicitationMode}` - : mcpUrl; - - if (target.name === "cloudflare") { - let clientPromise: Promise | undefined; - const client = () => { - if (!clientPromise) { - clientPromise = (async () => { - const directClient = new Client( - { name: serverName, version: "1.0.0" }, - { capabilities: {} }, - ); - const transport = new StreamableHTTPClientTransport(new URL(sessionUrl), { - requestInit: { headers: identity.headers ?? {} }, - }); - await directClient.connect(transport); - return directClient; - })(); +export const makeMcpSurface = (target: Target, runDir?: string): McpSurface => { + const cleanups = new Set<() => Promise>(); + const releaseTraceFetch = runDir ? acquireTraceparentFetch() : undefined; + let closed = false; + + return { + url: target.mcpUrl, + mintBearer: (identity) => Effect.promise(() => mintBearerFlow(target, identity)), + close: () => + Effect.promise(async () => { + if (closed) return; + closed = true; + const pending = [...cleanups]; + cleanups.clear(); + await Promise.allSettled(pending.map((cleanup) => cleanup())); + releaseTraceFetch?.(); + }), + session: (identity, options) => { + const mcpUrl = options?.url ?? target.mcpUrl; + const sessionTraceSink = runDir ? { mcpUrl, runDir } : undefined; + // mcporter caches OAuth tokens (and the DCR client) per server NAME, so a + // constant name would let a later session reuse an earlier identity's token + // and land in the wrong org. A unique name per session keeps each + // identity's OAuth isolated. The traceparent ledger keys off the URL, not + // this name, so it is unaffected. + const serverName = `${target.name}-${randomUUID().slice(0, 8)}`; + // `browser` mode is selected per the ecosystem convention: an + // `?elicitation_mode=` query on the MCP endpoint, so a paused execution + // yields an approvalUrl instead of letting the model resume inline. + const sessionUrl = options?.elicitationMode + ? `${mcpUrl}?elicitation_mode=${options.elicitationMode}` + : mcpUrl; + + if (target.name === "cloudflare") { + let clientPromise: Promise | undefined; + const cleanup = async () => { + if (clientPromise) await (await clientPromise).close(); + }; + cleanups.add(cleanup); + const client = () => { + if (!clientPromise) { + clientPromise = (async () => { + const directClient = new Client( + { name: serverName, version: "1.0.0" }, + { capabilities: {} }, + ); + const transport = new StreamableHTTPClientTransport(new URL(sessionUrl), { + requestInit: { headers: identity.headers ?? {} }, + }); + await directClient.connect(transport); + return directClient; + })(); + } + return clientPromise; + }; + + const listTools = () => + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + const listed = await (await client()).listTools(); + return listed.tools.map((tool) => tool.name); + }), + ); + + const call = (name: string, args: Record = {}) => + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + const raw = await (await client()).callTool({ name, arguments: args }); + const isError = Boolean((raw as { isError?: boolean })?.isError); + return { raw, text: textOf(raw), ok: !isError }; + }), + ); + + return { + listTools, + describeTools: () => + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + const listed = await (await client()).listTools(); + return listed.tools.map((tool) => ({ + name: tool.name, + description: tool.description ?? "", + })); + }), + ), + call, + approvePaused: (text, content = {}) => + Effect.suspend(() => { + const match = /\bexecutionId:\s*(\S+)/.exec(text); + if (!match) + return Effect.die(new Error("approvePaused: executionId not found in text")); + return call("resume", { + executionId: match[1], + action: "accept", + content: JSON.stringify(content), + }); + }), + awaitResume: (executionId) => call("resume", { executionId }), + }; + } + + let runtimePromise: Promise | undefined; + let runtimeDir: string | undefined; + let connected = false; + + const cleanup = async () => { + if (runtimePromise) await (await runtimePromise).close(); + if (runtimeDir) rmSync(runtimeDir, { recursive: true, force: true }); + }; + cleanups.add(cleanup); + + const consent = target.mcpConsent?.(identity); + const callOptions = { + autoAuthorize: true, + oauthSessionOptions: consent ? { consentStrategy: consent } : {}, + }; + + const runtime = () => { + if (!runtimePromise) { + const dir = mkdtempSync(join(tmpdir(), "executor-e2e-mcp-")); + runtimeDir = dir; + writeFileSync( + join(dir, "mcporter.json"), + JSON.stringify({ + mcpServers: { [serverName]: { url: sessionUrl } }, + }), + ); + runtimePromise = createRuntime({ + configPath: join(dir, "mcporter.json"), + }); } - return clientPromise; + return runtimePromise; }; const listTools = () => - Effect.promise(async () => { - const listed = await (await client()).listTools(); - return listed.tools.map((tool) => tool.name); - }); - - const call = (name: string, args: Record = {}) => - Effect.promise(async (): Promise => { - const raw = await (await client()).callTool({ name, arguments: args }); - const isError = Boolean((raw as { isError?: boolean })?.isError); - return { raw, text: textOf(raw), ok: !isError }; - }); + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + const defs = await (await runtime()).listTools(serverName, callOptions); + connected = true; + return defs.map((tool: { name: string }) => tool.name); + }), + ); - return { - listTools, - describeTools: () => - Effect.promise(async (): Promise> => { - const listed = await (await client()).listTools(); - return listed.tools.map((tool) => ({ + const describeTools = () => + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + const defs = await (await runtime()).listTools(serverName, callOptions); + connected = true; + return defs.map((tool: { name: string; description?: string }) => ({ name: tool.name, description: tool.description ?? "", })); }), + ); + + const call = (name: string, args: Record = {}) => + Effect.promise(() => + withTraceSink(sessionTraceSink, async () => { + if (!connected) { + await (await runtime()).listTools(serverName, callOptions); + connected = true; + } + const raw = await ( + await runtime() + ).callTool(serverName, name, { args, ...callOptions }); + const isError = Boolean((raw as { isError?: boolean })?.isError); + return { raw, text: textOf(raw), ok: !isError }; + }), + ); + + return { + listTools, + describeTools, call, approvePaused: (text, content = {}) => Effect.suspend(() => { @@ -355,80 +487,10 @@ export const makeMcpSurface = (target: Target, runDir?: string): McpSurface => ( content: JSON.stringify(content), }); }), + // No action argument: in browser mode `resume` blocks until the human's + // decision arrives via the console, then returns the resumed result. awaitResume: (executionId) => call("resume", { executionId }), }; - } - - let runtimePromise: Promise | undefined; - let connected = false; - - const consent = target.mcpConsent?.(identity); - const callOptions = { - autoAuthorize: true, - oauthSessionOptions: consent ? { consentStrategy: consent } : {}, - }; - - const runtime = () => { - if (!runtimePromise) { - const dir = mkdtempSync(join(tmpdir(), "executor-e2e-mcp-")); - writeFileSync( - join(dir, "mcporter.json"), - JSON.stringify({ - mcpServers: { [serverName]: { url: sessionUrl } }, - }), - ); - runtimePromise = createRuntime({ - configPath: join(dir, "mcporter.json"), - }); - } - return runtimePromise; - }; - - const listTools = () => - Effect.promise(async () => { - const defs = await (await runtime()).listTools(serverName, callOptions); - connected = true; - return defs.map((tool: { name: string }) => tool.name); - }); - - const describeTools = () => - Effect.promise(async (): Promise> => { - const defs = await (await runtime()).listTools(serverName, callOptions); - connected = true; - return defs.map((tool: { name: string; description?: string }) => ({ - name: tool.name, - description: tool.description ?? "", - })); - }); - - const call = (name: string, args: Record = {}) => - Effect.promise(async (): Promise => { - if (!connected) { - await (await runtime()).listTools(serverName, callOptions); - connected = true; - } - const raw = await (await runtime()).callTool(serverName, name, { args, ...callOptions }); - const isError = Boolean((raw as { isError?: boolean })?.isError); - return { raw, text: textOf(raw), ok: !isError }; - }); - - return { - listTools, - describeTools, - call, - approvePaused: (text, content = {}) => - Effect.suspend(() => { - const match = /\bexecutionId:\s*(\S+)/.exec(text); - if (!match) return Effect.die(new Error("approvePaused: executionId not found in text")); - return call("resume", { - executionId: match[1], - action: "accept", - content: JSON.stringify(content), - }); - }), - // No action argument: in browser mode `resume` blocks until the human's - // decision arrives via the console, then returns the resumed result. - awaitResume: (executionId) => call("resume", { executionId }), - }; - }, -}); + }, + }; +}; diff --git a/e2e/src/test-source.ts b/e2e/src/test-source.ts new file mode 100644 index 000000000..0dbd1308d --- /dev/null +++ b/e2e/src/test-source.ts @@ -0,0 +1,154 @@ +import { basename, join, relative, sep } from "node:path"; +import { fileURLToPath } from "node:url"; + +import ts from "typescript"; + +import { writeJsonAtomicSync, writeTextAtomicSync } from "./artifact-io"; +import { writeRunLaneProvenance } from "./evidence-provenance"; + +const E2E_ROOT = fileURLToPath(new URL("../", import.meta.url)); + +const registrationName = (expression: ts.LeftHandSideExpression) => { + if (ts.isIdentifier(expression) && (expression.text === "scenario" || expression.text === "it")) { + return expression.text; + } + if ( + ts.isPropertyAccessExpression(expression) && + ts.isIdentifier(expression.expression) && + expression.expression.text === "it" && + (expression.name.text === "live" || expression.name.text === "effect") + ) { + return `it.${expression.name.text}`; + } + return undefined; +}; + +const resolvedString = ( + expression: ts.Expression, + bindings: ReadonlyMap, +): string | undefined => { + if (ts.isStringLiteral(expression) || ts.isNoSubstitutionTemplateLiteral(expression)) { + return expression.text; + } + if (ts.isIdentifier(expression)) return bindings.get(expression.text); + if (!ts.isTemplateExpression(expression)) return undefined; + + let value = expression.head.text; + for (const span of expression.templateSpans) { + const interpolation = resolvedString(span.expression, bindings); + if (interpolation === undefined) return undefined; + value += interpolation + span.literal.text; + } + return value; +}; + +const stringBindings = (sourceFile: ts.SourceFile) => { + const bindings = new Map(); + const visit = (node: ts.Node): void => { + if (ts.isVariableStatement(node)) { + for (const declaration of node.declarationList.declarations) { + if (!ts.isIdentifier(declaration.name) || !declaration.initializer) continue; + const value = resolvedString(declaration.initializer, bindings); + if (value !== undefined && !bindings.has(declaration.name.text)) { + bindings.set(declaration.name.text, value); + } + } + } + ts.forEachChild(node, visit); + }; + visit(sourceFile); + return bindings; +}; + +const registrationsIn = (sourceFile: ts.SourceFile, bindings: ReadonlyMap) => { + const registrations: Array>> = []; + const visit = (node: ts.Node): void => { + if (ts.isExpressionStatement(node)) { + const registered = registeredTest(node, bindings); + if (registered) registrations.push(registered); + } + ts.forEachChild(node, visit); + }; + visit(sourceFile); + return registrations; +}; + +const registeredTest = (statement: ts.Statement, bindings: ReadonlyMap) => { + if (!ts.isExpressionStatement(statement) || !ts.isCallExpression(statement.expression)) { + return undefined; + } + const registration = registrationName(statement.expression.expression); + if (!registration) return undefined; + const nameArgument = statement.expression.arguments[0]; + const testName = nameArgument ? resolvedString(nameArgument, bindings) : undefined; + return testName ? { statement, registration, testName } : undefined; +}; + +const removeRanges = (source: string, ranges: ReadonlyArray<{ start: number; end: number }>) => { + let focused = source; + for (const range of [...ranges].sort((left, right) => right.start - left.start)) { + focused = focused.slice(0, range.start) + focused.slice(range.end); + } + return focused.replace(/\n{3,}/g, "\n\n").trim(); +}; + +export const extractFocusedTestSource = (filePath: string, testName: string) => { + const source = ts.sys.readFile(filePath); + if (source === undefined) return undefined; + const sourceFile = ts.createSourceFile( + filePath, + source, + ts.ScriptTarget.Latest, + true, + ts.ScriptKind.TS, + ); + const bindings = stringBindings(sourceFile); + const registrations = registrationsIn(sourceFile, bindings); + const selected = registrations.find((entry) => entry.testName === testName); + if (!selected) return undefined; + + const ranges = [ + ...sourceFile.statements.flatMap((statement) => + ts.isImportDeclaration(statement) + ? [{ start: statement.getStart(sourceFile), end: statement.end }] + : [], + ), + ...registrations + .filter((registered) => registered !== selected) + .map((registered) => ({ + start: registered.statement.getStart(sourceFile), + end: registered.statement.end, + })), + ]; + const focused = removeRanges(source, ranges); + return focused === "" + ? undefined + : { source: `${focused}\n`, registration: selected.registration }; +}; + +export const writeFocusedTestSource = ({ + runDir, + filePath, + testName, +}: { + readonly runDir: string; + readonly filePath: string; + readonly testName: string; +}) => { + writeRunLaneProvenance(runDir, process.env.E2E_TARGET ?? ""); + const focused = extractFocusedTestSource(filePath, testName); + if (!focused) return undefined; + const candidatePath = relative(E2E_ROOT, filePath).split(sep).join("/"); + const sourcePath = candidatePath.startsWith("../") ? basename(filePath) : candidatePath; + const metadata = { + schemaVersion: 1, + sourcePath, + testName, + registration: focused.registration, + extractor: "typescript-named-test-v2", + capturedAt: Date.now(), + } as const; + writeTextAtomicSync(join(runDir, "test.ts"), focused.source); + writeJsonAtomicSync(join(runDir, "test-source-metadata.json"), metadata); + return metadata; +}; diff --git a/e2e/src/timeline.ts b/e2e/src/timeline.ts index cf87d4630..b1621512e 100644 --- a/e2e/src/timeline.ts +++ b/e2e/src/timeline.ts @@ -1,22 +1,35 @@ // The run's focus timeline: which window the scenario was acting on, when. // -// Focus is DERIVED, never declared — driving a Playwright page focuses the +// Focus is DERIVED, never declared: driving a Playwright page focuses the // browser window; pushing a chat/terminal event focuses the terminal. The // surfaces call markFocus as a side effect of normal operations, so any // scenario gets a faithful "where was the developer looking" track for // free, and scripts/film.ts can cut the session recordings exactly where // the action moved. Anchors map wall-clock to each recording's own clock. -import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; +import { + evidenceReferenceFor, + withArtifactLockSync, + writeJsonAtomicSync, + type EvidenceReference, +} from "./artifact-io"; +import { sanitizePublishedUrl } from "./published-artifacts"; + export type TimelineWindow = "terminal" | "browser"; export interface Timeline { + /** Retry and worker-process identity for correlating this file with evidence.json. */ + readonly evidence?: EvidenceReference & { + readonly invocationIds: ReadonlyArray; + readonly updatedAt: number; + }; /** Wall-clock ms when each recording's clock started. */ readonly anchors: { terminal?: number; browser?: number }; /** Focus transitions (first event per contiguous run of a window). */ readonly focus: Array<{ at: number; window: TimelineWindow }>; - /** Main-frame navigations — lets the viewer render a live URL bar. */ + /** Main-frame navigations let the viewer render a live URL bar. */ readonly nav?: Array<{ at: number; url: string }>; } @@ -28,39 +41,59 @@ const read = (runDir: string): Timeline => { return JSON.parse(readFileSync(file, "utf8")) as Timeline; }; -const write = (runDir: string, timeline: Timeline) => - writeFileSync(fileFor(runDir), JSON.stringify(timeline, null, 1)); +const update = (runDir: string, mutate: (timeline: Timeline) => Timeline): Timeline => { + const evidence = evidenceReferenceFor(runDir); + const file = fileFor(runDir); + return withArtifactLockSync(file, () => { + const current = read(runDir); + const invocationIds = new Set(current.evidence?.invocationIds ?? []); + invocationIds.add(evidence.invocationId); + const updated = mutate({ + ...current, + evidence: { + ...evidence, + invocationIds: [...invocationIds], + updatedAt: Date.now(), + }, + }); + writeJsonAtomicSync(file, updated); + return updated; + }); +}; /** Record that `window`'s recording clock starts now. */ export const markRecordingStart = (runDir: string, window: TimelineWindow): void => { - const timeline = read(runDir); - write(runDir, { + update(runDir, (timeline) => ({ ...timeline, anchors: { ...timeline.anchors, [window]: Date.now() }, - }); + })); }; /** Record that the scenario is acting on `window` (deduped per run). */ export const markFocus = (runDir: string, window: TimelineWindow): void => { - const timeline = read(runDir); - if (timeline.focus.at(-1)?.window === window) return; - timeline.focus.push({ at: Date.now(), window }); - write(runDir, timeline); + update(runDir, (timeline) => + timeline.focus.at(-1)?.window === window + ? timeline + : { ...timeline, focus: [...timeline.focus, { at: Date.now(), window }] }, + ); }; /** Record a main-frame navigation (deduped against the previous URL). */ export const markNavigation = (runDir: string, url: string): void => { - const timeline = read(runDir); - const nav = timeline.nav ?? []; - if (nav.at(-1)?.url === url) return; - write(runDir, { ...timeline, nav: [...nav, { at: Date.now(), url }] }); + const sanitizedUrl = sanitizePublishedUrl(url); + update(runDir, (timeline) => { + const nav = timeline.nav ?? []; + return nav.at(-1)?.url === sanitizedUrl + ? timeline + : { ...timeline, nav: [...nav, { at: Date.now(), url: sanitizedUrl }] }; + }); }; export const readTimeline = (runDir: string): Timeline | null => existsSync(fileFor(runDir)) ? read(runDir) : null; // --------------------------------------------------------------------------- -// Human dwells — pacing for a watchable recording, owned by the framework. +// Human dwells: pacing for a watchable recording, owned by the framework. // // A scenario should never hand-code `waitForTimeout` to make a film readable; // that's the recording's concern, not the scenario's. A dwell ("beat") is a @@ -70,7 +103,7 @@ export const readTimeline = (runDir: string): Timeline | null => // splice reads like a person moving between apps. // // Beats apply ONLY when filming (E2E_FILM, also implied by the desk's E2E_DESK) -// — fast verification/CI runs, where nobody is watching, pay nothing. +// Fast verification/CI runs, where nobody is watching, pay nothing. // --------------------------------------------------------------------------- const FILM_BEAT_MS = 1500; @@ -79,7 +112,7 @@ const FILM_BEAT_MS = 1500; export const isFilming = (): boolean => process.env.E2E_FILM === "1" || process.env.E2E_DESK === "1"; -/** Hold for the viewer — a no-op unless this run is being filmed. */ +/** Hold for the viewer, a no-op unless this run is being filmed. */ export const beat = async (ms: number = FILM_BEAT_MS): Promise => { if (!isFilming()) return; await new Promise((tick) => setTimeout(tick, ms)); @@ -87,7 +120,7 @@ export const beat = async (ms: number = FILM_BEAT_MS): Promise => { /** * Focus `window`, lingering a beat on the OUTGOING window first when this is a - * real focus change and we're filming — "look before you tab away". The first + * real focus change and we're filming: "look before you tab away". The first * focus of a run never beats (nothing to linger on). */ export const enterFocus = async ( diff --git a/e2e/src/trace-harvest.ts b/e2e/src/trace-harvest.ts index 79f1b3738..2c2a8346c 100644 --- a/e2e/src/trace-harvest.ts +++ b/e2e/src/trace-harvest.ts @@ -2,35 +2,88 @@ // session made against the target, with the trace id that names its // click→server→DB waterfall in the OTLP store the run exported to. // -// Two writers share it — the browser surface (ids harvested off the wire, +// Two writers share it: the browser surface (ids harvested off the wire, // the web app sends traceparent itself) and the MCP surface (ids MINTED // here, since mcporter's plain fetch sends none; the server joins whatever // traceparent arrives). Append is read-merge-write so neither clobbers the // other; entries stay sorted by wall clock. -import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; +import { evidenceReferenceFor, withArtifactLockSync, writeJsonAtomicSync } from "./artifact-io"; +import { sanitizePublishedText, sanitizePublishedUrl } from "./published-artifacts"; + export interface TraceEntry { readonly id: string; readonly at: number; readonly url: string; readonly ms?: number; readonly status?: number; - /** Which window made the request — the viewer's rail tags rows with it. */ + /** Which window made the request; the viewer's rail tags rows with it. */ readonly source?: "terminal" | "browser"; /** Readable name when the URL alone says nothing (MCP: every call POSTs * the same endpoint; the JSON-RPC method/tool is the real identity). */ readonly label?: string; + /** Retry and worker-process identity for cross-file evidence correlation. */ + readonly attemptId?: string; + readonly invocationId?: string; + /** Stable order assigned while holding the run's cross-process ledger lock. */ + readonly sequence?: number; } const fileFor = (runDir: string) => join(runDir, "traces.json"); +const optionalNumber = (value: unknown): boolean => + value === undefined || typeof value === "number"; +const optionalString = (value: unknown): boolean => + value === undefined || typeof value === "string"; + +const isTraceEntry = (value: unknown): value is TraceEntry => { + if (typeof value !== "object" || value === null) return false; + return ( + "id" in value && + typeof value.id === "string" && + "at" in value && + typeof value.at === "number" && + "url" in value && + typeof value.url === "string" && + (!("ms" in value) || optionalNumber(value.ms)) && + (!("status" in value) || optionalNumber(value.status)) && + (!("source" in value) || + value.source === undefined || + value.source === "terminal" || + value.source === "browser") && + (!("label" in value) || optionalString(value.label)) && + (!("attemptId" in value) || optionalString(value.attemptId)) && + (!("invocationId" in value) || optionalString(value.invocationId)) && + (!("sequence" in value) || optionalNumber(value.sequence)) + ); +}; + export const appendTraces = (runDir: string, entries: ReadonlyArray): void => { if (entries.length === 0) return; const file = fileFor(runDir); - const existing: TraceEntry[] = existsSync(file) - ? (JSON.parse(readFileSync(file, "utf8")) as TraceEntry[]) - : []; - const merged = [...existing, ...entries].sort((a, b) => a.at - b.at); - writeFileSync(file, JSON.stringify(merged, null, 1)); + const evidence = evidenceReferenceFor(runDir); + withArtifactLockSync(file, () => { + const parsed: unknown = existsSync(file) ? JSON.parse(readFileSync(file, "utf8")) : []; + if (!Array.isArray(parsed) || !parsed.every(isTraceEntry)) { + throw new Error(`invalid e2e trace ledger: ${file}`); + } + const existing = parsed; + const nextSequence = + existing.reduce((highest, entry) => Math.max(highest, entry.sequence ?? -1), -1) + 1; + const appended = entries.map( + (entry, index): TraceEntry => ({ + ...entry, + url: sanitizePublishedUrl(entry.url), + ...(entry.label === undefined ? {} : { label: sanitizePublishedText(entry.label) }), + ...evidence, + sequence: nextSequence + index, + }), + ); + const merged = [...existing, ...appended].sort( + (left, right) => left.at - right.at || (left.sequence ?? 0) - (right.sequence ?? 0), + ); + writeJsonAtomicSync(file, merged); + }); }; diff --git a/e2e/src/viewer/manifest.ts b/e2e/src/viewer/manifest.ts index 641f8ade5..950e72b13 100644 --- a/e2e/src/viewer/manifest.ts +++ b/e2e/src/viewer/manifest.ts @@ -1,15 +1,17 @@ -// Writes runs/manifest.json — the machine-readable inventory the matrix +// Writes runs/manifest.json, the machine-readable inventory the matrix // renders (scenario × target + per-run status). Rebuilt after every scenario. -import { - existsSync, - readFileSync, - readdirSync, - renameSync, - writeFileSync, - type Dirent, -} from "node:fs"; +import { existsSync, readFileSync, readdirSync, type Dirent } from "node:fs"; import { join } from "node:path"; +import { withArtifactLockSync, writeJsonAtomicSync } from "../artifact-io"; +import { publishedArtifactFor, type PublishedArtifactKind } from "../published-artifacts"; + +export interface ManifestArtifact { + readonly name: string; + readonly kind: PublishedArtifactKind; + readonly label?: string; +} + export interface ManifestRun { readonly scenario: string; readonly target: string; @@ -17,6 +19,10 @@ export interface ManifestRun { readonly ok: boolean; readonly durationMs?: number; readonly endedAt?: number; + readonly attemptId?: string; + readonly portableTraceCount?: number; + readonly portableTraceMissing?: number; + readonly artifacts: ReadonlyArray; } export interface ManifestSkip { @@ -25,54 +31,86 @@ export interface ManifestSkip { readonly missing: ReadonlyArray; } +const artifactLabel = (name: string): string => + name + .replace(/\.[^.]+$/, "") + .split("-") + .filter(Boolean) + .map((part) => (part === "claude" ? "Claude" : part === "mcp" ? "MCP" : part)) + .join(" "); + +const artifactsFor = (target: string, slug: string, dir: string): ManifestArtifact[] => { + try { + return readdirSync(dir, { withFileTypes: true }) + .flatMap((entry) => { + if (!entry.isFile()) return []; + const artifact = publishedArtifactFor(`${target}/${slug}/${entry.name}`); + return artifact + ? [{ name: entry.name, kind: artifact.kind, label: artifactLabel(entry.name) }] + : []; + }) + .sort((left, right) => left.name.localeCompare(right.name)); + } catch { + return []; + } +}; + export const buildManifest = (runsDir: string): void => { - const runs: ManifestRun[] = []; - const skips: ManifestSkip[] = []; + const manifestFile = join(runsDir, "manifest.json"); + withArtifactLockSync(manifestFile, () => { + const runs: ManifestRun[] = []; + const skips: ManifestSkip[] = []; - for (const target of readdirSync(runsDir, { withFileTypes: true })) { - if (!target.isDirectory() || target.name === "assets") continue; - // Both vitest projects build the manifest concurrently while runs are - // being (re)written — tolerate dirs vanishing mid-scan. - let slugs: Dirent[]; - try { - slugs = readdirSync(join(runsDir, target.name), { withFileTypes: true }); - } catch { - continue; - } - for (const slug of slugs) { - if (!slug.isDirectory()) continue; - const dir = join(runsDir, target.name, slug.name); - const resultPath = join(dir, "result.json"); - if (existsSync(resultPath)) { - try { - const result = JSON.parse(readFileSync(resultPath, "utf8")); - runs.push({ - scenario: result.scenario, - target: target.name, - slug: slug.name, - ok: result.ok, - durationMs: result.durationMs, - endedAt: result.endedAt, - }); - continue; - } catch { - // unreadable result — fall through to the skip marker - } + for (const target of readdirSync(runsDir, { withFileTypes: true })) { + if (!target.isDirectory() || target.name === "assets") continue; + // Both vitest projects build the manifest concurrently while runs are + // being (re)written, so tolerate dirs vanishing mid-scan. + let slugs: Dirent[]; + try { + slugs = readdirSync(join(runsDir, target.name), { withFileTypes: true }); + } catch { + continue; } - const skipPath = join(dir, "skipped.json"); - if (existsSync(skipPath)) { - try { - const skip = JSON.parse(readFileSync(skipPath, "utf8")); - skips.push({ scenario: skip.scenario, target: target.name, missing: skip.missing }); - } catch { - // ignore + for (const slug of slugs) { + if (!slug.isDirectory()) continue; + const dir = join(runsDir, target.name, slug.name); + const resultPath = join(dir, "result.json"); + if (existsSync(resultPath)) { + try { + const result = JSON.parse(readFileSync(resultPath, "utf8")); + runs.push({ + scenario: result.scenario, + target: target.name, + slug: slug.name, + ok: result.ok, + durationMs: result.durationMs, + endedAt: result.endedAt, + ...(typeof result.attemptId === "string" ? { attemptId: result.attemptId } : {}), + ...(typeof result.portableTraces?.exported === "number" + ? { portableTraceCount: result.portableTraces.exported } + : {}), + ...(typeof result.portableTraces?.missing === "number" + ? { portableTraceMissing: result.portableTraces.missing } + : {}), + artifacts: artifactsFor(target.name, slug.name, dir), + }); + continue; + } catch { + // Unreadable result, fall through to the skip marker. + } + } + const skipPath = join(dir, "skipped.json"); + if (existsSync(skipPath)) { + try { + const skip = JSON.parse(readFileSync(skipPath, "utf8")); + skips.push({ scenario: skip.scenario, target: target.name, missing: skip.missing }); + } catch { + // Ignore incomplete skip markers. + } } } } - } - // Write-then-rename so a concurrent reader/writer never sees a torn file. - const tmp = join(runsDir, `.manifest-${process.pid}.tmp`); - writeFileSync(tmp, JSON.stringify({ generatedAt: Date.now(), runs, skips }, null, 1)); - renameSync(tmp, join(runsDir, "manifest.json")); + writeJsonAtomicSync(manifestFile, { generatedAt: Date.now(), runs, skips }); + }); }; diff --git a/e2e/src/vm/ec2-lifecycle.ts b/e2e/src/vm/ec2-lifecycle.ts new file mode 100644 index 000000000..d9a72ee12 --- /dev/null +++ b/e2e/src/vm/ec2-lifecycle.ts @@ -0,0 +1,274 @@ +import { execFile } from "node:child_process"; +import { promisify } from "node:util"; + +import { Schema } from "effect"; + +import { requireEc2CleanupOwner, type VmRunMetadata } from "./run-scope"; +import { sleep } from "./types"; + +const execFileP = promisify(execFile); + +export const EC2_MANAGED_TAG = "executor-e2e:managed"; +export const EC2_REPOSITORY_TAG = "executor-e2e:repository"; +export const EC2_RUN_ID_TAG = "executor-e2e:run-id"; +export const EC2_RUN_ATTEMPT_TAG = "executor-e2e:run-attempt"; +export const EC2_RUN_SCOPE_TAG = "executor-e2e:run-scope"; +export const EC2_CREATED_AT_TAG = "executor-e2e:created-at"; +export const EC2_EXPIRES_AT_TAG = "executor-e2e:expires-at"; + +const AwsTag = Schema.Struct({ + Key: Schema.String, + Value: Schema.String, +}); +const AwsInstance = Schema.Struct({ + InstanceId: Schema.String, + Tags: Schema.Array(AwsTag), +}); +const AwsInstances = Schema.Struct({ + Reservations: Schema.Array(Schema.Struct({ Instances: Schema.Array(AwsInstance) })), +}); +const AwsKeyPair = Schema.Struct({ + KeyPairId: Schema.String, + Tags: Schema.Array(AwsTag), +}); +const AwsKeyPairs = Schema.Struct({ + KeyPairs: Schema.Array(AwsKeyPair), +}); +const AwsSecurityGroup = Schema.Struct({ + GroupId: Schema.String, + Tags: Schema.Array(AwsTag), +}); +const AwsSecurityGroups = Schema.Struct({ + SecurityGroups: Schema.Array(AwsSecurityGroup), +}); +const decodeAwsInstances = Schema.decodeUnknownSync(Schema.fromJsonString(AwsInstances)); +const decodeAwsKeyPairs = Schema.decodeUnknownSync(Schema.fromJsonString(AwsKeyPairs)); +const decodeAwsSecurityGroups = Schema.decodeUnknownSync(Schema.fromJsonString(AwsSecurityGroups)); + +export type Ec2ResourceTag = typeof AwsTag.Type; +export type Ec2ResourceKind = "instance" | "key-pair" | "security-group"; + +export interface TaggedEc2Resource { + readonly id: string; + readonly kind: Ec2ResourceKind; + readonly tags: Readonly>; +} + +export type Ec2AwsRunner = (args: readonly string[]) => Promise; + +const region = (environment: Readonly>) => + environment.E2E_EC2_REGION?.trim() || "us-west-2"; + +const defaultAwsRunner = + (environment: Readonly>): Ec2AwsRunner => + async (args) => { + const executable = environment.E2E_AWS_BIN?.trim() || "aws"; + const { stdout } = await execFileP( + executable, + ["--region", region(environment), "--output", "json", ...args], + { maxBuffer: 64 * 1024 * 1024 }, + ); + return stdout; + }; + +export const ec2ResourceTags = ( + metadata: VmRunMetadata, + name: string, +): readonly Ec2ResourceTag[] => [ + { Key: "Name", Value: name }, + { Key: "purpose", Value: "e2e" }, + { Key: EC2_MANAGED_TAG, Value: "true" }, + { Key: EC2_REPOSITORY_TAG, Value: metadata.repository }, + { Key: EC2_RUN_ID_TAG, Value: metadata.runId }, + { Key: EC2_RUN_ATTEMPT_TAG, Value: metadata.runAttempt }, + { Key: EC2_RUN_SCOPE_TAG, Value: metadata.scope }, + { Key: EC2_CREATED_AT_TAG, Value: metadata.createdAt }, + { Key: EC2_EXPIRES_AT_TAG, Value: metadata.expiresAt }, +]; + +export const ec2TagSpecifications = ( + resourceType: Ec2ResourceKind, + tags: readonly Ec2ResourceTag[], +) => JSON.stringify([{ ResourceType: resourceType, Tags: tags }]); + +const tagsByKey = (tags: readonly Ec2ResourceTag[]) => + Object.fromEntries(tags.map((tag) => [tag.Key, tag.Value])); + +export const decodeTaggedEc2Instances = (json: string) => + decodeAwsInstances(json).Reservations.flatMap((reservation) => + reservation.Instances.map((instance) => ({ + id: instance.InstanceId, + kind: "instance" as const, + tags: tagsByKey(instance.Tags), + })), + ); + +export const decodeTaggedEc2KeyPairs = (json: string) => + decodeAwsKeyPairs(json).KeyPairs.map((keyPair) => ({ + id: keyPair.KeyPairId, + kind: "key-pair" as const, + tags: tagsByKey(keyPair.Tags), + })); + +export const decodeTaggedEc2SecurityGroups = (json: string) => + decodeAwsSecurityGroups(json).SecurityGroups.map((securityGroup) => ({ + id: securityGroup.GroupId, + kind: "security-group" as const, + tags: tagsByKey(securityGroup.Tags), + })); + +const hasManagedRepository = (resource: TaggedEc2Resource, repository: string) => + resource.tags[EC2_MANAGED_TAG] === "true" && resource.tags[EC2_REPOSITORY_TAG] === repository; + +export const selectCurrentEc2Resources = ( + resources: readonly TaggedEc2Resource[], + owner: ReturnType, +) => + resources.filter( + (resource) => + hasManagedRepository(resource, owner.repository) && + resource.tags[EC2_RUN_ID_TAG] === owner.runId && + resource.tags[EC2_RUN_ATTEMPT_TAG] === owner.runAttempt && + resource.tags[EC2_RUN_SCOPE_TAG] === owner.scope, + ); + +export const selectExpiredEc2Resources = ( + resources: readonly TaggedEc2Resource[], + repository: string, + minimumAgeHours: number, + now = Date.now(), +) => { + if (!Number.isFinite(minimumAgeHours) || minimumAgeHours <= 0) { + throw new Error("minimumAgeHours must be greater than zero"); + } + const minimumAgeMs = minimumAgeHours * 60 * 60 * 1_000; + return resources.filter((resource) => { + if (!hasManagedRepository(resource, repository)) return false; + const createdAt = Date.parse(resource.tags[EC2_CREATED_AT_TAG] ?? ""); + const expiresAt = Date.parse(resource.tags[EC2_EXPIRES_AT_TAG] ?? ""); + if (!Number.isFinite(createdAt) || !Number.isFinite(expiresAt)) return false; + if (createdAt > expiresAt || createdAt > now) return false; + return expiresAt <= now && now - createdAt >= minimumAgeMs; + }); +}; + +const listManagedResources = async ( + repository: string, + runner: Ec2AwsRunner, + exactTags: ReadonlyArray<{ readonly key: string; readonly value: string }> = [], +) => { + const filters = [ + { Name: `tag:${EC2_MANAGED_TAG}`, Values: ["true"] }, + { Name: `tag:${EC2_REPOSITORY_TAG}`, Values: [repository] }, + ...exactTags.map(({ key, value }) => ({ Name: `tag:${key}`, Values: [value] })), + ]; + const filterJson = JSON.stringify(filters); + const [instances, keyPairs, securityGroups] = await Promise.all([ + runner(["ec2", "describe-instances", "--filters", filterJson]), + runner(["ec2", "describe-key-pairs", "--filters", filterJson]), + runner(["ec2", "describe-security-groups", "--filters", filterJson]), + ]); + return [ + ...decodeTaggedEc2Instances(instances), + ...decodeTaggedEc2KeyPairs(keyPairs), + ...decodeTaggedEc2SecurityGroups(securityGroups), + ]; +}; + +const deleteSecurityGroup = async ( + id: string, + runner: Ec2AwsRunner, + wait: (ms: number) => Promise, +) => { + let lastFailure: unknown; + for (let attempt = 0; attempt < 6; attempt += 1) { + try { + await runner(["ec2", "delete-security-group", "--group-id", id]); + return; + } catch (error) { + lastFailure = error; + if (attempt < 5) await wait(2_000); + } + } + throw lastFailure; +}; + +const deleteResources = async ( + resources: readonly TaggedEc2Resource[], + runner: Ec2AwsRunner, + wait: (ms: number) => Promise, +) => { + const failures: unknown[] = []; + const instanceIds = resources + .filter((resource) => resource.kind === "instance") + .map((resource) => resource.id); + if (instanceIds.length > 0) { + try { + await runner(["ec2", "terminate-instances", "--instance-ids", ...instanceIds]); + await runner(["ec2", "wait", "instance-terminated", "--instance-ids", ...instanceIds]); + } catch (error) { + failures.push(new AggregateError([error], "EC2 instance cleanup failed")); + } + } + + for (const resource of resources.filter(({ kind }) => kind === "security-group")) { + try { + await deleteSecurityGroup(resource.id, runner, wait); + } catch (error) { + failures.push( + new AggregateError([error], `EC2 security-group cleanup failed: ${resource.id}`), + ); + } + } + + for (const resource of resources.filter(({ kind }) => kind === "key-pair")) { + try { + await runner(["ec2", "delete-key-pair", "--key-pair-id", resource.id]); + } catch (error) { + failures.push(new AggregateError([error], `EC2 key-pair cleanup failed: ${resource.id}`)); + } + } + + if (failures.length > 0) { + throw new AggregateError(failures, "EC2 cleanup was incomplete"); + } +}; + +export const cleanupCurrentEc2Resources = async (options?: { + readonly environment?: Readonly>; + readonly runner?: Ec2AwsRunner; + readonly wait?: (ms: number) => Promise; +}) => { + const environment = options?.environment ?? process.env; + const owner = requireEc2CleanupOwner(environment); + const runner = options?.runner ?? defaultAwsRunner(environment); + const resources = await listManagedResources(owner.repository, runner, [ + { key: EC2_RUN_ID_TAG, value: owner.runId }, + { key: EC2_RUN_ATTEMPT_TAG, value: owner.runAttempt }, + { key: EC2_RUN_SCOPE_TAG, value: owner.scope }, + ]); + const selected = selectCurrentEc2Resources(resources, owner); + await deleteResources(selected, runner, options?.wait ?? sleep); + return { deleted: selected.length, scope: owner.scope }; +}; + +export const sweepExpiredEc2Resources = async (options: { + readonly minimumAgeHours: number; + readonly environment?: Readonly>; + readonly now?: number; + readonly runner?: Ec2AwsRunner; + readonly wait?: (ms: number) => Promise; +}) => { + const environment = options.environment ?? process.env; + const owner = requireEc2CleanupOwner(environment); + const runner = options.runner ?? defaultAwsRunner(environment); + const resources = await listManagedResources(owner.repository, runner); + const selected = selectExpiredEc2Resources( + resources, + owner.repository, + options.minimumAgeHours, + options.now, + ); + await deleteResources(selected, runner, options.wait ?? sleep); + return { deleted: selected.length, repository: owner.repository }; +}; diff --git a/e2e/src/vm/ec2.ts b/e2e/src/vm/ec2.ts index fdbe7dce8..d32148eb8 100644 --- a/e2e/src/vm/ec2.ts +++ b/e2e/src/vm/ec2.ts @@ -11,8 +11,9 @@ // SSH reachability — an orderly shutdown keeps the daemon serving for several // seconds, so "SSH answered" alone can false-pass a reboot that never happened. -import { execFile } from "node:child_process"; -import { chmodSync, mkdtempSync, writeFileSync } from "node:fs"; +import { execFile, spawn } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { chmodSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import net from "node:net"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -26,13 +27,55 @@ import { type VmOs, type VmProvider, } from "./types"; +import { ec2ResourceTags, ec2TagSpecifications, type Ec2ResourceTag } from "./ec2-lifecycle"; +import { resolveVmRunMetadata } from "./run-scope"; const execFileP = promisify(execFile); +const AWS = process.env.E2E_AWS_BIN ?? "aws"; const REGION = process.env.E2E_EC2_REGION ?? "us-west-2"; const INSTANCE_TYPE = process.env.E2E_EC2_INSTANCE_TYPE ?? "t3.medium"; const TAG = "executor-e2e"; +type AsyncFinalizer = () => Promise | void; + +export const createEc2FinalizerStack = () => { + const finalizers: Array<{ readonly label: string; readonly run: AsyncFinalizer }> = []; + let finished = false; + + const add = (label: string, run: AsyncFinalizer) => { + if (finished) throw new Error(`cannot register ${label} after EC2 cleanup`); + finalizers.push({ label, run }); + }; + + const run = async () => { + if (finished) return; + finished = true; + const failures: unknown[] = []; + for (const finalizer of finalizers.reverse()) { + try { + await finalizer.run(); + } catch (error) { + failures.push(new AggregateError([error], `EC2 cleanup failed: ${finalizer.label}`)); + } + } + if (failures.length > 0) throw new AggregateError(failures, "EC2 cleanup was incomplete"); + }; + + return { add, run }; +}; + +export const ec2ResourceNames = ( + seed = `${process.pid}-${Date.now()}-${randomUUID().slice(0, 8)}`, +) => { + const safeSeed = seed.replace(/[^a-zA-Z0-9-]/g, "-").slice(0, 96); + return { + instance: `${TAG}-${safeSeed}`, + keyPair: `${TAG}-key-${safeSeed}`, + securityGroup: `${TAG}-sg-${safeSeed}`, + }; +}; + const SSH_OPTS = [ "-o", "StrictHostKeyChecking=no", @@ -67,7 +110,7 @@ export const ec2RebootGuest = async ( }; const aws = async (args: ReadonlyArray): Promise => { - const { stdout } = await execFileP("aws", ["--region", REGION, "--output", "text", ...args], { + const { stdout } = await execFileP(AWS, ["--region", REGION, "--output", "text", ...args], { maxBuffer: 64 * 1024 * 1024, }); return stdout.trim(); @@ -118,8 +161,8 @@ const latestAmi = async (os: VmOs): Promise => { ]); }; -const defaultSubnet = async (): Promise => { - const vpc = await aws([ +const defaultNetwork = async () => { + const vpcId = await aws([ "ec2", "describe-vpcs", "--filters", @@ -131,60 +174,68 @@ const defaultSubnet = async (): Promise => { "ec2", "describe-subnets", "--filters", - `Name=vpc-id,Values=${vpc}`, + `Name=vpc-id,Values=${vpcId}`, "Name=default-for-az,Values=true", "--query", "Subnets[0].SubnetId", ]); - return subnet && subnet !== "None" - ? subnet - : aws([ - "ec2", - "describe-subnets", - "--filters", - `Name=vpc-id,Values=${vpc}`, - "--query", - "Subnets[0].SubnetId", - ]); + const subnetId = + subnet && subnet !== "None" + ? subnet + : await aws([ + "ec2", + "describe-subnets", + "--filters", + `Name=vpc-id,Values=${vpcId}`, + "--query", + "Subnets[0].SubnetId", + ]); + return { subnetId, vpcId }; }; -/** Create (idempotently) a security group allowing inbound SSH from this host. */ -const ensureSecurityGroup = async (myIp: string): Promise => { - const name = `${TAG}-sg`; - let sg = await aws([ +/** Create a security group used only by one provisioned guest. */ +const createSecurityGroup = (vpcId: string, name: string, tags: readonly Ec2ResourceTag[]) => + aws([ "ec2", - "describe-security-groups", - "--filters", - `Name=group-name,Values=${name}`, + "create-security-group", + "--group-name", + name, + "--description", + "executor e2e ephemeral guest SSH", + "--vpc-id", + vpcId, + "--tag-specifications", + ec2TagSpecifications("security-group", tags), "--query", - "SecurityGroups[0].GroupId", - ]).catch(() => ""); - if (!sg || sg === "None") { - sg = await aws([ - "ec2", - "create-security-group", - "--group-name", - name, - "--description", - "executor e2e ephemeral guests (SSH from CI host)", - "--query", - "GroupId", - ]); - } - // Authorize this host's IP for SSH; ignore "already exists". - await aws([ + "GroupId", + ]); + +const authorizeSecurityGroup = (myIp: string, securityGroupId: string) => + aws([ "ec2", "authorize-security-group-ingress", "--group-id", - sg, + securityGroupId, "--protocol", "tcp", "--port", "22", "--cidr", `${myIp}/32`, - ]).catch(() => undefined); - return sg; + ]); + +const deleteSecurityGroup = async (securityGroupId: string) => { + let lastFailure: unknown; + for (let attempt = 0; attempt < 6; attempt++) { + try { + await aws(["ec2", "delete-security-group", "--group-id", securityGroupId]); + return; + } catch (error) { + lastFailure = error; + if (attempt < 5) await sleep(2_000); + } + } + throw lastFailure; }; /** PowerShell user-data: enable OpenSSH, default the shell to PowerShell, and @@ -206,6 +257,53 @@ const windowsUserData = (publicKey: string): string => const linuxUserData = (publicKey: string): string => ["#cloud-config", "ssh_authorized_keys:", ` - ${publicKey}`].join("\n"); +const rootDeviceName = (ami: string) => + aws(["ec2", "describe-images", "--image-ids", ami, "--query", "Images[0].RootDeviceName"]); + +export const ec2RunInstancesArgs = (options: { + readonly ami: string; + readonly instanceType: string; + readonly keyPairName: string; + readonly rootDeviceName: string; + readonly securityGroupId: string; + readonly subnetId: string; + readonly tags: readonly Ec2ResourceTag[]; + readonly userDataFile: string; +}) => [ + "ec2", + "run-instances", + "--image-id", + options.ami, + "--instance-type", + options.instanceType, + "--count", + "1", + "--key-name", + options.keyPairName, + "--security-group-ids", + options.securityGroupId, + "--subnet-id", + options.subnetId, + "--associate-public-ip-address", + "--instance-initiated-shutdown-behavior", + "terminate", + "--metadata-options", + "HttpTokens=required,HttpEndpoint=enabled,HttpPutResponseHopLimit=1,InstanceMetadataTags=disabled", + "--block-device-mappings", + JSON.stringify([ + { + DeviceName: options.rootDeviceName, + Ebs: { DeleteOnTermination: true, Encrypted: true, VolumeType: "gp3" }, + }, + ]), + "--user-data", + `file://${options.userDataFile}`, + "--tag-specifications", + ec2TagSpecifications("instance", options.tags), + "--query", + "Instances[0].InstanceId", +]; + const freePort = (): Promise => new Promise((resolve, reject) => { const srv = net.createServer(); @@ -238,185 +336,244 @@ const waitLocalPort = async (port: number, attempts = 40): Promise => { export const ec2Vm = (os: VmOs, arch: VmArch = "x64"): VmProvider => ({ os, provision: async () => { - const user = guestUser(os); - // A throwaway SSH keypair, authorized via user-data (no EC2 key pair needed — - // we drive over OpenSSH key auth, not the Windows password). - const keyDir = mkdtempSync(join(tmpdir(), "executor-ec2-")); - const keyPath = join(keyDir, "id"); - await execFileP("ssh-keygen", ["-t", "ed25519", "-N", "", "-q", "-f", keyPath]); - chmodSync(keyPath, 0o600); - const publicKey = (await execFileP("ssh-keygen", ["-y", "-f", keyPath])).stdout.trim(); - - const [myIp, ami, subnet] = await Promise.all([egressIp(), latestAmi(os), defaultSubnet()]); - const sg = await ensureSecurityGroup(myIp); - const userData = os === "windows" ? windowsUserData(publicKey) : linuxUserData(publicKey); - const userDataFile = join(keyDir, "user-data.txt"); - writeFileSync(userDataFile, userData); - - const instanceId = await aws([ - "ec2", - "run-instances", - "--image-id", - ami, - "--instance-type", - INSTANCE_TYPE, - "--count", - "1", - "--security-group-ids", - sg, - "--subnet-id", - subnet, - "--associate-public-ip-address", - "--instance-initiated-shutdown-behavior", - "terminate", - "--user-data", - `file://${userDataFile}`, - "--tag-specifications", - `ResourceType=instance,Tags=[{Key=Name,Value=${TAG}-${os}},{Key=purpose,Value=e2e}]`, - "--query", - "Instances[0].InstanceId", - ]); + const finalizers = createEc2FinalizerStack(); + try { + const user = guestUser(os); + const metadata = resolveVmRunMetadata(); + const names = ec2ResourceNames( + `${metadata.scopeSlug}-${process.pid}-${Date.now()}-${randomUUID().slice(0, 8)}`, + ); + const keyDir = mkdtempSync(join(tmpdir(), "executor-ec2-")); + finalizers.add("local key directory", () => rmSync(keyDir, { force: true, recursive: true })); - let ip = ""; - const tunnelClosers: Array<() => void> = []; + const [myIp, ami, network] = await Promise.all([egressIp(), latestAmi(os), defaultNetwork()]); - const ssh = async (command: string): Promise => { - try { - const { stdout, stderr } = await execFileP( - "ssh", - ["-i", keyPath, ...SSH_OPTS, `${user}@${ip}`, command], - { maxBuffer: 64 * 1024 * 1024 }, - ); - return { stdout, stderr, code: 0 }; - } catch (err) { - const e = err as { stdout?: string; stderr?: string; code?: number }; - return { - stdout: e.stdout ?? "", - stderr: e.stderr ?? "", - code: typeof e.code === "number" ? e.code : 1, - }; - } - }; + const keyMaterial = await aws([ + "ec2", + "create-key-pair", + "--key-name", + names.keyPair, + "--key-type", + "rsa", + "--key-format", + "pem", + "--tag-specifications", + ec2TagSpecifications("key-pair", ec2ResourceTags(metadata, names.keyPair)), + "--query", + "KeyMaterial", + ]); + finalizers.add("EC2 key pair", () => + aws(["ec2", "delete-key-pair", "--key-name", names.keyPair]).then(() => undefined), + ); - const waitSshUp = async (attempts: number): Promise => { - for (let i = 0; i < attempts; i++) { - if ((await ssh(os === "windows" ? "echo ok" : "true")).code === 0) return true; - await sleep(5000); - } - return false; - }; + const keyPath = join(keyDir, "id.pem"); + writeFileSync(keyPath, `${keyMaterial}\n`, { mode: 0o600 }); + chmodSync(keyPath, 0o600); + const publicKey = (await execFileP("ssh-keygen", ["-y", "-f", keyPath])).stdout.trim(); + + const securityGroupId = await createSecurityGroup( + network.vpcId, + names.securityGroup, + ec2ResourceTags(metadata, names.securityGroup), + ); + finalizers.add("EC2 security group", () => deleteSecurityGroup(securityGroupId)); + await authorizeSecurityGroup(myIp, securityGroupId); - const waitSshDown = async (attempts = 40): Promise => { - for (let i = 0; i < attempts; i++) { - if ((await ssh("echo up").catch(() => ({ code: 1 }) as SshResult)).code !== 0) return; - await sleep(3000); + const userData = os === "windows" ? windowsUserData(publicKey) : linuxUserData(publicKey); + const userDataFile = join(keyDir, "user-data.txt"); + writeFileSync(userDataFile, userData); + const rootDevice = await rootDeviceName(ami); + if (!rootDevice || rootDevice === "None") { + throw new Error(`ec2 ${os}: AMI ${ami} has no root device mapping`); } - // never observed down — caller's boot-time check is the backstop. - }; - - const bootTime = async (): Promise => - os === "windows" - ? ( - await ssh("(Get-CimInstance Win32_OperatingSystem).LastBootUpTime.ToString('o')") - ).stdout.trim() - : (await ssh("cat /proc/sys/kernel/random/boot_id")).stdout.trim(); - - const handle: VmHandle = { - os, - arch, - sshKeyPath: keyPath, - get host() { - return ip; - }, - ssh, - push: async (localPath, remotePath) => { - await execFileP("scp", [ - "-i", - keyPath, - "-r", - ...SSH_OPTS, - localPath, - `${user}@${ip}:${remotePath}`, - ]); - }, - reboot: async () => { - const before = await bootTime(); - await ssh(os === "windows" ? "Restart-Computer -Force" : "sudo reboot").catch( - () => undefined, - ); - await waitSshDown(); - if (!(await waitSshUp(60))) throw new Error(`ec2 ${os}: SSH did not return after reboot`); - const after = await bootTime(); - if (before && after && before === after) { - throw new Error( - `ec2 ${os}: boot time unchanged after reboot — the guest never actually rebooted`, - ); - } - }, - tunnel: async (guestPort) => { - const localPort = await freePort(); - let closed = false; - let child: ReturnType | undefined; - const { spawn } = await import("node:child_process"); - const spawnOnce = (): void => { - child = spawn( + + const instanceId = await aws( + ec2RunInstancesArgs({ + ami, + instanceType: INSTANCE_TYPE, + keyPairName: names.keyPair, + rootDeviceName: rootDevice, + securityGroupId, + subnetId: network.subnetId, + tags: ec2ResourceTags(metadata, `${names.instance}-${os}`), + userDataFile, + }), + ); + finalizers.add("EC2 instance", async () => { + await aws(["ec2", "terminate-instances", "--instance-ids", instanceId]); + await aws(["ec2", "wait", "instance-terminated", "--instance-ids", instanceId]); + }); + + let ip = ""; + const tunnelClosers = new Set<() => void>(); + finalizers.add("SSH tunnels", () => { + for (const close of tunnelClosers) close(); + tunnelClosers.clear(); + }); + + const ssh = async (command: string): Promise => { + try { + const { stdout, stderr } = await execFileP( "ssh", - [ - "-i", - keyPath, - ...SSH_OPTS, - "-N", - "-L", - `${localPort}:127.0.0.1:${guestPort}`, - `${user}@${ip}`, - ], - { stdio: "ignore" }, + ["-i", keyPath, ...SSH_OPTS, `${user}@${ip}`, command], + { maxBuffer: 64 * 1024 * 1024 }, ); - child.on("exit", () => { - if (!closed) setTimeout(spawnOnce, 2000); - }); - }; - spawnOnce(); - const close = (): void => { - closed = true; - child?.kill(); - }; - tunnelClosers.push(close); - await waitLocalPort(localPort); - return { localPort, close }; - }, - discard: async () => { - for (const close of tunnelClosers) close(); - await aws(["ec2", "terminate-instances", "--instance-ids", instanceId]).catch( - () => undefined, - ); - }, - }; + return { stdout, stderr, code: 0 }; + } catch (err) { + const e = err as { stdout?: string; stderr?: string; code?: number }; + return { + stdout: e.stdout ?? "", + stderr: e.stderr ?? "", + code: typeof e.code === "number" ? e.code : 1, + }; + } + }; - // Wait for a public IP, then for OpenSSH (Windows boot + FoD install ≈ 2-4 min). - for (let i = 0; i < 60; i++) { - const got = await aws([ - "ec2", - "describe-instances", - "--instance-ids", - instanceId, - "--query", - "Reservations[0].Instances[0].PublicIpAddress", - ]).catch(() => ""); - if (got && got !== "None") { - ip = got; - break; + const waitSshUp = async (attempts: number): Promise => { + for (let i = 0; i < attempts; i++) { + if ((await ssh(os === "windows" ? "echo ok" : "true")).code === 0) return true; + await sleep(5000); + } + return false; + }; + + const waitSshDown = async (attempts = 40): Promise => { + for (let i = 0; i < attempts; i++) { + if ((await ssh("echo up")).code !== 0) return; + await sleep(3000); + } + // never observed down, the boot-time check is the backstop. + }; + + const bootTime = async (): Promise => + os === "windows" + ? ( + await ssh("(Get-CimInstance Win32_OperatingSystem).LastBootUpTime.ToString('o')") + ).stdout.trim() + : (await ssh("cat /proc/sys/kernel/random/boot_id")).stdout.trim(); + + const handle: VmHandle = { + os, + arch, + sshKeyPath: keyPath, + get host() { + return ip; + }, + ssh, + push: async (localPath, remotePath) => { + await execFileP("scp", [ + "-i", + keyPath, + "-r", + ...SSH_OPTS, + localPath, + `${user}@${ip}:${remotePath}`, + ]); + }, + reboot: async () => { + const before = await bootTime(); + await ssh(os === "windows" ? "Restart-Computer -Force" : "sudo reboot"); + await waitSshDown(); + if (!(await waitSshUp(60))) throw new Error(`ec2 ${os}: SSH did not return after reboot`); + const after = await bootTime(); + if (before && after && before === after) { + throw new Error( + `ec2 ${os}: boot time unchanged after reboot, the guest never actually rebooted`, + ); + } + }, + tunnel: async (guestPort) => { + const localPort = await freePort(); + let child: ReturnType | undefined; + let reconnectTimer: ReturnType | undefined; + let closed = false; + + const spawnOnce = () => { + if (closed || child) return; + const spawned = spawn( + "ssh", + [ + "-i", + keyPath, + ...SSH_OPTS, + "-N", + "-L", + `${localPort}:127.0.0.1:${guestPort}`, + `${user}@${ip}`, + ], + { stdio: "ignore" }, + ); + child = spawned; + let settled = false; + const onStopped = () => { + if (settled) return; + settled = true; + if (child === spawned) child = undefined; + if (!closed && !reconnectTimer) { + reconnectTimer = setTimeout(() => { + reconnectTimer = undefined; + spawnOnce(); + }, 2_000); + } + }; + spawned.on("error", onStopped); + spawned.on("exit", onStopped); + }; + + const close = () => { + if (closed) return; + closed = true; + if (reconnectTimer) clearTimeout(reconnectTimer); + reconnectTimer = undefined; + const active = child; + child = undefined; + active?.kill(); + tunnelClosers.delete(close); + }; + + tunnelClosers.add(close); + spawnOnce(); + try { + await waitLocalPort(localPort); + } catch (error) { + close(); + throw error; + } + return { localPort, close }; + }, + discard: finalizers.run, + }; + + // Wait for a public IP, then for OpenSSH. Windows can need several minutes + // while its first-boot feature installation enables the SSH server. + for (let i = 0; i < 60; i++) { + const got = await aws([ + "ec2", + "describe-instances", + "--instance-ids", + instanceId, + "--query", + "Reservations[0].Instances[0].PublicIpAddress", + ]).catch(() => ""); + if (got && got !== "None") { + ip = got; + break; + } + await sleep(5000); } - await sleep(5000); - } - if (!ip) { - await handle.discard(); - throw new Error(`ec2 ${os}: no public IP within 300s`); - } - if (!(await waitSshUp(60))) { - await handle.discard(); - throw new Error(`ec2 ${os}: SSH never came up`); + if (!ip) throw new Error(`ec2 ${os}: no public IP within 300s`); + if (!(await waitSshUp(60))) throw new Error(`ec2 ${os}: SSH never came up`); + return handle; + } catch (error) { + try { + await finalizers.run(); + } catch (cleanupError) { + throw new AggregateError( + [error, cleanupError], + `ec2 ${os}: provisioning failed and cleanup was incomplete`, + ); + } + throw error; } - return handle; }, }); diff --git a/e2e/src/vm/linux-kvm-libvirt.ts b/e2e/src/vm/linux-kvm-libvirt.ts new file mode 100644 index 000000000..4909732f9 --- /dev/null +++ b/e2e/src/vm/linux-kvm-libvirt.ts @@ -0,0 +1,1267 @@ +// Concrete libvirt/QEMU implementation of the Linux desktop VM contract. +// The base image is a prepared x86_64 Linux desktop image with cloud-init, +// Xorg, xinit, openbox, xdpyinfo, xdotool, SSH, and Electron runtime libraries. Each run +// uses a disposable QCOW2 overlay and a unique cloud-init identity. + +import { execFile, spawn, type ChildProcess } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { + chmodSync, + existsSync, + lstatSync, + mkdirSync, + readFileSync, + readdirSync, + renameSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import net from "node:net"; +import { tmpdir } from "node:os"; +import { basename, dirname, join, resolve } from "node:path"; +import { promisify } from "node:util"; + +import { + createLinuxKvmDesktopProvider, + type LinuxKvmDesktopDriver, + type LinuxKvmDesktopHandle, + type LinuxKvmDisplayRecording, + type LinuxKvmGuestConnection, + type LinuxKvmGuestCommandResult, + type LinuxKvmPreflightRuntime, + type LinuxKvmToolchain, + resolveLinuxKvmToolchain, +} from "./linux-kvm"; + +const execFileP = promisify(execFile); + +const SSH_OPTIONS = [ + "-o", + "BatchMode=yes", + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "-o", + "ConnectTimeout=10", + "-o", + "ServerAliveInterval=10", + "-o", + "LogLevel=ERROR", +] as const; + +type AsyncFinalizer = () => Promise | void; + +export const createLinuxKvmFinalizerStack = () => { + const finalizers: Array<{ readonly label: string; readonly run: AsyncFinalizer }> = []; + let finished = false; + + const add = (label: string, run: AsyncFinalizer) => { + if (finished) throw new Error(`cannot register ${label} after Linux KVM cleanup`); + finalizers.push({ label, run }); + }; + + const run = async () => { + if (finished) return; + finished = true; + const failures: unknown[] = []; + for (const finalizer of finalizers.reverse()) { + try { + await finalizer.run(); + } catch (error) { + failures.push(new AggregateError([error], `Linux KVM cleanup failed: ${finalizer.label}`)); + } + } + if (failures.length > 0) { + throw new AggregateError(failures, "Linux KVM cleanup was incomplete"); + } + }; + + return { add, run }; +}; + +export interface LibvirtDomainArgsOptions { + readonly domainName: string; + readonly libvirtNetwork: string; + readonly libvirtUri: string; + readonly memoryMiB: number; + readonly osVariant: string; + readonly overlayPath: string; + readonly seedPath: string; + readonly vcpus: number; +} + +export const libvirtDomainArgs = (options: LibvirtDomainArgsOptions) => [ + "--connect", + options.libvirtUri, + "--name", + options.domainName, + "--memory", + String(options.memoryMiB), + "--vcpus", + String(options.vcpus), + "--cpu", + "host-passthrough", + "--import", + "--noautoconsole", + "--boot", + "hd", + "--disk", + `path=${options.overlayPath},format=qcow2,bus=virtio,cache=none,discard=unmap`, + "--disk", + `path=${options.seedPath},device=cdrom,readonly=on`, + "--network", + `network=${options.libvirtNetwork},model=virtio`, + "--graphics", + "spice,listen=127.0.0.1", + "--video", + "qxl", + "--channel", + "spicevmc", + "--rng", + "/dev/urandom", + "--os-variant", + options.osVariant, +]; + +export const linuxKvmCloudInit = (options: { + readonly domainName: string; + readonly guestDisplay: string; + readonly guestUser: string; + readonly publicKey: string; +}) => { + if (!/^[a-z_][a-z0-9_-]*$/.test(options.guestUser)) { + throw new Error(`invalid Linux KVM guest user: ${options.guestUser}`); + } + if (options.publicKey.includes("\n")) throw new Error("SSH public key must be one line"); + + const userData = `#cloud-config +hostname: ${options.domainName} +manage_etc_hosts: true +ssh_pwauth: false +disable_root: true +users: + - name: ${options.guestUser} + groups: [adm, sudo, video, render] + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + ssh_authorized_keys: + - ${options.publicKey} +write_files: + - path: /etc/X11/Xwrapper.config + permissions: "0644" + content: | + allowed_users=anybody + needs_root_rights=yes + - path: /usr/local/bin/executor-e2e-session + permissions: "0755" + content: | + #!/bin/sh + exec /usr/bin/dbus-run-session -- /usr/bin/openbox-session + - path: /etc/systemd/system/executor-e2e-gui.service + permissions: "0644" + content: | + [Unit] + Description=Executor E2E graphical session + After=systemd-user-sessions.service + Conflicts=display-manager.service + + [Service] + Type=simple + User=${options.guestUser} + Group=${options.guestUser} + PAMName=login + TTYPath=/dev/tty7 + StandardInput=tty-force + Environment=HOME=/home/${options.guestUser} + Environment=DISPLAY=${options.guestDisplay} + WorkingDirectory=/home/${options.guestUser} + ExecStart=/usr/bin/xinit /usr/local/bin/executor-e2e-session -- /usr/bin/Xorg ${options.guestDisplay} vt7 -keeptty -nolisten tcp -noreset -ac + Restart=on-failure + RestartSec=2 + + [Install] + WantedBy=multi-user.target +runcmd: + - [sh, -c, "systemctl disable --now display-manager.service 2>/dev/null || true"] + - [systemctl, daemon-reload] + - [systemctl, enable, executor-e2e-gui.service] + - [systemctl, start, --no-block, executor-e2e-gui.service] +`; + const metaData = `instance-id: ${options.domainName} +local-hostname: ${options.domainName} +`; + return { metaData, userData }; +}; + +export interface LibvirtLinuxKvmOptions { + readonly baseImagePath: string; + readonly baseImageFormat?: string; + readonly cleanupLedgerPath?: string; + readonly guestDisplay?: string; + readonly guestUser?: string; + readonly libvirtNetwork?: string; + readonly libvirtUri?: string; + readonly memoryMiB?: number; + readonly osVariant?: string; + readonly recordingFrameRate?: number; + readonly recordingSize?: string; + readonly repositoryScope?: string; + readonly runScope?: string; + readonly toolchain?: Partial; + readonly vcpus?: number; + readonly workRoot?: string; + readonly preflightRuntime?: LinuxKvmPreflightRuntime; +} + +export interface LinuxKvmCleanupLedger { + readonly version: 2; + readonly createdAt: string; + readonly repositoryScope: string; + readonly runScope: string; + readonly domainName: string; + readonly libvirtUri: string; + readonly workRoot: string; + readonly workDir: string; + readonly hostProcesses: ReadonlyArray; + readonly owner: LinuxKvmOwnerIdentity; +} + +export interface LinuxKvmOwnerIdentity { + readonly pid: number; + readonly bootId: string; + readonly startTicks: string; +} + +export interface LinuxKvmCleanupHostProcess { + readonly pid: number; + readonly role: "xvfb" | "openbox" | "remote-viewer" | "ffmpeg" | "ssh-forward"; + readonly marker: string; +} + +export interface LinuxKvmCleanupRuntime { + domainExists(libvirtUri: string, domainName: string): Promise; + hostProcessMatches(pid: number, marker: string): Promise; + terminateHostProcess(pid: number): Promise; + virsh(libvirtUri: string, args: ReadonlyArray): Promise; + removeDirectory(path: string): void; + removeLedger(path: string): void; +} + +const safeRunScope = (value: string) => + value + .toLowerCase() + .replace(/[^a-z0-9-]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 64); + +const requiredSafeScope = (value: string, label: string) => { + const normalized = safeRunScope(value); + if (!normalized) throw new Error(`${label} has no safe characters`); + return normalized; +}; + +export const linuxKvmRunScope = ( + environment: Readonly> = process.env, +) => { + const explicit = environment.E2E_KVM_RUN_SCOPE; + if (explicit) { + return requiredSafeScope(explicit, "E2E_KVM_RUN_SCOPE"); + } + const githubScope = [ + environment.GITHUB_RUN_ID, + environment.GITHUB_RUN_ATTEMPT, + environment.GITHUB_JOB, + ] + .filter((value): value is string => Boolean(value)) + .join("-"); + return safeRunScope(githubScope || `local-${process.pid}`); +}; + +export const linuxKvmRepositoryScope = ( + runScope: string, + value = process.env.E2E_KVM_REPOSITORY_SCOPE, +) => requiredSafeScope(value || runScope, "E2E_KVM_REPOSITORY_SCOPE"); + +const ensureRepositoryLedgerDirectory = (directory: string, repositoryScope: string) => { + if (basename(directory) !== repositoryScope) { + throw new Error( + `Linux KVM ledger directory must end in repository scope ${repositoryScope}: ${directory}`, + ); + } + mkdirSync(directory, { recursive: true, mode: 0o700 }); + const stat = lstatSync(directory); + if (!stat.isDirectory() || stat.isSymbolicLink()) { + throw new Error(`Linux KVM ledger directory is not a real directory: ${directory}`); + } + chmodSync(directory, 0o700); +}; + +const writeCleanupLedger = (path: string, ledger: LinuxKvmCleanupLedger) => { + mkdirSync(dirname(path), { recursive: true }); + const temporary = `${path}.${process.pid}.tmp`; + writeFileSync(temporary, `${JSON.stringify(ledger, null, 2)}\n`, { mode: 0o600 }); + renameSync(temporary, path); + chmodSync(path, 0o600); +}; + +const isUnknownRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const processStartTicks = (stat: string) => { + const commandEnd = stat.lastIndexOf(")"); + if (commandEnd < 0) throw new Error("Linux process stat did not contain a command boundary"); + const fieldsAfterCommand = stat + .slice(commandEnd + 1) + .trim() + .split(/\s+/); + const startTicks = fieldsAfterCommand[19]; + if (!startTicks || !/^\d+$/.test(startTicks)) { + throw new Error("Linux process stat did not contain start ticks"); + } + return startTicks; +}; + +export const linuxKvmOwnerIdentity = (pid = process.pid): LinuxKvmOwnerIdentity => { + const bootId = readFileSync("/proc/sys/kernel/random/boot_id", "utf8").trim(); + if (!bootId) throw new Error("Linux boot identity is unavailable"); + return { + pid, + bootId, + startTicks: processStartTicks(readFileSync(`/proc/${pid}/stat`, "utf8")), + }; +}; + +export const linuxKvmOwnerIdentityMatches = ( + expected: LinuxKvmOwnerIdentity, + observed: LinuxKvmOwnerIdentity, +) => + expected.pid === observed.pid && + expected.bootId === observed.bootId && + expected.startTicks === observed.startTicks; + +export type LinuxKvmOwnerStatus = "alive" | "dead" | "unknown"; + +export const linuxKvmOwnerStatus = (owner: LinuxKvmOwnerIdentity): LinuxKvmOwnerStatus => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: /proc distinguishes dead owners from unreadable liveness state + try { + return linuxKvmOwnerIdentityMatches(owner, linuxKvmOwnerIdentity(owner.pid)) ? "alive" : "dead"; + } catch (cause) { + return isUnknownRecord(cause) && cause.code === "ENOENT" ? "dead" : "unknown"; + } +}; + +const isLinuxKvmHostProcessRole = (value: unknown): value is LinuxKvmCleanupHostProcess["role"] => + value === "xvfb" || + value === "openbox" || + value === "remote-viewer" || + value === "ffmpeg" || + value === "ssh-forward"; + +export const readLinuxKvmCleanupLedger = (path: string): LinuxKvmCleanupLedger => { + const ledgerStat = lstatSync(path); + if (!ledgerStat.isFile() || ledgerStat.isSymbolicLink()) { + throw new Error(`Linux KVM cleanup ledger is not a real file: ${path}`); + } + const decoded: unknown = JSON.parse(readFileSync(path, "utf8")); + if ( + !isUnknownRecord(decoded) || + decoded.version !== 2 || + typeof decoded.createdAt !== "string" || + typeof decoded.repositoryScope !== "string" || + typeof decoded.runScope !== "string" || + typeof decoded.domainName !== "string" || + typeof decoded.libvirtUri !== "string" || + typeof decoded.workRoot !== "string" || + typeof decoded.workDir !== "string" || + !isUnknownRecord(decoded.owner) || + typeof decoded.owner.pid !== "number" || + !Number.isSafeInteger(decoded.owner.pid) || + decoded.owner.pid <= 0 || + typeof decoded.owner.bootId !== "string" || + decoded.owner.bootId.length === 0 || + typeof decoded.owner.startTicks !== "string" || + !/^\d+$/.test(decoded.owner.startTicks) + ) { + throw new Error(`invalid Linux KVM cleanup ledger: ${path}`); + } + const workRoot = resolve(decoded.workRoot); + const workDir = resolve(decoded.workDir); + const createdAtMs = Date.parse(decoded.createdAt); + if (!Number.isFinite(createdAtMs) || new Date(createdAtMs).toISOString() !== decoded.createdAt) { + throw new Error(`invalid Linux KVM cleanup timestamp: ${decoded.createdAt}`); + } + if (!/^[a-z0-9][a-z0-9-]*$/.test(decoded.repositoryScope)) { + throw new Error(`invalid Linux KVM repository scope: ${decoded.repositoryScope}`); + } + if (!/^[a-z0-9][a-z0-9-]*$/.test(decoded.runScope)) { + throw new Error(`invalid Linux KVM cleanup scope: ${decoded.runScope}`); + } + if ( + decoded.runScope !== decoded.repositoryScope && + !decoded.runScope.startsWith(`${decoded.repositoryScope}-`) + ) { + throw new Error( + `cleanup ledger run scope is outside repository ${decoded.repositoryScope}: ${decoded.runScope}`, + ); + } + if (!decoded.domainName.startsWith(`executor-e2e-desktop-${decoded.runScope}-`)) { + throw new Error(`cleanup ledger domain is outside its run scope: ${decoded.domainName}`); + } + if (dirname(workDir) !== workRoot || !basename(workDir).startsWith("executor-kvm-")) { + throw new Error(`cleanup ledger work directory is outside its root: ${workDir}`); + } + if (!Array.isArray(decoded.hostProcesses)) { + throw new Error(`invalid Linux KVM host process ledger: ${path}`); + } + const hostProcesses = decoded.hostProcesses.map((processEntry) => { + if ( + !isUnknownRecord(processEntry) || + typeof processEntry.pid !== "number" || + !Number.isSafeInteger(processEntry.pid) || + processEntry.pid <= 0 || + !isLinuxKvmHostProcessRole(processEntry.role) || + typeof processEntry.marker !== "string" || + processEntry.marker !== + `executor-e2e-kvm:${decoded.runScope}:${decoded.domainName}:${processEntry.role}` + ) { + throw new Error(`invalid Linux KVM host process entry: ${path}`); + } + return { + pid: processEntry.pid, + role: processEntry.role, + marker: processEntry.marker, + }; + }); + return { + version: 2, + createdAt: decoded.createdAt, + repositoryScope: decoded.repositoryScope, + runScope: decoded.runScope, + domainName: decoded.domainName, + libvirtUri: decoded.libvirtUri, + workRoot, + workDir, + hostProcesses, + owner: { + pid: decoded.owner.pid, + bootId: decoded.owner.bootId, + startTicks: decoded.owner.startTicks, + }, + }; +}; + +const processIsRunning = (pid: number) => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: POSIX process existence is exposed through throwing process.kill + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +}; + +const signalProcess = (pid: number, signal: NodeJS.Signals) => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: POSIX signals race with natural child-process exit + try { + process.kill(pid, signal); + return true; + } catch (cause) { + if (isUnknownRecord(cause) && cause.code === "ESRCH") return false; + throw cause; + } +}; + +const defaultCleanupRuntime: LinuxKvmCleanupRuntime = { + domainExists: async (libvirtUri, domainName) => { + const { stdout } = await execFileP(resolveLinuxKvmToolchain().virsh, [ + "--connect", + libvirtUri, + "list", + "--all", + "--name", + ]); + return stdout.split(/\r?\n/).includes(domainName); + }, + hostProcessMatches: async (pid, marker) => { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: /proc entries disappear asynchronously when recorded child processes exit + try { + return readFileSync(`/proc/${pid}/environ`, "utf8") + .split("\0") + .includes(`E2E_KVM_PROCESS_MARKER=${marker}`); + } catch (cause) { + if (isUnknownRecord(cause) && cause.code === "ENOENT") return false; + throw cause; + } + }, + terminateHostProcess: async (pid) => { + if (!processIsRunning(pid)) return; + if (!signalProcess(pid, "SIGTERM")) return; + for (let attempt = 0; attempt < 50; attempt++) { + if (!processIsRunning(pid)) return; + await new Promise((resolveWait) => setTimeout(resolveWait, 100)); + } + signalProcess(pid, "SIGKILL"); + }, + virsh: async (libvirtUri, args) => { + await execFileP(resolveLinuxKvmToolchain().virsh, ["--connect", libvirtUri, ...args], { + maxBuffer: 64 * 1024 * 1024, + }); + }, + removeDirectory: (path) => rmSync(path, { force: true, recursive: true }), + removeLedger: (path) => rmSync(path, { force: true }), +}; + +export const cleanupLibvirtLinuxKvmFromLedger = async ( + ledgerPath: string, + options: { + readonly expectedLibvirtUri?: string; + readonly expectedRepositoryScope?: string; + readonly expectedRunScope?: string; + readonly expectedWorkRoot?: string; + readonly runtime?: LinuxKvmCleanupRuntime; + } = {}, +) => { + const ledger = readLinuxKvmCleanupLedger(ledgerPath); + const expectedRepositoryScope = options.expectedRepositoryScope + ? requiredSafeScope(options.expectedRepositoryScope, "expected repository scope") + : undefined; + if (expectedRepositoryScope && ledger.repositoryScope !== expectedRepositoryScope) { + throw new Error( + `refusing Linux KVM cleanup for repository ${ledger.repositoryScope}; expected ${expectedRepositoryScope}`, + ); + } + const expectedRunScope = options.expectedRunScope + ? linuxKvmRunScope({ E2E_KVM_RUN_SCOPE: options.expectedRunScope }) + : undefined; + if (expectedRunScope && ledger.runScope !== expectedRunScope) { + throw new Error( + `refusing Linux KVM cleanup for scope ${ledger.runScope}; expected ${expectedRunScope}`, + ); + } + const expectedWorkRoot = options.expectedWorkRoot ? resolve(options.expectedWorkRoot) : undefined; + if (expectedWorkRoot && ledger.workRoot !== expectedWorkRoot) { + throw new Error( + `refusing Linux KVM cleanup for work root ${ledger.workRoot}; expected ${expectedWorkRoot}`, + ); + } + if (options.expectedLibvirtUri && ledger.libvirtUri !== options.expectedLibvirtUri) { + throw new Error( + `refusing Linux KVM cleanup for libvirt URI ${ledger.libvirtUri}; expected ${options.expectedLibvirtUri}`, + ); + } + const runtime = options.runtime ?? defaultCleanupRuntime; + for (const hostProcess of ledger.hostProcesses) { + if (await runtime.hostProcessMatches(hostProcess.pid, hostProcess.marker)) { + await runtime.terminateHostProcess(hostProcess.pid); + if (await runtime.hostProcessMatches(hostProcess.pid, hostProcess.marker)) { + throw new Error( + `host process survived Linux KVM cleanup: ${hostProcess.role} pid=${hostProcess.pid}`, + ); + } + } + } + if (await runtime.domainExists(ledger.libvirtUri, ledger.domainName)) { + await runtime.virsh(ledger.libvirtUri, ["destroy", ledger.domainName]).catch(() => undefined); + try { + await runtime.virsh(ledger.libvirtUri, ["undefine", ledger.domainName, "--nvram"]); + } catch { + await runtime.virsh(ledger.libvirtUri, ["undefine", ledger.domainName]); + } + if (await runtime.domainExists(ledger.libvirtUri, ledger.domainName)) { + throw new Error(`libvirt domain survived cleanup: ${ledger.domainName}`); + } + } + runtime.removeDirectory(ledger.workDir); + runtime.removeLedger(ledgerPath); + return ledger; +}; + +export interface LinuxKvmStaleSweepRuntime { + now(): number; + listLedgerPaths(directory: string): ReadonlyArray; + ownerStatus(owner: LinuxKvmOwnerIdentity): LinuxKvmOwnerStatus; +} + +const defaultStaleSweepRuntime: LinuxKvmStaleSweepRuntime = { + now: () => Date.now(), + listLedgerPaths: (directory) => + readdirSync(directory, { withFileTypes: true }) + .filter((entry) => entry.name.endsWith(".json")) + .map((entry) => { + if (!entry.isFile()) { + throw new Error(`refusing non-file Linux KVM cleanup ledger: ${entry.name}`); + } + return join(directory, entry.name); + }) + .sort(), + ownerStatus: linuxKvmOwnerStatus, +}; + +export const sweepStaleLibvirtLinuxKvm = async (options: { + readonly ledgerDirectory: string; + readonly repositoryScope: string; + readonly ttlMs: number; + readonly currentLedgerPath?: string; + readonly expectedWorkRoot: string; + readonly expectedLibvirtUri: string; + readonly runtime?: LinuxKvmStaleSweepRuntime; + readonly cleanupRuntime?: LinuxKvmCleanupRuntime; +}) => { + if (!Number.isSafeInteger(options.ttlMs) || options.ttlMs <= 0) { + throw new Error(`Linux KVM stale TTL must be a positive integer: ${options.ttlMs}`); + } + const repositoryScope = requiredSafeScope(options.repositoryScope, "Linux KVM repository scope"); + const ledgerDirectory = resolve(options.ledgerDirectory); + if (basename(ledgerDirectory) !== repositoryScope) { + throw new Error( + `Linux KVM ledger directory must end in repository scope ${repositoryScope}: ${ledgerDirectory}`, + ); + } + ensureRepositoryLedgerDirectory(ledgerDirectory, repositoryScope); + const currentLedgerPath = options.currentLedgerPath + ? resolve(options.currentLedgerPath) + : undefined; + if (currentLedgerPath && dirname(currentLedgerPath) !== ledgerDirectory) { + throw new Error(`current Linux KVM ledger is outside ${ledgerDirectory}: ${currentLedgerPath}`); + } + const expectedWorkRoot = resolve(options.expectedWorkRoot); + const runtime = options.runtime ?? defaultStaleSweepRuntime; + const now = runtime.now(); + if (!Number.isFinite(now)) throw new Error("Linux KVM stale sweep clock is invalid"); + + const inspected = runtime.listLedgerPaths(ledgerDirectory).map((candidatePath) => { + const ledgerPath = resolve(candidatePath); + if (dirname(ledgerPath) !== ledgerDirectory) { + throw new Error(`Linux KVM stale sweep candidate escaped its directory: ${ledgerPath}`); + } + const ledger = readLinuxKvmCleanupLedger(ledgerPath); + if (ledger.repositoryScope !== repositoryScope) { + throw new Error( + `Linux KVM stale ledger belongs to repository ${ledger.repositoryScope}, not ${repositoryScope}: ${ledgerPath}`, + ); + } + if (ledger.workRoot !== expectedWorkRoot) { + throw new Error( + `Linux KVM stale ledger uses work root ${ledger.workRoot}, not ${expectedWorkRoot}: ${ledgerPath}`, + ); + } + if (ledger.libvirtUri !== options.expectedLibvirtUri) { + throw new Error( + `Linux KVM stale ledger uses libvirt URI ${ledger.libvirtUri}, not ${options.expectedLibvirtUri}: ${ledgerPath}`, + ); + } + const ageMs = now - Date.parse(ledger.createdAt); + if (ledgerPath === currentLedgerPath) + return { disposition: "current" as const, ledgerPath, ledger }; + if (ageMs < options.ttlMs) return { disposition: "fresh" as const, ledgerPath, ledger }; + const ownerStatus = runtime.ownerStatus(ledger.owner); + if (ownerStatus === "unknown") { + throw new Error(`Linux KVM stale ledger owner status is unknown: ${ledgerPath}`); + } + return { + disposition: ownerStatus === "alive" ? ("active" as const) : ("stale" as const), + ledgerPath, + ledger, + }; + }); + + const cleaned: string[] = []; + for (const candidate of inspected) { + if (candidate.disposition !== "stale") continue; + const unchanged = readLinuxKvmCleanupLedger(candidate.ledgerPath); + if (JSON.stringify(unchanged) !== JSON.stringify(candidate.ledger)) { + throw new Error(`Linux KVM stale ledger changed during sweep: ${candidate.ledgerPath}`); + } + if (runtime.ownerStatus(unchanged.owner) !== "dead") { + throw new Error(`Linux KVM stale ledger owner changed during sweep: ${candidate.ledgerPath}`); + } + await cleanupLibvirtLinuxKvmFromLedger(candidate.ledgerPath, { + expectedRepositoryScope: repositoryScope, + expectedRunScope: unchanged.runScope, + expectedWorkRoot, + expectedLibvirtUri: options.expectedLibvirtUri, + runtime: options.cleanupRuntime, + }); + cleaned.push(candidate.ledgerPath); + } + + const pathsFor = (disposition: "current" | "fresh" | "active") => + inspected + .filter((candidate) => candidate.disposition === disposition) + .map((candidate) => candidate.ledgerPath); + return { + scanned: inspected.length, + cleaned, + preservedCurrent: pathsFor("current"), + preservedFresh: pathsFor("fresh"), + preservedActive: pathsFor("active"), + }; +}; + +interface GuestConnectionOptions { + readonly host: string; + readonly keyPath: string; + readonly tools: LinuxKvmToolchain; + readonly user: string; +} + +const commandFailure = (error: unknown): LinuxKvmGuestCommandResult => { + const failure = isUnknownRecord(error) ? error : {}; + return { + stdout: typeof failure.stdout === "string" ? failure.stdout : "", + stderr: typeof failure.stderr === "string" ? failure.stderr : "", + code: typeof failure.code === "number" ? failure.code : 1, + }; +}; + +export const connectLinuxKvmGuest = ( + options: Omit & { + readonly toolchain?: Partial; + }, +): LinuxKvmGuestConnection => { + const tools = resolveLinuxKvmToolchain(options.toolchain); + return { + run: async (command) => { + try { + const { stdout, stderr } = await execFileP( + tools.ssh, + ["-i", options.keyPath, ...SSH_OPTIONS, `${options.user}@${options.host}`, command], + { maxBuffer: 64 * 1024 * 1024 }, + ); + return { stdout, stderr, code: 0 }; + } catch (error) { + return commandFailure(error); + } + }, + push: async (localPath, remotePath) => { + await execFileP(tools.scp, [ + "-i", + options.keyPath, + "-r", + ...SSH_OPTIONS, + localPath, + `${options.user}@${options.host}:${remotePath}`, + ]); + }, + }; +}; + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +const stopChild = async ( + child: ChildProcess | undefined, + signal: NodeJS.Signals = "SIGTERM", + timeoutMs = 5_000, +) => { + if (!child || child.exitCode !== null || child.signalCode !== null) return; + await new Promise((resolve) => { + let settled = false; + const settle = () => { + if (settled) return; + settled = true; + resolve(); + }; + const timeout = setTimeout(() => { + child.kill("SIGKILL"); + setTimeout(settle, 500); + }, timeoutMs); + child.once("exit", () => { + clearTimeout(timeout); + settle(); + }); + child.kill(signal); + }); +}; + +const freePort = () => + new Promise((resolve, reject) => { + const server = net.createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: node:net listen callbacks cannot return an Effect failure + reject(new Error("temporary SSH forward did not publish a TCP address")); + return; + } + const port = address.port; + server.close(() => resolve(port)); + }); + }); + +const waitForLocalPort = async (port: number) => { + for (let attempt = 0; attempt < 100; attempt++) { + const connected = await new Promise((resolve) => { + const socket = net.connect({ host: "127.0.0.1", port }, () => { + socket.destroy(); + resolve(true); + }); + socket.once("error", () => resolve(false)); + socket.setTimeout(250, () => { + socket.destroy(); + resolve(false); + }); + }); + if (connected) return; + await sleep(100); + } + throw new Error(`SSH forward on port ${port} did not start`); +}; + +const displayNumber = () => 200 + (Number.parseInt(randomUUID().slice(0, 6), 16) % 20_000); + +const createDisplayRecording = async (options: { + readonly activeRecordings: Set; + readonly endpoint: string; + readonly frameRate: number; + readonly hostProcessMarker: (role: LinuxKvmCleanupHostProcess["role"]) => string; + readonly outputPath: string; + readonly size: string; + readonly trackHostProcess: ( + role: LinuxKvmCleanupHostProcess["role"], + child: ChildProcess, + ) => () => void; + readonly tools: LinuxKvmToolchain; +}) => { + mkdirSync(dirname(options.outputPath), { recursive: true }); + const display = `:${displayNumber()}`; + const displaySocket = `/tmp/.X11-unix/X${display.slice(1)}`; + const processEnvironment = (role: LinuxKvmCleanupHostProcess["role"]) => ({ + ...process.env, + DISPLAY: display, + E2E_KVM_PROCESS_MARKER: options.hostProcessMarker(role), + }); + let xvfb: ChildProcess | undefined; + let openbox: ChildProcess | undefined; + let viewer: ChildProcess | undefined; + let ffmpeg: ChildProcess | undefined; + let untrackXvfb: (() => void) | undefined; + let untrackOpenbox: (() => void) | undefined; + let untrackViewer: (() => void) | undefined; + let untrackFfmpeg: (() => void) | undefined; + let stopped = false; + + const stop = async () => { + if (stopped) return; + stopped = true; + await stopChild(ffmpeg, "SIGINT", 15_000); + untrackFfmpeg?.(); + await stopChild(viewer); + untrackViewer?.(); + await stopChild(openbox); + untrackOpenbox?.(); + await stopChild(xvfb); + untrackXvfb?.(); + options.activeRecordings.delete(recording); + if (!existsSync(options.outputPath) || statSync(options.outputPath).size === 0) { + throw new Error(`SPICE recording was not written to ${options.outputPath}`); + } + }; + + const recording: LinuxKvmDisplayRecording = { + container: "mp4", + outputPath: options.outputPath, + stop, + }; + + try { + xvfb = spawn( + options.tools.xvfb, + [display, "-screen", "0", `${options.size}x24`, "-nolisten", "tcp"], + { env: processEnvironment("xvfb"), stdio: "ignore" }, + ); + untrackXvfb = options.trackHostProcess("xvfb", xvfb); + for (let attempt = 0; attempt < 100 && !existsSync(displaySocket); attempt++) { + if (xvfb.exitCode !== null || xvfb.signalCode !== null) { + throw new Error("Xvfb exited before the SPICE recording display was ready"); + } + await sleep(100); + } + if (!existsSync(displaySocket)) throw new Error(`Xvfb did not create ${displaySocket}`); + + openbox = spawn(options.tools.openbox, ["--sm-disable"], { + env: processEnvironment("openbox"), + stdio: "ignore", + }); + untrackOpenbox = options.trackHostProcess("openbox", openbox); + viewer = spawn(options.tools.remoteViewer, ["--kiosk", "--full-screen", options.endpoint], { + env: processEnvironment("remote-viewer"), + stdio: "ignore", + }); + untrackViewer = options.trackHostProcess("remote-viewer", viewer); + await sleep(2_000); + if (viewer.exitCode !== null || viewer.signalCode !== null) { + throw new Error(`remote-viewer could not open ${options.endpoint}`); + } + + ffmpeg = spawn( + options.tools.ffmpeg, + [ + "-loglevel", + "warning", + "-f", + "x11grab", + "-framerate", + String(options.frameRate), + "-video_size", + options.size, + "-i", + display, + "-c:v", + "libx264", + "-preset", + "veryfast", + "-crf", + "24", + "-pix_fmt", + "yuv420p", + "-y", + options.outputPath, + ], + { env: processEnvironment("ffmpeg"), stdio: "ignore" }, + ); + untrackFfmpeg = options.trackHostProcess("ffmpeg", ffmpeg); + await sleep(1_000); + if (ffmpeg.exitCode !== null || ffmpeg.signalCode !== null) { + throw new Error(`ffmpeg could not record SPICE pixels from ${display}`); + } + options.activeRecordings.add(recording); + return recording; + } catch (error) { + await stopChild(ffmpeg, "SIGINT", 15_000); + untrackFfmpeg?.(); + await stopChild(viewer); + untrackViewer?.(); + await stopChild(openbox); + untrackOpenbox?.(); + await stopChild(xvfb); + untrackXvfb?.(); + throw error; + } +}; + +const parseDomainAddress = (output: string) => + output.match(/\b((?:\d{1,3}\.){3}\d{1,3})\/\d+\b/)?.[1]; + +export const createLibvirtLinuxKvmDriver = ( + options: LibvirtLinuxKvmOptions, +): LinuxKvmDesktopDriver => ({ + provision: async () => { + const tools = resolveLinuxKvmToolchain(options.toolchain); + const cleanup = createLinuxKvmFinalizerStack(); + const guestUser = options.guestUser || process.env.E2E_KVM_GUEST_USER || "executor"; + const guestDisplay = options.guestDisplay || process.env.E2E_KVM_GUEST_DISPLAY || ":0"; + const libvirtUri = options.libvirtUri || process.env.E2E_LIBVIRT_URI || "qemu:///system"; + const libvirtNetwork = options.libvirtNetwork || process.env.E2E_LIBVIRT_NETWORK || "default"; + const runScope = options.runScope + ? linuxKvmRunScope({ E2E_KVM_RUN_SCOPE: options.runScope }) + : linuxKvmRunScope(); + const repositoryScope = linuxKvmRepositoryScope( + runScope, + options.repositoryScope || process.env.E2E_KVM_REPOSITORY_SCOPE, + ); + if (runScope !== repositoryScope && !runScope.startsWith(`${repositoryScope}-`)) { + throw new Error( + `Linux KVM run scope ${runScope} is outside repository scope ${repositoryScope}`, + ); + } + const resourceId = randomUUID().slice(0, 8); + const domainName = `executor-e2e-desktop-${runScope}-${process.pid}-${resourceId}`; + const workRoot = resolve(options.workRoot || tmpdir()); + const configuredCleanupLedgerPath = + options.cleanupLedgerPath || process.env.E2E_KVM_CLEANUP_LEDGER; + const cleanupLedgerPath = configuredCleanupLedgerPath + ? resolve(configuredCleanupLedgerPath) + : undefined; + if (cleanupLedgerPath) { + ensureRepositoryLedgerDirectory(dirname(cleanupLedgerPath), repositoryScope); + } + mkdirSync(workRoot, { recursive: true }); + const workDir = join(workRoot, `executor-kvm-${process.pid}-${resourceId}`); + let cleanupLedger: LinuxKvmCleanupLedger | undefined; + let cleanupLedgerActive = false; + if (cleanupLedgerPath) { + if (existsSync(cleanupLedgerPath)) { + throw new Error( + `refusing to overwrite existing Linux KVM recovery ledger: ${cleanupLedgerPath}`, + ); + } + cleanupLedger = { + version: 2, + createdAt: new Date().toISOString(), + repositoryScope, + runScope, + domainName, + libvirtUri, + workRoot, + workDir, + hostProcesses: [], + owner: linuxKvmOwnerIdentity(), + }; + writeCleanupLedger(cleanupLedgerPath, cleanupLedger); + cleanupLedgerActive = true; + } + const hostProcesses = new Map(); + const hostProcessMarker = (role: LinuxKvmCleanupHostProcess["role"]) => + `executor-e2e-kvm:${runScope}:${domainName}:${role}`; + const persistHostProcesses = () => { + if (!cleanupLedgerActive || !cleanupLedgerPath || !cleanupLedger) return; + cleanupLedger = { ...cleanupLedger, hostProcesses: [...hostProcesses.values()] }; + writeCleanupLedger(cleanupLedgerPath, cleanupLedger); + }; + const trackHostProcess = (role: LinuxKvmCleanupHostProcess["role"], child: ChildProcess) => { + const pid = child.pid; + if (!pid) throw new Error(`could not track Linux KVM ${role} process`); + const processEntry: LinuxKvmCleanupHostProcess = { + pid, + role, + marker: hostProcessMarker(role), + }; + hostProcesses.set(pid, processEntry); + persistHostProcesses(); + let tracked = true; + const untrack = () => { + if (!tracked) return; + tracked = false; + hostProcesses.delete(pid); + persistHostProcesses(); + }; + child.once("exit", untrack); + return untrack; + }; + cleanup.add("working directory", () => rmSync(workDir, { force: true, recursive: true })); + + const virsh = (args: ReadonlyArray) => + execFileP(tools.virsh, ["--connect", libvirtUri, ...args], { + maxBuffer: 64 * 1024 * 1024, + }); + + cleanup.add("libvirt domain", async () => { + const { stdout: domainNames } = await virsh(["list", "--all", "--name"]); + const exists = domainNames.split(/\r?\n/).includes(domainName); + if (!exists) return; + await virsh(["destroy", domainName]).catch(() => undefined); + await virsh(["undefine", domainName, "--nvram"]).catch(() => virsh(["undefine", domainName])); + }); + + const forwardChildren = new Set(); + cleanup.add("SSH forwards", async () => { + await Promise.all([...forwardChildren].map((child) => stopChild(child))); + forwardChildren.clear(); + }); + + const activeRecordings = new Set(); + cleanup.add("SPICE recordings", async () => { + const failures: unknown[] = []; + for (const recording of [...activeRecordings]) { + try { + await recording.stop(); + } catch (error) { + failures.push(error); + } + } + if (failures.length > 0) { + throw new AggregateError(failures, "one or more SPICE recordings failed to finalize"); + } + }); + + let cleanupFailed = false; + const discard = async () => { + if (cleanupFailed) { + throw new Error( + `Linux KVM cleanup previously failed; recovery ledger retained at ${cleanupLedgerPath ?? "(not configured)"}`, + ); + } + try { + await cleanup.run(); + } catch (error) { + cleanupFailed = true; + throw error; + } + cleanupLedgerActive = false; + if (cleanupLedgerPath) rmSync(cleanupLedgerPath, { force: true }); + }; + + try { + mkdirSync(workDir, { mode: 0o755 }); + const keyPath = join(workDir, "id_ed25519"); + await execFileP("ssh-keygen", ["-t", "ed25519", "-N", "", "-q", "-f", keyPath]); + chmodSync(keyPath, 0o600); + const publicKey = (await execFileP("ssh-keygen", ["-y", "-f", keyPath])).stdout.trim(); + const cloudInit = linuxKvmCloudInit({ domainName, guestDisplay, guestUser, publicKey }); + const userDataPath = join(workDir, "user-data.yaml"); + const metaDataPath = join(workDir, "meta-data.yaml"); + const seedPath = join(workDir, "seed.iso"); + const overlayPath = join(workDir, "guest.qcow2"); + writeFileSync(userDataPath, cloudInit.userData); + writeFileSync(metaDataPath, cloudInit.metaData); + + await execFileP(tools.qemuImg, [ + "create", + "-f", + "qcow2", + "-F", + options.baseImageFormat ?? "qcow2", + "-b", + options.baseImagePath, + overlayPath, + ]); + await execFileP(tools.cloudLocalDs, [seedPath, userDataPath, metaDataPath]); + // qemu:///system domains run as libvirt's service account, not the CI + // runner that created these disposable files. + chmodSync(overlayPath, 0o666); + chmodSync(seedPath, 0o644); + await execFileP( + tools.virtInstall, + libvirtDomainArgs({ + domainName, + libvirtNetwork, + libvirtUri, + memoryMiB: options.memoryMiB ?? 4_096, + osVariant: options.osVariant ?? "generic", + overlayPath, + seedPath, + vcpus: options.vcpus ?? 4, + }), + { maxBuffer: 64 * 1024 * 1024 }, + ); + + let host = ""; + for (let attempt = 0; attempt < 150 && !host; attempt++) { + for (const source of ["agent", "lease"] as const) { + const result = await virsh(["domifaddr", domainName, "--source", source]).catch(() => ({ + stdout: "", + })); + host = parseDomainAddress(result.stdout) ?? ""; + if (host) break; + } + if (!host) await sleep(2_000); + } + if (!host) throw new Error(`libvirt did not report an address for ${domainName}`); + + const guest = connectLinuxKvmGuest({ host, keyPath, user: guestUser, toolchain: tools }); + let sshReady = false; + for (let attempt = 0; attempt < 120 && !sshReady; attempt++) { + sshReady = (await guest.run("true")).code === 0; + if (!sshReady) await sleep(2_000); + } + if (!sshReady) throw new Error(`SSH did not become ready for ${domainName}`); + + let guiReady = false; + let guiFailure = ""; + for (let attempt = 0; attempt < 150 && !guiReady; attempt++) { + const result = await guest.run(`DISPLAY=${guestDisplay} xdpyinfo >/dev/null 2>&1`); + guiReady = result.code === 0; + guiFailure = result.stderr || result.stdout; + if (!guiReady) await sleep(2_000); + } + if (!guiReady) { + const service = await guest.run( + "sudo systemctl status executor-e2e-gui.service --no-pager || true", + ); + throw new Error( + `guest Xorg session ${guestDisplay} did not become ready\n${guiFailure}\n${service.stdout}\n${service.stderr}`, + ); + } + + const endpoint = (await virsh(["domdisplay", domainName, "--type", "spice"])).stdout.trim(); + if (!endpoint.startsWith("spice://")) { + throw new Error(`libvirt returned an invalid SPICE endpoint: ${endpoint}`); + } + + const handle: LinuxKvmDesktopHandle = { + kind: "desktop-gui", + os: "linux", + arch: "x64", + host, + sshKeyPath: keyPath, + sshUser: guestUser, + display: { + protocol: "spice", + endpoint, + startRecording: (outputPath) => + createDisplayRecording({ + activeRecordings, + endpoint, + frameRate: options.recordingFrameRate ?? 24, + hostProcessMarker, + outputPath, + size: options.recordingSize ?? "1440x900", + trackHostProcess, + tools, + }), + }, + run: guest.run, + push: guest.push, + forward: async (guestPort) => { + const localPort = await freePort(); + const child = spawn( + tools.ssh, + [ + "-i", + keyPath, + ...SSH_OPTIONS, + "-N", + "-L", + `${localPort}:127.0.0.1:${guestPort}`, + `${guestUser}@${host}`, + ], + { + env: { + ...process.env, + E2E_KVM_PROCESS_MARKER: hostProcessMarker("ssh-forward"), + }, + stdio: "ignore", + }, + ); + const untrack = trackHostProcess("ssh-forward", child); + forwardChildren.add(child); + let closed = false; + const close = () => { + if (closed) return; + closed = true; + child.kill(); + }; + child.once("error", close); + child.once("exit", () => { + forwardChildren.delete(child); + untrack(); + }); + try { + await waitForLocalPort(localPort); + } catch (error) { + await stopChild(child); + forwardChildren.delete(child); + untrack(); + throw error; + } + return { localPort, close }; + }, + discard, + }; + return handle; + } catch (error) { + try { + await discard(); + } catch (cleanupError) { + throw new AggregateError( + [error, cleanupError], + "Linux KVM provisioning failed and cleanup was incomplete", + ); + } + throw error; + } + }, +}); + +export const libvirtLinuxKvmDesktop = (options: LibvirtLinuxKvmOptions) => + createLinuxKvmDesktopProvider(createLibvirtLinuxKvmDriver(options), { + baseImagePath: options.baseImagePath, + libvirtNetwork: options.libvirtNetwork, + libvirtUri: options.libvirtUri, + runtime: options.preflightRuntime, + toolchain: options.toolchain, + }); diff --git a/e2e/src/vm/linux-kvm.ts b/e2e/src/vm/linux-kvm.ts new file mode 100644 index 000000000..54772c397 --- /dev/null +++ b/e2e/src/vm/linux-kvm.ts @@ -0,0 +1,251 @@ +// Linux KVM substrate for headed desktop scenarios. This contract is separate +// from VmProvider, which models supervised CLI daemons reached through SSH. + +import { execFile } from "node:child_process"; +import { constants } from "node:fs"; +import { access } from "node:fs/promises"; +import { promisify } from "node:util"; + +const execFileP = promisify(execFile); + +export const LINUX_KVM_DESKTOP_CAPABILITIES = { + guest: { arch: "x64", os: "linux" }, + workload: "desktop-gui", + display: { interactive: true, protocol: "spice" }, + recording: { container: "mp4", required: true, source: "guest-display" }, +} as const; + +export type LinuxKvmRequirement = "optional" | "required"; +export type LinuxKvmCheckName = + | "kvm-device" + | "base-image" + | "qemu" + | "libvirt" + | "cloud-init" + | "guest-transport" + | "display-recorder"; + +export interface LinuxKvmToolchain { + readonly cloudLocalDs: string; + readonly ffmpeg: string; + readonly openbox: string; + readonly qemu: string; + readonly qemuImg: string; + readonly remoteViewer: string; + readonly scp: string; + readonly ssh: string; + readonly virsh: string; + readonly virtInstall: string; + readonly xvfb: string; +} + +export const resolveLinuxKvmToolchain = ( + overrides: Partial = {}, +): LinuxKvmToolchain => ({ + cloudLocalDs: process.env.E2E_CLOUD_LOCALDS_BIN ?? "cloud-localds", + ffmpeg: process.env.E2E_FFMPEG_BIN ?? "ffmpeg", + openbox: process.env.E2E_OPENBOX_BIN ?? "openbox", + qemu: process.env.E2E_QEMU_BIN ?? "qemu-system-x86_64", + qemuImg: process.env.E2E_QEMU_IMG_BIN ?? "qemu-img", + remoteViewer: process.env.E2E_REMOTE_VIEWER_BIN ?? "remote-viewer", + scp: process.env.E2E_SCP_BIN ?? "scp", + ssh: process.env.E2E_SSH_BIN ?? "ssh", + virsh: process.env.E2E_VIRSH_BIN ?? "virsh", + virtInstall: process.env.E2E_VIRT_INSTALL_BIN ?? "virt-install", + xvfb: process.env.E2E_XVFB_BIN ?? "Xvfb", + ...overrides, +}); + +export interface LinuxKvmCheck { + readonly name: LinuxKvmCheckName; + readonly available: boolean; + readonly detail: string; +} + +export interface LinuxKvmAvailability { + readonly status: "available" | "unavailable"; + readonly checks: ReadonlyArray; + readonly capabilities: typeof LINUX_KVM_DESKTOP_CAPABILITIES; + readonly summary: string; +} + +export interface LinuxKvmPreflightRuntime { + access(path: string, mode: number): Promise; + exec(command: string, args: ReadonlyArray): Promise<{ stdout: string; stderr: string }>; + report(message: string): void; +} + +export interface LinuxKvmPreflightOptions { + readonly requirement?: LinuxKvmRequirement; + readonly baseImagePath?: string; + readonly libvirtUri?: string; + readonly libvirtNetwork?: string; + readonly toolchain?: Partial; + readonly runtime?: LinuxKvmPreflightRuntime; +} + +const defaultRuntime: LinuxKvmPreflightRuntime = { + access, + exec: async (command, args) => { + const { stdout, stderr } = await execFileP(command, [...args]); + return { stdout, stderr }; + }, + report: (message) => console.warn(message), +}; + +const firstLine = (value: string) => value.trim().split(/\r?\n/, 1)[0] ?? ""; + +const probe = async ( + name: LinuxKvmCheckName, + availableDetail: string, + check: () => Promise, +) => { + try { + const detail = await check(); + return { name, available: true, detail: detail || availableDetail } satisfies LinuxKvmCheck; + } catch (error) { + return { name, available: false, detail: String(error) } satisfies LinuxKvmCheck; + } +}; + +export class LinuxKvmUnavailableError extends Error { + readonly availability: LinuxKvmAvailability; + + constructor(availability: LinuxKvmAvailability) { + super(availability.summary); + this.name = "LinuxKvmUnavailableError"; + this.availability = availability; + } +} + +export const preflightLinuxKvm = async (options: LinuxKvmPreflightOptions = {}) => { + const requirement = options.requirement ?? "optional"; + const runtime = options.runtime ?? defaultRuntime; + const baseImagePath = options.baseImagePath || process.env.E2E_KVM_BASE_IMAGE; + const tools = resolveLinuxKvmToolchain(options.toolchain); + const libvirtUri = options.libvirtUri || process.env.E2E_LIBVIRT_URI || "qemu:///system"; + const libvirtNetwork = options.libvirtNetwork || process.env.E2E_LIBVIRT_NETWORK || "default"; + + const checks = await Promise.all([ + probe("kvm-device", "/dev/kvm is readable and writable", async () => { + await runtime.access("/dev/kvm", constants.R_OK | constants.W_OK); + }), + probe("base-image", "the prepared desktop base image is readable", async () => { + if (!baseImagePath) throw new Error("E2E_KVM_BASE_IMAGE is not set"); + await runtime.access(baseImagePath, constants.R_OK); + return baseImagePath; + }), + probe("qemu", `${tools.qemu} and ${tools.qemuImg} are executable`, async () => { + const result = await runtime.exec(tools.qemu, ["--version"]); + await runtime.exec(tools.qemuImg, ["--version"]); + return firstLine(result.stdout || result.stderr); + }), + probe("libvirt", `${libvirtUri} is reachable`, async () => { + await runtime.exec(tools.virtInstall, ["--version"]); + await runtime.exec(tools.virsh, ["--version"]); + const result = await runtime.exec(tools.virsh, ["--connect", libvirtUri, "uri"]); + await runtime.exec(tools.virsh, ["--connect", libvirtUri, "net-info", libvirtNetwork]); + return firstLine(result.stdout || result.stderr); + }), + probe("cloud-init", `${tools.cloudLocalDs} is executable`, async () => { + const result = await runtime.exec(tools.cloudLocalDs, ["--help"]); + return firstLine(result.stdout || result.stderr); + }), + probe("guest-transport", `${tools.ssh} is executable`, async () => { + const result = await runtime.exec(tools.ssh, ["-V"]); + return firstLine(result.stdout || result.stderr); + }), + probe("display-recorder", "SPICE display capture tools are executable", async () => { + const result = await runtime.exec(tools.ffmpeg, ["-version"]); + await runtime.exec(tools.xvfb, ["-help"]); + await runtime.exec(tools.openbox, ["--version"]); + await runtime.exec(tools.remoteViewer, ["--version"]); + return firstLine(result.stdout || result.stderr); + }), + ]); + + const missing = checks.filter((check) => !check.available); + const status = missing.length === 0 ? "available" : "unavailable"; + const summary = + status === "available" + ? "Linux KVM desktop provider is available" + : `Linux KVM desktop provider is unavailable: ${missing + .map((check) => `${check.name} (${check.detail})`) + .join(", ")}`; + const availability: LinuxKvmAvailability = { + status, + checks, + capabilities: LINUX_KVM_DESKTOP_CAPABILITIES, + summary, + }; + + if (status === "unavailable" && requirement === "required") { + throw new LinuxKvmUnavailableError(availability); + } + if (status === "unavailable") runtime.report(`${summary} [optional]`); + return availability; +}; + +export interface LinuxKvmDisplayRecording { + readonly container: "mp4"; + readonly outputPath: string; + stop(): Promise; +} + +export interface LinuxKvmDisplaySession { + readonly protocol: "spice"; + readonly endpoint: string; + startRecording(outputPath: string): Promise; +} + +export interface LinuxKvmGuestCommandResult { + readonly stdout: string; + readonly stderr: string; + readonly code: number; +} + +export interface LinuxKvmPortForward { + readonly localPort: number; + close(): void; +} + +export interface LinuxKvmGuestConnection { + run(command: string): Promise; + push(localPath: string, remotePath: string): Promise; +} + +export interface LinuxKvmDesktopHandle extends LinuxKvmGuestConnection { + readonly kind: "desktop-gui"; + readonly os: "linux"; + readonly arch: "x64"; + readonly host: string; + readonly sshKeyPath: string; + readonly sshUser: string; + readonly display: LinuxKvmDisplaySession; + forward(guestPort: number): Promise; + discard(): Promise; +} + +export interface LinuxKvmDesktopDriver { + provision(): Promise; +} + +export interface LinuxKvmDesktopProvider { + readonly kind: "desktop-gui"; + readonly capabilities: typeof LINUX_KVM_DESKTOP_CAPABILITIES; + preflight(requirement?: LinuxKvmRequirement): Promise; + provision(): Promise; +} + +export const createLinuxKvmDesktopProvider = ( + driver: LinuxKvmDesktopDriver, + preflightOptions: Omit = {}, +): LinuxKvmDesktopProvider => ({ + kind: "desktop-gui", + capabilities: LINUX_KVM_DESKTOP_CAPABILITIES, + preflight: (requirement = "optional") => preflightLinuxKvm({ ...preflightOptions, requirement }), + provision: async () => { + await preflightLinuxKvm({ ...preflightOptions, requirement: "required" }); + return driver.provision(); + }, +}); diff --git a/e2e/src/vm/run-scope.ts b/e2e/src/vm/run-scope.ts new file mode 100644 index 000000000..352cbb249 --- /dev/null +++ b/e2e/src/vm/run-scope.ts @@ -0,0 +1,73 @@ +import { createHash, randomUUID } from "node:crypto"; + +const DEFAULT_TTL_HOURS = 6; +const MAX_TTL_HOURS = 7 * 24; + +type Environment = Readonly>; + +const nonempty = (value: string | undefined) => { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; +}; + +const ttlHours = (environment: Environment) => { + const raw = nonempty(environment.E2E_VM_TTL_HOURS); + if (!raw) return DEFAULT_TTL_HOURS; + const value = Number(raw); + if (!Number.isFinite(value) || value <= 0 || value > MAX_TTL_HOURS) { + throw new Error(`E2E_VM_TTL_HOURS must be greater than 0 and at most ${MAX_TTL_HOURS}`); + } + return value; +}; + +export const vmRunScopeSlug = (scope: string) => { + const readable = scope + .replace(/[^a-zA-Z0-9-]/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, "") + .slice(0, 36); + const digest = createHash("sha256").update(scope).digest("hex").slice(0, 12); + return `${readable || "scope"}-${digest}`; +}; + +export const resolveVmRunMetadata = (environment: Environment = process.env, now = Date.now()) => { + const explicitScope = nonempty(environment.E2E_VM_RUN_SCOPE); + if (environment.GITHUB_ACTIONS === "true" && !explicitScope) { + throw new Error("E2E_VM_RUN_SCOPE is required for VM provisioning in GitHub Actions"); + } + + const scope = + explicitScope ?? `local-${process.pid}-${now}-${randomUUID().replaceAll("-", "").slice(0, 8)}`; + const createdAt = new Date(now); + const expiresAt = new Date(now + ttlHours(environment) * 60 * 60 * 1_000); + return { + scope, + scopeSlug: vmRunScopeSlug(scope), + repository: nonempty(environment.GITHUB_REPOSITORY) ?? "local", + runId: nonempty(environment.GITHUB_RUN_ID) ?? "local", + runAttempt: nonempty(environment.GITHUB_RUN_ATTEMPT) ?? "local", + createdAt: createdAt.toISOString(), + expiresAt: expiresAt.toISOString(), + }; +}; + +export type VmRunMetadata = ReturnType; + +export const requireVmCleanupScope = (environment: Environment = process.env) => { + const scope = nonempty(environment.E2E_VM_RUN_SCOPE); + if (!scope) throw new Error("E2E_VM_RUN_SCOPE is required for VM cleanup"); + return { scope, scopeSlug: vmRunScopeSlug(scope) }; +}; + +export const requireEc2CleanupOwner = (environment: Environment = process.env) => { + const { scope, scopeSlug } = requireVmCleanupScope(environment); + const repository = nonempty(environment.GITHUB_REPOSITORY); + const runId = nonempty(environment.GITHUB_RUN_ID); + const runAttempt = nonempty(environment.GITHUB_RUN_ATTEMPT); + if (!repository || !runId || !runAttempt) { + throw new Error( + "EC2 cleanup requires GITHUB_REPOSITORY, GITHUB_RUN_ID, and GITHUB_RUN_ATTEMPT", + ); + } + return { repository, runAttempt, runId, scope, scopeSlug }; +}; diff --git a/e2e/src/vm/tart-lifecycle.ts b/e2e/src/vm/tart-lifecycle.ts new file mode 100644 index 000000000..674d001a1 --- /dev/null +++ b/e2e/src/vm/tart-lifecycle.ts @@ -0,0 +1,207 @@ +import { execFile } from "node:child_process"; +import { promisify } from "node:util"; + +import { Schema } from "effect"; + +import { vmRunScopeSlug } from "./run-scope"; +import { + readTartOwnership, + removeTartOwnership, + requireTartCleanupOwner, + selectCurrentTartOwnership, + selectExpiredTartOwnership, + type TartOwnershipLedger, +} from "./tart-ownership"; +import { sleep } from "./types"; + +const execFileP = promisify(execFile); + +const TartVmListEntry = Schema.Struct({ + Name: Schema.String, + Running: Schema.Boolean, + State: Schema.String, +}); +const decodeTartVmList = Schema.decodeUnknownSync( + Schema.fromJsonString(Schema.Array(TartVmListEntry)), +); +type TartVmListEntry = typeof TartVmListEntry.Type; + +export type TartCommandRunner = (args: readonly string[]) => Promise; + +export interface TartRunProcess { + readonly exitCode: number | null; + readonly signalCode: NodeJS.Signals | null; + kill(signal?: NodeJS.Signals): boolean; +} + +export const tartScopePrefix = (scope: string) => `executor-e2e-${vmRunScopeSlug(scope)}-`; + +export const tartResourceName = (scope: string, os: "linux" | "macos", unique: string) => + `${tartScopePrefix(scope)}${os}-${unique.replace(/[^a-zA-Z0-9-]/g, "-").slice(0, 32)}`; + +const hasExited = (child: TartRunProcess) => child.exitCode !== null || child.signalCode !== null; + +export const terminateTartRunProcess = async ( + child: TartRunProcess, + options?: { + readonly pollAttempts?: number; + readonly pollIntervalMs?: number; + readonly wait?: (ms: number) => Promise; + }, +) => { + if (hasExited(child)) return; + const attempts = options?.pollAttempts ?? 100; + const intervalMs = options?.pollIntervalMs ?? 100; + const wait = options?.wait ?? sleep; + + const waitForExit = async () => { + for (let attempt = 0; attempt < attempts; attempt += 1) { + if (hasExited(child)) return true; + await wait(intervalMs); + } + return hasExited(child); + }; + + const signaled = child.kill("SIGINT"); + if (!signaled && !hasExited(child)) { + throw new Error("tart run process rejected SIGINT"); + } + if (await waitForExit()) return; + + const killed = child.kill("SIGKILL"); + if (!killed && !hasExited(child)) { + throw new Error("tart run process rejected SIGKILL"); + } + if (!(await waitForExit())) { + throw new Error("tart run process did not exit after SIGKILL"); + } +}; + +const defaultTartRunner = + (environment: Readonly>): TartCommandRunner => + async (args) => { + const executable = environment.E2E_TART_BIN?.trim() || "/opt/homebrew/bin/tart"; + const { stdout } = await execFileP(executable, [...args], { + maxBuffer: 16 * 1024 * 1024, + }); + return stdout; + }; + +export const listTartVms = async (runner: TartCommandRunner) => + decodeTartVmList(await runner(["list", "--source", "local", "--format", "json"])); + +export const deleteTartVmAndVerify = async (name: string, runner: TartCommandRunner) => { + await runner(["delete", name]); + const remaining = await listTartVms(runner); + if (remaining.some((entry) => entry.Name === name)) { + throw new Error(`tart VM still exists after deletion: ${name}`); + } +}; + +const assertKnownTartState = (entry: TartVmListEntry) => { + const consistent = + (entry.State === "running" && entry.Running) || + ((entry.State === "stopped" || entry.State === "suspended") && !entry.Running); + if (!consistent) { + throw new Error( + `refusing to clean tart VM with unknown or inconsistent state: ${entry.Name} (${entry.State})`, + ); + } +}; + +const cleanupOwnedTartResources = async ( + ledgers: readonly TartOwnershipLedger[], + runner: TartCommandRunner, +) => { + if (ledgers.length === 0) return { deleted: 0, ledgersRemoved: 0 }; + const listed = await listTartVms(runner); + const byName = new Map(listed.map((entry) => [entry.Name, entry])); + const plans = ledgers.map((ledger) => ({ ledger, vm: byName.get(ledger.record.vmName) })); + for (const plan of plans) { + if (plan.vm) assertKnownTartState(plan.vm); + } + + const failures: unknown[] = []; + let deleted = 0; + let ledgersRemoved = 0; + for (const { ledger, vm } of plans) { + if (!vm) { + try { + removeTartOwnership(ledger); + ledgersRemoved += 1; + } catch (error) { + failures.push( + new AggregateError( + [error], + `failed to remove stale tart ownership: ${ledger.record.vmName}`, + ), + ); + } + continue; + } + + if (vm.State !== "stopped") { + try { + await runner(["stop", "--timeout", "30", vm.Name]); + } catch (error) { + failures.push(new AggregateError([error], `failed to stop tart VM: ${vm.Name}`)); + } + } + + let deletedVm = false; + try { + await deleteTartVmAndVerify(vm.Name, runner); + deletedVm = true; + deleted += 1; + } catch (error) { + failures.push(new AggregateError([error], `failed to delete tart VM: ${vm.Name}`)); + } + if (deletedVm) { + try { + removeTartOwnership(ledger); + ledgersRemoved += 1; + } catch (error) { + failures.push( + new AggregateError([error], `failed to remove tart ownership: ${ledger.record.vmName}`), + ); + } + } + } + + if (failures.length > 0) { + throw new AggregateError(failures, "tart cleanup was incomplete"); + } + return { deleted, ledgersRemoved }; +}; + +export const cleanupCurrentTartResources = async (options?: { + readonly environment?: Readonly>; + readonly runner?: TartCommandRunner; +}) => { + const environment = options?.environment ?? process.env; + const owner = requireTartCleanupOwner(environment); + const runner = options?.runner ?? defaultTartRunner(environment); + const ledgers = selectCurrentTartOwnership(readTartOwnership(environment), owner); + return { ...(await cleanupOwnedTartResources(ledgers, runner)), scope: owner.scope }; +}; + +export const sweepExpiredTartResources = async (options: { + readonly minimumAgeHours: number; + readonly environment?: Readonly>; + readonly now?: number; + readonly runner?: TartCommandRunner; +}) => { + const environment = options.environment ?? process.env; + const owner = requireTartCleanupOwner(environment); + const runner = options.runner ?? defaultTartRunner(environment); + const ledgers = selectExpiredTartOwnership( + readTartOwnership(environment), + owner, + options.minimumAgeHours, + options.now, + ); + return { + ...(await cleanupOwnedTartResources(ledgers, runner)), + repository: owner.repository, + }; +}; diff --git a/e2e/src/vm/tart-ownership.ts b/e2e/src/vm/tart-ownership.ts new file mode 100644 index 000000000..ca8b8b063 --- /dev/null +++ b/e2e/src/vm/tart-ownership.ts @@ -0,0 +1,184 @@ +import { createHash, randomUUID } from "node:crypto"; +import { + existsSync, + mkdirSync, + readFileSync, + readdirSync, + renameSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { homedir } from "node:os"; +import { basename, dirname, join, resolve } from "node:path"; + +import { Schema } from "effect"; + +import { type VmRunMetadata, vmRunScopeSlug } from "./run-scope"; + +const MANAGED_BY = "executor-e2e-tart-v1"; +const LEDGER_FILE = /^[a-f0-9]{64}\.json$/; + +export const TartOwnership = Schema.Struct({ + schemaVersion: Schema.Literal(1), + managedBy: Schema.Literal(MANAGED_BY), + repository: Schema.String, + runId: Schema.String, + runAttempt: Schema.String, + runScope: Schema.String, + os: Schema.Literals(["linux", "macos"]), + vmName: Schema.String, + createdAt: Schema.String, + expiresAt: Schema.String, +}); + +export type TartOwnership = typeof TartOwnership.Type; + +export interface TartOwnershipLedger { + readonly path: string; + readonly record: TartOwnership; +} + +type Environment = Readonly>; + +const nonempty = (value: string | undefined) => { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; +}; + +export const tartOwnershipRoot = (environment: Environment = process.env) => + resolve(nonempty(environment.E2E_TART_STATE_ROOT) ?? join(homedir(), ".executor-e2e", "tart")); + +const ledgerFilename = (vmName: string) => + `${createHash("sha256").update(vmName).digest("hex")}.json`; + +export const tartOwnershipPath = (root: string, vmName: string) => + join(resolve(root), ledgerFilename(vmName)); + +const assertValidOwnership = (record: TartOwnership, path?: string) => { + const required = [record.repository, record.runId, record.runAttempt, record.runScope]; + if (required.some((value) => value.trim().length === 0)) { + throw new Error(`tart ownership contains an empty identity field: ${record.vmName}`); + } + const createdAt = Date.parse(record.createdAt); + const expiresAt = Date.parse(record.expiresAt); + if (!Number.isFinite(createdAt) || !Number.isFinite(expiresAt) || createdAt > expiresAt) { + throw new Error(`tart ownership has an invalid lifetime: ${record.vmName}`); + } + const expectedPrefix = `executor-e2e-${vmRunScopeSlug(record.runScope)}-${record.os}-`; + if (!record.vmName.startsWith(expectedPrefix)) { + throw new Error(`tart ownership has an invalid VM name: ${record.vmName}`); + } + if (path && basename(path) !== ledgerFilename(record.vmName)) { + throw new Error(`tart ownership filename does not match VM name: ${basename(path)}`); + } +}; + +export const createTartOwnership = ( + metadata: VmRunMetadata, + os: "linux" | "macos", + vmName: string, +): TartOwnership => ({ + schemaVersion: 1, + managedBy: MANAGED_BY, + repository: metadata.repository, + runId: metadata.runId, + runAttempt: metadata.runAttempt, + runScope: metadata.scope, + os, + vmName, + createdAt: metadata.createdAt, + expiresAt: metadata.expiresAt, +}); + +export const writeTartOwnership = ( + record: TartOwnership, + environment: Environment = process.env, +) => { + assertValidOwnership(record); + const root = tartOwnershipRoot(environment); + mkdirSync(root, { mode: 0o700, recursive: true }); + const path = tartOwnershipPath(root, record.vmName); + const temporaryPath = `${path}.${process.pid}.${randomUUID()}.tmp`; + writeFileSync(temporaryPath, `${JSON.stringify(record, null, 2)}\n`, { + encoding: "utf8", + flag: "wx", + mode: 0o600, + }); + renameSync(temporaryPath, path); + return { path, record } satisfies TartOwnershipLedger; +}; + +const decodeOwnership = Schema.decodeUnknownSync(Schema.fromJsonString(TartOwnership)); + +export const readTartOwnership = (environment: Environment = process.env) => { + const root = tartOwnershipRoot(environment); + if (!existsSync(root)) return []; + const ledgers: TartOwnershipLedger[] = []; + for (const entry of readdirSync(root, { withFileTypes: true }).sort((a, b) => + a.name.localeCompare(b.name), + )) { + if (!entry.name.endsWith(".json")) continue; + if (!entry.isFile() || !LEDGER_FILE.test(entry.name)) { + throw new Error(`unsafe tart ownership entry: ${entry.name}`); + } + const path = join(root, entry.name); + const record = decodeOwnership(readFileSync(path, "utf8")); + assertValidOwnership(record, path); + ledgers.push({ path, record }); + } + return ledgers; +}; + +export const removeTartOwnership = (ledger: TartOwnershipLedger) => { + const expectedPath = tartOwnershipPath(dirname(ledger.path), ledger.record.vmName); + if (ledger.path !== expectedPath) { + throw new Error(`refusing to remove mismatched tart ownership path: ${ledger.path}`); + } + rmSync(ledger.path, { force: true }); +}; + +export const requireTartCleanupOwner = (environment: Environment = process.env) => { + const scope = nonempty(environment.E2E_VM_RUN_SCOPE); + const repository = nonempty(environment.GITHUB_REPOSITORY); + const runId = nonempty(environment.GITHUB_RUN_ID); + const runAttempt = nonempty(environment.GITHUB_RUN_ATTEMPT); + if (!scope || !repository || !runId || !runAttempt) { + throw new Error( + "Tart cleanup requires E2E_VM_RUN_SCOPE, GITHUB_REPOSITORY, GITHUB_RUN_ID, and GITHUB_RUN_ATTEMPT", + ); + } + return { repository, runAttempt, runId, scope }; +}; + +export const selectCurrentTartOwnership = ( + ledgers: readonly TartOwnershipLedger[], + owner: ReturnType, +) => + ledgers.filter( + ({ record }) => + record.repository === owner.repository && + record.runId === owner.runId && + record.runAttempt === owner.runAttempt && + record.runScope === owner.scope, + ); + +export const selectExpiredTartOwnership = ( + ledgers: readonly TartOwnershipLedger[], + owner: ReturnType, + minimumAgeHours: number, + now = Date.now(), +) => { + if (!Number.isFinite(minimumAgeHours) || minimumAgeHours <= 0) { + throw new Error("minimumAgeHours must be greater than zero"); + } + const minimumAgeMs = minimumAgeHours * 60 * 60 * 1_000; + return ledgers.filter(({ record }) => { + if (record.repository !== owner.repository) return false; + assertValidOwnership(record); + if (record.runId === owner.runId && record.runAttempt === owner.runAttempt) return false; + const createdAt = Date.parse(record.createdAt); + const expiresAt = Date.parse(record.expiresAt); + if (createdAt > now) throw new Error(`tart ownership is dated in the future: ${record.vmName}`); + return expiresAt <= now && now - createdAt >= minimumAgeMs; + }); +}; diff --git a/e2e/src/vm/tart.ts b/e2e/src/vm/tart.ts index 5ca696420..0805a26b6 100644 --- a/e2e/src/vm/tart.ts +++ b/e2e/src/vm/tart.ts @@ -1,8 +1,8 @@ // tart provider: macOS + Linux guests on an Apple-Silicon host (the Mini). -// Mirrors the by-hand reboot harness — clone a base image, boot headless, drive +// Mirrors the by-hand reboot harness: clone a base image, boot headless, drive // over sshpass, reboot the guest OS for real, tear down the clone. -import { execFile, spawn, type ChildProcess } from "node:child_process"; +import { execFile, spawn } from "node:child_process"; import net from "node:net"; import { promisify } from "node:util"; @@ -14,6 +14,9 @@ import { type VmHandle, type VmProvider, } from "./types"; +import { resolveVmRunMetadata } from "./run-scope"; +import { deleteTartVmAndVerify, tartResourceName, terminateTartRunProcess } from "./tart-lifecycle"; +import { createTartOwnership, removeTartOwnership, writeTartOwnership } from "./tart-ownership"; const execFileP = promisify(execFile); @@ -34,10 +37,97 @@ const SSH_OPTS = [ const GUEST_USER = "admin"; const GUEST_PASS = "admin"; +export interface ReconnectingChild { + on(event: "error" | "exit", listener: () => void): unknown; + kill(): unknown; +} + +/** + * Owns one reconnecting child process. Pausing or closing invalidates an + * in-flight async spawn, kills the active child, and clears its retry timer. + */ +export const createReconnectingProcess = ( + spawnChild: () => Promise | ReconnectingChild, + reconnectDelayMs = 2_000, +) => { + let child: ReconnectingChild | undefined; + let reconnectTimer: ReturnType | undefined; + let generation = 0; + let starting = false; + let paused = true; + let closed = false; + + const clearReconnectTimer = () => { + if (reconnectTimer) clearTimeout(reconnectTimer); + reconnectTimer = undefined; + }; + + const scheduleReconnect = () => { + if (closed || paused || reconnectTimer) return; + reconnectTimer = setTimeout(() => { + reconnectTimer = undefined; + void spawnOnce(); + }, reconnectDelayMs); + }; + + const spawnOnce = async () => { + if (closed || paused || starting || child) return; + const attempt = ++generation; + starting = true; + try { + const spawned = await spawnChild(); + if (closed || paused || attempt !== generation) { + spawned.kill(); + return; + } + + child = spawned; + let settled = false; + const onStopped = () => { + if (settled) return; + settled = true; + if (child === spawned) child = undefined; + scheduleReconnect(); + }; + spawned.on("error", onStopped); + spawned.on("exit", onStopped); + } catch { + scheduleReconnect(); + } finally { + if (attempt === generation) starting = false; + } + }; + + const pause = () => { + if (closed) return; + paused = true; + generation += 1; + starting = false; + clearReconnectTimer(); + const active = child; + child = undefined; + active?.kill(); + }; + + const resume = () => { + if (closed || !paused) return; + paused = false; + void spawnOnce(); + }; + + const close = () => { + if (closed) return; + pause(); + closed = true; + }; + + return { close, pause, resume }; +}; + /** * Reboot a tart guest by address, with no live handle. `restart()` runs in a * vitest worker (separate process from the globalsetup that owns the VM), so it - * re-derives the guest address from env and triggers the reboot statelessly — + * re-derives the guest address from env and triggers the reboot statelessly, * the reconnecting tunnel and a health poll confirm recovery. */ export const sshRebootGuest = async (ip: string): Promise => { @@ -90,21 +180,36 @@ const waitLocalPort = async (port: number, attempts = 40): Promise => { export const tartVm = (os: "macos" | "linux", arch: VmArch = "arm64"): VmProvider => ({ os, provision: async () => { - const name = `executor-e2e-${os}-${process.pid}-${Math.floor(performance.now())}`; + const metadata = resolveVmRunMetadata(); + const name = tartResourceName( + metadata.scope, + os, + `${process.pid}-${Math.floor(performance.now())}`, + ); + const ownership = writeTartOwnership(createTartOwnership(metadata, os, name)); await execFileP(TART, ["clone", baseImage(os), name]); const runProc = spawn(TART, ["run", name, "--no-graphics"], { stdio: "ignore" }); + const tartRunner = async (args: readonly string[]) => { + const { stdout } = await execFileP(TART, [...args]); + return stdout; + }; - const tunnelClosers: Array<() => void> = []; + const tunnels = new Set>(); let ip = ""; + const discoverIp = async () => { + const { stdout } = await execFileP(TART, ["ip", name]); + const discovered = stdout.trim(); + if (!discovered) throw new Error(`tart ${os}: IP is not available`); + ip = discovered; + return discovered; + }; + const fetchIp = async (): Promise => { for (let i = 0; i < 90; i++) { try { - const { stdout } = await execFileP(TART, ["ip", name]); - if (stdout.trim()) { - ip = stdout.trim(); - return true; - } + await discoverIp(); + return true; } catch { /* not booted yet */ } @@ -143,6 +248,32 @@ export const tartVm = (os: "macos" | "linux", arch: VmArch = "arm64"): VmProvide return false; }; + let discardPromise: Promise | undefined; + const discard = () => { + discardPromise ??= (async () => { + for (const tunnel of tunnels) tunnel.close(); + tunnels.clear(); + const failures: unknown[] = []; + try { + await terminateTartRunProcess(runProc); + } catch (error) { + failures.push(new AggregateError([error], `failed to stop tart run process: ${name}`)); + } + try { + await deleteTartVmAndVerify(name, tartRunner); + removeTartOwnership(ownership); + } catch (error) { + failures.push( + new AggregateError([error], `failed to delete tart VM or ownership: ${name}`), + ); + } + if (failures.length > 0) { + throw new AggregateError(failures, `tart ${os}: discard was incomplete`); + } + })(); + return discardPromise; + }; + const handle: VmHandle = { os, arch, @@ -162,20 +293,21 @@ export const tartVm = (os: "macos" | "linux", arch: VmArch = "arm64"): VmProvide ]); }, reboot: async () => { + for (const tunnel of tunnels) tunnel.pause(); await ssh("sudo reboot").catch(() => undefined); // connection drops mid-call + ip = ""; await sleep(5000); if (!(await fetchIp())) throw new Error(`tart ${os}: no IP after reboot`); if (!(await waitSsh(120))) throw new Error(`tart ${os}: SSH did not return after reboot`); + for (const tunnel of tunnels) tunnel.resume(); }, tunnel: async (guestPort) => { const localPort = await freePort(); - // Reconnecting forward: when the guest reboots the ssh exits, so respawn - // it until closed. `restart()` health-polls through this local port, so - // it only goes green once the daemon AND the forward are back. - let closed = false; - let child: ChildProcess | undefined; - const spawnOnce = (): void => { - child = spawn( + // Resolve the address before every spawn. A DHCP address can change + // while restart() runs in a worker that has no live VM handle. + const controller = createReconnectingProcess(async () => { + const currentIp = await discoverIp(); + return spawn( SSHPASS, [ "-p", @@ -185,30 +317,28 @@ export const tartVm = (os: "macos" | "linux", arch: VmArch = "arm64"): VmProvide "-N", "-L", `${localPort}:127.0.0.1:${guestPort}`, - `${GUEST_USER}@${ip}`, + `${GUEST_USER}@${currentIp}`, ], { stdio: "ignore" }, ); - child.on("exit", () => { - if (!closed) setTimeout(spawnOnce, 2000); - }); - }; - spawnOnce(); - const close = (): void => { - closed = true; - child?.kill(); + }); + tunnels.add(controller); + controller.resume(); + try { + await waitLocalPort(localPort); + } catch (error) { + controller.close(); + tunnels.delete(controller); + throw error; + } + const close = () => { + controller.close(); + tunnels.delete(controller); }; - tunnelClosers.push(close); - await waitLocalPort(localPort); const tunnel: Tunnel = { localPort, close }; return tunnel; }, - discard: async () => { - for (const close of tunnelClosers) close(); - runProc.kill(); - await sleep(1500); - await execFileP(TART, ["delete", name]).catch(() => undefined); - }, + discard, }; if (!(await fetchIp())) { diff --git a/e2e/targets/cloudflare.ts b/e2e/targets/cloudflare.ts index abce7ddee..e37168387 100644 --- a/e2e/targets/cloudflare.ts +++ b/e2e/targets/cloudflare.ts @@ -1,30 +1,54 @@ // The Cloudflare self-host app (apps/host-cloudflare) as a target: the REAL // worker on workerd via Miniflare (wrangler `unstable_dev`) with a local D1 + -// R2, booted in setup/cloudflare.globalsetup.ts. Dev-auth is on, so every -// request is the fixed dev admin — no per-identity login and no MCP OAuth (the -// /mcp endpoint accepts the dev principal directly). Single-tenant, like -// self-host; per-test isolation is the next step here. +// R2, booted in setup/cloudflare.globalsetup.ts. A loopback Access issuer signs +// application JWTs and serves the team JWKS, so every surface exercises the +// production Access verifier without a Cloudflare account. +import { randomUUID } from "node:crypto"; + import { Effect } from "effect"; +import { + accessAssertionHeaders, + issueCloudflareAccessToken, +} from "../src/cloudflare-access-emulator"; import { e2ePort } from "../src/ports"; -import type { Identity, Target } from "../src/target"; +import type { Target } from "../src/target"; -// Offsets 0-4 are taken by cloud (0-3) and self-host (4); Cloudflare claims 5. -export const CLOUDFLARE_PORT = e2ePort("E2E_CLOUDFLARE_PORT", 5); +// Offsets 0-5 are taken by cloud (0-3), self-host (4), and its Docker image (5). +// This target owns the worker at 6 and its Access issuer at 7. +export const CLOUDFLARE_PORT = e2ePort("E2E_CLOUDFLARE_PORT", 6); export const CLOUDFLARE_BASE_URL = process.env.E2E_CLOUDFLARE_URL ?? `http://127.0.0.1:${CLOUDFLARE_PORT}`; +export const CLOUDFLARE_ACCESS_PORT = e2ePort("E2E_CLOUDFLARE_ACCESS_PORT", 7); +export const CLOUDFLARE_ACCESS_BASE_URL = + process.env.E2E_CLOUDFLARE_ACCESS_URL ?? `http://127.0.0.1:${CLOUDFLARE_ACCESS_PORT}`; + +export const makeCloudflareAccessIdentity = async () => { + const id = randomUUID(); + const email = "admin@e2e.test"; + const token = + process.env.E2E_CLOUDFLARE_ACCESS_TOKEN ?? + (await issueCloudflareAccessToken(CLOUDFLARE_ACCESS_BASE_URL, { + kind: "human", + subject: `user-${id}`, + email, + name: `Access user ${id.slice(0, 8)}`, + groups: ["member"], + })); + return { + label: email, + headers: accessAssertionHeaders(token), + cookies: [{ name: "CF_Authorization", value: token }], + }; +}; export const cloudflareTarget = (): Target => ({ name: "cloudflare", baseUrl: CLOUDFLARE_BASE_URL, mcpUrl: `${CLOUDFLARE_BASE_URL}/mcp`, - // No "billing" and no setAccessTokenTtl (Cloudflare Access is the IdP; not - // test-adjustable). "mcp-oauth" advertises that the MCP surface exists — but - // dev-auth means it needs no consent flow, so `mcpConsent` is omitted and the - // MCP client connects as the dev admin directly. + // No "billing" and no setAccessTokenTtl (Cloudflare Access is the IdP). + // "mcp-oauth" advertises that the protected MCP surface exists. Access has + // already authenticated the assertion, so there is no app OAuth consent. capabilities: new Set(["api", "browser", "mcp-oauth"]), - // Dev-auth: one fixed admin. Empty `headers` makes the API surface send no - // auth (and skip the Better Auth sign-in path) — the worker resolves every - // request to the dev admin. No cookie is needed for the browser either. - newIdentity: () => Effect.succeed({ label: "dev-admin", headers: {} } satisfies Identity), + newIdentity: () => Effect.promise(makeCloudflareAccessIdentity), }); diff --git a/e2e/targets/selfhost-docker.ts b/e2e/targets/selfhost-docker.ts index 1c3d9dc30..51802a2a9 100644 --- a/e2e/targets/selfhost-docker.ts +++ b/e2e/targets/selfhost-docker.ts @@ -11,7 +11,7 @@ import { cookieConsentStrategy } from "@executor-js/mcporter"; import { e2ePort } from "../src/ports"; import type { Identity, Target } from "../src/target"; import { runSelfhostContainer, stopSelfhostContainer } from "../setup/selfhost-docker.boot"; -import { SELFHOST_ADMIN, signInSession } from "./selfhost"; +import { createInvitedSelfhostIdentity, SELFHOST_ADMIN } from "./selfhost"; export const SELFHOST_DOCKER_PORT = e2ePort("E2E_SELFHOST_DOCKER_PORT", 5); export const SELFHOST_DOCKER_BASE_URL = @@ -23,18 +23,7 @@ export const selfhostDockerTarget = (): Target => ({ mcpUrl: `${SELFHOST_DOCKER_BASE_URL}/mcp`, capabilities: new Set(["api", "browser", "mcp-oauth"]), newIdentity: () => - Effect.promise(async (): Promise => { - const { cookieHeader, cookies } = await signInSession( - SELFHOST_DOCKER_BASE_URL, - SELFHOST_ADMIN, - ); - return { - label: SELFHOST_ADMIN.email, - credentials: SELFHOST_ADMIN, - headers: { cookie: cookieHeader }, - cookies, - }; - }), + Effect.promise(() => createInvitedSelfhostIdentity(SELFHOST_DOCKER_BASE_URL, SELFHOST_ADMIN)), mcpConsent: (identity: Identity) => cookieConsentStrategy({ appBaseUrl: SELFHOST_DOCKER_BASE_URL, diff --git a/e2e/targets/selfhost.ts b/e2e/targets/selfhost.ts index f88fd8763..3b2d43237 100644 --- a/e2e/targets/selfhost.ts +++ b/e2e/targets/selfhost.ts @@ -2,6 +2,8 @@ // on a throwaway data dir, with Better Auth + the bootstrap admin. MCP OAuth is // headless via `forcedMcpConsent` below. Boot lives in // setup/selfhost.globalsetup.ts. +import { randomUUID } from "node:crypto"; + import { Effect } from "effect"; import { e2ePort } from "../src/ports"; @@ -43,6 +45,60 @@ export const signInSession = async ( return { cookieHeader: pairs.join("; "), cookies }; }; +/** Create a distinct administrator through the same invite and signup flow a + * real second operator uses. This keeps per-scenario browser, API, CLI, and MCP + * credential caches independent while retaining permission to create the + * workspace resources exercised by the shared scenario suite. */ +export const createInvitedSelfhostIdentity = async ( + baseUrl: string, + bootstrapAdmin: { readonly email: string; readonly password: string }, +) => { + const origin = new URL(baseUrl).origin; + const adminSession = await signInSession(baseUrl, bootstrapAdmin); + const invite = await fetch(new URL("/api/admin/invites", baseUrl), { + method: "POST", + headers: { + "content-type": "application/json", + cookie: adminSession.cookieHeader, + origin, + }, + body: JSON.stringify({ role: "admin", label: "e2e scenario identity" }), + }); + if (!invite.ok) { + throw new Error(`selfhost: invite creation failed (${invite.status}): ${await invite.text()}`); + } + const inviteBody = (await invite.json()) as { readonly code?: unknown }; + if (typeof inviteBody.code !== "string") { + throw new Error("selfhost: invite creation returned no code"); + } + + const label = `user-${randomUUID().slice(0, 8)}`; + const credentials = { + email: `${label}@e2e.test`, + password: `e2e-${randomUUID()}-password`, + }; + const signup = await fetch(new URL("/api/auth/sign-up/email", baseUrl), { + method: "POST", + headers: { "content-type": "application/json", origin }, + body: JSON.stringify({ + ...credentials, + name: label, + inviteCode: inviteBody.code, + }), + }); + if (!signup.ok) { + throw new Error(`selfhost: invited signup failed (${signup.status}): ${await signup.text()}`); + } + + const session = await signInSession(baseUrl, credentials); + return { + label: credentials.email, + credentials, + headers: { cookie: session.cookieHeader }, + cookies: session.cookies, + } satisfies Identity; +}; + // Headless MCP OAuth consent. The self-host serving layer forces // `prompt=consent` on every MCP authorize (src/auth/force-mcp-consent), so an // authenticated authorize no longer redirects straight to the callback with a @@ -105,22 +161,11 @@ export const selfhostTarget = (): Target => ({ mcpUrl: `${SELFHOST_BASE_URL}/mcp`, // No "billing" (no limits) and no setAccessTokenTtl yet (Better Auth is the // authorization server; its token TTL isn't test-adjustable, so token-expiry - // scenarios skip here). Identity is the bootstrap admin for now — - // single-tenant; per-test invite-signup isolation is the next step here, so - // browser scenarios must prefix the resources they create. + // scenarios skip here). Every scenario receives a distinct invited admin in + // the single workspace, so account credentials and caches cannot bleed. capabilities: new Set(["api", "browser", "mcp-oauth"]), newIdentity: () => - Effect.promise(async (): Promise => { - // Sign in once and carry the session in both shapes: `headers` for the - // API surface, `cookies` for an injectable logged-in browser context. - const { cookieHeader, cookies } = await signInSession(SELFHOST_BASE_URL, SELFHOST_ADMIN); - return { - label: SELFHOST_ADMIN.email, - credentials: SELFHOST_ADMIN, - headers: { cookie: cookieHeader }, - cookies, - }; - }), + Effect.promise(() => createInvitedSelfhostIdentity(SELFHOST_BASE_URL, SELFHOST_ADMIN)), mcpConsent: (identity: Identity) => forcedMcpConsent(SELFHOST_BASE_URL, { email: identity.credentials?.email ?? SELFHOST_ADMIN.email, diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json index 38d1e7965..f621cb096 100644 --- a/e2e/tsconfig.json +++ b/e2e/tsconfig.json @@ -10,5 +10,22 @@ "jsx": "react-jsx", "types": ["node"] }, - "include": ["src", "cloud", "scenarios", "selfhost", "setup", "targets", "scripts", "viewer/src"] + "include": [ + "src", + "cli", + "cloud", + "cloudflare", + "desktop", + "desktop-kvm", + "desktop-packaged", + "harness", + "local", + "scenarios", + "selfhost", + "setup", + "targets", + "scripts", + "viewer", + "vitest.config.ts" + ] } diff --git a/e2e/viewer/src/App.tsx b/e2e/viewer/src/App.tsx index 5370284fd..3ab3d8b5d 100644 --- a/e2e/viewer/src/App.tsx +++ b/e2e/viewer/src/App.tsx @@ -1,10 +1,19 @@ import React, { Suspense, useEffect, useState } from "react"; +import type { EvidencePublicationMetadata } from "../../src/published-artifacts"; +import type { ManifestArtifact, ManifestRun } from "../../src/viewer/manifest"; +import PublicationBanner, { parsePublicationMetadata } from "./PublicationBanner"; import type { SessionTimeline } from "./SessionPlayer"; +import { + liveMotelViewerFromSearch, + parsePortableTraceExport, + type PortableTraceExport, +} from "./portable-traces"; const TestSource = React.lazy(() => import("./TestSource")); const TerminalCast = React.lazy(() => import("./TerminalCast")); const SessionPlayer = React.lazy(() => import("./SessionPlayer")); +const PortableTraceExplorer = React.lazy(() => import("./PortableTraceExplorer")); // --------------------------------------------------------------------------- // The matrix (scenario × target health) plus a per-run artifact page. The @@ -13,15 +22,6 @@ const SessionPlayer = React.lazy(() => import("./SessionPlayer")); // session video, screenshots, failure output) for any run. // --------------------------------------------------------------------------- -interface ManifestRun { - scenario: string; - target: string; - slug: string; - ok: boolean; - durationMs?: number; - endedAt?: number; -} - interface Manifest { generatedAt: number; runs: ManifestRun[]; @@ -39,6 +39,18 @@ interface RunResult { artifacts: string[]; } +export const runRoute = (target: string, slug: string): string => + `#/run/${encodeURIComponent(target)}/${encodeURIComponent(slug)}`; + +const decodedRoutePart = (value: string | undefined) => { + if (!value) return undefined; + try { + return decodeURIComponent(value); + } catch { + return undefined; + } +}; + const useRoute = () => { const [hash, setHash] = useState(window.location.hash); useEffect(() => { @@ -47,19 +59,37 @@ const useRoute = () => { return () => window.removeEventListener("hashchange", onChange); }, []); const parts = hash.replace(/^#\/?/, "").split("/").filter(Boolean); - return parts.length >= 2 ? { target: parts[0], slug: parts[1] } : null; + const routeParts = parts[0] === "run" ? parts.slice(1) : parts; + const target = decodedRoutePart(routeParts[0]); + const slug = decodedRoutePart(routeParts[1]); + return target && slug ? { target, slug } : null; }; export const App = () => { const route = useRoute(); - return route ? : ; + const [publication, setPublication] = useState(); + useEffect(() => { + fetch("publication.json") + .then((response) => (response.ok ? response.json() : null)) + .then((value) => setPublication(parsePublicationMetadata(value))) + .catch(() => setPublication(null)); + }, []); + return route ? ( + + ) : ( + + ); }; // --------------------------------------------------------------------------- // Matrix // --------------------------------------------------------------------------- -const Matrix = () => { +const Matrix = ({ + publication, +}: { + readonly publication: EvidencePublicationMetadata | null | undefined; +}) => { const [manifest, setManifest] = useState(null); const [error, setError] = useState(null); useEffect(() => { @@ -85,10 +115,11 @@ const Matrix = () => { return (
-

Executor e2e — every scenario, on every deployment

+ +

Executor e2e: every scenario, on every deployment

Click a result for that run's artifacts (Playwright trace, video, screenshots, failure - output). “—” = capability not on that target. + output). "n/a" means the capability is unavailable on that target.

@@ -110,19 +141,25 @@ const Matrix = () => { ); } return ( ); })} @@ -143,28 +180,73 @@ const Matrix = () => { // mixed-content fetch of trace.zip. Same-origin avoids all of it. // --------------------------------------------------------------------------- -// The suite's motel (local OTLP store, booted by the global setup on a -// fixed port). Every run exports distributed traces there; the run page -// links its harvested trace ids straight into motel's per-trace waterfall. -const MOTEL_VIEWER = "http://127.0.0.1:4796"; - interface RunTraceRef { id: string; at: number; url: string; + ms?: number; + status?: number; + source?: "terminal" | "browser"; + label?: string; } type RunTab = "session" | "browser" | "terminal" | "source"; -const RunView = ({ target, slug }: { target: string; slug: string }) => { - const base = `${target}/${slug}`; +const artifactUrl = (base: string, name: string): string => `${base}/${encodeURIComponent(name)}`; + +export const ArtifactNavigation = ({ + base, + artifacts, +}: { + readonly base: string; + readonly artifacts: ReadonlyArray; +}) => { + if (artifacts.length === 0) return null; + return ( +
+

+ Persisted evidence +

+ +
+ ); +}; + +const RunView = ({ + target, + slug, + publication, +}: { + target: string; + slug: string; + publication: EvidencePublicationMetadata | null | undefined; +}) => { + const base = `${encodeURIComponent(target)}/${encodeURIComponent(slug)}`; const [result, setResult] = useState(null); + const [manifestRun, setManifestRun] = useState(null); const [error, setError] = useState(null); const [tab, setTab] = useState(null); const [traces, setTraces] = useState([]); const [timeline, setTimeline] = useState(null); + const [portableTraces, setPortableTraces] = useState(null); + const [selectedTraceId, setSelectedTraceId] = useState(); + const liveMotelViewer = liveMotelViewerFromSearch(window.location.search); + const portableTraceIds = new Set(portableTraces?.traces.map((entry) => entry.traceId) ?? []); useEffect(() => { + setPortableTraces(null); + setSelectedTraceId(undefined); + setManifestRun(null); fetch(`${base}/result.json`) .then((r) => r.json()) .then(setResult) @@ -177,7 +259,19 @@ const RunView = ({ target, slug }: { target: string; slug: string }) => { .then((r) => (r.ok ? r.json() : null)) .then(setTimeline) .catch(() => setTimeline(null)); - }, [base]); + fetch(`${base}/otel-traces.json`) + .then((r) => (r.ok ? r.json() : null)) + .then((value) => setPortableTraces(parsePortableTraceExport(value))) + .catch(() => setPortableTraces(null)); + fetch("manifest.json") + .then((r) => (r.ok ? r.json() : null)) + .then((manifest: Manifest | null) => + setManifestRun( + manifest?.runs.find((run) => run.target === target && run.slug === slug) ?? null, + ), + ) + .catch(() => setManifestRun(null)); + }, [base, slug, target]); if (error) return
failed to load run: {error}
; if (!result) return
loading…
; @@ -218,6 +312,7 @@ const RunView = ({ target, slug }: { target: string; slug: string }) => { return (
+
← all runs @@ -226,6 +321,16 @@ const RunView = ({ target, slug }: { target: string; slug: string }) => { ⊙ open trace )} + {portableTraces && ( + + portable traces + + )} result.json @@ -239,11 +344,13 @@ const RunView = ({ target, slug }: { target: string; slug: string }) => { {new Date(result.endedAt).toLocaleString()}

{result.error &&
{result.error}
} + {available.length > 1 && (
{available.map((entry) => (
{run.ok ? "✓ passed" : "✗ FAILED"} {run.durationMs != null && ( {(run.durationMs / 1000).toFixed(1)}s )} + {run.portableTraceCount != null && run.portableTraceCount > 0 && ( + · {run.portableTraceCount} traces + )} + {run.portableTraceMissing != null && run.portableTraceMissing > 0 && ( + · {run.portableTraceMissing} missing + )} - {skipFor(scenario, target) ? "—" : "·"} + {skipFor(scenario, target) ? "n/a" : "·"}
@@ -353,14 +478,18 @@ const RunView = ({ target, slug }: { target: string; slug: string }) => { ))} diff --git a/e2e/viewer/src/PortableTraceExplorer.tsx b/e2e/viewer/src/PortableTraceExplorer.tsx new file mode 100644 index 000000000..609c84ae0 --- /dev/null +++ b/e2e/viewer/src/PortableTraceExplorer.tsx @@ -0,0 +1,247 @@ +import { useMemo, useState } from "react"; + +import { + formatTraceDuration, + waterfallPosition, + type PortableTraceExport, + type PortableTraceSpan, +} from "./portable-traces"; + +interface TraceLedgerRef { + readonly id: string; + readonly url: string; + readonly label?: string; +} + +const displayPath = (value: string): string => value.replace(/^https?:\/\/[^/]+/, "") || value; + +const SpanDetails = ({ span }: { span: PortableTraceSpan }) => { + const tags = Object.entries(span.tags); + return ( +
+
+ {span.operationName} + {span.spanId} +
+
+
service
+
{span.serviceName}
+ {span.scopeName && ( + <> +
scope
+
{span.scopeName}
+ + )} + {span.kind && ( + <> +
kind
+
{span.kind}
+ + )} +
duration
+
{formatTraceDuration(span.durationMs)}
+
status
+
{span.status}
+
+ {tags.length > 0 && ( +
+

attributes

+
+ {tags.map(([key, value]) => ( +
+
{key}
+
{value}
+
+ ))} +
+
+ )} + {span.warnings.length > 0 && ( +
+

warnings

+
    + {span.warnings.map((warning, index) => ( +
  • {warning}
  • + ))} +
+
+ )} + {span.events.length > 0 && ( +
+

events

+
    + {span.events.map((event, index) => ( +
  • + {event.name} + {event.timestamp || "timestamp unavailable"} + {Object.keys(event.attributes).length > 0 && ( + {JSON.stringify(event.attributes)} + )} +
  • + ))} +
+
+ )} +
+ ); +}; + +export const PortableTraceExplorer = ({ + exportData, + ledger, + selectedTraceId, + onSelectTrace, + liveMotelViewer, +}: { + exportData: PortableTraceExport; + ledger: ReadonlyArray; + selectedTraceId?: string; + onSelectTrace: (traceId: string) => void; + liveMotelViewer?: string; +}) => { + const [selectedSpanId, setSelectedSpanId] = useState(); + const ledgerById = useMemo( + () => new Map(ledger.map((entry) => [entry.id, entry] as const)), + [ledger], + ); + const selectedEntry = + exportData.traces.find((entry) => entry.traceId === selectedTraceId) ?? exportData.traces[0]; + const trace = selectedEntry?.data; + const selectedSpan = + trace?.spans.find((span) => span.spanId === selectedSpanId) ?? trace?.spans[0]; + + if (!trace || !selectedEntry) { + return ( +
+

Portable distributed traces

+

The exporter produced no complete traces for this run.

+
+ ); + } + + return ( +
+
+
+

Portable distributed traces

+

+ Self-contained span trees captured before telemetry teardown. Select a span for its + attributes, events, and warnings. +

+
+ {liveMotelViewer && ( + + open live Motel + + )} +
+ +
+ + +
+
+
+ {trace.rootOperationName} + {trace.serviceName} +
+
+ {formatTraceDuration(trace.durationMs)} + {trace.spanCount} spans + 0 ? "error-text" : "ok-text"}> + {trace.errorCount > 0 ? `${trace.errorCount} errors` : "ok"} + +
+
+ + {trace.warnings.length > 0 && ( +
    + {trace.warnings.map((warning, index) => ( +
  • {warning}
  • + ))} +
+ )} + +
+ {trace.spans.map((span) => { + const position = waterfallPosition(trace, span); + const active = span.spanId === selectedSpan?.spanId; + return ( + + ); + })} +
+ + {selectedSpan && } +
+
+ + {(exportData.missing.length > 0 || exportData.invalidTraceIds.length > 0) && ( +

+ Export incomplete: {exportData.missing.length} missing,{" "} + {exportData.invalidTraceIds.length} invalid trace IDs. +

+ )} +
+ ); +}; + +export default PortableTraceExplorer; diff --git a/e2e/viewer/src/PublicationBanner.tsx b/e2e/viewer/src/PublicationBanner.tsx new file mode 100644 index 000000000..3451105e9 --- /dev/null +++ b/e2e/viewer/src/PublicationBanner.tsx @@ -0,0 +1,105 @@ +import type { EvidencePublicationMetadata } from "../../src/published-artifacts"; + +const record = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const finiteNumber = (value: unknown): value is number => + typeof value === "number" && Number.isFinite(value); + +const strings = (value: unknown) => + Array.isArray(value) ? value.filter((entry): entry is string => typeof entry === "string") : []; + +export const parsePublicationMetadata = (value: unknown): EvidencePublicationMetadata | null => { + if (!record(value) || value.schemaVersion !== 1 || !finiteNumber(value.sanitizedAt)) return null; + if (value.status !== "passed" && value.status !== "failed") return null; + if (!record(value.sanitizer) || !record(value.policy) || !record(value.runtime)) return null; + if (!record(value.stats)) return null; + if ( + value.sanitizer.source !== "e2e/scripts/sanitize-evidence.ts" || + value.sanitizer.policyVersion !== 1 || + value.policy.unknownArtifacts !== "removed" || + value.policy.textAndJson !== "redacted" || + value.policy.binaryVisuals !== "unredacted-synthetic-only" || + value.policy.binarySecretDetection !== "byte-canary-only" + ) { + return null; + } + const { runtime, stats } = value; + if ( + typeof runtime.name !== "string" || + typeof runtime.version !== "string" || + typeof runtime.platform !== "string" || + typeof runtime.arch !== "string" || + !finiteNumber(stats.removed) || + !finiteNumber(stats.redacted) || + !finiteNumber(stats.retained) || + !finiteNumber(stats.canariesChecked) + ) { + return null; + } + const sourceRevision = value.sanitizer.sourceRevision; + return { + schemaVersion: 1, + sanitizedAt: value.sanitizedAt, + status: value.status, + sanitizer: { + source: "e2e/scripts/sanitize-evidence.ts", + policyVersion: 1, + ...(typeof sourceRevision === "string" ? { sourceRevision } : {}), + }, + policy: { + unknownArtifacts: "removed", + textAndJson: "redacted", + binaryVisuals: "unredacted-synthetic-only", + binarySecretDetection: "byte-canary-only", + }, + runtime: { + name: runtime.name, + version: runtime.version, + platform: runtime.platform, + arch: runtime.arch, + }, + stats: { + removed: stats.removed, + redacted: stats.redacted, + retained: stats.retained, + canariesChecked: stats.canariesChecked, + }, + binaryArtifacts: strings(value.binaryArtifacts), + errors: strings(value.errors), + }; +}; + +export const PublicationBanner = ({ + metadata, +}: { + readonly metadata: EvidencePublicationMetadata | null | undefined; +}) => { + if (metadata === undefined) return null; + if (metadata === null) { + return ( + + ); + } + const visualCount = metadata.binaryArtifacts.length; + return ( + + ); +}; + +export default PublicationBanner; diff --git a/e2e/viewer/src/SessionPlayer.tsx b/e2e/viewer/src/SessionPlayer.tsx index c285ae144..0b999d3e3 100644 --- a/e2e/viewer/src/SessionPlayer.tsx +++ b/e2e/viewer/src/SessionPlayer.tsx @@ -1,5 +1,5 @@ -// One synced playback head over BOTH of a run's real recordings — the -// terminal cast and the browser video — driven by the run's focus timeline +// One synced playback head over BOTH of a run's real recordings: the +// terminal cast and the browser video, driven by the run's focus timeline // (timeline.json). Where film.mp4 bakes the cuts into pixels, this player // performs them live: the active act decides which recording is on screen, // a synthetic window chrome floats above it (terminal title bar, or a @@ -10,7 +10,7 @@ // The master clock is WALL CLOCK: focus entries are wall-contiguous, so // session-time t maps to exactly one act, and each recording's own clock // is recovered through its anchor (timeline.anchors). No idle compression -// anywhere — cast time must equal wall time for the cuts to land. +// anywhere. Cast time must equal wall time for the cuts to land. import { useEffect, useMemo, useRef, useState } from "react"; import * as AsciinemaPlayer from "asciinema-player"; import "asciinema-player/dist/bundle/asciinema-player.css"; @@ -25,7 +25,7 @@ export interface SessionTraceRef { id: string; at: number; url: string; - /** Request duration (ms) — recorded by the surfaces at run time. */ + /** Request duration (ms), recorded by the surfaces at run time. */ ms?: number; status?: number; /** Which window made the request: terminal (MCP/CLI) or browser. */ @@ -64,14 +64,18 @@ export const SessionPlayer = ({ timeline, traces, playwrightTraceUrl, - motelViewer, + liveMotelViewer, + onInspectTrace, + inspectableTraceIds, }: { castUrl: string; videoUrl: string; timeline: SessionTimeline; traces: SessionTraceRef[]; playwrightTraceUrl: string | null; - motelViewer: string; + liveMotelViewer?: string; + onInspectTrace?: (traceId: string) => void; + inspectableTraceIds?: ReadonlySet; }) => { const sessionStart = timeline.focus[0]?.at ?? 0; const terminalAnchor = timeline.anchors.terminal ?? sessionStart; @@ -127,7 +131,7 @@ export const SessionPlayer = ({ 1000, ); - // Mount the cast player once (no idle compression — sync needs real time). + // Mount the cast player once (no idle compression, sync needs real time). useEffect(() => { if (!castMount.current) return; const player = AsciinemaPlayer.create(castUrl, castMount.current, { @@ -252,7 +256,7 @@ export const SessionPlayer = ({ .filter((trace) => trace.t >= 0), [traces, sessionStart], ); - // Duration bars are scaled to the slowest request — the question the rail + // Duration bars are scaled to the slowest request. The question the rail // answers is "which of these was the slow one", not absolute ms. const slowest = Math.max(...traceMarks.map((mark) => mark.ms ?? 0), 1); @@ -270,7 +274,7 @@ export const SessionPlayer = ({
- {/* Synthetic window chrome — the recordings are chromeless, so the + {/* Synthetic window chrome: the recordings are chromeless, so the viewer restores what a developer would actually see: a terminal title bar, or a browser URL bar with the address the page is on. */}
@@ -295,7 +299,7 @@ export const SessionPlayer = ({ )} ) : ( - terminal — agent chat + terminal: agent chat )}
@@ -370,17 +374,18 @@ export const SessionPlayer = ({ {/* The trace rail: every API request the session made, beside the video it happened in, with a duration bar scaled to the slowest - request — "why did that take so long" is answered at a glance. - Rows seek the player; ids open motel's waterfall. */} + request. "Why did that take so long" is answered at a glance. + Rows seek the player; ids open the portable waterfall. */} {traceMarks.length > 0 && (
traces - click = seek · id = waterfall + click = seek · id = portable waterfall
{traceMarks.map((mark, index) => { const isNow = index === nowIndex; const slow = (mark.ms ?? 0) >= 1000; + const inspectTrace = inspectableTraceIds?.has(mark.id) ? onInspectTrace : undefined; return (
- event.stopPropagation()} - > - {mark.id.slice(0, 7)} - + {inspectTrace ? ( + + ) : ( + {mark.id.slice(0, 7)} + )} + {liveMotelViewer && ( + event.stopPropagation()} + title="Open this trace in the live local Motel" + > + ↗ + + )}
); diff --git a/e2e/viewer/src/TestSource.tsx b/e2e/viewer/src/TestSource.tsx index 3de0f22c1..606a2a68c 100644 --- a/e2e/viewer/src/TestSource.tsx +++ b/e2e/viewer/src/TestSource.tsx @@ -1,7 +1,7 @@ // Read-only Monaco showing the run's test source (the scenario's code with // imports + sibling tests stripped, written by the runner as test.ts). -// Uses Monaco CORE + the monarch TypeScript colorizer only — no language -// service, no ts.worker — a read-only pane needs highlighting, not IntelliSense +// Uses Monaco CORE + the monarch TypeScript colorizer only, with no language +// service or ts.worker. A read-only pane needs highlighting, not IntelliSense // (the full build is ~12 MB of workers). Lazy-loaded so the matrix stays light. import React, { useEffect, useRef, useState } from "react"; import * as monaco from "monaco-editor/esm/vs/editor/editor.api"; @@ -15,9 +15,39 @@ declare global { } self.MonacoEnvironment = { getWorker: () => new EditorWorker() }; -export default function TestSource({ url }: { url: string }) { +interface SourceMetadata { + readonly sourcePath: string; + readonly testName: string; + readonly registration: string; + readonly extractor: string; +} + +const parseSourceMetadata = (value: unknown): SourceMetadata | null => { + if (typeof value !== "object" || value === null) return null; + if ( + !("sourcePath" in value) || + typeof value.sourcePath !== "string" || + !("testName" in value) || + typeof value.testName !== "string" || + !("registration" in value) || + typeof value.registration !== "string" || + !("extractor" in value) || + typeof value.extractor !== "string" + ) { + return null; + } + return { + sourcePath: value.sourcePath, + testName: value.testName, + registration: value.registration, + extractor: value.extractor, + }; +}; + +export default function TestSource({ url, metadataUrl }: { url: string; metadataUrl?: string }) { const container = useRef(null); const [failed, setFailed] = useState(false); + const [metadata, setMetadata] = useState(null); useEffect(() => { let editor: monaco.editor.IStandaloneCodeEditor | undefined; @@ -57,6 +87,27 @@ export default function TestSource({ url }: { url: string }) { }; }, [url]); + useEffect(() => { + if (!metadataUrl) { + setMetadata(null); + return; + } + fetch(metadataUrl) + .then((response) => (response.ok ? response.json() : null)) + .then((value) => setMetadata(parseSourceMetadata(value))) + .catch(() => setMetadata(null)); + }, [metadataUrl]); + if (failed) return null; - return
; + return ( + <> + {metadata && ( +

+ Focused from {metadata.sourcePath} via {metadata.registration} ( + {metadata.extractor}) +

+ )} +
+ + ); } diff --git a/e2e/viewer/src/portable-traces.ts b/e2e/viewer/src/portable-traces.ts new file mode 100644 index 000000000..ace525153 --- /dev/null +++ b/e2e/viewer/src/portable-traces.ts @@ -0,0 +1,190 @@ +export interface PortableSpanEvent { + readonly name: string; + readonly timestamp: string; + readonly attributes: Readonly>; +} + +export interface PortableTraceSpan { + readonly spanId: string; + readonly parentSpanId: string | null; + readonly serviceName: string; + readonly scopeName: string | null; + readonly kind: string | null; + readonly operationName: string; + readonly startTime: string; + readonly isRunning: boolean; + readonly durationMs: number; + readonly status: "ok" | "error"; + readonly depth: number; + readonly tags: Readonly>; + readonly warnings: ReadonlyArray; + readonly events: ReadonlyArray; +} + +export interface PortableTrace { + readonly traceId: string; + readonly serviceName: string; + readonly rootOperationName: string; + readonly startedAt: string; + readonly isRunning: boolean; + readonly durationMs: number; + readonly spanCount: number; + readonly errorCount: number; + readonly warnings: ReadonlyArray; + readonly spans: ReadonlyArray; +} + +export interface PortableTraceExport { + readonly schemaVersion: 1; + readonly exportedAt: number; + readonly traces: ReadonlyArray<{ readonly traceId: string; readonly data: PortableTrace }>; + readonly missing: ReadonlyArray<{ readonly traceId: string; readonly error: string }>; + readonly invalidTraceIds: ReadonlyArray; +} + +export interface WaterfallPosition { + readonly left: number; + readonly width: number; +} + +const record = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +const text = (value: unknown, fallback = ""): string => + typeof value === "string" ? value : fallback; + +const number = (value: unknown, fallback = 0): number => + typeof value === "number" && Number.isFinite(value) ? value : fallback; + +const boolean = (value: unknown): boolean => value === true; + +const nullableText = (value: unknown): string | null => + value === null ? null : typeof value === "string" ? value : null; + +const textArray = (value: unknown): string[] => + Array.isArray(value) ? value.filter((entry): entry is string => typeof entry === "string") : []; + +const textRecord = (value: unknown): Record => { + if (!record(value)) return {}; + return Object.fromEntries( + Object.entries(value).filter( + (entry): entry is [string, string] => typeof entry[1] === "string", + ), + ); +}; + +const eventFrom = (value: unknown): PortableSpanEvent | undefined => { + if (!record(value) || typeof value.name !== "string") return undefined; + return { + name: value.name, + timestamp: text(value.timestamp), + attributes: textRecord(value.attributes), + }; +}; + +const spanFrom = (value: unknown): PortableTraceSpan | undefined => { + if (!record(value) || typeof value.spanId !== "string") return undefined; + return { + spanId: value.spanId, + parentSpanId: nullableText(value.parentSpanId), + serviceName: text(value.serviceName, "unknown service"), + scopeName: nullableText(value.scopeName), + kind: nullableText(value.kind), + operationName: text(value.operationName, "unnamed span"), + startTime: text(value.startTime), + isRunning: boolean(value.isRunning), + durationMs: Math.max(0, number(value.durationMs)), + status: value.status === "error" ? "error" : "ok", + depth: Math.max(0, Math.floor(number(value.depth))), + tags: textRecord(value.tags), + warnings: textArray(value.warnings), + events: Array.isArray(value.events) + ? value.events.flatMap((entry) => { + const event = eventFrom(entry); + return event ? [event] : []; + }) + : [], + }; +}; + +const traceFrom = (value: unknown): PortableTrace | undefined => { + if (!record(value) || typeof value.traceId !== "string" || !Array.isArray(value.spans)) { + return undefined; + } + const spans = value.spans.flatMap((entry) => { + const span = spanFrom(entry); + return span ? [span] : []; + }); + return { + traceId: value.traceId, + serviceName: text(value.serviceName, "unknown service"), + rootOperationName: text(value.rootOperationName, "unnamed trace"), + startedAt: text(value.startedAt), + isRunning: boolean(value.isRunning), + durationMs: Math.max(0, number(value.durationMs)), + spanCount: Math.max(spans.length, Math.floor(number(value.spanCount, spans.length))), + errorCount: Math.max(0, Math.floor(number(value.errorCount))), + warnings: textArray(value.warnings), + spans, + }; +}; + +export const parsePortableTraceExport = (value: unknown): PortableTraceExport | null => { + if (!record(value) || value.schemaVersion !== 1 || !Array.isArray(value.traces)) return null; + const traces = value.traces.flatMap((entry) => { + if (!record(entry) || typeof entry.traceId !== "string") return []; + const data = traceFrom(entry.data); + return data ? [{ traceId: entry.traceId, data }] : []; + }); + const missing = Array.isArray(value.missing) + ? value.missing.flatMap((entry) => + record(entry) && typeof entry.traceId === "string" + ? [{ traceId: entry.traceId, error: text(entry.error, "trace unavailable") }] + : [], + ) + : []; + return { + schemaVersion: 1, + exportedAt: number(value.exportedAt), + traces, + missing, + invalidTraceIds: textArray(value.invalidTraceIds), + }; +}; + +export const waterfallPosition = ( + trace: PortableTrace, + span: PortableTraceSpan, +): WaterfallPosition => { + const traceStart = Date.parse(trace.startedAt); + const spanStart = Date.parse(span.startTime); + const duration = Math.max(trace.durationMs, 0); + if (!Number.isFinite(traceStart) || !Number.isFinite(spanStart) || duration === 0) { + return { left: 0, width: 100 }; + } + const left = Math.min(99.4, Math.max(0, ((spanStart - traceStart) / duration) * 100)); + const available = Math.max(0, 100 - left); + const width = Math.min(available, Math.max(0.6, (span.durationMs / duration) * 100)); + return { left, width }; +}; + +/** Optional live enhancement. Portable traces remain the primary evidence. */ +export const liveMotelViewerFromSearch = (search: string): string | undefined => { + const candidate = new URLSearchParams(search).get("motel"); + if (!candidate) return undefined; + try { + const url = new URL(candidate); + const loopback = + url.hostname === "localhost" || url.hostname === "127.0.0.1" || url.hostname === "[::1]"; + if (!loopback || (url.protocol !== "http:" && url.protocol !== "https:")) return undefined; + return url.toString().replace(/\/$/, ""); + } catch { + return undefined; + } +}; + +export const formatTraceDuration = (durationMs: number): string => { + if (durationMs >= 1_000) return `${(durationMs / 1_000).toFixed(2)}s`; + if (durationMs >= 1) return `${durationMs.toFixed(durationMs < 10 ? 1 : 0)}ms`; + return `${Math.round(durationMs * 1_000)}µs`; +}; diff --git a/e2e/viewer/src/styles.css b/e2e/viewer/src/styles.css index d340fe361..6c2025262 100644 --- a/e2e/viewer/src/styles.css +++ b/e2e/viewer/src/styles.css @@ -83,6 +83,39 @@ a.watch.no { } /* run page */ +.publication-banner { + display: grid; + gap: 2px; + margin-bottom: 14px; + padding: 9px 11px; + border: 1px solid #2d333b; + border-radius: 8px; + background: #101722; + color: #8b98a9; + font-size: 12px; +} +.publication-banner strong { + color: #d7dce5; + font-size: 12.5px; +} +.publication-banner.passed { + border-color: #2ea04366; + background: #102019; +} +.publication-banner.failed { + border-color: #f8514966; + background: #241111; +} +.publication-banner.failed strong { + color: #ff7b72; +} +.publication-banner.local { + border-color: #d2992266; + background: #211a0d; +} +.publication-banner.local strong { + color: #d29922; +} .topbar { display: flex; justify-content: space-between; @@ -139,6 +172,47 @@ a.watch.no { font-size: 12.5px; margin-top: 4px; } +.artifact-navigation ul { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); + gap: 7px; + margin: 0; + padding: 0; + list-style: none; +} +.artifact-navigation a { + display: grid; + grid-template-columns: minmax(0, 1fr) max-content; + gap: 2px 10px; + padding: 8px 10px; + border: 1px solid #21262d; + border-radius: 7px; + background: #0f1620; +} +.artifact-navigation a:hover { + border-color: #388bfd; +} +.artifact-navigation a > span { + color: #d7dce5; + font-size: 12.5px; + text-transform: capitalize; +} +.artifact-navigation code { + grid-column: 1; + color: #6b7785; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + font-size: 10.5px; +} +.artifact-navigation small { + grid-column: 2; + grid-row: 1 / span 2; + align-self: center; + color: #596779; + font-size: 10px; + text-transform: uppercase; +} /* test source (monaco) */ .section { @@ -154,6 +228,14 @@ a.watch.no { border-radius: 8px; overflow: hidden; } +.source-provenance { + margin: 0 0 6px; + color: #6b7785; + font-size: 11.5px; +} +.source-provenance code { + color: #8b98a9; +} /* run-page tabs (video / source) */ .tabs { @@ -187,7 +269,7 @@ a.watch.no { border: 1px solid #2a2a2a; } -/* session player — synthetic window chrome + synced two-recording stage */ +/* Session player: synthetic window chrome and synced two-recording stage. */ .player { margin-top: 0.8rem; } @@ -262,7 +344,7 @@ a.watch.no { white-space: nowrap; } /* Fixed 16:10 stage (the browser records at 1280x800): the size never - changes when the act cuts between terminal and browser — both + changes when the act cuts between terminal and browser. Both recordings letterbox into the same box, like one screen. */ .stage { border: 1px solid #21262d; @@ -495,10 +577,21 @@ a.watch.no { Menlo, monospace; color: #3d4651; + border: 0; + padding: 0; + background: transparent; + cursor: pointer; } .trace-id:hover { color: #58a6ff; } +.trace-live { + color: #3d4651; + font-size: 11px; +} +.trace-live:hover { + color: #58a6ff; +} /* trace source tag: terminal (MCP/CLI) vs browser */ .trace-src { @@ -516,3 +609,335 @@ a.watch.no { color: #79c0ff; background: #1d3a5f66; } + +/* Portable OTLP trace explorer */ +.portable-traces { + margin-top: 1.4rem; + scroll-margin-top: 18px; +} +.portable-trace-title { + display: flex; + align-items: end; + justify-content: space-between; + gap: 16px; +} +.portable-trace-title .section, +.portable-trace-title .hint { + margin-bottom: 0; +} +.portable-trace-grid { + display: grid; + grid-template-columns: minmax(210px, 280px) minmax(0, 1fr); + gap: 12px; + margin-top: 10px; +} +.portable-trace-list, +.portable-waterfall { + border: 1px solid #21262d; + border-radius: 9px; + background: #0d1219; + overflow: hidden; +} +.portable-trace-list { + align-self: start; +} +.portable-trace-list button { + display: flex; + flex-direction: column; + gap: 2px; + width: 100%; + color: #8b98a9; + background: transparent; + border: 0; + border-bottom: 1px solid #161b22; + padding: 9px 11px; + text-align: left; + cursor: pointer; +} +.portable-trace-list button:hover, +.portable-trace-list button.active { + color: #d7dce5; + background: #14222e; +} +.portable-trace-list button.active { + box-shadow: inset 2px 0 0 #388bfd; +} +.portable-trace-list button > span { + width: 100%; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + font-size: 12.5px; +} +.portable-trace-list small, +.portable-trace-list code { + color: #596779; + font-size: 10.5px; +} +.portable-waterfall > header { + display: flex; + justify-content: space-between; + gap: 16px; + padding: 10px 12px; + border-bottom: 1px solid #21262d; + background: #161b22; +} +.portable-waterfall > header > div:first-child { + display: flex; + min-width: 0; + flex-direction: column; +} +.portable-waterfall > header strong { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.portable-waterfall > header span { + color: #6b7785; + font-size: 11px; +} +.portable-trace-summary { + display: flex; + align-items: center; + gap: 10px; + white-space: nowrap; +} +.portable-span-list > button { + display: grid; + grid-template-columns: minmax(180px, 34%) minmax(120px, 1fr) 70px; + align-items: center; + gap: 10px; + width: 100%; + min-height: 34px; + color: #8b98a9; + background: transparent; + border: 0; + border-bottom: 1px solid #141a22; + padding: 5px 12px; + text-align: left; + cursor: pointer; +} +.portable-span-list > button:hover, +.portable-span-list > button.active { + background: #101722; +} +.portable-span-list > button.active { + box-shadow: inset 2px 0 0 #388bfd; +} +.portable-span-list > button.error { + color: #ff7b72; +} +.portable-span-name { + display: flex; + align-items: center; + gap: 6px; + min-width: 0; +} +.portable-span-name > span { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.portable-span-name > i { + width: 12px; + color: #58a6ff; + font-style: normal; + flex: none; +} +.portable-span-name > small { + color: #3d4651; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.portable-span-track { + position: relative; + display: block; + height: 8px; + border-radius: 4px; + background: #161b22; + overflow: hidden; +} +.portable-span-track > i { + position: absolute; + top: 0; + height: 100%; + min-width: 2px; + border-radius: 3px; + background: #2ea043; +} +.portable-span-track > i.error { + background: #f85149; +} +.portable-span-duration { + color: #6b7785; + font: + 11px ui-monospace, + SFMono-Regular, + Menlo, + monospace; + text-align: right; +} +.portable-span-details { + padding: 12px; + border-top: 1px solid #21262d; + background: #0b0f17; +} +.portable-detail-heading { + display: flex; + justify-content: space-between; + gap: 12px; +} +.portable-detail-heading > span { + color: #3d4651; + font: + 10.5px ui-monospace, + SFMono-Regular, + Menlo, + monospace; +} +.portable-span-details > dl { + display: grid; + grid-template-columns: max-content minmax(0, 1fr); + gap: 3px 12px; + margin: 10px 0 0; + font-size: 12px; +} +.portable-span-details dt, +.portable-tags dt { + color: #6b7785; +} +.portable-span-details dd, +.portable-tags dd { + margin: 0; + overflow-wrap: anywhere; +} +.portable-detail-section { + margin-top: 12px; +} +.portable-detail-section h4 { + margin: 0 0 5px; + color: #6b7785; + font-size: 10.5px; + letter-spacing: 0.04em; + text-transform: uppercase; +} +.portable-tags { + display: grid; + gap: 4px; + margin: 0; + max-height: 230px; + overflow: auto; +} +.portable-tags > div { + display: grid; + grid-template-columns: minmax(110px, 30%) minmax(0, 1fr); + gap: 10px; + padding-bottom: 3px; + border-bottom: 1px solid #141a22; + font: + 11px ui-monospace, + SFMono-Regular, + Menlo, + monospace; +} +.portable-warnings, +.portable-events { + margin: 0; + padding-left: 18px; + color: #ff7b72; + font-size: 11.5px; +} +.portable-events { + color: #8b98a9; +} +.portable-events li { + display: grid; + grid-template-columns: minmax(120px, max-content) minmax(0, 1fr); + gap: 4px 10px; +} +.portable-events code { + grid-column: 1 / -1; + white-space: pre-wrap; + overflow-wrap: anywhere; +} +.trace-warnings { + padding: 8px 28px; + border-bottom: 1px solid #21262d; +} +.portable-export-warning { + color: #d29922; + font-size: 12px; +} + +@media (max-width: 880px) { + .player-split { + flex-direction: column; + } + .trace-rail { + width: auto; + max-height: 360px; + } + .portable-trace-grid { + grid-template-columns: 1fr; + } + .portable-trace-list { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(190px, 1fr)); + } + .portable-span-list > button { + grid-template-columns: minmax(150px, 42%) minmax(80px, 1fr) 62px; + } +} + +@media (max-width: 560px) { + .page { + padding: 14px; + } + .topbar, + .portable-trace-title, + .portable-waterfall > header { + align-items: stretch; + flex-direction: column; + } + .topbar > span { + display: flex; + flex-wrap: wrap; + gap: 6px; + } + .tool-link { + margin-left: 0; + } + .portable-trace-list { + display: block; + } + .portable-trace-summary { + flex-wrap: wrap; + white-space: normal; + } + .portable-span-list > button { + grid-template-areas: + "name duration" + "track track"; + grid-template-columns: minmax(0, 1fr) max-content; + gap: 5px 8px; + padding: 7px 9px; + } + .portable-span-name { + grid-area: name; + } + .portable-span-track { + grid-area: track; + } + .portable-span-duration { + grid-area: duration; + } + .portable-detail-heading { + align-items: flex-start; + flex-direction: column; + } + .portable-tags > div, + .portable-events li { + grid-template-columns: minmax(0, 1fr); + } +} diff --git a/e2e/vitest.config.ts b/e2e/vitest.config.ts index 6f008b459..38d503390 100644 --- a/e2e/vitest.config.ts +++ b/e2e/vitest.config.ts @@ -1,109 +1,29 @@ import { defineConfig } from "vitest/config"; -// One project per target. Same scenario files, different running instance: -// `vitest run --project cloud` / `--project selfhost` (or both, the default). -// Each project's globalsetup boots that app's OWN dev server (or attaches to -// E2E__URL). Scenarios are isolated by fresh identities, not resets. -const project = (name: string, overrides: Record = {}) => ({ - test: { - name, - include: ["scenarios/**/*.test.ts", `${name}/**/*.test.ts`], - env: { E2E_TARGET: name }, - globalSetup: [`./setup/${name}.globalsetup.ts`], - testTimeout: 180_000, - hookTimeout: 120_000, - ...overrides, - }, -}); +import { capabilityRequirementMode, E2E_PROJECTS } from "./src/project-matrix"; -export default defineConfig({ +// Project names select execution policy. E2E_TARGET selects the deployed app. +// Hermetic and live-drift projects can therefore share the exact same target +// factory and global setup without duplicating boot logic. +const projects = E2E_PROJECTS.map((project) => ({ test: { - projects: [ - // PGlite's socket server is effectively single-connection; parallel test - // files (each fanning out per-request postgres sockets) crash it. Run - // files serially — swap PGlite for real Postgres if wall-clock matters. - project("cloud", { fileParallelism: false }), - // selfhost identities are the shared bootstrap admin for now — run files - // serially until per-test invite-signup isolation lands. - project("selfhost", { fileParallelism: false }), - // The same app as the PRODUCTION Docker artifact (the image users - // deploy: production build, bun serve.ts, /data volume) instead of the - // dev server. Runs the cross-target scenarios AND the selfhost/** - // scenarios — it is the same single-tenant app, so they all apply. - // Needs a docker daemon with host-networking support (Engine ≥ 26 on - // Docker Desktop); not part of the default `npm run test` chain — run - // with `npm run test:selfhost-docker` (release gate + CI for the - // publish workflow). - project("selfhost-docker", { - include: ["scenarios/**/*.test.ts", "selfhost/**/*.test.ts"], - fileParallelism: false, - }), - // The Cloudflare self-host worker (workerd via wrangler dev, dev-auth). - // Scoped to the cross-target scenarios wired for this host; the rest of - // scenarios/** is not yet validated against the worker. The full-graph - // scenario is included on purpose: workerd's 128MB isolate is the exact - // limit the streaming compile + content-addressed serve path defends, so - // it is the one host where that regression must be proven. Shares - // self-host's single-admin model. - project("cloudflare", { - include: [ - "scenarios/browser-approval.test.ts", - "scenarios/microsoft-graph-full.test.ts", - "scenarios/toolkits-mcp.test.ts", - "cloudflare/**/*.test.ts", - ], - fileParallelism: false, - }), - // The Electron desktop app. Only desktop/** scenarios — the desktop - // target provides none of the standard surfaces (each scenario - // launches its own app via Playwright's electron driver), so running - // the cross-target suite here would just emit a page of skips. Needs - // a display; not part of the default `npm run test` chain. - project("desktop", { - include: ["desktop/**/*.test.ts"], - fileParallelism: false, - testTimeout: 300_000, - }), - // The PACKAGED desktop app: the real electron-builder bundle, where - // app.isPackaged is true — the ONLY target that exercises the supervised- - // daemon attach path (ensureSupervisedConnection) and the bundled executor. - // Its globalsetup builds the bundle (slow), so it's separate from - // `desktop` to keep the fast dev-electron suite off the package build. - // Needs a display; not part of the default `npm run test` chain — run with - // `vitest run --project desktop-packaged`. - project("desktop-packaged", { - include: ["desktop-packaged/**/*.test.ts"], - fileParallelism: false, - testTimeout: 360_000, - hookTimeout: 600_000, - }), - // The single-user local app. Each scenario launches its OWN `executor - // web` via the CLI on a throwaway data dir + an OS-assigned port, so - // there is no shared instance and scenarios are independent — file - // parallelism is ON. No globalSetup (nothing shared to boot). Only - // local/** scenarios. Not part of the default `npm run test` chain; run - // with `vitest run --project local`. - project("local", { - include: ["local/**/*.test.ts"], - globalSetup: [], - fileParallelism: true, - testTimeout: 180_000, - }), - // The supervised CLI daemon inside a guest VM, one project per OS. The - // globalsetup provisions a VM, `executor service install`s the daemon, and - // tunnels it; restart() reboots the guest for REAL, so restart-persistence - // proves the boot-time auto-start path. Needs tart (macOS/Linux) or an EC2 - // credential (Windows); not part of the default `npm run test` chain — run - // with `vitest run --project cli-macos` (etc.) on the Mini. - ...(["macos", "linux", "windows"] as const).map((os) => - project(`cli-${os}`, { - include: ["scenarios/restart-persistence.test.ts", "cli/**/*.test.ts"], - env: { E2E_TARGET: `cli-${os}`, E2E_VM_OS: os }, - fileParallelism: false, - testTimeout: 300_000, - hookTimeout: 900_000, - }), - ), - ], + name: project.name, + include: [...project.include], + ...("exclude" in project ? { exclude: [...project.exclude] } : {}), + env: { + E2E_TARGET: project.target, + E2E_PROJECT: project.name, + E2E_PROJECT_TIER: project.tier, + E2E_PROJECT_HERMETIC: String(project.hermetic), + E2E_REQUIRED_CAPABILITY_MODE: capabilityRequirementMode(), + E2E_REQUIRED_CAPABILITIES: project.requiredCapabilities.join(","), + ...("env" in project ? project.env : {}), + }, + globalSetup: [...project.globalSetup], + fileParallelism: project.fileParallelism, + testTimeout: project.testTimeout, + hookTimeout: project.hookTimeout, }, -}); +})); + +export default defineConfig({ test: { projects } }); diff --git a/packages/app/src/web/server-connection-menu.tsx b/packages/app/src/web/server-connection-menu.tsx index ed4c46363..ebcce337e 100644 --- a/packages/app/src/web/server-connection-menu.tsx +++ b/packages/app/src/web/server-connection-menu.tsx @@ -11,7 +11,9 @@ import { } from "@executor-js/react/api/server-connection"; import { EXECUTOR_SERVER_PROFILES_STORAGE_KEY, + createExecutorServerProfileKey, getActiveExecutorServerProfile, + mergeExecutorDesktopSidecarProfile, normalizeExecutorServerProfilesSnapshot, parseExecutorServerProfilesSnapshot, readExecutorServerProfiles, @@ -195,7 +197,12 @@ const withoutLoopbackProfiles = ( ): ExecutorServerProfilesSnapshot => normalizeExecutorServerProfilesSnapshot({ activeKey: snapshot.activeKey, - profiles: snapshot.profiles.filter((profile) => !isLoopbackConnection(profile)), + profiles: snapshot.profiles.filter( + (profile) => + profile.kind === "desktop-sidecar" || + profile.key.startsWith("profile:") || + !isLoopbackConnection(profile), + ), }); const draftAuth = (draft: DraftProfile): ExecutorServerAuth | undefined => { @@ -223,6 +230,7 @@ export function ServerConnectionMenu(props: ServerConnectionMenuProps = {}) { const connection = useExecutorServerConnection(); const setServerConnection = useSetExecutorServerConnection(); const hydratedRef = useRef(false); + const snapshotRevisionRef = useRef(0); const [hydrated, setHydrated] = useState(false); const [snapshot, setSnapshot] = useState(() => ({ activeKey: connection.key, @@ -233,6 +241,7 @@ export function ServerConnectionMenu(props: ServerConnectionMenuProps = {}) { const [showCustomServer, setShowCustomServer] = useState(false); const persistSnapshot = useCallback((next: ExecutorServerProfilesSnapshot) => { + snapshotRevisionRef.current += 1; setSnapshot(next); writeStoredProfiles(next); }, []); @@ -240,34 +249,41 @@ export function ServerConnectionMenu(props: ServerConnectionMenuProps = {}) { useEffect(() => { if (hydratedRef.current) return; hydratedRef.current = true; + const hydrationRevision = snapshotRevisionRef.current; let cancelled = false; void readStoredProfiles().then((stored) => { void (async () => { if (cancelled) return; - const desktopBridge = hasDesktopServerConnectionBridge(); const desktopConnection = (await readDesktopServerConnection()?.then( (value) => value, () => null, )) ?? null; - const storedActive = getActiveExecutorServerProfile(stored); + if (cancelled) return; + if (snapshotRevisionRef.current !== hydrationRevision) { + setHydrated(true); + return; + } const current = desktopConnection ? normalizeExecutorServerConnection(desktopConnection) : connection; const baseStored = desktopConnection ? withoutLoopbackProfiles(stored) : stored; - const shouldKeepCurrent = - desktopBridge || - storedActive === null || - (hasBearerAuth(current) && - storedActive !== null && - sameLoopbackServer(current, storedActive)); - const next = snapshotWithCurrent(baseStored, current, shouldKeepCurrent); + const storedActive = getActiveExecutorServerProfile(baseStored); + const next = desktopConnection + ? mergeExecutorDesktopSidecarProfile(baseStored, current) + : snapshotWithCurrent( + baseStored, + current, + storedActive === null || + (hasBearerAuth(current) && + storedActive !== null && + sameLoopbackServer(current, storedActive)), + ); + const active = getActiveExecutorServerProfile(next); persistSnapshot(next); - if (desktopConnection) { - setServerConnection(desktopConnection); - } else if (!shouldKeepCurrent && storedActive && storedActive.key !== connection.key) { - setServerConnection(storedActive); + if (active && active.key !== connection.key) { + setServerConnection(active); } setHydrated(true); })(); @@ -320,6 +336,7 @@ export function ServerConnectionMenu(props: ServerConnectionMenuProps = {}) { const auth = draftAuth(draft); const input: ExecutorServerConnectionInput = { kind: "http", + key: createExecutorServerProfileKey(), origin, ...(draft.displayName.trim() ? { displayName: draft.displayName.trim() } : {}), ...(auth ? { auth } : {}), diff --git a/packages/react/src/api/server-connection.test.ts b/packages/react/src/api/server-connection.test.ts index 2a7062829..d08234454 100644 --- a/packages/react/src/api/server-connection.test.ts +++ b/packages/react/src/api/server-connection.test.ts @@ -7,6 +7,7 @@ import { normalizeExecutorServerOrigin, originFromApiBaseUrl, resolveBrowserExecutorServerConnection, + resolveExecutorServerConnectionBridgeHydration, } from "./server-connection"; describe("Executor server connection", () => { @@ -63,4 +64,27 @@ describe("Executor server connection", () => { expect(connection.origin).toBe("http://localhost:4788"); expect(getExecutorServerAuthorizationHeader(connection)).toBeNull(); }); + + it("does not let a late desktop bridge read replace an explicit profile selection", () => { + const initial = normalizeExecutorServerConnection({ origin: "http://127.0.0.1:4788" }); + const remote = normalizeExecutorServerConnection({ + key: "profile:remote-account", + origin: "https://executor.example", + displayName: "Remote account", + auth: { kind: "bearer", token: "token_remote" }, + }); + const bridge = { + kind: "desktop-sidecar" as const, + key: "desktop-sidecar", + origin: "http://127.0.0.1:4788", + displayName: "Local Executor", + }; + + expect( + resolveExecutorServerConnectionBridgeHydration({ initial, current: initial, bridge }).kind, + ).toBe("desktop-sidecar"); + expect( + resolveExecutorServerConnectionBridgeHydration({ initial, current: remote, bridge }), + ).toBe(remote); + }); }); diff --git a/packages/react/src/api/server-connection.tsx b/packages/react/src/api/server-connection.tsx index 00c303282..820cfbb90 100644 --- a/packages/react/src/api/server-connection.tsx +++ b/packages/react/src/api/server-connection.tsx @@ -58,6 +58,15 @@ export const resolveBrowserExecutorServerConnection = (input: { }); }; +export const resolveExecutorServerConnectionBridgeHydration = (input: { + readonly initial: ExecutorServerConnection; + readonly current: ExecutorServerConnection; + readonly bridge: ExecutorServerConnectionInput; +}): ExecutorServerConnection => + input.current.key === input.initial.key + ? normalizeExecutorServerConnection(input.bridge) + : input.current; + const resolveInitialExecutorServerConnection = (): ExecutorServerConnection => { const browserWindow = globalThis.window; if (!browserWindow) { @@ -134,9 +143,6 @@ interface ExecutorServerConnectionContextValue { const ExecutorServerConnectionContext = React.createContext(null); -const hasDesktopServerConnectionBridge = (): boolean => - typeof globalThis.window?.executor?.getServerConnection === "function"; - export function ExecutorServerConnectionProvider( props: React.PropsWithChildren<{ readonly connection?: ExecutorServerConnectionInput; @@ -152,7 +158,6 @@ export function ExecutorServerConnectionProvider( const [connection, setConnection] = React.useState(initialConnection); const setActiveConnection = React.useCallback((input: ExecutorServerConnectionInput): void => { const next = normalizeExecutorServerConnection(input); - if (hasDesktopServerConnectionBridge() && next.kind !== "desktop-sidecar") return; activeConnection = next; setConnection(next); }, []); @@ -174,10 +179,15 @@ export function ExecutorServerConnectionProvider( void bridge.getServerConnection().then( (input) => { if (cancelled || !input) return; - const next = normalizeExecutorServerConnection(input); - setConnection(() => { + setConnection((current) => { // Electron loads the UI from a local URL before the async bridge - // answers. Once it does, the bridge is the authoritative app server. + // answers. Apply that bootstrap value only while no profile selection + // has replaced the initial connection. + const next = resolveExecutorServerConnectionBridgeHydration({ + initial: initialConnection, + current, + bridge: input, + }); activeConnection = next; return next; }); @@ -188,7 +198,7 @@ export function ExecutorServerConnectionProvider( return () => { cancelled = true; }; - }, [props.connection]); + }, [initialConnection, props.connection]); activeConnection = connection; const value = React.useMemo( diff --git a/packages/react/src/api/server-profiles.test.ts b/packages/react/src/api/server-profiles.test.ts index 415eb3577..693a181d2 100644 --- a/packages/react/src/api/server-profiles.test.ts +++ b/packages/react/src/api/server-profiles.test.ts @@ -1,7 +1,9 @@ import { describe, expect, it } from "@effect/vitest"; import { + createExecutorServerProfileKey, getActiveExecutorServerProfile, + mergeExecutorDesktopSidecarProfile, parseExecutorServerProfilesSnapshot, readExecutorServerProfiles, removeExecutorServerProfile, @@ -90,4 +92,110 @@ describe("Executor server profiles", () => { const removed = removeExecutorServerProfile(roundTripped, "http:http://127.0.0.1:4788"); expect(removed.activeKey).toBe("http:https://executor.example"); }); + + it("keeps same-origin accounts isolated across profile switches", () => { + const firstKey = createExecutorServerProfileKey(); + const secondKey = createExecutorServerProfileKey(); + expect(firstKey).not.toBe(secondKey); + + const local = upsertExecutorServerProfile( + { activeKey: null, profiles: [] }, + { + kind: "desktop-sidecar", + key: "desktop-sidecar", + origin: "http://127.0.0.1:4788", + displayName: "Desktop", + auth: { kind: "bearer", token: "token_desktop" }, + }, + ); + const first = upsertExecutorServerProfile(local!, { + key: firstKey, + origin: "https://executor.example", + displayName: "Account A", + auth: { kind: "bearer", token: "token_account_a" }, + }); + const second = upsertExecutorServerProfile(first!, { + key: secondKey, + origin: "https://executor.example", + displayName: "Account B", + auth: { kind: "bearer", token: "token_account_b" }, + }); + + expect(second?.profiles).toHaveLength(3); + expect(second?.profiles.map((profile) => profile.key)).toEqual([ + "desktop-sidecar", + firstKey, + secondKey, + ]); + + const selectedFirst = selectExecutorServerProfile(second!, firstKey); + expect(getActiveExecutorServerProfile(selectedFirst)?.auth).toEqual({ + kind: "bearer", + token: "token_account_a", + }); + + const selectedSecond = selectExecutorServerProfile(selectedFirst, secondKey); + expect(getActiveExecutorServerProfile(selectedSecond)?.auth).toEqual({ + kind: "bearer", + token: "token_account_b", + }); + + const selectedLocal = selectExecutorServerProfile(selectedSecond, "desktop-sidecar"); + expect(getActiveExecutorServerProfile(selectedLocal)?.kind).toBe("desktop-sidecar"); + + const selectedFirstAgain = selectExecutorServerProfile(selectedLocal, firstKey); + expect(getActiveExecutorServerProfile(selectedFirstAgain)?.auth).toEqual({ + kind: "bearer", + token: "token_account_a", + }); + + const roundTripped = parseExecutorServerProfilesSnapshot( + serializeExecutorServerProfilesSnapshot(selectedFirstAgain), + ); + expect(roundTripped.profiles.map((profile) => profile.key)).toEqual([ + "desktop-sidecar", + firstKey, + secondKey, + ]); + expect(roundTripped.activeKey).toBe(firstKey); + }); + + it("merges a refreshed desktop sidecar without replacing the active remote profile", () => { + const remoteKey = createExecutorServerProfileKey(); + const local = mergeExecutorDesktopSidecarProfile( + { activeKey: null, profiles: [] }, + { + kind: "desktop-sidecar", + key: "desktop-sidecar", + origin: "http://127.0.0.1:4788", + displayName: "Old sidecar", + }, + ); + expect(local.activeKey).toBe("desktop-sidecar"); + const stored = upsertExecutorServerProfile(local, { + key: remoteKey, + origin: "https://executor.example", + displayName: "Persisted remote", + auth: { kind: "bearer", token: "token_remote" }, + })!; + + const merged = mergeExecutorDesktopSidecarProfile(stored, { + kind: "desktop-sidecar", + key: "desktop-sidecar", + origin: "http://127.0.0.1:4799", + displayName: "Current sidecar", + }); + + expect(merged.activeKey).toBe(remoteKey); + expect(getActiveExecutorServerProfile(merged)?.displayName).toBe("Persisted remote"); + expect(merged.profiles.find((profile) => profile.kind === "desktop-sidecar")?.origin).toBe( + "http://127.0.0.1:4799", + ); + + const restored = parseExecutorServerProfilesSnapshot( + serializeExecutorServerProfilesSnapshot(merged), + ); + expect(restored.activeKey).toBe(remoteKey); + expect(getActiveExecutorServerProfile(restored)?.displayName).toBe("Persisted remote"); + }); }); diff --git a/packages/react/src/api/server-profiles.tsx b/packages/react/src/api/server-profiles.tsx index 3e40bdd6f..48239902a 100644 --- a/packages/react/src/api/server-profiles.tsx +++ b/packages/react/src/api/server-profiles.tsx @@ -7,6 +7,8 @@ import { export const EXECUTOR_SERVER_PROFILES_STORAGE_KEY = "executor.serverConnections.v1"; +export const createExecutorServerProfileKey = () => `profile:${globalThis.crypto.randomUUID()}`; + export interface ExecutorServerProfilesSnapshot { readonly activeKey: string | null; readonly profiles: readonly ExecutorServerConnection[]; @@ -169,6 +171,18 @@ export const upsertExecutorServerProfile = ( }); }; +export const mergeExecutorDesktopSidecarProfile = ( + snapshot: ExecutorServerProfilesSnapshot, + sidecar: ExecutorServerConnectionInput, +): ExecutorServerProfilesSnapshot => { + if (sidecar.kind !== "desktop-sidecar") return snapshot; + return ( + upsertExecutorServerProfile(snapshot, sidecar, { + makeActive: getActiveExecutorServerProfile(snapshot) === null, + }) ?? snapshot + ); +}; + export const selectExecutorServerProfile = ( snapshot: ExecutorServerProfilesSnapshot, key: string, diff --git a/packages/react/src/pages/org.test.ts b/packages/react/src/pages/org.test.ts new file mode 100644 index 000000000..fa5be5ca1 --- /dev/null +++ b/packages/react/src/pages/org.test.ts @@ -0,0 +1,85 @@ +import { describe, expect, it } from "@effect/vitest"; +import * as AsyncResult from "effect/unstable/reactivity/AsyncResult"; +import * as Cause from "effect/Cause"; + +import { canManageOrganizationRole, resolveOrgPageAccess, resolveOrgPageAccessResult } from "./org"; + +describe("canManageOrganizationRole", () => { + it("allows the roles accepted by the account providers", () => { + expect(canManageOrganizationRole("admin")).toBe(true); + expect(canManageOrganizationRole("owner")).toBe(true); + }); + + it("hides management controls from non-administrative roles", () => { + expect(canManageOrganizationRole("member")).toBe(false); + expect(canManageOrganizationRole("viewer")).toBe(false); + expect(canManageOrganizationRole(null)).toBe(false); + }); +}); + +describe("resolveOrgPageAccessResult", () => { + it("maps the request lifecycle without treating loading or failure as read-only", () => { + const failure = AsyncResult.failure(Cause.fail("offline")); + + expect(resolveOrgPageAccessResult(AsyncResult.initial())).toEqual({ + status: "loading", + canManageOrganization: false, + }); + expect(resolveOrgPageAccessResult(failure)).toEqual({ + status: "failed", + canManageOrganization: false, + }); + expect(resolveOrgPageAccessResult(AsyncResult.waiting(failure))).toEqual({ + status: "loading", + canManageOrganization: false, + }); + }); + + it("uses the current member role only after the request succeeds", () => { + expect( + resolveOrgPageAccessResult( + AsyncResult.success({ + members: [ + { isCurrentUser: false, role: "admin" }, + { isCurrentUser: true, role: "member" }, + ], + }), + ), + ).toEqual({ status: "denied", canManageOrganization: false }); + }); +}); + +describe("resolveOrgPageAccess", () => { + it("keeps permission loading distinct from a denied role", () => { + expect(resolveOrgPageAccess({ status: "loading" })).toEqual({ + status: "loading", + canManageOrganization: false, + }); + expect(resolveOrgPageAccess({ status: "resolved", role: "member" })).toEqual({ + status: "denied", + canManageOrganization: false, + }); + }); + + it("allows administrators only after their role resolves", () => { + expect(resolveOrgPageAccess({ status: "resolved", role: "admin" })).toEqual({ + status: "allowed", + canManageOrganization: true, + }); + expect(resolveOrgPageAccess({ status: "resolved", role: "owner" })).toEqual({ + status: "allowed", + canManageOrganization: true, + }); + }); + + it("keeps request failure and a missing current membership explicit", () => { + expect(resolveOrgPageAccess({ status: "failed" })).toEqual({ + status: "failed", + canManageOrganization: false, + }); + expect(resolveOrgPageAccess({ status: "resolved", role: null })).toEqual({ + status: "failed", + canManageOrganization: false, + }); + }); +}); diff --git a/packages/react/src/pages/org.tsx b/packages/react/src/pages/org.tsx index 8bf7031b5..a68f789bb 100644 --- a/packages/react/src/pages/org.tsx +++ b/packages/react/src/pages/org.tsx @@ -1,6 +1,6 @@ -import { useReducer, useState } from "react"; +import { useEffect, useReducer, useState } from "react"; import { Exit, Match } from "effect"; -import { useAtomValue, useAtomSet } from "@effect/atom-react"; +import { useAtomRefresh, useAtomSet, useAtomValue } from "@effect/atom-react"; import * as AsyncResult from "effect/unstable/reactivity/AsyncResult"; import { toast } from "sonner"; import { trackEvent } from "../api/analytics"; @@ -16,8 +16,11 @@ import { } from "../components/dialog"; import { Button } from "../components/button"; import { Badge } from "../components/badge"; +import { Alert, AlertDescription, AlertTitle } from "../components/alert"; +import { Info, InfoDescription, InfoTitle } from "../components/info"; import { Input } from "../components/input"; import { Label } from "../components/label"; +import { Skeleton } from "../components/skeleton"; import { Select, SelectContent, @@ -65,6 +68,74 @@ type MemberData = { type RoleData = { slug: string; name: string }; +type OrganizationNameDraft = { + readonly organizationId: string | null; + readonly sourceName: string; + readonly value: string; +}; + +export type OrgPageAccess = + | { readonly status: "loading"; readonly canManageOrganization: false } + | { readonly status: "allowed"; readonly canManageOrganization: true } + | { readonly status: "denied"; readonly canManageOrganization: false } + | { readonly status: "failed"; readonly canManageOrganization: false }; + +type OrgPageAccessSource = + | { readonly status: "loading" } + | { readonly status: "failed" } + | { readonly status: "resolved"; readonly role: string | null | undefined }; + +export const canManageOrganizationRole = (role: string | null | undefined) => + role === "admin" || role === "owner"; + +export const resolveOrgPageAccess = (source: OrgPageAccessSource) => { + if (source.status === "loading") { + return { status: "loading", canManageOrganization: false } satisfies OrgPageAccess; + } + if (source.status === "failed" || source.role == null) { + return { status: "failed", canManageOrganization: false } satisfies OrgPageAccess; + } + if (canManageOrganizationRole(source.role)) { + return { status: "allowed", canManageOrganization: true } satisfies OrgPageAccess; + } + return { status: "denied", canManageOrganization: false } satisfies OrgPageAccess; +}; + +export const resolveOrgPageAccessResult = ( + result: + | AsyncResult.Initial + | AsyncResult.Failure + | AsyncResult.Success< + { + readonly members: ReadonlyArray>; + }, + unknown + >, +) => { + if (AsyncResult.isWaiting(result) && AsyncResult.isFailure(result)) { + return resolveOrgPageAccess({ status: "loading" }); + } + if (AsyncResult.isInitial(result)) { + return resolveOrgPageAccess({ status: "loading" }); + } + if (AsyncResult.isFailure(result)) { + return resolveOrgPageAccess({ status: "failed" }); + } + return resolveOrgPageAccess({ + status: "resolved", + role: result.value.members.find((member) => member.isCurrentUser)?.role, + }); +}; + +const organizationNameDraft = ( + organizationId: string | null, + sourceName: string, +): OrganizationNameDraft => ({ + organizationId, + sourceName, + value: sourceName, +}); + type InviteState = { email: string; roleSlug: string; @@ -120,20 +191,95 @@ function formatLastActive(lastActiveAt: string | null): string { return date.toLocaleDateString(undefined, { month: "short", day: "numeric" }); } -export function OrgPage(props: { domainsSection?: React.ReactNode }) { +function OrganizationPermissionNotice(props: { access: OrgPageAccess; onRetry: () => void }) { + if (props.access.status === "denied") { + return ( + + Read-only organization access + + An organization administrator manages names, domains, invitations, and member roles. + + + ); + } + + if (props.access.status === "failed") { + return ( + + Organization permissions unavailable + +

+ Management controls are unavailable because your organization permissions could not be + determined. +

+ +
+
+ ); + } + + return null; +} + +function OrganizationMembersSkeleton() { + return ( +
+ {[1, 2, 3].map((index) => ( + + ))} +
+ ); +} + +export function OrgPage(props: { + domainsSection?: React.ReactNode | ((access: OrgPageAccess) => React.ReactNode); +}) { const auth = useAuth(); + const organizationId = auth.status === "authenticated" ? (auth.organization?.id ?? null) : null; const organizationName = auth.status === "authenticated" ? (auth.organization?.name ?? "Organization") : "Organization"; const membersResult = useAtomValue(orgMembersAtom); const rolesResult = useAtomValue(orgRolesAtom); + const refreshMembers = useAtomRefresh(orgMembersAtom); const doRemove = useAtomSet(removeMember, { mode: "promiseExit" }); const doUpdateRole = useAtomSet(updateMemberRole, { mode: "promiseExit" }); const doUpdateOrgName = useAtomSet(updateOrgName, { mode: "promiseExit" }); const [inviteOpen, setInviteOpen] = useState(false); - const [editName, setEditName] = useState(organizationName); - const [savingName, setSavingName] = useState(false); + const [nameDraft, setNameDraft] = useState(() => + organizationNameDraft(organizationId, organizationName), + ); + const [savingOrganizationId, setSavingOrganizationId] = useState(null); const [search, setSearch] = useState(""); + // A URL-driven organization switch can replace auth while this page remains + // mounted. Associate the draft with its organization so stale text from the + // previous workspace is never rendered or submitted during that transition. + const activeNameDraft = + nameDraft.organizationId === organizationId + ? nameDraft + : organizationNameDraft(organizationId, organizationName); + const editName = activeNameDraft.value; + const savingName = savingOrganizationId === organizationId; + + useEffect(() => { + setNameDraft((current) => { + if (current.organizationId !== organizationId) { + return organizationNameDraft(organizationId, organizationName); + } + if (current.sourceName === organizationName) return current; + return { + organizationId, + sourceName: organizationName, + value: current.value === current.sourceName ? organizationName : current.value, + }; + }); + }, [organizationId, organizationName]); + + const access = resolveOrgPageAccessResult(membersResult); + const canManageOrganization = access.canManageOrganization; + const roles = AsyncResult.match(rolesResult, { onInitial: () => [] as readonly RoleData[], onFailure: () => [] as readonly RoleData[], @@ -164,12 +310,14 @@ export function OrgPage(props: { domainsSection?: React.ReactNode }) { }; const handleSaveName = async () => { + if (!canManageOrganization || !organizationId) return; const trimmed = editName.trim(); if (!trimmed || trimmed === organizationName) { - setEditName(organizationName); + setNameDraft(organizationNameDraft(organizationId, organizationName)); return; } - setSavingName(true); + const targetOrganizationId = organizationId; + setSavingOrganizationId(targetOrganizationId); const exit = await doUpdateOrgName({ payload: { name: trimmed }, reactivityKeys: orgInfoWriteKeys, @@ -179,11 +327,20 @@ export function OrgPage(props: { domainsSection?: React.ReactNode }) { toast.success("Organization name updated"); } else { toast.error("Failed to update organization name"); - setEditName(organizationName); + setNameDraft((current) => + current.organizationId === targetOrganizationId + ? organizationNameDraft(targetOrganizationId, organizationName) + : current, + ); } - setSavingName(false); + setSavingOrganizationId((current) => (current === targetOrganizationId ? null : current)); }; + const domainsSection = + typeof props.domainsSection === "function" + ? props.domainsSection(access) + : props.domainsSection; + return (
@@ -191,30 +348,54 @@ export function OrgPage(props: { domainsSection?: React.ReactNode }) {

Organization

+ +
-
-
- - setEditName((e.target as HTMLInputElement).value)} - onKeyDown={(e) => { - if (e.key === "Enter") handleSaveName(); - }} - className="mt-1.5 h-9 text-sm" - /> + + {access.status === "loading" ? ( +
+ Checking organization permissions +
- {editName.trim() !== organizationName && editName.trim() !== "" && ( - - )} -
+ ) : canManageOrganization ? ( +
+
+ + setNameDraft({ + organizationId, + sourceName: organizationName, + value: (e.target as HTMLInputElement).value, + }) + } + onKeyDown={(e) => { + if (e.key === "Enter") void handleSaveName(); + }} + className="h-9 text-sm" + /> +
+ {editName.trim() !== organizationName && editName.trim() !== "" && ( + + )} +
+ ) : ( +

{organizationName}

+ )}
- {props.domainsSection && props.domainsSection} + {domainsSection}
@@ -224,9 +405,13 @@ export function OrgPage(props: { domainsSection?: React.ReactNode }) { People with access to this Executor instance.

- + {access.status === "loading" ? ( + + ) : canManageOrganization ? ( + + ) : null}
- {AsyncResult.match(membersResult, { - onInitial: () => ( -
- {[1, 2, 3].map((i) => ( -
- ))} -
- ), - onFailure: () => ( -
-

Failed to load members

-
- ), - onSuccess: ({ value }) => { - const members = value.members; - const filtered = search - ? members.filter( - (m: MemberData) => - m.email.toLowerCase().includes(search.toLowerCase()) || - (m.name?.toLowerCase().includes(search.toLowerCase()) ?? false), - ) - : members; - - if (filtered.length === 0) { + {access.status === "loading" ? ( + + ) : ( + AsyncResult.match(membersResult, { + onInitial: () => , + onFailure: () => ( +
+

Failed to load members

+
+ ), + onSuccess: ({ value }) => { + const members = value.members; + const filtered = search + ? members.filter( + (m: MemberData) => + m.email.toLowerCase().includes(search.toLowerCase()) || + (m.name?.toLowerCase().includes(search.toLowerCase()) ?? false), + ) + : members; + + if (filtered.length === 0) { + return ( +

+ {search ? "No matching members" : "No members yet"} +

+ ); + } + return ( -

- {search ? "No matching members" : "No members yet"} -

- ); - } +
+ {filtered.map((member: MemberData) => ( +
+ {member.avatarUrl ? ( + + ) : ( +
+ {member.name + ? member.name + .split(" ") + .map((n: string) => n[0]) + .join("") + .slice(0, 2) + .toUpperCase() + : member.email[0]!.toUpperCase()} +
+ )} - return ( -
- {filtered.map((member: MemberData) => ( -
- {member.avatarUrl ? ( - - ) : ( -
- {member.name - ? member.name - .split(" ") - .map((n: string) => n[0]) - .join("") - .slice(0, 2) - .toUpperCase() - : member.email[0]!.toUpperCase()} -
- )} - -
-
-

- {member.name ?? member.email} -

- {member.isCurrentUser && ( - You - )} - {member.status === "pending" && ( - - Invited - +
+
+

+ {member.name ?? member.email} +

+ {member.isCurrentUser && ( + You + )} + {member.status === "pending" && ( + + Invited + + )} +
+ {member.name && ( +

+ {member.email} +

)}
- {member.name && ( -

- {member.email} -

+ +

+ {member.role} +

+ +

+ {formatLastActive(member.lastActiveAt)} +

+ + {canManageOrganization && !member.isCurrentUser ? ( + + + + + + {roles.length > 0 && ( + <> + + + Change role + + + {roles.map((role: RoleData) => ( + + handleChangeRole(member.id, role.slug, role.name) + } + > + {role.name} + + ))} + + + + + )} + handleRemove(member.id, member.name ?? member.email)} + > + Remove member + + + + ) : ( +
)}
- -

- {member.role} -

- -

- {formatLastActive(member.lastActiveAt)} -

- - {!member.isCurrentUser ? ( - - - - - - {roles.length > 0 && ( - <> - - - Change role - - - {roles.map((role: RoleData) => ( - - handleChangeRole(member.id, role.slug, role.name) - } - > - {role.name} - - ))} - - - - - )} - handleRemove(member.id, member.name ?? member.email)} - > - Remove member - - - - ) : ( -
- )} -
- ))} -
- ); - }, - })} + ))} +
+ ); + }, + }) + )} - + {canManageOrganization && ( + + )}
); diff --git a/tests/ec2-vm.test.ts b/tests/ec2-vm.test.ts new file mode 100644 index 000000000..8872a4e26 --- /dev/null +++ b/tests/ec2-vm.test.ts @@ -0,0 +1,248 @@ +import { describe, expect, it } from "@effect/vitest"; +import { Data, Effect } from "effect"; + +import { createEc2FinalizerStack, ec2ResourceNames, ec2RunInstancesArgs } from "../e2e/src/vm/ec2"; +import { + EC2_CREATED_AT_TAG, + EC2_EXPIRES_AT_TAG, + EC2_MANAGED_TAG, + EC2_REPOSITORY_TAG, + EC2_RUN_ATTEMPT_TAG, + EC2_RUN_ID_TAG, + EC2_RUN_SCOPE_TAG, + cleanupCurrentEc2Resources, + ec2ResourceTags, + selectCurrentEc2Resources, + selectExpiredEc2Resources, + type TaggedEc2Resource, +} from "../e2e/src/vm/ec2-lifecycle"; +import { requireEc2CleanupOwner, resolveVmRunMetadata } from "../e2e/src/vm/run-scope"; + +class SimulatedCleanupFailure extends Data.TaggedError("SimulatedCleanupFailure")<{ + readonly resource: string; +}> {} + +describe("EC2 VM resources", () => { + it("runs cleanup in dependency-safe reverse order exactly once", async () => { + const order: string[] = []; + const finalizers = createEc2FinalizerStack(); + finalizers.add("local key directory", () => { + order.push("local"); + }); + finalizers.add("EC2 key pair", () => { + order.push("key"); + }); + finalizers.add("EC2 security group", () => { + order.push("security-group"); + }); + finalizers.add("EC2 instance", () => { + order.push("instance"); + }); + + await finalizers.run(); + await finalizers.run(); + + expect(order).toEqual(["instance", "security-group", "key", "local"]); + }); + + it("continues cleanup after an individual finalizer fails", async () => { + const order: string[] = []; + const finalizers = createEc2FinalizerStack(); + finalizers.add("local key directory", () => { + order.push("local"); + }); + finalizers.add("EC2 key pair", () => { + order.push("key"); + return Effect.runPromise( + Effect.fail(new SimulatedCleanupFailure({ resource: "EC2 key pair" })), + ); + }); + finalizers.add("EC2 instance", () => { + order.push("instance"); + }); + + await expect(finalizers.run()).rejects.toThrow("EC2 cleanup was incomplete"); + expect(order).toEqual(["instance", "key", "local"]); + }); + + it("hardens instance metadata and encrypts the disposable root volume", () => { + const metadata = resolveVmRunMetadata( + { + E2E_VM_RUN_SCOPE: "run-123-attempt-2-windows", + GITHUB_REPOSITORY: "example/executor", + GITHUB_RUN_ATTEMPT: "2", + GITHUB_RUN_ID: "123", + }, + Date.parse("2026-06-26T00:00:00.000Z"), + ); + const args = ec2RunInstancesArgs({ + ami: "ami-test", + instanceType: "t3.medium", + keyPairName: "executor-e2e-key-run", + rootDeviceName: "/dev/sda1", + securityGroupId: "sg-test", + subnetId: "subnet-test", + tags: ec2ResourceTags(metadata, "executor-e2e-run"), + userDataFile: "/tmp/user-data.txt", + }); + + expect(args).toContain("--key-name"); + expect(args).toContain("executor-e2e-key-run"); + expect(args).toContain( + "HttpTokens=required,HttpEndpoint=enabled,HttpPutResponseHopLimit=1,InstanceMetadataTags=disabled", + ); + expect(args).toContain( + '[{"DeviceName":"/dev/sda1","Ebs":{"DeleteOnTermination":true,"Encrypted":true,"VolumeType":"gp3"}}]', + ); + const tagSpecification = args.at(args.indexOf("--tag-specifications") + 1) ?? ""; + expect(tagSpecification).toContain('"Key":"executor-e2e:run-scope"'); + expect(tagSpecification).toContain('"Value":"run-123-attempt-2-windows"'); + expect(tagSpecification).toContain('"Key":"executor-e2e:created-at"'); + expect(tagSpecification).toContain('"Key":"executor-e2e:expires-at"'); + expect(metadata.expiresAt).toBe("2026-06-26T06:00:00.000Z"); + }); + + it("derives distinct per-run key and security-group names", () => { + expect(ec2ResourceNames("run-one")).toEqual({ + instance: "executor-e2e-run-one", + keyPair: "executor-e2e-key-run-one", + securityGroup: "executor-e2e-sg-run-one", + }); + expect(ec2ResourceNames("run-two")).toEqual({ + instance: "executor-e2e-run-two", + keyPair: "executor-e2e-key-run-two", + securityGroup: "executor-e2e-sg-run-two", + }); + }); + + it("selects only the exact current run and matrix scope", () => { + const environment = { + E2E_VM_RUN_SCOPE: "windows-leg", + GITHUB_REPOSITORY: "example/executor", + GITHUB_RUN_ATTEMPT: "2", + GITHUB_RUN_ID: "123", + }; + const owner = requireEc2CleanupOwner(environment); + const owned = taggedResource("instance", "i-owned", { + [EC2_MANAGED_TAG]: "true", + [EC2_REPOSITORY_TAG]: "example/executor", + [EC2_RUN_ATTEMPT_TAG]: "2", + [EC2_RUN_ID_TAG]: "123", + [EC2_RUN_SCOPE_TAG]: "windows-leg", + }); + const sibling = taggedResource("instance", "i-sibling", { + ...owned.tags, + [EC2_RUN_SCOPE_TAG]: "linux-leg", + }); + const earlierAttempt = taggedResource("security-group", "sg-old", { + ...owned.tags, + [EC2_RUN_ATTEMPT_TAG]: "1", + }); + + expect(selectCurrentEc2Resources([owned, sibling, earlierAttempt], owner)).toEqual([owned]); + }); + + it("sweeps only expired, old-enough resources carrying exact repository ownership", () => { + const now = Date.parse("2026-06-26T12:00:00.000Z"); + const tags = { + [EC2_MANAGED_TAG]: "true", + [EC2_REPOSITORY_TAG]: "example/executor", + [EC2_CREATED_AT_TAG]: "2026-06-26T00:00:00.000Z", + [EC2_EXPIRES_AT_TAG]: "2026-06-26T06:00:00.000Z", + }; + const expired = taggedResource("instance", "i-expired", tags); + const young = taggedResource("instance", "i-young", { + ...tags, + [EC2_CREATED_AT_TAG]: "2026-06-26T11:00:00.000Z", + }); + const unexpired = taggedResource("key-pair", "key-future", { + ...tags, + [EC2_EXPIRES_AT_TAG]: "2026-06-26T18:00:00.000Z", + }); + const anotherRepository = taggedResource("security-group", "sg-other", { + ...tags, + [EC2_REPOSITORY_TAG]: "someone-else/executor", + }); + const unmanaged = taggedResource("instance", "i-unmanaged", { + ...tags, + [EC2_MANAGED_TAG]: "false", + }); + const invalidDeadline = taggedResource("instance", "i-invalid", { + ...tags, + [EC2_EXPIRES_AT_TAG]: "not-a-date", + }); + + expect( + selectExpiredEc2Resources( + [expired, young, unexpired, anotherRepository, unmanaged, invalidDeadline], + "example/executor", + 6, + now, + ), + ).toEqual([expired]); + }); + + it("runs exact-scope EC2 cleanup in dependency order", async () => { + const environment = { + E2E_VM_RUN_SCOPE: "windows-leg", + GITHUB_REPOSITORY: "example/executor", + GITHUB_RUN_ATTEMPT: "2", + GITHUB_RUN_ID: "123", + }; + const tags = [ + { Key: EC2_MANAGED_TAG, Value: "true" }, + { Key: EC2_REPOSITORY_TAG, Value: "example/executor" }, + { Key: EC2_RUN_ATTEMPT_TAG, Value: "2" }, + { Key: EC2_RUN_ID_TAG, Value: "123" }, + { Key: EC2_RUN_SCOPE_TAG, Value: "windows-leg" }, + ]; + const calls: string[][] = []; + const runner = async (args: readonly string[]) => { + calls.push([...args]); + if (args[1] === "describe-instances") { + return JSON.stringify({ + Reservations: [{ Instances: [{ InstanceId: "i-owned", Tags: tags }] }], + }); + } + if (args[1] === "describe-key-pairs") { + return JSON.stringify({ KeyPairs: [{ KeyPairId: "key-owned", Tags: tags }] }); + } + if (args[1] === "describe-security-groups") { + return JSON.stringify({ SecurityGroups: [{ GroupId: "sg-owned", Tags: tags }] }); + } + return "{}"; + }; + + const result = await cleanupCurrentEc2Resources({ environment, runner, wait: async () => {} }); + + expect(result).toEqual({ deleted: 3, scope: "windows-leg" }); + expect(calls.map((args) => args.slice(0, 2))).toEqual([ + ["ec2", "describe-instances"], + ["ec2", "describe-key-pairs"], + ["ec2", "describe-security-groups"], + ["ec2", "terminate-instances"], + ["ec2", "wait"], + ["ec2", "delete-security-group"], + ["ec2", "delete-key-pair"], + ]); + }); + + it("fails closed when EC2 cleanup ownership is incomplete", async () => { + await expect( + cleanupCurrentEc2Resources({ + environment: { E2E_VM_RUN_SCOPE: "windows-leg" }, + runner: async () => "{}", + }), + ).rejects.toThrow("EC2 cleanup requires"); + }); +}); + +const taggedResource = ( + kind: TaggedEc2Resource["kind"], + id: string, + tags: Readonly>, +) => ({ + id, + kind, + tags, +}); diff --git a/tests/linux-kvm-vm.test.ts b/tests/linux-kvm-vm.test.ts new file mode 100644 index 000000000..006f2d284 --- /dev/null +++ b/tests/linux-kvm-vm.test.ts @@ -0,0 +1,859 @@ +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + readdirSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it, vi } from "@effect/vitest"; +import { Data, Effect } from "effect"; + +import { + KVM_ACCOUNT_FIXTURES, + KVM_CLAUDE_EXECUTE_CODE, + createKvmAccountFixture, + createKvmReplayBrain, + isLoopbackHttpUrl, + runKvmGuestClaude, +} from "../e2e/desktop-kvm/guest-runtime"; +import { + LINUX_KVM_DESKTOP_CAPABILITIES, + LinuxKvmUnavailableError, + createLinuxKvmDesktopProvider, + preflightLinuxKvm, + type LinuxKvmPreflightRuntime, +} from "../e2e/src/vm/linux-kvm"; +import { + cleanupLibvirtLinuxKvmFromLedger, + createLinuxKvmFinalizerStack, + libvirtDomainArgs, + linuxKvmCloudInit, + linuxKvmOwnerIdentity, + linuxKvmOwnerIdentityMatches, + linuxKvmOwnerStatus, + linuxKvmRunScope, + sweepStaleLibvirtLinuxKvm, + type LinuxKvmCleanupHostProcess, + type LinuxKvmCleanupRuntime, + type LinuxKvmOwnerIdentity, + type LinuxKvmStaleSweepRuntime, +} from "../e2e/src/vm/linux-kvm-libvirt"; +import { cleanupLinuxKvmLedger, sweepLinuxKvmRepository } from "../e2e/scripts/cleanup-linux-kvm"; +import { projectDefinition } from "../e2e/src/project-matrix"; + +class SimulatedPreflightFailure extends Data.TaggedError("SimulatedPreflightFailure")<{ + readonly dependency: string; +}> {} + +const failProbe = (dependency: string) => + Effect.runPromise(Effect.fail(new SimulatedPreflightFailure({ dependency }))); + +const availableRuntime = (report = vi.fn()): LinuxKvmPreflightRuntime => ({ + access: async () => undefined, + exec: async (command) => ({ stdout: `${command} available`, stderr: "" }), + report, +}); + +const unavailableRuntime = (report = vi.fn()): LinuxKvmPreflightRuntime => ({ + access: () => failProbe("/dev/kvm"), + exec: async (command) => { + if (command === "ffmpeg") return { stdout: "ffmpeg available", stderr: "" }; + return failProbe(command); + }, + report, +}); + +const listenOnLoopback = (server: ReturnType) => + new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", () => { + server.off("error", reject); + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: test fixture adapts node:http listen into a promise + reject(new Error("test fixture did not publish a TCP address")); + return; + } + resolve(address.port); + }); + }); + +const closeServer = (server: ReturnType) => + new Promise((resolve) => server.close(() => resolve())); + +const writeTestCleanupLedger = ( + path: string, + input: { + readonly runScope: string; + readonly domainName: string; + readonly libvirtUri: string; + readonly workRoot: string; + readonly workDir: string; + readonly hostProcesses?: ReadonlyArray; + readonly createdAt?: string; + readonly repositoryScope?: string; + readonly owner?: { + readonly pid: number; + readonly bootId: string; + readonly startTicks: string; + }; + }, +) => + writeFileSync( + path, + `${JSON.stringify({ + version: 2, + createdAt: input.createdAt ?? "2026-06-27T00:00:00.000Z", + repositoryScope: input.repositoryScope ?? input.runScope, + owner: input.owner ?? { pid: 999, bootId: "boot-test", startTicks: "1" }, + hostProcesses: input.hostProcesses ?? [], + ...input, + })}\n`, + ); + +const listJsonLedgers = (directory: string) => + readdirSync(directory, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith(".json")) + .map((entry) => join(directory, entry.name)) + .sort(); + +describe("Linux KVM desktop preflight", () => { + it("reports the complete GUI and recording substrate as available", async () => { + const report = vi.fn(); + const availability = await preflightLinuxKvm({ + baseImagePath: "/images/executor-desktop.qcow2", + runtime: availableRuntime(report), + }); + + expect(availability.status).toBe("available"); + expect(availability.checks).toHaveLength(7); + expect(availability.capabilities).toBe(LINUX_KVM_DESKTOP_CAPABILITIES); + expect(report).not.toHaveBeenCalled(); + }); + + it("returns and reports an unavailable optional local substrate", async () => { + const report = vi.fn(); + const availability = await preflightLinuxKvm({ + requirement: "optional", + runtime: unavailableRuntime(report), + }); + + expect(availability).toMatchObject({ + status: "unavailable", + capabilities: { + workload: "desktop-gui", + display: { interactive: true, protocol: "spice" }, + recording: { container: "mp4", required: true, source: "guest-display" }, + }, + }); + expect(report).toHaveBeenCalledWith(expect.stringContaining("[optional]")); + }); + + it("fails required mode before a VM driver can boot a guest", async () => { + const report = vi.fn(); + const preflight = preflightLinuxKvm({ + requirement: "required", + runtime: unavailableRuntime(report), + }); + + await expect(preflight).rejects.toBeInstanceOf(LinuxKvmUnavailableError); + expect(report).not.toHaveBeenCalled(); + }); + + it("gates desktop provisioning on required preflight", async () => { + const provision = vi.fn(() => failProbe("VM driver must not run")); + const provider = createLinuxKvmDesktopProvider( + { provision }, + { runtime: unavailableRuntime() }, + ); + + await expect(provider.provision()).rejects.toBeInstanceOf(LinuxKvmUnavailableError); + expect(provision).not.toHaveBeenCalled(); + }); +}); + +describe("Linux KVM libvirt driver", () => { + it("registers an opt-in required heavy-VM project", () => { + expect(projectDefinition("desktop-kvm")).toMatchObject({ + target: "desktop-kvm", + include: ["desktop-kvm/**/*.test.ts"], + globalSetup: ["./setup/desktop-kvm.globalsetup.ts"], + requiredCapabilities: ["desktop-gui"], + tier: "heavy-vm", + hermetic: true, + }); + }); + + it("creates a QXL/SPICE domain from disposable overlay and cloud-init disks", () => { + const args = libvirtDomainArgs({ + domainName: "executor-e2e-desktop-test", + libvirtNetwork: "default", + libvirtUri: "qemu:///system", + memoryMiB: 4_096, + osVariant: "generic", + overlayPath: "/tmp/guest.qcow2", + seedPath: "/tmp/seed.iso", + vcpus: 4, + }); + + expect(args).toContain("spice,listen=127.0.0.1"); + expect(args).toContain("qxl"); + expect(args).toContain( + "path=/tmp/guest.qcow2,format=qcow2,bus=virtio,cache=none,discard=unmap", + ); + expect(args).toContain("path=/tmp/seed.iso,device=cdrom,readonly=on"); + }); + + it("seeds an isolated guest user and real Xorg service", () => { + const cloudInit = linuxKvmCloudInit({ + domainName: "executor-e2e-desktop-test", + guestDisplay: ":0", + guestUser: "executor", + publicKey: "ssh-ed25519 AAAATEST executor-e2e", + }); + + expect(cloudInit.userData).toContain("ssh-ed25519 AAAATEST executor-e2e"); + expect(cloudInit.userData).toContain("executor-e2e-gui.service"); + expect(cloudInit.userData).toContain("/usr/bin/Xorg :0 vt7"); + expect(cloudInit.userData).toContain("-noreset -ac"); + expect(cloudInit.userData).toContain("display-manager.service"); + }); + + it("guarantees LIFO discard even when one cleanup action fails", async () => { + const order: string[] = []; + const finalizers = createLinuxKvmFinalizerStack(); + finalizers.add("work directory", () => { + order.push("work-directory"); + }); + finalizers.add("domain", () => { + order.push("domain"); + return failProbe("domain cleanup"); + }); + finalizers.add("recording", () => { + order.push("recording"); + }); + + await expect(finalizers.run()).rejects.toThrow("Linux KVM cleanup was incomplete"); + expect(order).toEqual(["recording", "domain", "work-directory"]); + await expect(finalizers.run()).resolves.toBeUndefined(); + }); + + it("normalizes one explicit CI run scope for both provisioning and cleanup", () => { + expect(linuxKvmRunScope({ E2E_KVM_RUN_SCOPE: "Run 123 / Attempt 2 / KVM" })).toBe( + "run-123-attempt-2-kvm", + ); + expect(() => linuxKvmRunScope({ E2E_KVM_RUN_SCOPE: "///" })).toThrow("has no safe characters"); + }); + + it("cleans only the exact ledger domain and work directory", async () => { + const workRoot = mkdtempSync(join(tmpdir(), "executor-kvm-cleanup-test-")); + const workDir = join(workRoot, "executor-kvm-exact"); + const ledgerPath = join(workRoot, "cleanup.json"); + const domainName = "executor-e2e-desktop-run-123-99-deadbeef"; + const hostProcess: LinuxKvmCleanupHostProcess = { + pid: 43_210, + role: "ffmpeg", + marker: `executor-e2e-kvm:run-123:${domainName}:ffmpeg`, + }; + mkdirSync(workDir); + writeTestCleanupLedger(ledgerPath, { + runScope: "run-123", + domainName, + libvirtUri: "qemu:///system", + workRoot, + workDir, + hostProcesses: [hostProcess], + }); + const domainExists = vi.fn().mockResolvedValueOnce(true).mockResolvedValueOnce(false); + const hostProcessMatches = vi.fn().mockResolvedValueOnce(true).mockResolvedValueOnce(false); + const terminateHostProcess = vi.fn(async () => undefined); + const virsh = vi.fn(async () => undefined); + const removedDirectories: string[] = []; + const removedLedgers: string[] = []; + const runtime = { + domainExists, + hostProcessMatches, + terminateHostProcess, + virsh, + removeDirectory: (path: string) => { + removedDirectories.push(path); + rmSync(path, { force: true, recursive: true }); + }, + removeLedger: (path: string) => { + removedLedgers.push(path); + rmSync(path, { force: true }); + }, + } satisfies LinuxKvmCleanupRuntime; + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + const cleaned = await cleanupLibvirtLinuxKvmFromLedger(ledgerPath, { + expectedRepositoryScope: "run-123", + expectedRunScope: "run-123", + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime, + }); + + expect(cleaned.domainName).toBe(domainName); + expect(hostProcessMatches).toHaveBeenNthCalledWith(1, hostProcess.pid, hostProcess.marker); + expect(hostProcessMatches).toHaveBeenNthCalledWith(2, hostProcess.pid, hostProcess.marker); + expect(terminateHostProcess).toHaveBeenCalledWith(hostProcess.pid); + expect(domainExists).toHaveBeenNthCalledWith(1, "qemu:///system", domainName); + expect(domainExists).toHaveBeenNthCalledWith(2, "qemu:///system", domainName); + expect(virsh).toHaveBeenNthCalledWith(1, "qemu:///system", ["destroy", domainName]); + expect(virsh).toHaveBeenNthCalledWith(2, "qemu:///system", [ + "undefine", + domainName, + "--nvram", + ]); + expect(removedDirectories).toEqual([workDir]); + expect(removedLedgers).toEqual([ledgerPath]); + expect(existsSync(workDir)).toBe(false); + expect(existsSync(ledgerPath)).toBe(false); + } finally { + rmSync(workRoot, { force: true, recursive: true }); + } + }); + + it("refuses a cleanup ledger from any other run scope before touching libvirt", async () => { + const workRoot = mkdtempSync(join(tmpdir(), "executor-kvm-cleanup-scope-test-")); + const workDir = join(workRoot, "executor-kvm-exact"); + const ledgerPath = join(workRoot, "cleanup.json"); + mkdirSync(workDir); + writeTestCleanupLedger(ledgerPath, { + runScope: "run-123", + domainName: "executor-e2e-desktop-run-123-99-deadbeef", + libvirtUri: "qemu:///system", + workRoot, + workDir, + }); + const runtime = { + domainExists: vi.fn(async () => true), + hostProcessMatches: vi.fn(async () => false), + terminateHostProcess: vi.fn(async () => undefined), + virsh: vi.fn(async () => undefined), + removeDirectory: vi.fn(), + removeLedger: vi.fn(), + } satisfies LinuxKvmCleanupRuntime; + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + await expect( + cleanupLibvirtLinuxKvmFromLedger(ledgerPath, { + expectedRepositoryScope: "run-123", + expectedRunScope: "run-124", + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime, + }), + ).rejects.toThrow("expected run-124"); + expect(runtime.domainExists).not.toHaveBeenCalled(); + expect(runtime.hostProcessMatches).not.toHaveBeenCalled(); + expect(runtime.terminateHostProcess).not.toHaveBeenCalled(); + expect(runtime.virsh).not.toHaveBeenCalled(); + expect(runtime.removeDirectory).not.toHaveBeenCalled(); + expect(runtime.removeLedger).not.toHaveBeenCalled(); + expect(existsSync(ledgerPath)).toBe(true); + expect(existsSync(workDir)).toBe(true); + } finally { + rmSync(workRoot, { force: true, recursive: true }); + } + }); + + it("recovers a partial provision where the ledger exists but no domain was created", async () => { + const workRoot = mkdtempSync(join(tmpdir(), "executor-kvm-partial-test-")); + const workDir = join(workRoot, "executor-kvm-partial"); + const ledgerPath = join(workRoot, "cleanup.json"); + const domainName = "executor-e2e-desktop-run-partial-99-deadbeef"; + const exitedHostProcess: LinuxKvmCleanupHostProcess = { + pid: 43_211, + role: "xvfb", + marker: `executor-e2e-kvm:run-partial:${domainName}:xvfb`, + }; + mkdirSync(workDir); + writeTestCleanupLedger(ledgerPath, { + runScope: "run-partial", + domainName, + libvirtUri: "qemu:///system", + workRoot, + workDir, + hostProcesses: [exitedHostProcess], + }); + const runtime = { + domainExists: vi.fn(async () => false), + hostProcessMatches: vi.fn(async () => false), + terminateHostProcess: vi.fn(async () => undefined), + virsh: vi.fn(async () => undefined), + removeDirectory: (path: string) => rmSync(path, { force: true, recursive: true }), + removeLedger: (path: string) => rmSync(path, { force: true }), + } satisfies LinuxKvmCleanupRuntime; + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + await cleanupLibvirtLinuxKvmFromLedger(ledgerPath, { + expectedRepositoryScope: "run-partial", + expectedRunScope: "run-partial", + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime, + }); + expect(runtime.domainExists).toHaveBeenCalledWith("qemu:///system", domainName); + expect(runtime.hostProcessMatches).toHaveBeenCalledWith( + exitedHostProcess.pid, + exitedHostProcess.marker, + ); + expect(runtime.terminateHostProcess).not.toHaveBeenCalled(); + expect(runtime.virsh).not.toHaveBeenCalled(); + expect(existsSync(workDir)).toBe(false); + expect(existsSync(ledgerPath)).toBe(false); + } finally { + rmSync(workRoot, { force: true, recursive: true }); + } + }); + + it("treats a missing cancellation ledger as an explicit clean no-op", async () => { + await expect( + cleanupLinuxKvmLedger({ + ledgerPath: "/tmp/executor-kvm-definitely-missing-ledger.json", + expectedRepositoryScope: "run-missing", + expectedRunScope: "run-missing", + ledgerExists: () => false, + }), + ).resolves.toEqual({ + status: "missing", + ledgerPath: "/tmp/executor-kvm-definitely-missing-ledger.json", + }); + }); +}); + +describe("Linux KVM stale runner recovery", () => { + it("requires an explicit repository scope and positive TTL before scanning", () => { + expect(() => + sweepLinuxKvmRepository({ + ledgerDirectory: "/var/tmp/executor-kvm-ledgers/repo-42", + repositoryScope: undefined, + staleTtlMs: "21600000", + }), + ).toThrow("requires E2E_KVM_REPOSITORY_SCOPE"); + expect(() => + sweepLinuxKvmRepository({ + ledgerDirectory: "/var/tmp/executor-kvm-ledgers/repo-42", + repositoryScope: "repo-42", + staleTtlMs: "0", + }), + ).toThrow("requires a positive E2E_KVM_STALE_TTL_MS"); + }); + + it("cleans only an expired dead owner and preserves fresh, active, current, and other repositories", async () => { + const root = mkdtempSync(join(tmpdir(), "executor-kvm-stale-sweep-test-")); + const repositoryScope = "repo-42"; + const ledgerDirectory = join(root, repositoryScope); + const otherLedgerDirectory = join(root, "repo-99"); + const workRoot = join(root, "work"); + mkdirSync(ledgerDirectory); + mkdirSync(otherLedgerDirectory); + mkdirSync(workRoot); + const makeLedger = (input: { + readonly filename: string; + readonly runScope: string; + readonly createdAt: string; + readonly owner: LinuxKvmOwnerIdentity; + readonly hostProcess?: LinuxKvmCleanupHostProcess; + }) => { + const ledgerPath = join(ledgerDirectory, input.filename); + const workDir = join(workRoot, `executor-kvm-${input.runScope}`); + const domainName = `executor-e2e-desktop-${input.runScope}-99-deadbeef`; + mkdirSync(workDir); + writeTestCleanupLedger(ledgerPath, { + repositoryScope, + runScope: input.runScope, + createdAt: input.createdAt, + owner: input.owner, + domainName, + libvirtUri: "qemu:///system", + workRoot, + workDir, + hostProcesses: input.hostProcess ? [input.hostProcess] : [], + }); + return { ledgerPath, workDir, domainName }; + }; + const staleRunScope = `${repositoryScope}-run-stale`; + const staleDomain = `executor-e2e-desktop-${staleRunScope}-99-deadbeef`; + const staleHostProcess: LinuxKvmCleanupHostProcess = { + pid: 70_001, + role: "ffmpeg", + marker: `executor-e2e-kvm:${staleRunScope}:${staleDomain}:ffmpeg`, + }; + const stale = makeLedger({ + filename: "01-stale.json", + runScope: staleRunScope, + createdAt: "2026-06-27T06:00:00.000Z", + owner: { pid: 101, bootId: "boot-old", startTicks: "10" }, + hostProcess: staleHostProcess, + }); + const fresh = makeLedger({ + filename: "02-fresh.json", + runScope: `${repositoryScope}-run-fresh`, + createdAt: "2026-06-27T10:00:00.000Z", + owner: { pid: 102, bootId: "boot-old", startTicks: "20" }, + }); + const active = makeLedger({ + filename: "03-active.json", + runScope: `${repositoryScope}-run-active`, + createdAt: "2026-06-27T00:00:00.000Z", + owner: { pid: 103, bootId: "boot-current", startTicks: "30" }, + }); + const current = makeLedger({ + filename: "04-current.json", + runScope: `${repositoryScope}-run-current`, + createdAt: "2026-06-27T00:00:00.000Z", + owner: { pid: 104, bootId: "boot-old", startTicks: "40" }, + }); + const otherWorkDir = join(workRoot, "executor-kvm-other-repository"); + const otherLedgerPath = join(otherLedgerDirectory, "01-other.json"); + mkdirSync(otherWorkDir); + writeTestCleanupLedger(otherLedgerPath, { + repositoryScope: "repo-99", + runScope: "repo-99-run-old", + createdAt: "2026-06-27T00:00:00.000Z", + owner: { pid: 105, bootId: "boot-old", startTicks: "50" }, + domainName: "executor-e2e-desktop-repo-99-run-old-99-deadbeef", + libvirtUri: "qemu:///system", + workRoot, + workDir: otherWorkDir, + }); + const ownerStatus = vi.fn((owner: LinuxKvmOwnerIdentity) => + owner.pid === 103 ? ("alive" as const) : ("dead" as const), + ); + const sweepRuntime = { + now: () => Date.parse("2026-06-27T12:00:00.000Z"), + listLedgerPaths: listJsonLedgers, + ownerStatus, + } satisfies LinuxKvmStaleSweepRuntime; + const cleanupRuntime = { + domainExists: vi.fn().mockResolvedValueOnce(true).mockResolvedValueOnce(false), + hostProcessMatches: vi.fn().mockResolvedValueOnce(true).mockResolvedValueOnce(false), + terminateHostProcess: vi.fn(async () => undefined), + virsh: vi.fn(async () => undefined), + removeDirectory: (path: string) => rmSync(path, { force: true, recursive: true }), + removeLedger: (path: string) => rmSync(path, { force: true }), + } satisfies LinuxKvmCleanupRuntime; + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + const result = await sweepStaleLibvirtLinuxKvm({ + ledgerDirectory, + repositoryScope, + ttlMs: 6 * 60 * 60 * 1_000, + currentLedgerPath: current.ledgerPath, + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime: sweepRuntime, + cleanupRuntime, + }); + + expect(result).toEqual({ + scanned: 4, + cleaned: [stale.ledgerPath], + preservedCurrent: [current.ledgerPath], + preservedFresh: [fresh.ledgerPath], + preservedActive: [active.ledgerPath], + }); + expect(cleanupRuntime.hostProcessMatches).toHaveBeenNthCalledWith( + 1, + staleHostProcess.pid, + staleHostProcess.marker, + ); + expect(cleanupRuntime.hostProcessMatches).toHaveBeenNthCalledWith( + 2, + staleHostProcess.pid, + staleHostProcess.marker, + ); + expect(cleanupRuntime.terminateHostProcess).toHaveBeenCalledWith(staleHostProcess.pid); + expect(cleanupRuntime.virsh).toHaveBeenCalledWith("qemu:///system", [ + "destroy", + stale.domainName, + ]); + expect(existsSync(stale.ledgerPath)).toBe(false); + expect(existsSync(stale.workDir)).toBe(false); + expect(existsSync(fresh.ledgerPath)).toBe(true); + expect(existsSync(fresh.workDir)).toBe(true); + expect(existsSync(active.ledgerPath)).toBe(true); + expect(existsSync(active.workDir)).toBe(true); + expect(existsSync(current.ledgerPath)).toBe(true); + expect(existsSync(current.workDir)).toBe(true); + expect(existsSync(otherLedgerPath)).toBe(true); + expect(existsSync(otherWorkDir)).toBe(true); + } finally { + rmSync(root, { force: true, recursive: true }); + } + }); + + it("validates every repository ledger before cleaning any stale resource", async () => { + const root = mkdtempSync(join(tmpdir(), "executor-kvm-stale-malformed-test-")); + const repositoryScope = "repo-42"; + const ledgerDirectory = join(root, repositoryScope); + const workRoot = join(root, "work"); + const workDir = join(workRoot, "executor-kvm-valid"); + const validLedgerPath = join(ledgerDirectory, "01-valid.json"); + mkdirSync(ledgerDirectory); + mkdirSync(workDir, { recursive: true }); + writeTestCleanupLedger(validLedgerPath, { + repositoryScope, + runScope: `${repositoryScope}-run-valid`, + createdAt: "2026-06-27T00:00:00.000Z", + owner: { pid: 201, bootId: "boot-old", startTicks: "10" }, + domainName: `executor-e2e-desktop-${repositoryScope}-run-valid-99-deadbeef`, + libvirtUri: "qemu:///system", + workRoot, + workDir, + }); + const invalidLedgerPath = join(ledgerDirectory, "02-invalid.json"); + writeFileSync(invalidLedgerPath, "{\n"); + const cleanupRuntime = { + domainExists: vi.fn(async () => false), + hostProcessMatches: vi.fn(async () => false), + terminateHostProcess: vi.fn(async () => undefined), + virsh: vi.fn(async () => undefined), + removeDirectory: vi.fn(), + removeLedger: vi.fn(), + } satisfies LinuxKvmCleanupRuntime; + const sweep = () => + sweepStaleLibvirtLinuxKvm({ + ledgerDirectory, + repositoryScope, + ttlMs: 1, + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime: { + now: () => Date.parse("2026-06-27T12:00:00.000Z"), + listLedgerPaths: listJsonLedgers, + ownerStatus: () => "dead", + }, + cleanupRuntime, + }); + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + await expect(sweep()).rejects.toThrow(); + expect(cleanupRuntime.domainExists).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeDirectory).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeLedger).not.toHaveBeenCalled(); + expect(existsSync(validLedgerPath)).toBe(true); + expect(existsSync(workDir)).toBe(true); + + writeFileSync(invalidLedgerPath, '{"version":999}\n'); + await expect(sweep()).rejects.toThrow("invalid Linux KVM cleanup ledger"); + expect(cleanupRuntime.domainExists).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeDirectory).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeLedger).not.toHaveBeenCalled(); + expect(existsSync(validLedgerPath)).toBe(true); + expect(existsSync(workDir)).toBe(true); + + writeFileSync( + invalidLedgerPath, + `${JSON.stringify({ + version: 2, + createdAt: "2026-06-27T00:00:00.000Z", + repositoryScope, + runScope: `${repositoryScope}-run-incomplete`, + domainName: `executor-e2e-desktop-${repositoryScope}-run-incomplete-99-deadbeef`, + libvirtUri: "qemu:///system", + workRoot, + workDir: join(workRoot, "executor-kvm-incomplete"), + owner: { pid: 202, bootId: "boot-old", startTicks: "20" }, + })}\n`, + ); + await expect(sweep()).rejects.toThrow("invalid Linux KVM host process ledger"); + expect(cleanupRuntime.domainExists).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeDirectory).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeLedger).not.toHaveBeenCalled(); + expect(existsSync(validLedgerPath)).toBe(true); + expect(existsSync(workDir)).toBe(true); + } finally { + rmSync(root, { force: true, recursive: true }); + } + }); + + it("fails safe when an expired ledger owner cannot be classified", async () => { + const root = mkdtempSync(join(tmpdir(), "executor-kvm-owner-unknown-test-")); + const repositoryScope = "repo-42"; + const ledgerDirectory = join(root, repositoryScope); + const workRoot = join(root, "work"); + const workDir = join(workRoot, "executor-kvm-owner-unknown"); + const ledgerPath = join(ledgerDirectory, "01-owner-unknown.json"); + mkdirSync(ledgerDirectory); + mkdirSync(workDir, { recursive: true }); + writeTestCleanupLedger(ledgerPath, { + repositoryScope, + runScope: `${repositoryScope}-run-owner-unknown`, + createdAt: "2026-06-27T00:00:00.000Z", + owner: { pid: 301, bootId: "boot-old", startTicks: "10" }, + domainName: `executor-e2e-desktop-${repositoryScope}-run-owner-unknown-99-deadbeef`, + libvirtUri: "qemu:///system", + workRoot, + workDir, + }); + const cleanupRuntime = { + domainExists: vi.fn(async () => false), + hostProcessMatches: vi.fn(async () => false), + terminateHostProcess: vi.fn(async () => undefined), + virsh: vi.fn(async () => undefined), + removeDirectory: vi.fn(), + removeLedger: vi.fn(), + } satisfies LinuxKvmCleanupRuntime; + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: plain test fixture cleanup must run after assertions + try { + await expect( + sweepStaleLibvirtLinuxKvm({ + ledgerDirectory, + repositoryScope, + ttlMs: 1, + expectedWorkRoot: workRoot, + expectedLibvirtUri: "qemu:///system", + runtime: { + now: () => Date.parse("2026-06-27T12:00:00.000Z"), + listLedgerPaths: listJsonLedgers, + ownerStatus: () => "unknown", + }, + cleanupRuntime, + }), + ).rejects.toThrow("owner status is unknown"); + expect(cleanupRuntime.domainExists).not.toHaveBeenCalled(); + expect(cleanupRuntime.removeDirectory).not.toHaveBeenCalled(); + expect(existsSync(ledgerPath)).toBe(true); + expect(existsSync(workDir)).toBe(true); + } finally { + rmSync(root, { force: true, recursive: true }); + } + }); + + it("treats a reused PID as dead unless boot and start identities also match", () => { + const self = linuxKvmOwnerIdentity(); + expect(linuxKvmOwnerStatus(self)).toBe("alive"); + expect(linuxKvmOwnerStatus({ ...self, startTicks: `${self.startTicks}0` })).toBe("dead"); + expect(linuxKvmOwnerStatus({ ...self, bootId: `${self.bootId}-previous` })).toBe("dead"); + const expected = { pid: 401, bootId: "boot-a", startTicks: "10" }; + expect(linuxKvmOwnerIdentityMatches(expected, expected)).toBe(true); + expect(linuxKvmOwnerIdentityMatches(expected, { ...expected, startTicks: "11" })).toBe(false); + expect(linuxKvmOwnerIdentityMatches(expected, { ...expected, bootId: "boot-b" })).toBe(false); + }); +}); + +describe("Linux KVM guest acceptance payload", () => { + it("serves bearer-isolated account catalogs from one origin", async () => { + const stateDir = mkdtempSync(join(tmpdir(), "executor-kvm-account-fixture-test-")); + const ledgerPath = join(stateDir, "account-ledger.json"); + const server = createKvmAccountFixture(ledgerPath); + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: node:http test fixture must close after assertions + try { + const port = await listenOnLoopback(server); + const origin = `http://127.0.0.1:${port}`; + const [accountA, accountB] = KVM_ACCOUNT_FIXTURES; + const request = (token: string) => + fetch(`${origin}/api/integrations`, { + headers: { authorization: `Bearer ${token}` }, + }); + const responseA = await request(accountA.token); + const responseB = await request(accountB.token); + const rejected = await request("not-an-account"); + + expect(responseA.status).toBe(200); + expect(await responseA.json()).toEqual([ + expect.objectContaining({ slug: accountA.slug, name: accountA.marker }), + ]); + expect(responseB.status).toBe(200); + expect(await responseB.json()).toEqual([ + expect.objectContaining({ slug: accountB.slug, name: accountB.marker }), + ]); + expect(rejected.status).toBe(401); + expect(readFileSync(ledgerPath, "utf8")).toContain(`Bearer ${accountA.token}`); + expect(readFileSync(ledgerPath, "utf8")).toContain(`Bearer ${accountB.token}`); + } finally { + await closeServer(server); + rmSync(stateDir, { force: true, recursive: true }); + } + }); + + it("drives execute discovery and returns its tool result through loopback replay", async () => { + const stateDir = mkdtempSync(join(tmpdir(), "executor-kvm-replay-test-")); + const ledgerPath = join(stateDir, "replay-ledger.json"); + const server = createKvmReplayBrain(ledgerPath); + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: node:http test fixture must close after assertions + try { + const port = await listenOnLoopback(server); + const origin = `http://127.0.0.1:${port}`; + const first = await fetch(`${origin}/v1/messages`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "replay-model", + stream: true, + tools: [{ name: "mcp__executor__execute" }], + messages: [{ role: "user", content: "calculate six times seven" }], + }), + }); + const firstTranscript = await first.text(); + expect(firstTranscript).toContain("mcp__executor__execute"); + expect(firstTranscript).toContain(KVM_CLAUDE_EXECUTE_CODE); + + const second = await fetch(`${origin}/v1/messages`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "replay-model", + stream: true, + tools: [{ name: "mcp__executor__execute" }], + messages: [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_kvm_replay_0", + content: "42", + }, + ], + }, + ], + }), + }); + expect(await second.text()).toContain("executor-result:42"); + expect(readFileSync(ledgerPath, "utf8")).toContain("mcp__executor__execute"); + expect(readFileSync(ledgerPath, "utf8")).toContain('"content": "42"'); + } finally { + await closeServer(server); + rmSync(stateDir, { force: true, recursive: true }); + } + }); + + it("refuses any paid-inference or non-local MCP boundary before starting Claude", async () => { + expect(isLoopbackHttpUrl("http://127.0.0.1:4000")).toBe(true); + expect(isLoopbackHttpUrl("http://localhost:4000/mcp")).toBe(true); + expect(isLoopbackHttpUrl("https://api.anthropic.com")).toBe(false); + expect(isLoopbackHttpUrl("http://192.0.2.1:4000/mcp")).toBe(false); + + await expect( + runKvmGuestClaude({ + binaryPath: "/never/invoked/claude", + expectedVersion: "2.1.195", + homeDir: "/never/created/home", + mcpUrl: "http://127.0.0.1:3000/mcp", + authorizationHeader: "Bearer synthetic", + brainBaseUrl: "https://api.anthropic.com", + outputPath: "/never/created/result.json", + }), + ).rejects.toThrow("refusing non-loopback Anthropic replay URL"); + expect(existsSync("/never/created/home")).toBe(false); + }); +}); diff --git a/tests/tart-vm.test.ts b/tests/tart-vm.test.ts new file mode 100644 index 000000000..251c354df --- /dev/null +++ b/tests/tart-vm.test.ts @@ -0,0 +1,385 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { afterEach, describe, expect, it, vi } from "@effect/vitest"; +import { Data, Effect } from "effect"; + +import { createReconnectingProcess, type ReconnectingChild } from "../e2e/src/vm/tart"; +import { + cleanupCurrentTartResources, + deleteTartVmAndVerify, + tartResourceName, + tartScopePrefix, + sweepExpiredTartResources, + terminateTartRunProcess, + type TartRunProcess, +} from "../e2e/src/vm/tart-lifecycle"; +import { + createTartOwnership, + readTartOwnership, + writeTartOwnership, +} from "../e2e/src/vm/tart-ownership"; +import { resolveVmRunMetadata } from "../e2e/src/vm/run-scope"; + +class FakeChild implements ReconnectingChild { + readonly listeners = { + error: new Set<() => void>(), + exit: new Set<() => void>(), + }; + killed = false; + + on(event: "error" | "exit", listener: () => void) { + this.listeners[event].add(listener); + } + + kill() { + this.killed = true; + } + + emit(event: "error" | "exit") { + for (const listener of this.listeners[event]) listener(); + } +} + +class FakeTartRunProcess implements TartRunProcess { + exitCode: number | null = null; + signalCode: NodeJS.Signals | null = null; + readonly signals: NodeJS.Signals[] = []; + + constructor(private readonly exitOn: NodeJS.Signals | null) {} + + kill(signal: NodeJS.Signals = "SIGTERM") { + this.signals.push(signal); + if (signal === this.exitOn) this.signalCode = signal; + return true; + } +} + +class SimulatedTartDeleteFailure extends Data.TaggedError("SimulatedTartDeleteFailure")<{ + readonly name: string; +}> {} + +const flushMicrotasks = async () => { + await Promise.resolve(); + await Promise.resolve(); +}; + +const temporaryRoots: string[] = []; +const temporaryRoot = () => { + const root = mkdtempSync(join(tmpdir(), "executor-tart-ownership-test-")); + temporaryRoots.push(root); + return root; +}; + +const cleanupEnvironment = (scope: string, root: string) => ({ + E2E_TART_STATE_ROOT: root, + E2E_VM_RUN_SCOPE: scope, + GITHUB_REPOSITORY: "example/executor", + GITHUB_RUN_ID: "123", + GITHUB_RUN_ATTEMPT: "2", +}); + +const writeOwnership = ( + environment: ReturnType, + os: "linux" | "macos", + unique: string, + now = Date.parse("2026-06-26T00:00:00.000Z"), +) => { + const metadata = resolveVmRunMetadata(environment, now); + const name = tartResourceName(metadata.scope, os, unique); + return writeTartOwnership(createTartOwnership(metadata, os, name), environment); +}; + +afterEach(() => { + vi.useRealTimers(); + for (const root of temporaryRoots.splice(0)) rmSync(root, { force: true, recursive: true }); +}); + +describe("Tart reconnecting process", () => { + it("clears retry timers while paused and after close", async () => { + vi.useFakeTimers(); + const children: FakeChild[] = []; + const controller = createReconnectingProcess(() => { + const child = new FakeChild(); + children.push(child); + return child; + }); + + controller.resume(); + await flushMicrotasks(); + children[0]?.emit("exit"); + expect(vi.getTimerCount()).toBe(1); + + controller.pause(); + expect(vi.getTimerCount()).toBe(0); + await vi.advanceTimersByTimeAsync(2_000); + expect(children).toHaveLength(1); + + controller.resume(); + await flushMicrotasks(); + expect(children).toHaveLength(2); + children[1]?.emit("error"); + expect(vi.getTimerCount()).toBe(1); + + controller.close(); + expect(vi.getTimerCount()).toBe(0); + await vi.advanceTimersByTimeAsync(2_000); + expect(children).toHaveLength(2); + }); + + it("kills an async child that resolves after the controller is paused", async () => { + let resolveSpawn: ((child: FakeChild) => void) | undefined; + const pendingChild = new Promise((resolve) => { + resolveSpawn = resolve; + }); + const controller = createReconnectingProcess(() => pendingChild); + const child = new FakeChild(); + + controller.resume(); + controller.pause(); + resolveSpawn?.(child); + await flushMicrotasks(); + + expect(child.killed).toBe(true); + }); +}); + +describe("Tart VM cleanup", () => { + it("derives collision-resistant names from the exact matrix scope", () => { + const windowsSibling = tartResourceName("run-123-windows", "linux", "1"); + const linuxSibling = tartResourceName("run-123-linux", "linux", "1"); + + expect(windowsSibling).toMatch(new RegExp(`^${tartScopePrefix("run-123-windows")}`)); + expect(linuxSibling).toMatch(new RegExp(`^${tartScopePrefix("run-123-linux")}`)); + expect(windowsSibling).not.toBe(linuxSibling); + }); + + it("waits for graceful process exit and escalates to SIGKILL when necessary", async () => { + const graceful = new FakeTartRunProcess("SIGINT"); + const forced = new FakeTartRunProcess("SIGKILL"); + + await terminateTartRunProcess(graceful, { pollAttempts: 1, wait: async () => {} }); + await terminateTartRunProcess(forced, { pollAttempts: 1, wait: async () => {} }); + + expect(graceful.signals).toEqual(["SIGINT"]); + expect(forced.signals).toEqual(["SIGINT", "SIGKILL"]); + }); + + it("reports a tart run process that remains alive after SIGKILL", async () => { + const stubborn = new FakeTartRunProcess(null); + + await expect( + terminateTartRunProcess(stubborn, { pollAttempts: 1, wait: async () => {} }), + ).rejects.toThrow("did not exit after SIGKILL"); + expect(stubborn.signals).toEqual(["SIGINT", "SIGKILL"]); + }); + + it("cleans only VMs carrying exact managed ownership for the current scope", async () => { + const scope = "run-123-attempt-2-linux"; + const root = temporaryRoot(); + const environment = cleanupEnvironment(scope, root); + const ownedLedger = writeOwnership(environment, "linux", "owned"); + const siblingScope = "run-123-attempt-2-macos"; + const siblingEnvironment = { ...environment, E2E_VM_RUN_SCOPE: siblingScope }; + const siblingLedger = writeOwnership(siblingEnvironment, "macos", "sibling"); + const unmanaged = tartResourceName(scope, "linux", "unmanaged"); + let entries = [ + { Name: ownedLedger.record.vmName, Running: true, State: "running" }, + { Name: siblingLedger.record.vmName, Running: true, State: "running" }, + { Name: unmanaged, Running: false, State: "stopped" }, + ]; + const calls: string[][] = []; + const runner = async (args: readonly string[]) => { + calls.push([...args]); + if (args[0] === "list") return JSON.stringify(entries); + if (args[0] === "stop" && args.at(-1) === ownedLedger.record.vmName) { + entries = entries.map((entry) => + entry.Name === ownedLedger.record.vmName + ? { ...entry, Running: false, State: "stopped" } + : entry, + ); + } + if (args[0] === "delete" && args[1] === ownedLedger.record.vmName) { + entries = entries.filter((entry) => entry.Name !== ownedLedger.record.vmName); + } + return ""; + }; + + const result = await cleanupCurrentTartResources({ + environment, + runner, + }); + + expect(result).toEqual({ deleted: 1, ledgersRemoved: 1, scope }); + expect(entries).toEqual([ + { Name: siblingLedger.record.vmName, Running: true, State: "running" }, + { Name: unmanaged, Running: false, State: "stopped" }, + ]); + expect(readTartOwnership(environment).map(({ record }) => record.vmName)).toEqual([ + siblingLedger.record.vmName, + ]); + expect(calls.filter((args) => args[0] === "delete")).toEqual([ + ["delete", ownedLedger.record.vmName], + ]); + }); + + it("surfaces deletion failures and leftover scoped VMs", async () => { + const scope = "run-456-macos"; + const root = temporaryRoot(); + const environment = cleanupEnvironment(scope, root); + const owned = writeOwnership(environment, "macos", "owned"); + const runner = async (args: readonly string[]) => { + if (args[0] === "list") { + return JSON.stringify([{ Name: owned.record.vmName, Running: false, State: "stopped" }]); + } + if (args[0] === "delete") { + return Effect.runPromise( + Effect.fail(new SimulatedTartDeleteFailure({ name: owned.record.vmName })), + ); + } + return ""; + }; + + await expect( + cleanupCurrentTartResources({ + environment, + runner, + }), + ).rejects.toThrow("tart cleanup was incomplete"); + expect(readTartOwnership(environment).map(({ record }) => record.vmName)).toEqual([ + owned.record.vmName, + ]); + }); + + it("sweeps only expired managed VMs from this repository and preserves the current run", async () => { + const root = temporaryRoot(); + const environment = cleanupEnvironment("current-linux", root); + const staleEnvironment = { + ...environment, + E2E_VM_RUN_SCOPE: "stale-linux", + GITHUB_RUN_ID: "122", + GITHUB_RUN_ATTEMPT: "1", + }; + const youngEnvironment = { + ...environment, + E2E_VM_RUN_SCOPE: "young-linux", + GITHUB_RUN_ID: "121", + GITHUB_RUN_ATTEMPT: "1", + }; + const otherRepositoryEnvironment = { + ...staleEnvironment, + E2E_VM_RUN_SCOPE: "other-repository-linux", + GITHUB_REPOSITORY: "someone-else/executor", + }; + const stale = writeOwnership(staleEnvironment, "linux", "stale"); + const current = writeOwnership(environment, "linux", "current"); + const young = writeOwnership( + youngEnvironment, + "linux", + "young", + Date.parse("2026-06-26T10:00:00.000Z"), + ); + const otherRepository = writeOwnership(otherRepositoryEnvironment, "linux", "other"); + const unmanaged = tartResourceName("unmanaged-old-scope", "linux", "unmanaged"); + let entries = [stale, current, young, otherRepository].map(({ record }) => ({ + Name: record.vmName, + Running: false, + State: "stopped", + })); + entries.push({ Name: unmanaged, Running: false, State: "stopped" }); + const deleted: string[] = []; + const runner = async (args: readonly string[]) => { + if (args[0] === "list") return JSON.stringify(entries); + if (args[0] === "delete" && args[1]) { + deleted.push(args[1]); + entries = entries.filter((entry) => entry.Name !== args[1]); + } + return ""; + }; + + const result = await sweepExpiredTartResources({ + environment, + minimumAgeHours: 6, + now: Date.parse("2026-06-26T12:00:00.000Z"), + runner, + }); + + expect(result).toEqual({ + deleted: 1, + ledgersRemoved: 1, + repository: "example/executor", + }); + expect(deleted).toEqual([stale.record.vmName]); + expect(entries.map(({ Name }) => Name).sort()).toEqual( + [current.record.vmName, young.record.vmName, otherRepository.record.vmName, unmanaged].sort(), + ); + expect( + readTartOwnership(environment) + .map(({ record }) => record.vmName) + .sort(), + ).toEqual([current.record.vmName, young.record.vmName, otherRepository.record.vmName].sort()); + }); + + it("fails before listing VMs when a managed ownership ledger is malformed", async () => { + const root = temporaryRoot(); + const environment = cleanupEnvironment("current-linux", root); + mkdirSync(root, { recursive: true }); + writeFileSync(join(root, `${"0".repeat(64)}.json`), "not-json", "utf8"); + const calls: string[][] = []; + + await expect( + sweepExpiredTartResources({ + environment, + minimumAgeHours: 6, + runner: async (args) => { + calls.push([...args]); + return "[]"; + }, + }), + ).rejects.toThrow(); + expect(calls).toEqual([]); + }); + + it("fails before deletion when tart reports an unknown managed VM state", async () => { + const root = temporaryRoot(); + const environment = cleanupEnvironment("current-linux", root); + const staleEnvironment = { + ...environment, + E2E_VM_RUN_SCOPE: "stale-linux", + GITHUB_RUN_ID: "122", + GITHUB_RUN_ATTEMPT: "1", + }; + const stale = writeOwnership(staleEnvironment, "linux", "stale"); + const calls: string[][] = []; + const runner = async (args: readonly string[]) => { + calls.push([...args]); + return args[0] === "list" + ? JSON.stringify([{ Name: stale.record.vmName, Running: false, State: "migrating" }]) + : ""; + }; + + await expect( + sweepExpiredTartResources({ + environment, + minimumAgeHours: 6, + now: Date.parse("2026-06-26T12:00:00.000Z"), + runner, + }), + ).rejects.toThrow("unknown or inconsistent state"); + expect(calls).toEqual([["list", "--source", "local", "--format", "json"]]); + expect(readTartOwnership(environment).map(({ record }) => record.vmName)).toEqual([ + stale.record.vmName, + ]); + }); + + it("rejects a successful delete command when readback still finds the VM", async () => { + const name = tartResourceName("run-789-linux", "linux", "owned"); + const runner = async (args: readonly string[]) => + args[0] === "list" ? JSON.stringify([{ Name: name, Running: false, State: "stopped" }]) : ""; + + await expect(deleteTartVmAndVerify(name, runner)).rejects.toThrow( + "tart VM still exists after deletion", + ); + }); +});
{trace.url.replace(/^https?:\/\/[^/]+/, "")} - - {trace.id.slice(0, 8)}… - + {liveMotelViewer ? ( + + {trace.id.slice(0, 8)}… + + ) : ( + {trace.id.slice(0, 8)}… + )}