From 60ee246a7a22e0e92000c1240798f6a202690db2 Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Thu, 25 Jun 2026 10:35:51 +0000 Subject: [PATCH 1/4] docs: document self-improving automation loops --- CONTRIBUTING.md | 9 + README.md | 16 + docs/DASHBOARD-API-AUDIT.md | 2 +- docs/KIRO-INTEGRATION.md | 8 +- docs/SELF-IMPROVING-LOOPS-CONTRACTS.md | 67 +++ docs/dashboard-port-handoff.md | 29 +- docs/dashboard.md | 29 +- ...4-lsp-code-diagnostics-dashboard-design.md | 553 ++++++++++++++++++ 8 files changed, 688 insertions(+), 25 deletions(-) create mode 100644 docs/SELF-IMPROVING-LOOPS-CONTRACTS.md create mode 100644 docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e07639b8..509ab12d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -121,12 +121,21 @@ refactor: simplify reference resolver lookup Keep the first line under 72 characters. Add a body explaining *why* if the change isn't obvious. +Install the local `commit-msg` hook once per checkout: + +```bash +scripts/install-git-hooks.sh +``` + CI validates commit subjects with: ```bash scripts/check-conventional-commits.sh origin/master..HEAD ``` +Run the same command locally before pushing to lint every non-merge commit in a +branch range. Merge commits are skipped to match CI behavior. + ## Pull Requests - Target `master` for bug fixes and stable features. diff --git a/README.md b/README.md index 64254c8e..da319a1c 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,22 @@ Each agent gets the integration that matches its host: All changes are idempotent -- safe to run again after upgrading. After agent setup, you'll be offered a global git post-commit hook. +Managed skills generated by the self-improvement loop are exported through the +host surface that each provider can load safely: + +| Host | Managed-skill surface | +|---|---| +| Cursor | Native skill overlay under the Cursor plugin's managed skill directory. | +| Codex | Native skill overlay inside the Codex plugin bundle/cache; Codex sessions load approved skills after the plugin is installed or refreshed. | +| Claude Code, Kimi, OpenCode, Copilot/Vibe, generic Agents targets | Prompt-index block that points the host at `tracedecay_skill_list` and `tracedecay_skill_view` instead of copying full skill bodies into global prompts. | +| Kiro | Dedicated steering index at `~/.kiro/steering/tracedecay-managed-skills.md`, referenced by the tracedecay-managed Kiro agent. | +| Hermes | Read-only bridge only. Hermes owns its native skill store; TraceDecay does not export or mutate Hermes skills. | + +The curation backend is provider-agnostic. Hermes remains a compatibility bridge +and reference source, while TraceDecay's automation config can delegate +intelligence to hosts such as the Codex app server without requiring Hermes to +run. + For project-scoped setup, run from the repository root: ```bash diff --git a/docs/DASHBOARD-API-AUDIT.md b/docs/DASHBOARD-API-AUDIT.md index dd81cee7..163ba6f1 100644 --- a/docs/DASHBOARD-API-AUDIT.md +++ b/docs/DASHBOARD-API-AUDIT.md @@ -114,7 +114,7 @@ in several handlers and omitted from the table for brevity. | 3 | GET | `/api/plugins/holographic/projection` | `projection` | `q`, `limit`(25/`PROJECTION_POINT_CAP`=2000) | `{exists, dim, limit, method, points, error}` | `vector_facts` decodes HRR blobs → PCA on `spawn_blocking`. **Cached** by `(query,limit,VectorStateFingerprint)`. | | 4 | GET | `/api/plugins/holographic/similarity` | `similarity` | `min_similarity`, `limit`(25/`SIMILARITY_PAIR_CAP`) | `{exists, dim, count, limit, threshold, min_similarity, total_pairs, score_distribution, pairs, error}` | O(n²·d) pairwise phase-cosine on `spawn_blocking`. **Cached** by fingerprint. Emits `threshold` AND `min_similarity` for shape compat. | | 5 | GET | `/api/plugins/holographic/curation/status` | `curation_status` | — | curator status stub | Reads `curate_preview`. Mostly static (`paused:false`, `mode:"similarity_dedup"`). | -| 6 | GET | `/api/plugins/holographic/curation/activity` | `curation_activity` | `limit` | `{events:[], count:0, limit, error}` | **Stub** — no live event stream. | +| 6 | GET | `/api/plugins/holographic/curation/activity` | `curation_activity` | `limit` | `{events, count, limit, error}` | In-memory deterministic curation activity stream capped to the newest events. Preview/apply, agent-plan, and queued automation paths emit phases such as `queued`, `evidence`, `backend`, `validation`, `apply`, `report`, `finish`, `failure`, and `rejection`. | | 7 | GET | `/api/plugins/holographic/curation/preview` | `curation_preview` | — | `{report, saved_at, stale, stale_reason, error}` | Reads saved dry-run preview; recomputes `memory_facts` fingerprint to flag staleness. | | 8 | POST | `/api/plugins/holographic/curate` | `curate` | body `{dry_run:bool}` (default true) | `{ran, dry_run, actions, hygiene_candidates, counts, applied_counts, llm_calls, coverage, provider, mode}` | `build_delete_plan` (similarity dedup). dry_run saves preview to state + disk; apply **hard-deletes** losers via `MemoryStore::remove_fact`, records oplog summary. | | 9 | POST | `/api/plugins/holographic/curate/apply` | `curate_apply` | body `{ops:[{op:"delete"\|"merge", ...}]}` | `{results, counts:{deleted, merged, errors}}` | Generic ops endpoint. `delete`→`MemoryStore::remove_fact`; `merge`→`MemoryStore::merge_facts` (optional content rewrite + hard-delete losers). Per-op failures reported inline (HTTP 200). | diff --git a/docs/KIRO-INTEGRATION.md b/docs/KIRO-INTEGRATION.md index 9103bc70..6e8ce163 100644 --- a/docs/KIRO-INTEGRATION.md +++ b/docs/KIRO-INTEGRATION.md @@ -16,6 +16,7 @@ so does not overwrite a user's existing custom default-agent choice. |---|---| | `~/.kiro/settings/mcp.json` | Registers the global `tracedecay` MCP server with `command`, `args: ["serve"]`, and `disabled: false`. Approval policy is left to the managed Kiro agent. | | `~/.kiro/steering/tracedecay.md` | Adds global Kiro steering that tells normal Kiro sessions to prefer tracedecay MCP tools for codebase research. | +| `~/.kiro/steering/tracedecay-managed-skills.md` | Adds a tracedecay-managed skill index for approved managed skills. The index points Kiro at `tracedecay_skill_list` and `tracedecay_skill_view`; full skill bodies remain in TraceDecay's managed skill store. | | `~/.kiro/agents/tracedecay.json` | Adds the tracedecay-managed Kiro agent with `tools: ["*"]`, `allowedTools: ["@builtin", "@tracedecay"]`, hooks for delegation guardrails, post-write sync, and an absolute `resources` entry for `~/.kiro/steering/tracedecay.md`. The agent leaves `prompt` unset so Kiro's default prompt is used. | | `~/.kiro/settings/cli.json` | Sets `chat.defaultAgent` to `tracedecay` when the setting is absent or still points at Kiro's built-in default. | @@ -25,9 +26,10 @@ tracedecay also does not point `chat.defaultAgent` at that user-managed file. If `chat.defaultAgent` already names another custom agent, install leaves that choice unchanged and prints a warning. -Uninstall removes only the `tracedecay.md` steering block, the global MCP server entry, -the tracedecay-owned agent file, and `chat.defaultAgent` when it points at that -owned agent. User-authored steering after the installed block remains in place. +Uninstall removes only the `tracedecay.md` steering block, the managed-skill +index, the global MCP server entry, the tracedecay-owned agent file, and +`chat.defaultAgent` when it points at that owned agent. User-authored steering +after the installed block remains in place. ## Tool approval defaults diff --git a/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md new file mode 100644 index 00000000..5576316a --- /dev/null +++ b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md @@ -0,0 +1,67 @@ +# Self-Improving Loop Contracts + +This document is the durable contract for TraceDecay-owned self-improvement loops. Hermes is a reference implementation and compatibility bridge; it is not required to run TraceDecay curation, managed skills, scheduler jobs, or artifact generation. The first standalone backend is the Codex app-server adapter, and the same contracts are intended to support other delegated or CLI hosts later. + +## Host Matrix + +| Host | TraceDecay-owned behavior | Host-owned behavior | Skill delivery | +| --- | --- | --- | --- | +| Cursor | Config, ledgers, curation validation, managed skill storage, telemetry sidecars, native overlay export | Native host loading and any host-local transcript signals | Approved managed `SKILL.md` packages under the generated plugin overlay | +| Codex | Config, ledgers, curation validation, managed skill storage, telemetry sidecars, native overlay export, shareable plugin artifact generation | Native plugin discovery, app-server execution when selected as backend | Approved managed `SKILL.md` packages under the Codex plugin overlay or plugin artifact | +| Hermes | Read-only bridge over profile skills, pending approvals, usage, curator/write-approval state, and hosted proposals | Auxiliary LLM calls, background review, `skill_manage`, write approvals, skill mutations, curator decisions | Hermes profile skills remain Hermes-owned | +| Claude Code | Config, ledgers, managed-skill index generation, MCP skill body serving | Prompt-file loading and any host-local execution | Compact `CLAUDE.md` prompt index plus `tracedecay_skill_view` | +| OpenCode | Config, ledgers, managed-skill index generation, MCP skill body serving | Prompt-file loading and any host-local execution | Compact `AGENTS.md` prompt index plus `tracedecay_skill_view` | +| Kimi | Config, ledgers, managed-skill index generation, MCP skill body serving | Prompt-file loading and any host-local execution | Compact prompt index plus `tracedecay_skill_view` | +| Kiro | Config, ledgers, managed-agent prompt-index content, MCP skill body serving | Managed-agent file ownership and host execution | Existing managed-agent path with prompt index plus `tracedecay_skill_view` | +| Prompt-only agents | Config, ledgers, prompt-index generation, MCP skill body serving | Prompt ingestion and execution | Compact prompt index plus `tracedecay_skill_view` | + +## Standalone And Delegated Modes + +`standalone` means TraceDecay owns backend calls, evidence collection, validation, run ledger writes, approval staging, dashboard review payloads, and optional scheduler execution. Backend output can propose changes, but TraceDecay validates every proposed mutation before it can be applied. + +`delegated_host` means the host owns intelligence and mutation decisions. TraceDecay exposes contracts and storage views, validates proposed operations when asked, and records bridge-visible evidence. It must not call its own backend for memory curation, session reflection, or skill writing in this mode. Legacy `hermes_hosted` config is only an alias for `delegated_host`. + +## Curation Operation Contract + +Curation proposals are advisory until TraceDecay validation accepts them. Every proposal must identify the reviewed evidence item it targets, include a supported operation kind, provide a confidence/reason, and pass the existing evidence guard before any apply policy is considered. + +Timestamp semantics follow the Hermes memory-curator rule: + +1. Prove same subject first. +2. Prove same atomic claim second. +3. Prefer semantic freshness fields such as `asserted_at`, `effective_at`, `observed_at`, `occurred_at`, or `created_at`. +4. Treat maintenance `updated_at` as storage metadata, not truth freshness. +5. Use deterministic tie-breakers only after the subject, claim, and semantic timestamp checks are resolved. + +## Managed Skill Contract + +TraceDecay-owned managed skills live under the profile `agent_managed/skills` store and static bundled skills stay immutable. Managed skill metadata includes id, title, summary, category, targets, lifecycle state, pinned flag, checksum, timestamps, and provenance. Support files are restricted to `references`, `templates`, `scripts`, and `assets`. + +Agent-authored or backend-authored changes enter pending approval first. Activation, disable, archive, restore, and staged updates are explicit lifecycle operations. Pinned, user-authored, shipped, and Hermes-owned skills are protected from automatic mutation. + +## Telemetry And Recommendations + +Skill telemetry is a sidecar ledger, not frontmatter. The ledger tracks view/use/patch counts, last timestamps, created_by, state, pinned, targets, and provenance. TraceDecay may normalize its own analytics into this ledger. In delegated Hermes mode, TraceDecay reads Hermes usage/provenance data as bridge evidence and does not write Hermes state. + +Archive/prune recommendations are explainable review recommendations only. They cannot auto-delete skills. Skill improvement recommendations must cite repeated corrections, failed workflows, underused tool evidence, or validation artifacts before proposing a patch. + +## Local Skill Versus Plugin Artifact + +Use a local managed skill when the workflow is personal, project-specific, unstable, or still pending validation. + +Use a managed overlay when an approved skill should be available to a local native host without changing shipped TraceDecay skills. + +Generate a Codex plugin artifact when an approved workflow is stable, shareable, and should travel with plugin metadata, native `skills/`, optional `.mcp.json`, optional hooks, and marketplace metadata. + +## Improvement Artifacts + +Every automation run that reaches backend validation should be able to produce a review chain: + +- traces +- feedback +- generated evals +- validation gate +- optimizer diagnosis +- Codex handoff + +The handoff is the durable output for broader code or behavior changes. It must preserve approval gates and list validation requirements before any generated recommendation is applied. diff --git a/docs/dashboard-port-handoff.md b/docs/dashboard-port-handoff.md index 84716d12..ceaa6257 100644 --- a/docs/dashboard-port-handoff.md +++ b/docs/dashboard-port-handoff.md @@ -54,7 +54,7 @@ that reuses it, never a fork. └──────────────┬─────────────────────────────┘ register via window.__HERMES_PLUGINS__ / SDK ┌───────────────────┴────────────────────────┐ - │ standalone │ hermes-hosted + │ standalone │ delegated-host ┌──────────▼─────────────┐ ┌────────────▼─────────────────┐ │ shell/dist/shell.js │ │ hermes-wrapper dist/index.js │ │ (bundles React 19, │ │ (uses host SDK; evaluates │ @@ -138,7 +138,7 @@ Hermes `~/.hermes/memory_store.db` (`facts`/`entities`/`memory_banks`). | `GET /similarity` | pure-python `mean(cos(p_i−p_j))` + lexical overlap + classification | same math in Rust (`SIMILARITY_FACT_CAP` 500, identical thresholds) | working | | `GET /archive` / `POST /archive/{id}/restore` | `facts.state='archived'` / provider restore | **removed by design** — tracedecay curation hard-DELETEs losing facts; there is no archive state and no restore. The UI's Archive tab was removed accordingly. | n/a | | `GET /curation/status` | hermes curator state files | Returns `enabled:true`, `mode: similarity_dedup`, last preview timestamp | **working** | -| `GET /curation/activity` | curator activity events | Always empty (no LLM/agent event stream) | working | +| `GET /curation/activity` | curator activity events | Structured TraceDecay curation activity events from preview/apply, agent-plan, and automation paths (`queued`, `evidence`, `backend`, `validation`, `apply`, `report`, `finish`, `failure`, or `rejection` as applicable) | working | | `GET /curation/preview` | saved dry-run file | Last `dry_run=true` result, persisted to a `.tracedecay/dashboard/curation_preview.json` sidecar (survives restarts); stale when fact count changes | **working** | | `POST /curate` | `agent.memory_curator.run_memory_curation` | Similarity-based dedup: proposes/applies `delete` actions for `likely_duplicate` pairs; `dry_run=true` returns plan, `dry_run=false` hard-deletes losers via `MemoryStore::remove_fact` | **working** | | `POST /curate/apply` | (new, no Hermes equivalent) | Generic curation-ops apply API: `{"ops": [{"op":"delete",...} \| {"op":"merge",...}]}` with per-op results; the contract for external (LLM) planners | **working** | @@ -307,12 +307,15 @@ planner builds against. ### Capabilities -`GET /api/capabilities` returns `"curation": true, "llm_curation": false` -(standalone). The Hermes wrapper flips `llm_curation` when it adds an -LLM-backed planner that proposes merge/retag-style ops and applies them via -`/curate/apply`. The UI's CurationPanel consumes the same ops shape either -way (its Archive tab was removed; `delete` ops render as high-risk actions -with a permanent-deletion warning). +`GET /api/capabilities` returns `"curation": true` plus automation metadata. +TraceDecay reports `automation.mode` as `"disabled"`, `"standalone_backend"`, +or `"delegated_host"`. In standalone mode, a configured backend can set +`features.llm_curation` true. In delegated-host mode, planning remains +host-owned and the host submits proposed ops via `/curate/apply`; Hermes is one +compatibility bridge for this provider-neutral contract. The UI's +CurationPanel consumes the same ops shape either way (its Archive tab was +removed; `delete` ops render as high-risk actions with a permanent-deletion +warning). ### Hermes live render + holographic_plus retirement (2026-06-10) @@ -514,9 +517,11 @@ conservatism backstop, and callers can pass a higher `threshold` / ## What's stubbed / known gaps -1. **Curation activity stream**: `GET /curation/activity` always returns an empty - event list. The holographic_plus backend streams structured phases from a live - LLM agent run; the similarity-dedup implementation has no equivalent events. +1. **Curation activity stream**: RESOLVED — `GET /curation/activity` returns the + in-memory structured activity log for preview/apply, standalone agent-plan, + and queued automation paths. Events use phases such as `queued`, `evidence`, + `backend`, `validation`, `apply`, `report`, `finish`, `failure`, and + `rejection` as applicable. 2. **Rich curation ops**: the built-in planner only proposes `delete`. The apply API additionally executes `merge` (content rewrite + loser deletion), but `supersede`, `retag`, and `entity_*` ops from holographic_plus are not @@ -640,7 +645,7 @@ cd /home/zack/projects/tracedecay cargo build --bin tracedecay ./target/debug/tracedecay dashboard # http://127.0.0.1:7341/ -# 3. Hermes-hosted (wrapper spawns the server automatically) +# 3. Delegated host (wrapper spawns the server automatically) TRACEDECAY_BIN=/home/zack/projects/tracedecay/target/debug/tracedecay \ TRACEDECAY_DASHBOARD_PROJECT=/home/zack/projects/tracedecay \ hermes dashboard # → "TraceDecay" tab (named "Hermes Intelligence" at port time) diff --git a/docs/dashboard.md b/docs/dashboard.md index 0c4e6f19..1e6939ac 100644 --- a/docs/dashboard.md +++ b/docs/dashboard.md @@ -504,7 +504,16 @@ Returns feature flags and server configuration. Used by the UI and wrappers to d "lcm": true, "graph": true, "curation": true, - "llm_curation": false + "automation": true, + "llm_curation": true, + "managed_skills": true + }, + "automation": { + "enabled": true, + "mode": "standalone_backend", + "backend": "codex_app_server", + "host_mode": "standalone", + "availability": {"available": true, "reason": ""} }, "dashboards": ["holographic", "hermes-lcm", "graph"] } @@ -516,7 +525,9 @@ Returns feature flags and server configuration. Used by the UI and wrappers to d - `features.memory`: Whether the project database is available - `features.lcm`: Whether the LCM session store is available - `features.curation`: Whether similarity-dedup curation tools are enabled -- `features.llm_curation`: Whether an LLM-backed curation planner is available. Always `false` in standalone; the Hermes wrapper flips this when it adds an LLM planner that generates ops for `POST /curate/apply` +- `features.automation`: Whether TraceDecay automation is enabled with a supported backend +- `features.llm_curation`: Whether TraceDecay's standalone backend can run LLM-backed curation. Delegated hosts keep planning host-owned and submit ops through `POST /curate/apply`. +- `automation.mode`: `"disabled"`, `"standalone_backend"`, or `"delegated_host"`; `delegated_host` is provider-neutral and may be used by Hermes, Codex app-server orchestration, CloudCode CLI, Cursor Agent CLI, or another host that owns the intelligence layer. --- @@ -768,10 +779,10 @@ Same structure with `applied_counts` showing what was actually deleted and #### `POST /api/plugins/holographic/curate/apply` -Generic curation-ops apply endpoint. This is the contract external planners -(e.g. an LLM-backed Hermes wrapper, advertised via `features.llm_curation`) -build against. Per-op failures are reported per-op in `results`; the request -only fails wholesale (400) on a malformed body. +Generic curation-ops apply endpoint. This is the contract standalone automation +backends and delegated host planners build against. Per-op failures are +reported per-op in `results`; the request only fails wholesale (400) on a +malformed body. **Request Body:** ```json @@ -1070,12 +1081,12 @@ fetch('/api/capabilities') | `features.graph` | Code-graph API is available | Show Code Graph tab | | `features.savings` | Savings & Cost API is available | Show Savings & Cost tab | | `features.curation` | Similarity-dedup curation tools are available | Show Curation panel, enable curate actions | -| `features.llm_curation` | An LLM-backed curation planner is available (Hermes wrapper only) | Enable LLM plan actions that target `POST /curate/apply` | +| `features.llm_curation` | An LLM-backed curation planner is available through TraceDecay standalone automation or a delegated host wrapper | Enable LLM plan actions that target `POST /curate/apply` | There is no archive flag: curation deletes are permanent, and no archive or restore endpoints exist. Always check the capability flags rather than -assuming availability — they may change based on database state and host -(standalone vs Hermes). +assuming availability — they may change based on database state and host mode +(standalone backend vs delegated host). --- diff --git a/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md b/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md new file mode 100644 index 00000000..0731926b --- /dev/null +++ b/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md @@ -0,0 +1,553 @@ +# LSP Code Diagnostics Dashboard Design + +## Summary + +Phase 1 adds a dedicated **Code Diagnostics** dashboard surface powered by a TraceDecay-owned, LSP-first diagnostics broker. The broker starts supported language servers when available, keeps them warm while the dashboard process is alive, caches diagnostics, and exposes status/results only through dashboard APIs. Hooks, prompt hints, MCP auto-context, and model-visible summaries are explicitly out of Phase 1 and are documented as Phase 2 extension points. + +Phase 1 is a general LSP diagnostics platform, not a Rust-only feature. It ships a broker and adapter registry that can host every language server TraceDecay knows how to start. Rust is an important validation path because `rust-analyzer` highlights the cold `cargo check` problem, but the architecture and dashboard controls are language-generic. + +Built-in Phase 1 adapters should cover the practical, low-friction language servers first: + +- Rust via `rust-analyzer` +- TypeScript and JavaScript via `typescript-language-server` +- Python via `pyright-langserver` +- Go via `gopls` +- C, C++, and Objective-C via `clangd` +- Zig via `zls` +- Lua via `lua-language-server` +- PHP via `intelephense` + +The registry must also allow project-configured custom adapters so languages without a built-in adapter can still participate when a user knows the server command, language id, file extensions, and root markers. + +Each language can be enabled or disabled from the dashboard. Disabled languages stop their LSP worker, clear pending refresh work, and remain visible as disabled in the engine status table. + +## Goals + +- Prefer warm LSP diagnostics over repeated batch tool invocations. +- Surface code diagnostics in the dashboard only. +- Let users enable or disable LSP diagnostics per language from the dashboard. +- Support a broad language-server registry, not a Rust-specialized path. +- Allow project-configured custom LSP adapters for languages beyond built-ins. +- Keep the design fail-open: missing LSP binaries, broken initialization, or server crashes should not break the dashboard. +- Preserve the current `tracedecay_diagnostics` MCP tool as an explicit one-shot diagnostics path. +- Leave hook/model-context surfacing designed but unimplemented. + +## Non-Goals + +- Do not inject diagnostics into Codex/Cursor/Kiro hooks. +- Do not add model-visible hints or prompt context. +- Do not attach to editor-owned LSP instances in Phase 1. +- Do not replace the existing `tracedecay_diagnostics` MCP tool. +- Do not run batch compiler checks automatically as the dashboard default when LSP is unavailable. +- Do not make LSP diagnostics an autostart system daemon. + +## Product Surface + +The dashboard gains a new dedicated **Code Diagnostics** tab or plugin area, separate from the existing Savings & Cost `Diagnostics` view. The existing diagnostics panel reports TraceDecay hook/tool/prompt telemetry, so compiler and type diagnostics should not be mixed into it. + +The Code Diagnostics UI includes: + +- Summary stats: total errors, total warnings, pending refreshes, last refresh age. +- Engine status table: + - language + - adapter/server binary + - enabled/disabled toggle + - state: unavailable, disabled, starting, indexing, ready, refreshing, crashed + - last error + - last diagnostic update time +- File-grouped diagnostics table: + - file + - line range + - severity + - code/rule + - message + - language/driver + - enclosing TraceDecay node when available +- Controls: + - Refresh all enabled languages + - Refresh one language + - Enable/disable one language + - Restart one language server + - Enable/disable idle whole-project backfill + +Dashboard toggles are persisted in the active project store so the setting survives dashboard restarts and is branch/worktree scoped with the rest of the active TraceDecay store. + +## Backend Architecture + +Add a diagnostics broker module under `src/diagnostics/lsp/`: + +```text +src/diagnostics/lsp/ + mod.rs public broker types and module exports + broker.rs per-project orchestration, cache, refresh queue, status + client.rs stdio JSON-RPC LSP client + protocol.rs minimal LSP request/notification/diagnostic structs + adapters.rs adapter trait, built-in adapters, custom adapter loader + settings.rs project-persisted language enablement +``` + +The broker is owned by dashboard state, not by hidden autostart infrastructure. When `tracedecay dashboard` starts, it builds one `DiagnosticBroker` for the active project. The broker lazily starts language servers when the Code Diagnostics UI asks for status, diagnostics, or refresh. This avoids surprising background work for users who never open the tab. + +The broker stores: + +- project root +- active store/dashboard sidecar root +- per-language settings +- per-language LSP client handle +- per-language engine state +- cached diagnostics +- refresh queue state +- idle backfill queue state +- last refresh timestamps and errors + +## Adapter Registry and Language Coverage + +The LSP broker uses an adapter registry. Each adapter is a declarative unit that answers: + +- which TraceDecay languages it handles +- LSP language id for each file type +- binary names to probe +- root markers and manifests +- spawn command and arguments +- initialization options/settings +- supported file extensions +- whether diagnostics are push, pull, or both + +Built-in adapters: + +| TraceDecay language(s) | LSP server | Binary | Root/manifest signal | Notes | +| --- | --- | --- | --- | --- | +| Rust | rust-analyzer | `rust-analyzer` | `Cargo.toml` | Primary validation path for warm diagnostics. | +| TypeScript, JavaScript | typescript-language-server | `typescript-language-server` | `tsconfig.json`, `jsconfig.json`, or indexed TS/JS files | Handles `.ts`, `.tsx`, `.js`, `.jsx`. | +| Python | pyright-langserver | `pyright-langserver` | `pyrightconfig.json`, `pyproject.toml`, or indexed Python files | Prefer project config when present to reduce import noise. | +| Go | gopls | `gopls` | `go.mod` or indexed Go files | Falls back to workspace root when no module exists. | +| C, C++, Objective-C | clangd | `clangd` | `compile_commands.json` optional | One shared server adapter handles C-family languages. | +| Zig | zls | `zls` | `build.zig` optional | Useful even for single-file projects. | +| Lua | lua-language-server | `lua-language-server` | `.luarc.json` optional | Workspace scan can be expensive; lazy start matters. | +| PHP | intelephense | `intelephense` | `composer.json` optional | Definition/diagnostics support varies by configuration. | + +Custom adapters are configured in `tracedecay.toml` or the active project store: + +```toml +[[lsp.custom]] +language = "ruby" +language_id = "ruby" +command = "ruby-lsp" +args = [] +extensions = ["rb"] +root_markers = ["Gemfile", ".ruby-version"] +diagnostics = "push" +``` + +The dashboard should list built-in and custom adapters together. For unsupported languages, it should show an “Add custom LSP adapter” affordance rather than pretending TraceDecay has no path forward. + +## External Implementation Notes + +The design should copy proven LSP-client shapes from existing projects rather than invent protocol machinery from scratch. + +Relevant findings: + +- The Codex repository had a closed PR for a `rust-analyzer-lsp-timing` sample skill: . The PR added helper scripts for one long-lived `rust-analyzer` process, timing from `textDocument/didChange` to `textDocument/publishDiagnostics`, and a tiny UNIX-socket control wrapper. The useful design lessons are persistent process reuse, explicit health probes, workspace-keyed control state, text-document version tracking, and waiting for publish-diagnostics events instead of rerunning cargo manually. +- The same Codex commit (`7b230fc`) shows a minimal stdio client shape: spawn `rust-analyzer`, send `Content-Length` framed JSON-RPC, issue `initialize`/`initialized`, send `didOpen` and full-document `didChange`, and collect `textDocument/publishDiagnostics`. +- `codive-lsp` documents an agent-oriented Rust LSP architecture with modules for server definitions/spawning, JSON-RPC client, lazy facade/caching, file-extension language mapping, and support for rust-analyzer, TypeScript, Pyright, and gopls: . +- `tokio-lsp` is a lightweight async-first Rust LSP client crate with transport abstraction and serde-based typed messages: . +- `lsp-types` provides shared Rust structs for LSP messages and should be preferred over hand-written request/diagnostic structs where it fits: . +- `bacon-ls` is not a replacement for rust-analyzer, but it is a useful Rust diagnostics reference. It exposes `textDocument/diagnostic` and `workspace/diagnostic`, supports partial diagnostic publishes during long cargo runs, cancellation of running checks, manual retrigger commands, and backends for direct cargo or already-running Bacon: . + +Phase 1 should use these notes as implementation guidance: + +- Use `lsp-types` for protocol data structures unless a required server extension is missing. +- Keep the transport small and explicit: stdio JSON-RPC with `Content-Length` framing, one reader task, one writer path, and a pending request map. +- Track `textDocument` versions per opened file. Send full-document `didChange` first; incremental range changes can wait until there is evidence the full-text path is too expensive. +- Treat `textDocument/publishDiagnostics` as the primary diagnostic source. Add `textDocument/diagnostic` or `workspace/diagnostic` only after capability detection proves a server supports pull diagnostics. +- Keep health/control state inside the dashboard-owned broker rather than a separate autostart daemon. A reconnectable local socket is a useful future shape, but Phase 1 does not need it. +- For Rust, measure and expose diagnostic latency in status fields. The Codex sample was explicitly about timing edit-to-diagnostic latency, and that signal will help tune debounce and refresh behavior. + +## LSP Lifecycle + +For each supported language, an adapter provides: + +- language id used by LSP +- binary names to detect +- project manifest detection +- spawn command +- initialization options +- optional workspace folders +- file extensions to open +- diagnostic capability support + +Phase 1 starts with stdio LSP servers launched by TraceDecay. It does not attempt to attach to Cursor, VS Code, or other editor-owned LSP sessions because the LSP lifecycle is client-owned and not exposed through a standard cross-editor discovery API. + +The active lifecycle is: + +1. Dashboard asks for Code Diagnostics. +2. Broker loads per-language settings. +3. For each enabled language, broker detects whether the LSP binary and project shape are available. +4. On refresh, broker starts missing enabled clients. +5. Broker sends `initialize` and `initialized`. +6. Broker opens relevant project files using `textDocument/didOpen`. +7. Broker collects diagnostics from `textDocument/publishDiagnostics`. +8. When supported, broker can also issue `textDocument/diagnostic` or `workspace/diagnostic`. +9. Broker maps diagnostics to project-relative files and enriches them with enclosing graph nodes. +10. Dashboard reads the cached snapshot. + +The passive lifecycle is: + +1. Once the Code Diagnostics dashboard has been opened, the broker may begin idle backfill for enabled languages. +2. The broker builds a per-language queue from TraceDecay's indexed files. +3. The broker opens files in small batches while the dashboard process is otherwise idle. +4. Each LSP server publishes diagnostics for files it can analyze. +5. The broker updates coverage counters so the UI can distinguish "whole project covered" from "only recently opened/refreshed files covered." + +Server crashes are converted into engine status and last error fields. The UI remains usable, and other languages keep running. + +## Refresh Model + +Phase 1 uses explicit dashboard refresh and dashboard-owned idle backfill, not edit hooks. + +Refresh requests are debounced per language: + +- If a refresh is already running, a new request marks the language as pending. +- When the current refresh finishes, one pending refresh may run. +- Repeated clicks do not create unbounded work. + +Refresh scopes: + +- `all`: refresh every enabled language. +- `language`: refresh one enabled language. + +The broker should avoid a full project file walk on every refresh when possible. It can use TraceDecay’s indexed file list and language/file-extension mapping to find candidate files. If the index is stale, the dashboard should show the index freshness status instead of silently forcing a sync. + +## Idle Whole-Project Backfill + +Phase 1 should passively collect diagnostics for files the user has not touched, but only through dashboard-owned idle work. This gives the dashboard a project-wide type-error view without making hooks or prompt submission slower. + +Idle backfill behavior: + +- Starts only after the Code Diagnostics dashboard surface is opened or the user explicitly enables Code Diagnostics for the project. +- Runs only for enabled languages. +- Uses TraceDecay's indexed file list to avoid a fresh filesystem walk. +- Processes files in small batches per language. +- Yields to explicit refresh/restart/toggle requests. +- Pauses when an LSP server reports indexing/busy status or when refresh work is active. +- Stops immediately when a language is disabled. +- Records progress per language: queued files, opened files, files with diagnostics, last completed sweep. + +Backfill modes: + +- `off`: no passive project sweep. +- `idle`: default. Backfill only when the dashboard process is idle and no explicit refresh is active. + +The Phase 1 default should be `idle`. This gives users broad coverage without surprising CPU use. A repeating `continuous` sweep can be considered later, but it is not part of Phase 1. The dashboard should expose the Phase 1 setting and show current backfill progress. + +Whole-project coverage is best-effort. Some LSP servers publish diagnostics for the entire workspace after initialization; others only publish for opened files. The broker should support both: + +- For servers that support `workspace/diagnostic`, request workspace diagnostics and cache the result. +- For servers that support only push diagnostics, open files in bounded batches and wait for `textDocument/publishDiagnostics`. +- For servers that only diagnose visible/open files reliably, mark coverage as partial rather than pretending the project is fully checked. + +## Diagnostics Cache + +Define a normalized diagnostic record shared by the broker and dashboard API: + +```rust +pub struct CodeDiagnostic { + pub language: String, + pub source: String, + pub file: String, + pub line_start: u32, + pub line_end: u32, + pub character_start: Option, + pub character_end: Option, + pub severity: String, + pub code: String, + pub message: String, + pub enclosing: Option, +} +``` + +Define per-language engine state: + +```rust +pub struct DiagnosticEngineStatus { + pub language: String, + pub server: String, + pub enabled: bool, + pub available: bool, + pub state: String, + pub diagnostic_count: usize, + pub error_count: usize, + pub warning_count: usize, + pub indexed_file_count: usize, + pub covered_file_count: usize, + pub backfill_state: String, + pub backfill_queued: usize, + pub backfill_completed: usize, + pub last_started_at: Option, + pub last_updated_at: Option, + pub last_backfill_completed_at: Option, + pub last_error: Option, +} +``` + +The first implementation can keep the cache in memory while the dashboard runs. Persisting the latest snapshot to a dashboard sidecar table is allowed in Phase 1 if it is useful for reload behavior, but the UI must clearly distinguish cached/stale data from fresh data. + +## Dashboard API + +Add a new API module, for example `src/dashboard/code_diagnostics_api.rs`, mounted under `/api/plugins/code-diagnostics`. + +Endpoints: + +- `GET /api/plugins/code-diagnostics/overview` + - Returns enabled languages, engine statuses, totals, and last update metadata. +- `GET /api/plugins/code-diagnostics/diagnostics` + - Returns cached diagnostics with optional query params: + - `language` + - `severity` + - `file` + - `limit` + - `offset` +- `POST /api/plugins/code-diagnostics/refresh` + - Body: `{ "language": "rust" }` or `{ "language": "all" }` + - Enqueues refresh and returns current status immediately. +- `POST /api/plugins/code-diagnostics/settings` + - Body: `{ "language": "rust", "enabled": false }` or `{ "idle_backfill": "idle" }` + - Persists language and backfill settings. Disabling a language shuts down its client. +- `POST /api/plugins/code-diagnostics/restart` + - Body: `{ "language": "rust" }` + - Restarts an enabled language client and enqueues refresh. + +The API should return plain JSON and never expose diagnostics to hooks or MCP model-context paths. + +## Dashboard UI + +Add a dedicated frontend package or panel matching existing dashboard plugin patterns. The UI should be quiet, operational, and scan-friendly: + +- Top summary band with counts and freshness. +- Engine table with compact language controls. +- Diagnostics table grouped by file. +- Filters for language and severity. +- Refresh/restart controls using existing button and table primitives. +- Idle backfill control with progress for project-wide coverage. + +The UI should treat disabled and unavailable languages differently: + +- Disabled: user intentionally turned it off. +- Unavailable: enabled, but binary or project manifest is missing. +- Crashed: server started but failed. + +Toggle behavior: + +- Turning a language off immediately calls the settings endpoint and stops the server. +- Turning it on calls the settings endpoint, then enqueues a refresh. +- The UI should not block while refresh runs; it shows starting/indexing/refreshing status. +- The idle backfill control changes only broker/dashboard behavior. It must not install hooks or alter model-facing context. + +## Built-In Adapter Details + +All built-in adapters share the same broker/client/cache path. The sections below call out language-specific behavior, but none of them should fork the architecture. + +## Rust Adapter + +The Rust adapter uses `rust-analyzer`. + +Detection: + +- Project has `Cargo.toml`. +- `rust-analyzer` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Configure Cargo target dir to avoid contention with the user’s interactive builds when rust-analyzer supports that setting. +- Prefer diagnostics from rust-analyzer’s normal diagnostic publication path. + +Rust-specific notes: + +- rust-analyzer may run its own background cargo check/flycheck. That is acceptable because it is warm, debounced, and integrated with the LSP session. +- The current `cargo check` diagnostics driver remains available for explicit fresh MCP checks. + +## TypeScript Adapter + +The TypeScript adapter uses `typescript-language-server`. + +Detection: + +- Project has `tsconfig.json` or TypeScript/JavaScript files. +- `typescript-language-server` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Open indexed `.ts`, `.tsx`, `.js`, and `.jsx` files as needed. + +TypeScript-specific notes: + +- Phase 1 should not silently fall back to `tsc --watch`. +- If the language server is unavailable, dashboard status says unavailable. + +## Python Adapter + +The Python adapter uses `pyright-langserver`. + +Detection: + +- Project has `pyrightconfig.json` or `pyproject.toml`. +- `pyright-langserver` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Open indexed `.py` files as needed. + +Python-specific notes: + +- Phase 1 uses the LSP server path rather than `pyright --watch`. +- The current `pyright --outputjson` batch driver remains available through explicit MCP diagnostics. + +## Go Adapter + +The Go adapter uses `gopls`. + +Detection: + +- Project has `go.mod` or indexed `.go` files. +- `gopls` is available on `PATH`. + +Initialization: + +- Root URI is the module root when `go.mod` exists, otherwise the project root. +- Open indexed `.go` files as needed. + +## C-Family Adapter + +The C-family adapter uses `clangd` for C, C++, and Objective-C. + +Detection: + +- Project has indexed `.c`, `.h`, `.cc`, `.cpp`, `.cxx`, `.hpp`, `.m`, or `.mm` files. +- `clangd` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Prefer `compile_commands.json` when present. +- Surface degraded status when `compile_commands.json` is missing and diagnostics may be incomplete. + +## Zig Adapter + +The Zig adapter uses `zls`. + +Detection: + +- Project has `build.zig` or indexed `.zig` files. +- `zls` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Open indexed `.zig` files as needed. + +## Lua Adapter + +The Lua adapter uses `lua-language-server`. + +Detection: + +- Project has indexed `.lua` files. +- `lua-language-server` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Respect `.luarc.json` or `.luarc.jsonc` when present. +- Avoid eager full-workspace file opening; start with files requested by refresh. + +## PHP Adapter + +The PHP adapter uses `intelephense`. + +Detection: + +- Project has `composer.json` or indexed `.php` files. +- `intelephense` is available on `PATH`. + +Initialization: + +- Root URI is the project root. +- Open indexed `.php` files as needed. + +## Error Handling + +All LSP diagnostics work is fail-open: + +- Missing server binary: language state becomes unavailable. +- Initialization timeout: language state becomes crashed with last error. +- Server exits unexpectedly: language state becomes crashed, diagnostics remain cached but stale. +- Malformed LSP diagnostics: malformed entries are dropped and counted in engine status. +- Graph enrichment failure: diagnostics are still returned with `enclosing: null`. + +The dashboard should never panic or fail to load because one language server is broken. + +## Testing Strategy + +Backend tests: + +- Adapter registry returns all built-in adapters and merges project custom adapters. +- Custom adapter config rejects missing language id, command, or extensions with a clear error. +- Settings persistence toggles a language on/off. +- Idle backfill setting persists as `off` or `idle`. +- Disabling a language stops its engine and prevents refresh enqueue. +- Refresh request for unavailable language returns status without spawning work. +- Idle backfill uses indexed files and does not perform a fresh filesystem walk. +- Idle backfill opens files in bounded batches and yields to explicit refresh. +- Idle backfill stops when a language is disabled. +- LSP diagnostic normalization maps severity, code, file, ranges, and message. +- Enclosing node enrichment handles known and unknown files. +- API overview returns per-language states and totals. +- API diagnostics supports language/severity/file filters. + +Frontend tests: + +- Adapter list renders built-in and custom languages together. +- Engine status table distinguishes disabled, unavailable, ready, refreshing, and crashed. +- Toggle off calls settings endpoint and removes refresh controls for that language. +- Toggle on calls settings endpoint and refresh endpoint. +- Idle backfill control calls settings endpoint and shows queue/progress status. +- Diagnostics table groups by file and renders line/severity/code/message. +- Empty state explains when no enabled LSP engines are available. + +Manual verification: + +- Start dashboard in a Rust project with `rust-analyzer` installed. +- Open Code Diagnostics. +- Enable Rust. +- Refresh. +- Confirm diagnostics appear or “no diagnostics” is shown with ready status. +- Enable idle backfill. +- Confirm coverage progress increases for files not manually refreshed. +- Disable Rust. +- Confirm rust-analyzer process stops and dashboard status changes to disabled. +- Repeat one refresh in a TypeScript or Python project when the matching language server is installed. +- Add a local custom adapter entry for a non-built-in language and confirm it appears in the engine table. + +## Phase 2 Design, Not Implemented + +Phase 2 can reuse the broker cache and settings, but must be a separate implementation step. + +Possible Phase 2 additions: + +- Edit hooks enqueue language/file refreshes after code changes. +- Prompt/session hooks inject a compact diagnostic summary into model context. +- MCP `tracedecay_diagnostics` accepts `mode: "cached" | "fresh" | "wait"`. +- Tool hints can suggest Code Diagnostics when a user asks about type errors. + +Phase 1 must not wire any of these into hooks or model-facing output. + +## Open Decisions Resolved + +- Dashboard placement: a dedicated Code Diagnostics surface, not the existing Savings diagnostics panel. +- Runtime model: TraceDecay-owned LSP clients, not editor-owned LSP reuse. +- Refresh model: explicit dashboard refresh plus dashboard-owned idle whole-project backfill. +- Per-language control: dashboard enable/disable persisted per active project store. +- Batch fallback: explicit MCP/manual fallback only, not automatic dashboard fallback. From 683b6798ec61febf219ae7f8588688064e62a845 Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Thu, 25 Jun 2026 11:05:36 +0000 Subject: [PATCH 2/4] docs: align managed skill mutation contract --- docs/SELF-IMPROVING-LOOPS-CONTRACTS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md index 5576316a..9a64b56d 100644 --- a/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md +++ b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md @@ -37,7 +37,7 @@ Timestamp semantics follow the Hermes memory-curator rule: TraceDecay-owned managed skills live under the profile `agent_managed/skills` store and static bundled skills stay immutable. Managed skill metadata includes id, title, summary, category, targets, lifecycle state, pinned flag, checksum, timestamps, and provenance. Support files are restricted to `references`, `templates`, `scripts`, and `assets`. -Agent-authored or backend-authored changes enter pending approval first. Activation, disable, archive, restore, and staged updates are explicit lifecycle operations. Pinned, user-authored, shipped, and Hermes-owned skills are protected from automatic mutation. +Agent-authored or backend-authored changes enter pending approval first. Activation, disable, archive, restore, and staged updates are explicit lifecycle operations. Pinned and user-authored skills are excluded from automatic archive or patch recommendations; shipped and Hermes-owned skills remain outside TraceDecay-owned mutation surfaces. ## Telemetry And Recommendations From a400b84aadc6d43cebecf4cc607796448afef092 Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Thu, 25 Jun 2026 11:11:33 +0000 Subject: [PATCH 3/4] docs: simplify PR 131 wording --- README.md | 5 ++-- docs/SELF-IMPROVING-LOOPS-CONTRACTS.md | 2 +- docs/dashboard-port-handoff.md | 4 +-- docs/dashboard.md | 10 +++---- ...4-lsp-code-diagnostics-dashboard-design.md | 28 +++++++++---------- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index da319a1c..ae1e299e 100644 --- a/README.md +++ b/README.md @@ -164,9 +164,8 @@ host surface that each provider can load safely: | Hermes | Read-only bridge only. Hermes owns its native skill store; TraceDecay does not export or mutate Hermes skills. | The curation backend is provider-agnostic. Hermes remains a compatibility bridge -and reference source, while TraceDecay's automation config can delegate -intelligence to hosts such as the Codex app server without requiring Hermes to -run. +and reference source; TraceDecay automation can delegate intelligence to hosts +such as the Codex app server without requiring Hermes to run. For project-scoped setup, run from the repository root: diff --git a/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md index 9a64b56d..e082056a 100644 --- a/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md +++ b/docs/SELF-IMPROVING-LOOPS-CONTRACTS.md @@ -1,6 +1,6 @@ # Self-Improving Loop Contracts -This document is the durable contract for TraceDecay-owned self-improvement loops. Hermes is a reference implementation and compatibility bridge; it is not required to run TraceDecay curation, managed skills, scheduler jobs, or artifact generation. The first standalone backend is the Codex app-server adapter, and the same contracts are intended to support other delegated or CLI hosts later. +This is the durable contract for TraceDecay-owned self-improvement loops. Hermes is a reference implementation and compatibility bridge, not a requirement for curation, managed skills, scheduler jobs, or artifact generation. The first standalone backend is the Codex app-server adapter, and the same contracts are intended to support other delegated or CLI hosts later. ## Host Matrix diff --git a/docs/dashboard-port-handoff.md b/docs/dashboard-port-handoff.md index ceaa6257..3685b249 100644 --- a/docs/dashboard-port-handoff.md +++ b/docs/dashboard-port-handoff.md @@ -302,8 +302,8 @@ each op is one of: Response: `{"results": [per-op result], "counts": {"deleted", "merged", "errors"}}`. Ids are validated per-op; partial failures are reported per-op (status stays 200), never as a whole-request 500. A 400 is returned only for a -malformed body. This is the contract the Hermes wrapper's future LLM curation -planner builds against. +malformed body. Standalone automation backends and delegated host planners use +this contract. ### Capabilities diff --git a/docs/dashboard.md b/docs/dashboard.md index 1e6939ac..db99649a 100644 --- a/docs/dashboard.md +++ b/docs/dashboard.md @@ -526,8 +526,8 @@ Returns feature flags and server configuration. Used by the UI and wrappers to d - `features.lcm`: Whether the LCM session store is available - `features.curation`: Whether similarity-dedup curation tools are enabled - `features.automation`: Whether TraceDecay automation is enabled with a supported backend -- `features.llm_curation`: Whether TraceDecay's standalone backend can run LLM-backed curation. Delegated hosts keep planning host-owned and submit ops through `POST /curate/apply`. -- `automation.mode`: `"disabled"`, `"standalone_backend"`, or `"delegated_host"`; `delegated_host` is provider-neutral and may be used by Hermes, Codex app-server orchestration, CloudCode CLI, Cursor Agent CLI, or another host that owns the intelligence layer. +- `features.llm_curation`: Whether TraceDecay can run LLM-backed curation through standalone automation. Delegated hosts keep planning host-owned and submit ops through `POST /curate/apply`. +- `automation.mode`: `"disabled"`, `"standalone_backend"`, or `"delegated_host"`; `delegated_host` is provider-neutral and may be used by Hermes, Codex app-server orchestration, Claude Code CLI, Cursor Agent CLI, or another host that owns the intelligence layer. --- @@ -779,8 +779,8 @@ Same structure with `applied_counts` showing what was actually deleted and #### `POST /api/plugins/holographic/curate/apply` -Generic curation-ops apply endpoint. This is the contract standalone automation -backends and delegated host planners build against. Per-op failures are +Generic curation-ops apply endpoint. Standalone automation backends and +delegated host planners use this contract. Per-op failures are reported per-op in `results`; the request only fails wholesale (400) on a malformed body. @@ -1081,7 +1081,7 @@ fetch('/api/capabilities') | `features.graph` | Code-graph API is available | Show Code Graph tab | | `features.savings` | Savings & Cost API is available | Show Savings & Cost tab | | `features.curation` | Similarity-dedup curation tools are available | Show Curation panel, enable curate actions | -| `features.llm_curation` | An LLM-backed curation planner is available through TraceDecay standalone automation or a delegated host wrapper | Enable LLM plan actions that target `POST /curate/apply` | +| `features.llm_curation` | An LLM-backed curation planner is available through standalone automation or a delegated host wrapper | Enable LLM plan actions that target `POST /curate/apply` | There is no archive flag: curation deletes are permanent, and no archive or restore endpoints exist. Always check the capability flags rather than diff --git a/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md b/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md index 0731926b..5a3d6ba9 100644 --- a/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md +++ b/docs/superpowers/specs/2026-06-24-lsp-code-diagnostics-dashboard-design.md @@ -2,11 +2,11 @@ ## Summary -Phase 1 adds a dedicated **Code Diagnostics** dashboard surface powered by a TraceDecay-owned, LSP-first diagnostics broker. The broker starts supported language servers when available, keeps them warm while the dashboard process is alive, caches diagnostics, and exposes status/results only through dashboard APIs. Hooks, prompt hints, MCP auto-context, and model-visible summaries are explicitly out of Phase 1 and are documented as Phase 2 extension points. +Phase 1 adds a dedicated **Code Diagnostics** dashboard surface powered by a TraceDecay-owned, LSP-first diagnostics broker. The broker starts supported language servers when available, keeps them warm while the dashboard process is alive, caches diagnostics, and exposes status/results only through dashboard APIs. Hooks, prompt hints, MCP auto-context, and model-visible summaries stay out of Phase 1 and remain Phase 2 extension points. -Phase 1 is a general LSP diagnostics platform, not a Rust-only feature. It ships a broker and adapter registry that can host every language server TraceDecay knows how to start. Rust is an important validation path because `rust-analyzer` highlights the cold `cargo check` problem, but the architecture and dashboard controls are language-generic. +The platform is language-generic, not Rust-only. Rust is an important validation path because `rust-analyzer` highlights the cold `cargo check` problem, but the broker, adapter registry, and dashboard controls must work for every supported language server. -Built-in Phase 1 adapters should cover the practical, low-friction language servers first: +Built-in adapters should cover the practical, low-friction language servers first: - Rust via `rust-analyzer` - TypeScript and JavaScript via `typescript-language-server` @@ -43,7 +43,7 @@ Each language can be enabled or disabled from the dashboard. Disabled languages ## Product Surface -The dashboard gains a new dedicated **Code Diagnostics** tab or plugin area, separate from the existing Savings & Cost `Diagnostics` view. The existing diagnostics panel reports TraceDecay hook/tool/prompt telemetry, so compiler and type diagnostics should not be mixed into it. +The dashboard gains a dedicated **Code Diagnostics** tab or plugin area. It stays separate from the existing Savings & Cost `Diagnostics` view, which reports TraceDecay hook/tool/prompt telemetry rather than compiler or type diagnostics. The Code Diagnostics UI includes: @@ -86,7 +86,7 @@ src/diagnostics/lsp/ settings.rs project-persisted language enablement ``` -The broker is owned by dashboard state, not by hidden autostart infrastructure. When `tracedecay dashboard` starts, it builds one `DiagnosticBroker` for the active project. The broker lazily starts language servers when the Code Diagnostics UI asks for status, diagnostics, or refresh. This avoids surprising background work for users who never open the tab. +Dashboard state owns the broker; hidden autostart infrastructure does not. When `tracedecay dashboard` starts, it builds one `DiagnosticBroker` for the active project. The broker starts language servers lazily when the Code Diagnostics UI asks for status, diagnostics, or refresh, so users who never open the tab do not get surprise background work. The broker stores: @@ -102,7 +102,7 @@ The broker stores: ## Adapter Registry and Language Coverage -The LSP broker uses an adapter registry. Each adapter is a declarative unit that answers: +The LSP broker uses an adapter registry. Each adapter declares: - which TraceDecay languages it handles - LSP language id for each file type @@ -139,11 +139,11 @@ root_markers = ["Gemfile", ".ruby-version"] diagnostics = "push" ``` -The dashboard should list built-in and custom adapters together. For unsupported languages, it should show an “Add custom LSP adapter” affordance rather than pretending TraceDecay has no path forward. +The dashboard should list built-in and custom adapters together. Unsupported languages should show an “Add custom LSP adapter” affordance rather than imply TraceDecay has no path forward. ## External Implementation Notes -The design should copy proven LSP-client shapes from existing projects rather than invent protocol machinery from scratch. +The design should reuse proven LSP-client shapes from existing projects rather than invent protocol machinery from scratch. Relevant findings: @@ -176,7 +176,7 @@ For each supported language, an adapter provides: - file extensions to open - diagnostic capability support -Phase 1 starts with stdio LSP servers launched by TraceDecay. It does not attempt to attach to Cursor, VS Code, or other editor-owned LSP sessions because the LSP lifecycle is client-owned and not exposed through a standard cross-editor discovery API. +TraceDecay starts stdio LSP servers itself in Phase 1. It does not attach to Cursor, VS Code, or other editor-owned LSP sessions because the LSP lifecycle is client-owned and not exposed through a standard cross-editor discovery API. The active lifecycle is: @@ -220,7 +220,7 @@ The broker should avoid a full project file walk on every refresh when possible. ## Idle Whole-Project Backfill -Phase 1 should passively collect diagnostics for files the user has not touched, but only through dashboard-owned idle work. This gives the dashboard a project-wide type-error view without making hooks or prompt submission slower. +Phase 1 passively collects diagnostics for files the user has not touched, but only through dashboard-owned idle work. This gives the dashboard a project-wide type-error view without slowing hooks or prompt submission. Idle backfill behavior: @@ -238,7 +238,7 @@ Backfill modes: - `off`: no passive project sweep. - `idle`: default. Backfill only when the dashboard process is idle and no explicit refresh is active. -The Phase 1 default should be `idle`. This gives users broad coverage without surprising CPU use. A repeating `continuous` sweep can be considered later, but it is not part of Phase 1. The dashboard should expose the Phase 1 setting and show current backfill progress. +The Phase 1 default is `idle`, which gives broad coverage without surprising CPU use. A repeating `continuous` sweep can be considered later, but it is not part of Phase 1. The dashboard should expose the setting and show current backfill progress. Whole-project coverage is best-effort. Some LSP servers publish diagnostics for the entire workspace after initialization; others only publish for opened files. The broker should support both: @@ -290,7 +290,7 @@ pub struct DiagnosticEngineStatus { } ``` -The first implementation can keep the cache in memory while the dashboard runs. Persisting the latest snapshot to a dashboard sidecar table is allowed in Phase 1 if it is useful for reload behavior, but the UI must clearly distinguish cached/stale data from fresh data. +The first implementation can keep the cache in memory while the dashboard runs. Persisting the latest snapshot to a dashboard sidecar table is allowed in Phase 1 for reload behavior, but the UI must clearly distinguish cached/stale data from fresh data. ## Dashboard API @@ -330,7 +330,7 @@ Add a dedicated frontend package or panel matching existing dashboard plugin pat - Refresh/restart controls using existing button and table primitives. - Idle backfill control with progress for project-wide coverage. -The UI should treat disabled and unavailable languages differently: +The UI should distinguish disabled and unavailable languages: - Disabled: user intentionally turned it off. - Unavailable: enabled, but binary or project manifest is missing. @@ -487,7 +487,7 @@ All LSP diagnostics work is fail-open: - Malformed LSP diagnostics: malformed entries are dropped and counted in engine status. - Graph enrichment failure: diagnostics are still returned with `enclosing: null`. -The dashboard should never panic or fail to load because one language server is broken. +One broken language server must never panic the dashboard or prevent it from loading. ## Testing Strategy From d9e7dd15ff04ba74f500d27f48c8a5e940aa186a Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Fri, 26 Jun 2026 07:26:47 +0000 Subject: [PATCH 4/4] fix: avoid lazy sync for read-only MCP tools --- src/mcp/server.rs | 104 +++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 34 deletions(-) diff --git a/src/mcp/server.rs b/src/mcp/server.rs index e7a48e96..dd0d28e5 100644 --- a/src/mcp/server.rs +++ b/src/mcp/server.rs @@ -220,6 +220,18 @@ fn format_per_file_staleness_banner( lines.join("\n") } +fn needs_lazy_sync_before_dispatch(tool_name: &str) -> bool { + matches!( + tool_name, + "tracedecay_ast_grep_rewrite" + | "tracedecay_insert_at" + | "tracedecay_insert_at_symbol" + | "tracedecay_multi_str_replace" + | "tracedecay_replace_symbol" + | "tracedecay_str_replace" + ) +} + /// Read the on-disk mtime (UNIX seconds) for `relative_path` joined onto /// `project_root`. Returns `None` when the file is missing or stat fails. fn file_mtime_secs(project_root: &std::path::Path, relative_path: &str) -> Option { @@ -486,10 +498,10 @@ pub struct McpServer { impl McpServer { /// Creates a new MCP server backed by the given code graph. /// - /// Index freshness is maintained by a lazy staleness check - /// ([`maybe_sync_if_stale`](Self::maybe_sync_if_stale)) invoked at the - /// start of every `tools/call` and gated by a 30 s cooldown — there - /// is no background watcher task. This replaces the + /// Index freshness for source-editing tools is maintained by a lazy + /// staleness check ([`maybe_sync_if_stale`](Self::maybe_sync_if_stale)) + /// gated by a 30 s cooldown — there is no background watcher task. This + /// replaces the /// `notify-debouncer-full` watcher removed in v6.x (#80), which was /// the source of severe CPU and memory pressure on large monorepos /// where nested ignored directories (`apps/*/node_modules`, @@ -566,27 +578,13 @@ impl McpServer { timings_enabled: AtomicBool::new(false), last_staleness_check_at: AtomicI64::new(0), worktree_mismatch, - startup_catch_up_done: AtomicBool::new(false), - transcript_ingest_done: Arc::new(AtomicBool::new(false)), + startup_catch_up_done: AtomicBool::new(true), + transcript_ingest_done: Arc::new(AtomicBool::new(true)), ledger_writes_started: Arc::new(AtomicU64::new(0)), ledger_writes_finished: Arc::new(AtomicU64::new(0)), ledger_write_notify: Arc::new(tokio::sync::Notify::new()), }); - // Catch-up sync (#414): pick up changes made while the server - // was down — terminal `git pull`, IDE edits before the agent - // launched, files touched by another tool. Detached + weak so - // it never extends the server's lifetime; non-blocking so MCP - // `initialize` doesn't wait on the walk. - { - let weak = Arc::downgrade(&server); - tokio::spawn(async move { - if let Some(s) = weak.upgrade() { - s.run_startup_catch_up_sync().await; - } - }); - } - tokio::task::spawn_blocking(move || { let _ = cleanup_expired_response_handles( &response_handle_project_root, @@ -758,23 +756,27 @@ impl McpServer { } } - /// Catch-up sync run once at startup (#414). Bypasses the 30 s - /// cooldown in [`Self::maybe_sync_if_stale`] so changes made while - /// the server was down — a terminal `git pull`, IDE edits before - /// the agent launched, files touched by another tool — are - /// reconciled by the time the first MCP tool call arrives. The - /// staleness-check stamp is updated on the way out so the first - /// tool call doesn't re-walk the tree. + /// Catch-up sync helper for tests and explicit callers. Bypasses the 30 s + /// cooldown in [`Self::maybe_sync_if_stale`] so changes made while the + /// server was down — a terminal `git pull`, IDE edits before the agent + /// launched, files touched by another tool — can be reconciled before + /// assertions or source-editing work. The staleness-check stamp is updated + /// on the way out so the next lazy sync doesn't immediately re-walk the + /// tree. /// /// The completion flag is flipped on every exit path (including /// errors) so [`Self::wait_for_startup_catch_up`] never hangs. pub async fn run_startup_catch_up_sync(&self) { + self.startup_catch_up_done.store(false, Ordering::Release); + self.transcript_ingest_done.store(false, Ordering::Release); + let cg = self.cg_snapshot().await; let stale = cg.find_stale_files().await; if !stale.is_empty() { if let Err(e) = cg.sync_if_stale_silent(&stale).await { eprintln!("[tracedecay] startup catch-up sync failed: {e}"); self.startup_catch_up_done.store(true, Ordering::Release); + self.transcript_ingest_done.store(true, Ordering::Release); return; } } @@ -860,12 +862,12 @@ impl McpServer { .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs() as i64; + let previous = self.last_staleness_check_at.load(Ordering::Acquire); let last_sync = cg.last_sync_timestamp().await; - if now.saturating_sub(last_sync) < 30 { + if previous != 0 && now.saturating_sub(last_sync) < 30 { return; } - let previous = self.last_staleness_check_at.load(Ordering::Acquire); if now.saturating_sub(previous) < 30 { return; } @@ -1664,10 +1666,14 @@ impl McpServer { // this call reads the right index. Cheap no-op check when no drift. let cg = self.reopen_if_branch_drifted().await; - // Notification-free freshness: walk the tree and resync any stale - // files, gated by a 30 s cooldown. Replaces the embedded watcher - // (see McpServer::new). No-op on the hot path most of the time. - self.maybe_sync_if_stale().await; + // Notification-free freshness is useful before tools that edit source + // files in the index. Read-only graph queries should not block behind + // a full project walk; on very large indexes (especially when + // node_modules was intentionally included) that turns diagnostics and + // search into sync operations. + if needs_lazy_sync_before_dispatch(tool_name) { + self.maybe_sync_if_stale().await; + } self.stats.tool_calls.fetch_add(1, Ordering::Relaxed); eprintln!("[tracedecay] tool call: {tool_name}"); @@ -2133,7 +2139,7 @@ fn json_rpc_request_id_string(id: &Value) -> Option { #[cfg(test)] #[allow(clippy::unwrap_used)] mod staleness_banner_tests { - use super::{format_per_file_staleness_banner, humanize_age}; + use super::{format_per_file_staleness_banner, humanize_age, needs_lazy_sync_before_dispatch}; use std::fs; use tempfile::tempdir; @@ -2177,4 +2183,34 @@ mod staleness_banner_tests { // sync and tool response). Age falls back to 0s. assert!(banner.contains("does/not/exist.rs")); } + + #[test] + fn read_only_tools_skip_lazy_sync_before_dispatch() { + for tool in [ + "tracedecay_active_project", + "tracedecay_context", + "tracedecay_files", + "tracedecay_runtime", + "tracedecay_search", + "tracedecay_status", + "tracedecay_storage_status", + ] { + assert!( + !needs_lazy_sync_before_dispatch(tool), + "{tool} should stay available when lazy sync is stuck" + ); + } + + for tool in [ + "tracedecay_insert_at", + "tracedecay_multi_str_replace", + "tracedecay_replace_symbol", + "tracedecay_str_replace", + ] { + assert!( + needs_lazy_sync_before_dispatch(tool), + "{tool} should still get the normal lazy freshness check" + ); + } + } }