diff --git a/src/utils.ts b/src/utils.ts index cd5bb65..6dd1aea 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -812,14 +812,19 @@ export function exactRecallTerms(query: string): readonly string[] { 'after', 'agent', 'anything', + 'been', 'branch', 'case', 'current', + 'does', 'durable', 'find', 'feature', 'features', + 'from', 'handoff', + 'have', + 'into', 'issue', 'issues', 'knowledge', @@ -833,11 +838,22 @@ export function exactRecallTerms(query: string): readonly string[] { 'related', 'search', 'stored', + 'than', + 'that', + 'them', + 'then', + 'they', 'this', 'the', + 'were', + 'what', + 'when', + 'which', + 'while', 'with', 'workspace', 'worktree', + 'your', ]); const seen = new Set(); const terms: string[] = []; @@ -909,6 +925,28 @@ export function isSummarySidecarUri(uri: string): boolean { return /\.(?:overview|abstract)\.md(?:#|$)/.test(uri); } +/** + * Internal agent-artifact machinery — shareable-pack manifests and the prompt + * fragments bundled under them (`.../agent-artifacts/packs/...`). These are + * tooling, not recallable knowledge: a reviewer pack lists many review + * dimensions ("observability", "rollout", ...) so it lexically matches almost + * any query and floods recall. Filtered out like summary sidecars. Top-level + * shared skills (`.../agent-artifacts/skills/.../SKILL.md`) stay discoverable + * and are placed in the skills category by `categoryForUri`. + */ +export function isAgentArtifactPackUri(uri: string): boolean { + return /\/agent-artifacts\/packs\//.test(uri); +} + +/** + * A URI that must never surface in recall — summary sidecars or agent-artifact + * pack machinery. Shared by the semantic (`parseRecallHits`) and exact + * (`grepUrisFromJson`) passes so their exclusion set cannot drift. + */ +export function isExcludedRecallUri(uri: string): boolean { + return isSummarySidecarUri(uri) || isAgentArtifactPackUri(uri); +} + /** * Extract the matched resource URIs from `ov grep --output json` stdout, minus * summary sidecars. The CLI prints a `cmd: ...` banner before the JSON, so @@ -929,7 +967,7 @@ export function grepUrisFromJson(output: string): readonly string[] { } const uris: string[] = []; for (const match of matches) { - if (isJsonObject(match) && typeof match.uri === 'string' && !isSummarySidecarUri(match.uri)) { + if (isJsonObject(match) && typeof match.uri === 'string' && !isExcludedRecallUri(match.uri)) { uris.push(match.uri); } } @@ -1020,7 +1058,7 @@ export function parseRecallHits(output: string, options: ParseRecallHitsOptions continue; } for (const item of items) { - if (!isJsonObject(item) || typeof item.uri !== 'string' || isSummarySidecarUri(item.uri)) { + if (!isJsonObject(item) || typeof item.uri !== 'string' || isExcludedRecallUri(item.uri)) { continue; } if (options.includeArchived !== true && isArchivedMemoryUri(item.uri)) { @@ -1117,6 +1155,12 @@ function dedupeByContent(hits: readonly RecallHit[]): readonly RecallHit[] { * a resource. */ export function categoryForUri(uri: string): RecallCategory { + // Shared agent artifacts live under `.../memories/.../agent-artifacts/` but + // are tooling, not personal knowledge — keep them out of the leading memory + // band. (Pack machinery is dropped entirely upstream; only skills reach here.) + if (uri.includes('/agent-artifacts/')) { + return 'skills'; + } if (uri.includes('/memories/')) { return 'memories'; } @@ -1133,23 +1177,93 @@ function contextTypeForCategory(category: RecallCategory): string { return category === 'skills' ? 'skill' : 'resource'; } +/** + * Extra weight given to an exact term that also appears in the document's slug + * (its memory topic or resource filename). A slug match is a title-level signal + * — the document is *about* that term — whereas a bare body match can be an + * incidental mention (a branch name in a CI note, "spec" inside "the author's + * spec doc"). The bonus lets a document whose topic names the query terms lead + * its category even when those terms are common corpus-wide. + */ +const RECALL_EXACT_SLUG_BONUS = 4; + +/** The document slug: last path segment, chunk anchor and extension stripped, + * lowercased. For a memory this is its topic (`mobile-observability-alerting-spec`), + * for a resource its filename. */ +function uriSlug(uri: string): string { + const withoutExtension = stripAnchor(uri).replace(/\.[a-z0-9]+$/i, ''); + return withoutExtension.slice(withoutExtension.lastIndexOf('/') + 1).toLowerCase(); +} + +/** + * Document frequency of each exact term across the exact-match set: how many + * matched documents contain it. Used as a self-contained inverse-frequency + * (IDF-style) signal — no engine stats needed — so a term matching many + * documents (common, e.g. "background", "rollout") is discounted while a term + * matching one or two (distinctive, e.g. "sharding") keeps its weight. + */ +function exactTermDocumentFrequency(matches: readonly ExactMatch[]): Map { + const frequency = new Map(); + for (const match of matches) { + for (const term of new Set(match.terms.map(term => term.toLowerCase()))) { + frequency.set(term, (frequency.get(term) ?? 0) + 1); + } + } + return frequency; +} + +/** + * Whether the slug names the term as a whole token rather than an incidental + * substring — matched on non-alphanumeric boundaries (slugs are kebab/snake + * case) so `spec` boosts `mobile-observability-alerting-spec` but not + * `design-respec-notes`, while a hyphenated term like `valencia-v1` still + * matches `coda-valencia-v1-notes`. + */ +function slugNamesTerm(slug: string, term: string): boolean { + return new RegExp(`(^|[^a-z0-9])${escapeRegExp(term)}([^a-z0-9]|$)`).test(slug); +} + +/** + * Combined exact-match strength for a hit — the intra-category sort key that + * replaced a raw exact-term *count*. Each matched term contributes its inverse + * document frequency (rare term → up to 1, common term → toward 0), multiplied + * by `RECALL_EXACT_SLUG_BONUS` when the term names the document's slug. A doc + * matched only by common terms in its body scores ~0 and falls back to semantic + * order; a doc whose topic names distinctive query terms leads its category. + */ +function exactMatchStrength(hit: RecallHit, documentFrequency: ReadonlyMap): number { + if (!hit.exactTerms?.length) { + return 0; + } + const slug = uriSlug(hit.uri); + let strength = 0; + for (const term of hit.exactTerms) { + const normalized = term.toLowerCase(); + // `?? 1` is defensive only: exactTerms is always a subset of the terms the + // documentFrequency map was built from, so the lookup resolves in practice. + const rarity = 1 / (documentFrequency.get(normalized) ?? 1); + strength += rarity * (slugNamesTerm(slug, normalized) ? RECALL_EXACT_SLUG_BONUS : 1); + } + return strength; +} + /** * Fold exact (lexical) matches into the semantically-ranked hits so the lexical * signal drives ranking rather than sitting in a separate afterthought section. * Semantic hits that a term also matched are annotated with `exactTerms`; * exact-match documents with no semantic hit are promoted in as fresh hits with - * `score` 0. The result is re-sorted category-first, then by number of exact - * terms matched, then by semantic score — so within each category, exact matches - * lead (most terms first) and only then come unmatched semantic hits. This fixes - * canonical docs being buried under higher-scored-but-irrelevant noise in the - * compressed semantic score band. + * `score` 0. The result is re-sorted category-first, then by exact-match + * *strength* (see `exactMatchStrength`), then by semantic score. * - * Intentional trade-off: an exact (lexical) match is treated as a stronger - * relevance signal than semantic proximity, so a promoted exact-only document - * (score 0) outranks an unmatched semantic hit in the same category and can - * occupy a slot in the shown window. `exactRecallTerms` only keeps distinctive - * tokens, so a literal match is high precision; surfacing it over a fuzzy - * neighbour is the desired behaviour. + * Ranking by weighted strength rather than raw term count is deliberate: a + * distinctive exact match (a rare term, or one that names the document's topic) + * still outranks an unmatched semantic hit in its category — the intended + * high-precision behaviour — but a document matched only by common words in its + * body no longer floods the top, because inverse-document-frequency drives those + * terms toward zero. The intra-category key is `exactMatchStrength + score`, so + * a strong exact match (strength ≥ 1) leads regardless of semantic score while a + * weak common-word-only promotion (strength ≪ 1) sits below a genuine semantic + * hit instead of over-correcting into keyword flooding. */ export function applyExactMatchBoost( hits: readonly RecallHit[], @@ -1177,10 +1291,20 @@ export function applyExactMatchBoost( uri, }; }); - return [...annotated, ...promoted].sort( + const documentFrequency = exactTermDocumentFrequency(exactMatches); + const merged = [...annotated, ...promoted]; + // Hoist the blended relevance (exact strength + semantic score) into an O(n) + // pre-pass, keyed by document URI (anchors stripped, matching termsByUri), so + // the comparator stays a cheap lookup and every hit resolves. + const combinedRelevanceByUri = new Map(); + for (const hit of merged) { + combinedRelevanceByUri.set(stripAnchor(hit.uri), exactMatchStrength(hit, documentFrequency) + hit.score); + } + return merged.sort( (left, right) => recallCategoryRank(left.category) - recallCategoryRank(right.category) || - (right.exactTerms?.length ?? 0) - (left.exactTerms?.length ?? 0) || + (combinedRelevanceByUri.get(stripAnchor(right.uri)) ?? 0) - + (combinedRelevanceByUri.get(stripAnchor(left.uri)) ?? 0) || right.score - left.score, ); } @@ -1189,11 +1313,26 @@ export function formatRecallHits(hits: readonly RecallHit[], maxHits: number): s return renderRecallHits(hits.slice(0, maxHits), Math.max(0, hits.length - maxHits)); } +/** + * Leading note shown when every hit in the window is a keyword-only (score 0) + * promotion — i.e. no semantic pass matched above the recall threshold. It marks + * the difference between "here is what the corpus knows about this" and "nothing + * semantically matched; these merely contain the words", so an agent does not + * mistake keyword noise for coverage of an absent topic. + */ +export const RECALL_LOW_CONFIDENCE_NOTE = + '⚠ No semantically-relevant matches — the results below only contain the query words (the corpus may not cover this topic).'; + /** * Render an already-decided shown window into the numbered recall list. Keeping * the slice out of here lets `buildRecallSections` compute the shown set once and * feed both the rendering and the exact-tail "already shown" filter from the same * list. `overflow` is the count of hits beyond the window, for the trailing note. + * + * A promoted exact-only hit (score 0) is labelled `keyword-only:` rather than + * `exact:` so it is visibly distinct from a semantic hit that a term also + * corroborated; when the whole window is keyword-only, a low-confidence note + * leads the list. */ function renderRecallHits(shown: readonly RecallHit[], overflow: number): string | undefined { if (shown.length === 0) { @@ -1201,14 +1340,16 @@ function renderRecallHits(shown: readonly RecallHit[], overflow: number): string } const lines = shown.flatMap((hit, index) => { const scorePart = hit.score > 0 ? `score ${hit.score.toFixed(2)}` : undefined; - const exactPart = hit.exactTerms?.length ? `exact: ${hit.exactTerms.join(', ')}` : undefined; + const exactLabel = hit.score > 0 ? 'exact' : 'keyword-only'; + const exactPart = hit.exactTerms?.length ? `${exactLabel}: ${hit.exactTerms.join(', ')}` : undefined; const head = `${index + 1}. ${[hit.contextType, scorePart, exactPart].filter(Boolean).join(' · ')} · ${hit.uri}`; return hit.snippet ? [head, ` ${hit.snippet}`] : [head]; }); if (overflow > 0) { lines.push(`(+${overflow} more — refine the query or read a URI above)`); } - return lines.join('\n'); + const noSemanticMatch = shown.every(hit => hit.score === 0); + return (noSemanticMatch ? [RECALL_LOW_CONFIDENCE_NOTE, ...lines] : lines).join('\n'); } export interface RecallSections { diff --git a/test/unit/utils.test.ts b/test/unit/utils.test.ts index 267ebf4..774c849 100644 --- a/test/unit/utils.test.ts +++ b/test/unit/utils.test.ts @@ -21,8 +21,10 @@ import { getGlobBase, globToRegExp, grepUrisFromJson, + isAgentArtifactPackUri, mergeRecallHits, parseRecallHits, + RECALL_LOW_CONFIDENCE_NOTE, type RecallHit, hasGlob, isExecutable, @@ -346,6 +348,13 @@ describe('exactRecallTerms', () => { 'mobile-checkbox-clipped-table-cell', ]); }); + + it('drops expanded generic function words so they cannot re-flood recall', () => { + // A sentence built only from stop words yields no exact grep terms. + expect(exactRecallTerms('what have they which does that when were')).toEqual([]); + // A distinctive token still survives among the same filler. + expect(exactRecallTerms('what does the sharding do')).toEqual(['sharding']); + }); }); describe('recallQueryRequestsWorkspaceContext', () => { @@ -586,6 +595,23 @@ describe('grepUrisFromJson', () => { expect(grepUrisFromJson(output)).toEqual(['viking://a/real.md']); }); + it('drops agent-artifact pack machinery so a review pack does not flood exact matches', () => { + const output = JSON.stringify({ + ok: true, + result: { + matches: [ + { + line: 1, + uri: 'viking://user/me/memories/shared/default/agent-artifacts/packs/claude/r/r.pack.json', + content: 'x', + }, + {line: 2, uri: 'viking://user/me/memories/durable/projects/x/real.md', content: 'y'}, + ], + }, + }); + expect(grepUrisFromJson(output)).toEqual(['viking://user/me/memories/durable/projects/x/real.md']); + }); + it('returns [] on malformed output', () => { expect(grepUrisFromJson('cmd: ov grep\nnot json')).toEqual([]); expect(grepUrisFromJson('')).toEqual([]); @@ -657,6 +683,60 @@ describe('parseRecallHits / mergeRecallHits / formatRecallHits', () => { ]); }); + it('drops agent-artifact pack machinery from semantic hits', () => { + const hits = parseRecallHits( + json({ + ok: true, + result: { + memories: [ + { + context_type: 'memory', + uri: 'viking://user/me/memories/shared/default/agent-artifacts/packs/claude/r/r.pack.json', + score: 0.7, + abstract: 'pack', + }, + { + context_type: 'memory', + uri: 'viking://user/me/memories/durable/projects/x/real.md', + score: 0.6, + abstract: 'real', + }, + ], + }, + }), + ); + expect(hits.map(hit => hit.uri)).toEqual(['viking://user/me/memories/durable/projects/x/real.md']); + }); + + it('leads with a low-confidence note when the window is entirely keyword-only', () => { + // No semantic pass matched; every shown hit is a promoted exact-only doc. + const {semanticSection} = buildRecallSections( + [], + [{terms: ['kubernetes'], uri: 'viking://user/me/memories/durable/projects/x/unrelated.md'}], + 12, + ); + expect(semanticSection?.split('\n')[0]).toBe(RECALL_LOW_CONFIDENCE_NOTE); + expect(semanticSection).toContain('keyword-only: kubernetes'); + }); + + it('omits the low-confidence note when a semantic hit is present', () => { + const {semanticSection} = buildRecallSections( + [parseRecallHits(json({ok: true, result: {memories: [{uri: 'viking://real.md', score: 0.7, abstract: 'x'}]}}))], + [{terms: ['kubernetes'], uri: 'viking://user/me/memories/durable/projects/x/unrelated.md'}], + 12, + ); + expect(semanticSection).not.toContain(RECALL_LOW_CONFIDENCE_NOTE); + // Positively assert the mixed state so the test cannot pass by dropping the + // semantic hit or rendering an empty section. + expect(semanticSection).toContain('viking://real.md'); + expect(semanticSection).toContain('keyword-only: kubernetes'); + }); + + it('emits no note and no section for an empty result', () => { + const {semanticSection} = buildRecallSections([], [], 12); + expect(semanticSection).toBeUndefined(); + }); + it('omits archived lifecycle memories by default', () => { const hits = parseRecallHits( json({ @@ -928,6 +1008,33 @@ describe('categoryForUri', () => { expect(categoryForUri('viking://resources/repos/coda/CLAUDE.md')).toBe('resources'); expect(categoryForUri('viking://resources/repos/coda/.claude/skills/x/SKILL.md')).toBe('resources'); }); + + it('keeps shared agent artifacts out of the memory band (routes them to skills)', () => { + expect( + categoryForUri('viking://user/me/memories/shared/default/agent-artifacts/skills/claude/reviewer/SKILL.md'), + ).toBe('skills'); + }); +}); + +describe('isAgentArtifactPackUri', () => { + it('flags pack machinery but not shared skills or plain memories', () => { + expect( + isAgentArtifactPackUri( + 'viking://user/me/memories/shared/default/agent-artifacts/packs/claude/reviewer/reviewer.pack.json', + ), + ).toBe(true); + expect( + isAgentArtifactPackUri( + 'viking://user/me/memories/shared/default/agent-artifacts/packs/claude/reviewer/files/prompts/f.md', + ), + ).toBe(true); + expect( + isAgentArtifactPackUri( + 'viking://user/me/memories/shared/default/agent-artifacts/skills/claude/reviewer/SKILL.md', + ), + ).toBe(false); + expect(isAgentArtifactPackUri('viking://user/me/memories/durable/projects/x/y.md')).toBe(false); + }); }); describe('applyExactMatchBoost', () => { @@ -972,7 +1079,7 @@ describe('applyExactMatchBoost', () => { expect(ranked[0]?.uri).toBe('viking://resources/repos/coda/CLAUDE.md'); }); - it('orders by category, then exact-term count, then score', () => { + it('orders by category, then blended exact strength + score', () => { const ranked = applyExactMatchBoost( [ hit({category: 'memories', contextType: 'memory', uri: 'viking://user/me/memories/m.md', score: 0.5}), @@ -991,7 +1098,7 @@ describe('applyExactMatchBoost', () => { ]); }); - it('breaks ties by score when exact-term counts are equal', () => { + it('breaks ties by score when exact strength is equal', () => { const ranked = applyExactMatchBoost( [hit({uri: 'viking://lo', score: 0.5}), hit({uri: 'viking://hi', score: 0.8})], [ @@ -1013,11 +1120,72 @@ describe('applyExactMatchBoost', () => { ); const lines = (formatRecallHits(ranked, 5) ?? '').split('\n'); const promotedIndex = lines.findIndex(line => line.includes('CLAUDE.md')); - // Promoted exact-only line carries no "score" token and is not followed by a - // wrapped (3-space indented) snippet line, since its snippet is empty. - expect(lines[promotedIndex]).toContain('resource · exact: y, z · viking://resources/repos/coda/CLAUDE.md'); + // Promoted exact-only line is labelled keyword-only (no semantic match), + // carries no "score" token, and is not followed by a wrapped (3-space + // indented) snippet line, since its snippet is empty. + expect(lines[promotedIndex]).toContain('resource · keyword-only: y, z · viking://resources/repos/coda/CLAUDE.md'); expect(lines[promotedIndex]).not.toContain('score'); expect(lines[promotedIndex + 1] ?? '').not.toMatch(/^ {3}/); + // A semantic hit that a term also matched keeps the "exact:" label. expect(lines.join('\n')).toContain('resource · score 0.62 · exact: x · viking://sem'); }); + + it('weights a rare exact term above a common one (inverse document frequency)', () => { + // "common" matches three documents, "rare" matches one. The rare-term + // document leads even though "common" appears in more of the result set. + const ranked = applyExactMatchBoost( + [], + [ + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/c1.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/c2.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/c3.md'}, + {terms: ['rare'], uri: 'viking://user/me/memories/durable/projects/x/r.md'}, + ], + ); + expect(ranked[0]?.uri).toBe('viking://user/me/memories/durable/projects/x/r.md'); + }); + + it('boosts an exact term that names the document slug over a body-only match', () => { + // Same term, same document frequency — the memory whose topic slug names the + // term wins over one that only mentions it in the body (an incidental match). + const ranked = applyExactMatchBoost( + [], + [ + {terms: ['observability'], uri: 'viking://user/me/memories/durable/projects/x/mobile-observability-spec.md'}, + {terms: ['observability'], uri: 'viking://user/me/memories/durable/projects/x/desktop-layout-review.md'}, + ], + ); + expect(ranked[0]?.uri).toContain('mobile-observability-spec'); + }); + + it('gives the slug bonus only on a token boundary, not an incidental substring', () => { + // "spec" names the slug token in one doc but is only a substring of "respec" + // in the other; same term and df, so the whole-token match must win. + const ranked = applyExactMatchBoost( + [], + [ + {terms: ['spec'], uri: 'viking://user/me/memories/durable/projects/x/mobile-alerting-spec.md'}, + {terms: ['spec'], uri: 'viking://user/me/memories/durable/projects/x/design-respec-notes.md'}, + ], + ); + expect(ranked[0]?.uri).toContain('mobile-alerting-spec'); + expect(ranked[1]?.uri).toContain('design-respec-notes'); + }); + + it('keeps a common-word-only promotion below a genuine semantic hit', () => { + // The semantic hit carries no exact term but scores 0.6; the promoted docs + // match only a corpus-common term (df 5 → strength 0.2), so they no longer + // outrank the real semantic hit — this is the anti-flooding guarantee. + const ranked = applyExactMatchBoost( + [hit({category: 'memories', contextType: 'memory', uri: 'viking://user/me/memories/semantic.md', score: 0.6})], + [ + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/p1.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/p2.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/p3.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/p4.md'}, + {terms: ['common'], uri: 'viking://user/me/memories/durable/projects/x/p5.md'}, + ], + ); + expect(ranked[0]?.uri).toBe('viking://user/me/memories/semantic.md'); + }); });