From edaa969e798ddce328db59b66fd5ac9554352e51 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Jul 2026 00:44:10 -0400 Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Implement Source Library and Memory Authority lifecycle model","authority":"XY-1152"} --- .github/workflows/language.yml | 2 +- README.md | 2 +- docs/log.md | 9 +++ docs/runbook/getting_started.md | 2 +- docs/spec/agent_memory_knowledge_system_v1.md | 2 +- docs/spec/production_corpus_manifest_v1.md | 4 +- docs/spec/system_doc_source_ref_v1.md | 10 +++- docs/spec/system_elf_memory_service_v2.md | 13 ++++- makefiles/check.toml | 4 +- packages/elf-service/src/docs.rs | 6 +- packages/elf-service/src/docs/api.rs | 5 +- packages/elf-service/src/docs/api/put.rs | 25 ++++++++ packages/elf-service/src/docs/service.rs | 4 +- packages/elf-service/src/docs/service/put.rs | 1 + packages/elf-service/src/docs/service/read.rs | 5 +- .../elf-service/src/docs/source_capture.rs | 58 ++++++++++++++++++- .../src/docs/tests_source_capture.rs | 35 +++++++++++ .../elf-service/src/docs/types/capture.rs | 1 + packages/elf-service/src/history.rs | 20 +++++++ .../src/memory_corrections/tests.rs | 16 +++++ .../src/memory_corrections/validation.rs | 16 +++++ .../src/search/tests_retrieval_merge.rs | 47 ++++++++++++++- packages/elf-storage/src/docs/documents.rs | 6 +- scripts/check-docs.py | 8 ++- 24 files changed, 277 insertions(+), 24 deletions(-) diff --git a/.github/workflows/language.yml b/.github/workflows/language.yml index 1e1e1567..f86a2368 100644 --- a/.github/workflows/language.yml +++ b/.github/workflows/language.yml @@ -78,4 +78,4 @@ jobs: tool: taplo - name: Run repository checks - run: cargo make check + run: cargo make checks diff --git a/README.md b/README.md index f79564b3..93ab6b98 100644 --- a/README.md +++ b/README.md @@ -638,7 +638,7 @@ comparability gates and no universal leaderboard claim. ```sh cargo make fmt -cargo make check +cargo make checks cargo make test-rust ``` diff --git a/docs/log.md b/docs/log.md index 2b7b01c7..1d398c4f 100644 --- a/docs/log.md +++ b/docs/log.md @@ -161,3 +161,12 @@ logs. rather than generic Knowledge OS scope. - Updated docs routing so the retained spec path now resolves by the source-backed project memory product name and explicit Context Pack/recall boundary language. +- Updated Source Library and Memory Authority lifecycle contracts for XY-1152 so + captured sources carry active/current lifecycle metadata, deleted sources retain + tombstone metadata, memory corrections carry supersede/delete/restore lifecycle + state, and stale vector candidates are revalidated against current authority. +- Renamed the full repository gate task to `cargo make checks` for XY-1152 so the + Makefile command surface matches the registered Decodex verify command. +- Preserved historical benchmark evidence rows that recorded the former singular + check command, and constrained docs task validation to allow only those exact + legacy evidence references. diff --git a/docs/runbook/getting_started.md b/docs/runbook/getting_started.md index 07d21cbd..1b6d4753 100644 --- a/docs/runbook/getting_started.md +++ b/docs/runbook/getting_started.md @@ -177,7 +177,7 @@ Use `cargo make` tasks from repository root. ```sh cargo make fmt -cargo make check +cargo make checks cargo make test-rust cargo make test-rust-integration cargo make test-e2e diff --git a/docs/spec/agent_memory_knowledge_system_v1.md b/docs/spec/agent_memory_knowledge_system_v1.md index 29d09031..f875151c 100644 --- a/docs/spec/agent_memory_knowledge_system_v1.md +++ b/docs/spec/agent_memory_knowledge_system_v1.md @@ -321,7 +321,7 @@ Repository-native validation is authoritative. docs are validation-ready. - Before a PR handoff or any push that refreshes a PR head, run the registered Decodex workflow gate: `cargo make fmt`, `cargo make lint-fix`, then - `cargo make check`. + `cargo make checks`. - If a phase changes commands, schemas, config, runtime behavior, status semantics, or benchmark claims, update the owning docs and include drift evidence as required by `docs/policy.md`. diff --git a/docs/spec/production_corpus_manifest_v1.md b/docs/spec/production_corpus_manifest_v1.md index e341265d..5d6714d5 100644 --- a/docs/spec/production_corpus_manifest_v1.md +++ b/docs/spec/production_corpus_manifest_v1.md @@ -100,7 +100,7 @@ evidence ID. It must not silently fall back to the checked-in synthetic corpus. "evidence_id": "issue-xy123-resume", "category": "issue", "title": "XY-123 Resume State", - "text": "XY-123 resumes on branch y/example with command `cargo make check`." + "text": "XY-123 resumes on branch y/example with command `cargo make checks`." } ], "queries": [ @@ -110,7 +110,7 @@ evidence ID. It must not silently fall back to the checked-in synthetic corpus. "query": "How do I resume XY-123?", "expected_evidence_ids": ["issue-xy123-resume"], "allowed_alternate_evidence_ids": [], - "expected_terms": ["XY-123", "cargo make check"] + "expected_terms": ["XY-123", "cargo make checks"] } ] } diff --git a/docs/spec/system_doc_source_ref_v1.md b/docs/spec/system_doc_source_ref_v1.md index d3b5a321..38eeabef 100644 --- a/docs/spec/system_doc_source_ref_v1.md +++ b/docs/spec/system_doc_source_ref_v1.md @@ -200,6 +200,9 @@ Normalized capture output: - `docs_put` MUST return `source_capture.schema = "doc_source_capture/v1"`. - `source_capture.source_record_id` MUST equal the stored `doc_documents.doc_id`. +- `source_capture.lifecycle` MUST use `elf.source_lifecycle/v1` with + `status = "active"`, `freshness = "current"`, the capture actor, transition + timestamp, and `reason_code = "SOURCE_CAPTURED"`. - `source_capture.origin` MUST be the canonical source origin used for operator inspection and deduplication. Source Library `canonical_uri` takes precedence over legacy URL, URI, thread, search, or repo-derived origins. @@ -254,7 +257,7 @@ Persisted normalized `source_ref`: - The stored `doc_documents.source_ref` MUST retain the caller-provided `doc_source_ref/v1` fields and add normalized capture fields: - `source_record_id`, `origin`, `captured_at`, `content_hash`, + `source_record_id`, `lifecycle`, `origin`, `captured_at`, `content_hash`, `visibility_scope`, `source_type`, and `source_spans`. - When policy spans exist, stored `doc_documents.source_ref` MUST include `policy_spans`. @@ -270,6 +273,11 @@ Delete, export, and private-span boundary: refs non-recallable. Derived pages may retain stored stale text until rebuild, but page search MUST suppress snippets whose source refs no longer resolve to active readable document or chunk rows. +- Source Library delete MUST preserve a tombstone lifecycle in + `doc_documents.source_ref.lifecycle` with `status = "deleted"`, + `freshness = "tombstoned"`, `reason_code = "SOURCE_LIBRARY_DELETE"`, + `deleted_at`, actor metadata, and a `tombstone_ref`. The tombstone is audit + evidence only and MUST NOT make deleted source spans recallable. - `doc_source_span/v1` entries with `status = "excluded"` or `status = "redacted"` are audit evidence for write-policy handling. They MUST NOT be treated as captured source evidence for derived page search, memory promotion, graph facts, or export diff --git a/docs/spec/system_elf_memory_service_v2.md b/docs/spec/system_elf_memory_service_v2.md index 6221b16b..b7d19aea 100644 --- a/docs/spec/system_elf_memory_service_v2.md +++ b/docs/spec/system_elf_memory_service_v2.md @@ -1160,14 +1160,15 @@ Body: Behavior: - `supersede` sets the note status to `deprecated`, writes a `DEPRECATE` `memory_note_versions` row, stores `elf.memory_correction/v1` source-ref evidence, - and enqueues an indexing `DELETE` so normal recall suppresses the note. + includes `elf.memory_authority_lifecycle/v1` superseded metadata, and enqueues an + indexing `DELETE` so normal recall suppresses the note. - `delete` sets the note status to `deleted`, writes a `DELETE` `memory_note_versions` row, stores `elf.memory_correction/v1` source-ref evidence, - and enqueues an indexing `DELETE`. + includes tombstone lifecycle metadata, and enqueues an indexing `DELETE`. - `restore` restores the latest prior active snapshot from a `DELETE` or `DEPRECATE` version, or the supplied `restore_version_id`, writes a `RESTORE` `memory_note_versions` row, stores `elf.memory_correction/v1` source-ref evidence, - and enqueues an indexing `UPSERT`. + includes rollback lifecycle metadata, and enqueues an indexing `UPSERT`. - Correction actions require a non-empty reason and non-empty JSON object `source_ref`. They do not mutate raw source notes, docs, events, traces, graph facts, or source pointers. @@ -1845,6 +1846,9 @@ Behavior: writes doc chunks, and enqueues doc-index `UPSERT` jobs for derived Qdrant points. - The request may include write-policy redactions or exclusions; excluded spans are retained as policy metadata but are not captured source spans. +- The normalized document `source_ref` carries `elf.source_lifecycle/v1` metadata + with active/current state at capture time. The lifecycle object is source audit + metadata; it does not promote the source into Memory Ledger authority. - This endpoint must not create Memory Ledger notes, graph facts, knowledge pages, search traces, or recall hits. @@ -1874,6 +1878,9 @@ Response: Behavior: - Marks the Source Library document `deleted` when the caller owns the document and the document scope is writable. +- Updates the stored document `source_ref.lifecycle` to + `elf.source_lifecycle/v1` with `status = "deleted"`, `freshness = "tombstoned"`, + a delete actor, `deleted_at`, and a tombstone reference. - Enqueues a doc-index `DELETE` job for every persisted document chunk so the worker removes derived Qdrant doc-vector points. - Repeating delete on an already deleted document returns `op = NONE`. diff --git a/makefiles/check.toml b/makefiles/check.toml index c6ab6569..0e8b4980 100644 --- a/makefiles/check.toml +++ b/makefiles/check.toml @@ -1,6 +1,6 @@ -# Rust workspace tasks: Check. +# Rust workspace tasks: Checks. -[tasks.check] +[tasks.checks] clear = true workspace = false dependencies = [ diff --git a/packages/elf-service/src/docs.rs b/packages/elf-service/src/docs.rs index e641bcb0..b37942a4 100644 --- a/packages/elf-service/src/docs.rs +++ b/packages/elf-service/src/docs.rs @@ -16,7 +16,8 @@ pub use api::{ DocsExcerptsGetRequest, DocsGetRequest, DocsGetResponse, DocsPutRequest, DocsPutResponse, DocsSearchL0Item, DocsSearchL0ItemHashes, DocsSearchL0ItemLocator, DocsSearchL0ItemPointer, DocsSearchL0ItemReference, DocsSearchL0ItemState, DocsSearchL0Request, DocsSearchL0Response, - DocsSourceCaptureSummary, DocsSourceSpanRef, TextPositionSelector, TextQuoteSelector, + DocsSourceCaptureSummary, DocsSourceLifecycle, DocsSourceSpanRef, TextPositionSelector, + TextQuoteSelector, }; use std::{ @@ -65,7 +66,8 @@ use search_support::{ }; use source_capture::{ build_doc_chunk_rows, build_source_capture_summary, doc_chunk_id_for, - normalize_source_ref_for_capture, source_record_id_for, source_span_id, + normalize_source_ref_for_capture, source_record_id_for, source_ref_with_deleted_lifecycle, + source_span_id, }; use types::{ ByteChunk, DEFAULT_DOC_MAX_BYTES, DEFAULT_L0_MAX_BYTES, DEFAULT_L1_MAX_BYTES, diff --git a/packages/elf-service/src/docs/api.rs b/packages/elf-service/src/docs/api.rs index 6842b954..83f9e301 100644 --- a/packages/elf-service/src/docs/api.rs +++ b/packages/elf-service/src/docs/api.rs @@ -11,7 +11,10 @@ pub use self::{ excerpts::{ DocsExcerptLocator, DocsExcerptResponse, DocsExcerptVerification, DocsExcerptsGetRequest, }, - put::{DocsPutRequest, DocsPutResponse, DocsSourceCaptureSummary, DocsSourceSpanRef}, + put::{ + DocsPutRequest, DocsPutResponse, DocsSourceCaptureSummary, DocsSourceLifecycle, + DocsSourceSpanRef, + }, read::{DocsDeleteRequest, DocsDeleteResponse, DocsGetRequest, DocsGetResponse}, search_l0::{ DocsSearchL0Item, DocsSearchL0ItemHashes, DocsSearchL0ItemLocator, DocsSearchL0ItemPointer, diff --git a/packages/elf-service/src/docs/api/put.rs b/packages/elf-service/src/docs/api/put.rs index 6c593f3b..46eac337 100644 --- a/packages/elf-service/src/docs/api/put.rs +++ b/packages/elf-service/src/docs/api/put.rs @@ -53,6 +53,8 @@ pub struct DocsSourceCaptureSummary { pub schema: String, /// Stable source record identifier. This is also the stored `doc_id`. pub source_record_id: Uuid, + /// Source-record lifecycle and freshness state at capture time. + pub lifecycle: DocsSourceLifecycle, /// Canonical source origin used for operator inspection and deduplication. pub origin: String, /// RFC3339 timestamp when ELF captured the source. @@ -73,6 +75,29 @@ pub struct DocsSourceCaptureSummary { pub policy_spans: Vec, } +/// Source Library lifecycle metadata persisted with normalized source refs. +#[derive(Clone, Debug, Serialize)] +pub struct DocsSourceLifecycle { + /// Schema identifier for this lifecycle object. + pub schema: String, + /// Authoritative Source Library row status. + pub status: String, + /// Freshness label used by recall and audit surfaces. + pub freshness: String, + /// Actor that created the current lifecycle transition. + pub actor_agent_id: String, + /// Transition timestamp. + pub ts: String, + /// Machine-readable lifecycle reason. + pub reason_code: String, + #[serde(skip_serializing_if = "Option::is_none")] + /// Timestamp when the source was tombstoned or deleted. + pub deleted_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Review or delete metadata that proves the tombstone. + pub tombstone_ref: Option, +} + /// Stable reference to one captured or policy-affected source span. #[derive(Clone, Debug, Serialize)] pub struct DocsSourceSpanRef { diff --git a/packages/elf-service/src/docs/service.rs b/packages/elf-service/src/docs/service.rs index 7c241bc5..2f9d85ea 100644 --- a/packages/elf-service/src/docs/service.rs +++ b/packages/elf-service/src/docs/service.rs @@ -20,8 +20,8 @@ use crate::{ load_doc_search_rows, load_docs_excerpt_context, load_tokenizer, normalize_source_ref_for_capture, record_result_projection_stage, resolve_doc_chunking_profile, run_doc_fusion_query, slice, source_record_id_for, - split_tokens_by_offsets, validate_docs_excerpts_get, validate_docs_put, - validate_docs_search_l0, + source_ref_with_deleted_lifecycle, split_tokens_by_offsets, validate_docs_excerpts_get, + validate_docs_put, validate_docs_search_l0, }, search, }; diff --git a/packages/elf-service/src/docs/service/put.rs b/packages/elf-service/src/docs/service/put.rs index 3be81c50..dd5284d4 100644 --- a/packages/elf-service/src/docs/service/put.rs +++ b/packages/elf-service/src/docs/service/put.rs @@ -53,6 +53,7 @@ impl ElfService { source_ref: source_ref_map, doc_type, scope: scope.as_str(), + actor_agent_id: agent_id.as_str(), title: title.as_deref(), content_hash: content_hash.as_str(), raw_content_hash: raw_content_hash.as_str(), diff --git a/packages/elf-service/src/docs/service/read.rs b/packages/elf-service/src/docs/service/read.rs index dd65978f..f7762936 100644 --- a/packages/elf-service/src/docs/service/read.rs +++ b/packages/elf-service/src/docs/service/read.rs @@ -176,8 +176,11 @@ FOR UPDATE", } let chunks = docs::list_doc_chunks(&mut *tx, row.doc_id).await?; + let tombstoned_source_ref = + service::source_ref_with_deleted_lifecycle(&row.source_ref, agent_id, now)?; - docs::mark_doc_deleted(&mut *tx, tenant_id, row.doc_id, now).await?; + docs::mark_doc_deleted(&mut *tx, tenant_id, row.doc_id, &tombstoned_source_ref, now) + .await?; for chunk in &chunks { doc_outbox::enqueue_doc_outbox( diff --git a/packages/elf-service/src/docs/source_capture.rs b/packages/elf-service/src/docs/source_capture.rs index c4852061..6da012bd 100644 --- a/packages/elf-service/src/docs/source_capture.rs +++ b/packages/elf-service/src/docs/source_capture.rs @@ -2,8 +2,8 @@ mod helpers; use crate::docs::{ ByteChunk, DOC_SOURCE_CAPTURE_SCHEMA_V1, DOC_SOURCE_SPAN_SCHEMA_V1, DocChunk, DocType, - DocsSourceCaptureSummary, DocsSourceSpanRef, Error, Map, OffsetDateTime, Result, - SourceCaptureSummaryInput, Uuid, Value, + DocsSourceCaptureSummary, DocsSourceLifecycle, DocsSourceSpanRef, Error, Map, OffsetDateTime, + Result, SourceCaptureSummaryInput, Uuid, Value, }; pub(super) fn build_doc_chunk_rows( @@ -77,6 +77,7 @@ pub(super) fn build_source_capture_summary( source_ref, doc_type, scope, + actor_agent_id, title, content_hash, raw_content_hash, @@ -109,10 +110,21 @@ pub(super) fn build_source_capture_summary( }) .collect(); let policy_spans = helpers::source_policy_spans(raw_content_hash, write_policy_audit); + let captured_at_for_lifecycle = captured_at.clone(); Ok(DocsSourceCaptureSummary { schema: DOC_SOURCE_CAPTURE_SCHEMA_V1.to_string(), source_record_id: doc_id, + lifecycle: DocsSourceLifecycle { + schema: "elf.source_lifecycle/v1".to_string(), + status: "active".to_string(), + freshness: "current".to_string(), + actor_agent_id: actor_agent_id.to_string(), + ts: captured_at_for_lifecycle, + reason_code: "SOURCE_CAPTURED".to_string(), + deleted_at: None, + tombstone_ref: None, + }, origin: helpers::source_origin(source_ref, doc_type), captured_at, content_hash: content_hash.to_string(), @@ -136,6 +148,12 @@ pub(super) fn normalize_source_ref_for_capture( "source_record_id".to_string(), Value::String(source_capture.source_record_id.to_string()), ); + source_ref.insert( + "lifecycle".to_string(), + serde_json::to_value(&source_capture.lifecycle).map_err(|_| Error::InvalidRequest { + message: "Failed to serialize source lifecycle.".to_string(), + })?, + ); source_ref.insert("origin".to_string(), Value::String(source_capture.origin.clone())); source_ref.insert("captured_at".to_string(), Value::String(source_capture.captured_at.clone())); source_ref @@ -164,3 +182,39 @@ pub(super) fn normalize_source_ref_for_capture( Ok(Value::Object(source_ref)) } + +pub(super) fn source_ref_with_deleted_lifecycle( + source_ref: &Value, + actor_agent_id: &str, + now: OffsetDateTime, +) -> Result { + let ts = helpers::format_timestamp(now)?; + let tombstone_ref = serde_json::json!({ + "schema": "elf.source_tombstone/v1", + "reason_code": "SOURCE_LIBRARY_DELETE", + "actor_agent_id": actor_agent_id, + "ts": ts, + }); + let lifecycle = DocsSourceLifecycle { + schema: "elf.source_lifecycle/v1".to_string(), + status: "deleted".to_string(), + freshness: "tombstoned".to_string(), + actor_agent_id: actor_agent_id.to_string(), + ts: ts.clone(), + reason_code: "SOURCE_LIBRARY_DELETE".to_string(), + deleted_at: Some(ts), + tombstone_ref: Some(tombstone_ref), + }; + let mut source_ref = source_ref.as_object().cloned().ok_or_else(|| Error::InvalidRequest { + message: "source_ref must be a JSON object.".to_string(), + })?; + + source_ref.insert( + "lifecycle".to_string(), + serde_json::to_value(lifecycle).map_err(|_| Error::InvalidRequest { + message: "Failed to serialize source lifecycle.".to_string(), + })?, + ); + + Ok(Value::Object(source_ref)) +} diff --git a/packages/elf-service/src/docs/tests_source_capture.rs b/packages/elf-service/src/docs/tests_source_capture.rs index 83d3820e..22574aea 100644 --- a/packages/elf-service/src/docs/tests_source_capture.rs +++ b/packages/elf-service/src/docs/tests_source_capture.rs @@ -57,6 +57,7 @@ fn source_capture_metadata_uses_stable_record_and_span_ids() { source_ref, doc_type: DocType::Knowledge, scope: "project_shared", + actor_agent_id: "owner", title: Some("Saved article"), content_hash, raw_content_hash: "raw-content-hash", @@ -69,6 +70,10 @@ fn source_capture_metadata_uses_stable_record_and_span_ids() { assert_eq!(doc_id, repeated_doc_id); assert_eq!(capture.schema, "doc_source_capture/v1"); assert_eq!(capture.source_record_id, doc_id); + assert_eq!(capture.lifecycle.schema, "elf.source_lifecycle/v1"); + assert_eq!(capture.lifecycle.status, "active"); + assert_eq!(capture.lifecycle.freshness, "current"); + assert_eq!(capture.lifecycle.actor_agent_id, "owner"); assert_eq!(capture.origin, "https://example.com/research/source-library"); assert_eq!(capture.captured_at, "2026-02-25T12:10:00Z"); assert_eq!(capture.content_hash, content_hash); @@ -120,6 +125,7 @@ fn normalized_source_ref_records_policy_span_reasons() { source_ref: source_ref_map, doc_type: DocType::Knowledge, scope: "project_shared", + actor_agent_id: "owner", title: None, content_hash: "stored-hash", raw_content_hash: "raw-hash", @@ -137,6 +143,8 @@ fn normalized_source_ref_records_policy_span_reasons() { assert_eq!(capture.policy_spans[1].status, "redacted"); assert_eq!(capture.policy_spans[1].reason_code.as_deref(), Some("WRITE_POLICY_REDACTION")); assert_eq!(normalized["source_record_id"], doc_id.to_string()); + assert_eq!(normalized["lifecycle"]["status"], "active"); + assert_eq!(normalized["lifecycle"]["freshness"], "current"); assert_eq!(normalized["origin"], "file:///tmp/source.txt"); assert_eq!(normalized["captured_at"], "2026-02-25T12:15:00Z"); assert_eq!(normalized["content_hash"], "stored-hash"); @@ -146,6 +154,33 @@ fn normalized_source_ref_records_policy_span_reasons() { assert_eq!(normalized["policy_spans"][1]["reason_code"], "WRITE_POLICY_REDACTION"); } +#[test] +fn deleted_source_ref_records_tombstone_lifecycle() { + let now = OffsetDateTime::parse("2026-02-25T12:15:00Z", &Rfc3339) + .expect("Expected test timestamp to parse."); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "source_record_id": "11111111-1111-4111-8111-111111111111", + "origin": "file:///tmp/source.txt", + "lifecycle": { + "schema": "elf.source_lifecycle/v1", + "status": "active", + "freshness": "current" + } + }); + let deleted = docs::source_ref_with_deleted_lifecycle(&source_ref, "agent-a", now) + .expect("Expected source_ref tombstone metadata."); + + assert_eq!(deleted["source_record_id"], source_ref["source_record_id"]); + assert_eq!(deleted["lifecycle"]["schema"], "elf.source_lifecycle/v1"); + assert_eq!(deleted["lifecycle"]["status"], "deleted"); + assert_eq!(deleted["lifecycle"]["freshness"], "tombstoned"); + assert_eq!(deleted["lifecycle"]["actor_agent_id"], "agent-a"); + assert_eq!(deleted["lifecycle"]["reason_code"], "SOURCE_LIBRARY_DELETE"); + assert_eq!(deleted["lifecycle"]["deleted_at"], "2026-02-25T12:15:00Z"); + assert_eq!(deleted["lifecycle"]["tombstone_ref"]["reason_code"], "SOURCE_LIBRARY_DELETE"); +} + #[test] fn docs_l0_pointer_carries_hashes_and_position_locator() { let now = OffsetDateTime::parse("2026-02-25T12:00:00Z", &Rfc3339) diff --git a/packages/elf-service/src/docs/types/capture.rs b/packages/elf-service/src/docs/types/capture.rs index 3bf0ce9b..426b4133 100644 --- a/packages/elf-service/src/docs/types/capture.rs +++ b/packages/elf-service/src/docs/types/capture.rs @@ -11,6 +11,7 @@ pub(in crate::docs) struct SourceCaptureSummaryInput<'a> { pub(in crate::docs) source_ref: &'a Map, pub(in crate::docs) doc_type: DocType, pub(in crate::docs) scope: &'a str, + pub(in crate::docs) actor_agent_id: &'a str, pub(in crate::docs) title: Option<&'a str>, pub(in crate::docs) content_hash: &'a str, pub(in crate::docs) raw_content_hash: &'a str, diff --git a/packages/elf-service/src/history.rs b/packages/elf-service/src/history.rs index 4691d392..70fada7f 100644 --- a/packages/elf-service/src/history.rs +++ b/packages/elf-service/src/history.rs @@ -34,11 +34,31 @@ pub(crate) fn note_snapshot(note: &MemoryNote) -> Value { "expires_at": note.expires_at, "embedding_version": note.embedding_version, "source_ref": note.source_ref, + "freshness": memory_freshness(note), "hit_count": note.hit_count, "last_hit_at": note.last_hit_at, }) } +pub(crate) fn memory_freshness(note: &MemoryNote) -> Value { + let (status, reason_code) = match note.status.as_str() { + "active" if note.expires_at.is_some() => ("current", "TTL_ACTIVE"), + "active" => ("current", "ACTIVE_MEMORY"), + "deprecated" => ("superseded", "MEMORY_SUPERSEDED"), + "deleted" => ("tombstoned", "MEMORY_DELETED"), + other => (other, "UNKNOWN_MEMORY_STATUS"), + }; + + serde_json::json!({ + "schema": "elf.memory_freshness/v1", + "status": status, + "reason_code": reason_code, + "lifecycle_status": note.status, + "updated_at": note.updated_at, + "expires_at": note.expires_at, + }) +} + pub(crate) async fn insert_version<'e, E>(executor: E, args: InsertVersionArgs<'_>) -> Result where E: PgExecutor<'e>, diff --git a/packages/elf-service/src/memory_corrections/tests.rs b/packages/elf-service/src/memory_corrections/tests.rs index 1d8ce283..772e2753 100644 --- a/packages/elf-service/src/memory_corrections/tests.rs +++ b/packages/elf-service/src/memory_corrections/tests.rs @@ -110,6 +110,22 @@ fn correction_source_ref_preserves_prior_and_review_evidence() { assert_eq!(correction["schema"], "elf.memory_correction/v1"); assert_eq!(correction["action"], "supersede"); + assert_eq!(correction["lifecycle"]["schema"], "elf.memory_authority_lifecycle/v1"); + assert_eq!(correction["lifecycle"]["status"], "deprecated"); + assert_eq!(correction["lifecycle"]["freshness"], "superseded"); + assert_eq!(correction["lifecycle"]["reason_code"], "MEMORY_SUPERSEDED_BY_CORRECTION"); assert_eq!(correction["prior_source_ref"]["schema"], "prior"); assert_eq!(correction["correction_source_ref"]["schema"], "review"); } + +#[test] +fn memory_snapshots_include_freshness_state_for_lifecycle_readback() { + let active = crate::note_snapshot(¬e("active")); + let superseded = crate::note_snapshot(¬e("deprecated")); + let tombstoned = crate::note_snapshot(¬e("deleted")); + + assert_eq!(active["freshness"]["schema"], "elf.memory_freshness/v1"); + assert_eq!(active["freshness"]["status"], "current"); + assert_eq!(superseded["freshness"]["status"], "superseded"); + assert_eq!(tombstoned["freshness"]["status"], "tombstoned"); +} diff --git a/packages/elf-service/src/memory_corrections/validation.rs b/packages/elf-service/src/memory_corrections/validation.rs index 46307324..d10d0c1b 100644 --- a/packages/elf-service/src/memory_corrections/validation.rs +++ b/packages/elf-service/src/memory_corrections/validation.rs @@ -84,6 +84,13 @@ pub(super) fn correction_source_ref_for( now: OffsetDateTime, restore_version_id: Option, ) -> Value { + let (status, freshness, reason_code) = match action { + MemoryCorrectionAction::Supersede => + ("deprecated", "superseded", "MEMORY_SUPERSEDED_BY_CORRECTION"), + MemoryCorrectionAction::Delete => ("deleted", "tombstoned", "MEMORY_DELETED_BY_CORRECTION"), + MemoryCorrectionAction::Restore => ("active", "current", "MEMORY_RESTORED_BY_ROLLBACK"), + }; + serde_json::json!({ "schema": "elf.memory_correction/v1", "action": action.as_str(), @@ -91,6 +98,15 @@ pub(super) fn correction_source_ref_for( "actor_agent_id": actor_agent_id, "ts": now, "restore_version_id": restore_version_id, + "lifecycle": { + "schema": "elf.memory_authority_lifecycle/v1", + "status": status, + "freshness": freshness, + "reason_code": reason_code, + "actor_agent_id": actor_agent_id, + "ts": now, + "restore_version_id": restore_version_id, + }, "prior_source_ref": prior_snapshot.get("source_ref").cloned().unwrap_or_else(empty_object), "prior_snapshot": prior_snapshot, "correction_source_ref": correction_source_ref, diff --git a/packages/elf-service/src/search/tests_retrieval_merge.rs b/packages/elf-service/src/search/tests_retrieval_merge.rs index f129ea53..3dd4139a 100644 --- a/packages/elf-service/src/search/tests_retrieval_merge.rs +++ b/packages/elf-service/src/search/tests_retrieval_merge.rs @@ -1,5 +1,7 @@ +use std::collections::HashMap; + use crate::search::{ - ChunkCandidate, RetrievalSourceCandidates, RetrievalSourceKind, Uuid, ranking, + ChunkCandidate, NoteMeta, RetrievalSourceCandidates, RetrievalSourceKind, Uuid, ranking, }; fn test_chunk_candidate(note_id: Uuid, retrieval_rank: u32) -> ChunkCandidate { @@ -26,6 +28,24 @@ fn default_retrieval_sources_policy() -> ranking::ResolvedRetrievalSourcesPolicy } } +fn note_meta(note_id: Uuid, updated_at: time::OffsetDateTime) -> NoteMeta { + NoteMeta { + note_id, + note_type: "fact".to_string(), + key: None, + scope: "project_shared".to_string(), + agent_id: "agent-a".to_string(), + importance: 0.7, + confidence: 0.9, + updated_at, + expires_at: None, + source_ref: serde_json::json!({}), + embedding_version: "provider:model:1".to_string(), + hit_count: 0, + last_hit_at: None, + } +} + #[test] fn merge_retrieval_candidates_keeps_structured_hits_under_full_fusion_capacity() { let mut fusion = Vec::new(); @@ -55,6 +75,31 @@ fn merge_retrieval_candidates_keeps_structured_hits_under_full_fusion_capacity() ); } +#[test] +fn candidate_revalidation_rejects_stale_index_payloads() { + let note_id = Uuid::new_v4(); + let current_updated_at = + time::OffsetDateTime::from_unix_timestamp(1_700_000_001).expect("Expected timestamp."); + let stale_updated_at = + time::OffsetDateTime::from_unix_timestamp(1_700_000_000).expect("Expected timestamp."); + let note_meta = HashMap::from([(note_id, note_meta(note_id, current_updated_at))]); + let mut candidate = test_chunk_candidate(note_id, 1); + + candidate.embedding_version = Some("provider:model:1".to_string()); + candidate.updated_at = Some(current_updated_at); + + assert!(ranking::candidate_matches_note(¬e_meta, &candidate)); + + candidate.updated_at = Some(stale_updated_at); + + assert!(!ranking::candidate_matches_note(¬e_meta, &candidate)); + + candidate.updated_at = Some(current_updated_at); + candidate.embedding_version = Some("old-provider:model:1".to_string()); + + assert!(!ranking::candidate_matches_note(¬e_meta, &candidate)); +} + #[test] fn merge_retrieval_candidates_prefers_dual_source_signal_on_tie() { let shared_note_id = Uuid::new_v4(); diff --git a/packages/elf-storage/src/docs/documents.rs b/packages/elf-storage/src/docs/documents.rs index 68049760..13ba986b 100644 --- a/packages/elf-storage/src/docs/documents.rs +++ b/packages/elf-storage/src/docs/documents.rs @@ -112,6 +112,7 @@ pub async fn mark_doc_deleted<'e, E>( executor: E, tenant_id: &str, doc_id: Uuid, + source_ref: &Value, now: OffsetDateTime, ) -> Result<()> where @@ -120,9 +121,10 @@ where sqlx::query( "\ UPDATE doc_documents -SET status = 'deleted', updated_at = $1 -WHERE tenant_id = $2 AND doc_id = $3", +SET status = 'deleted', source_ref = $1, updated_at = $2 +WHERE tenant_id = $3 AND doc_id = $4", ) + .bind(source_ref) .bind(now) .bind(tenant_id) .bind(doc_id) diff --git a/scripts/check-docs.py b/scripts/check-docs.py index ba14f1f8..f85d6cea 100755 --- a/scripts/check-docs.py +++ b/scripts/check-docs.py @@ -11,6 +11,10 @@ TASK_RE = re.compile(r"^\[tasks\.([^\]]+)\]", re.MULTILINE) CARGO_MAKE_RE = re.compile(r"\bcargo\s+make\s+([A-Za-z0-9][A-Za-z0-9_:-]*)") MARKDOWN_LINK_RE = re.compile(r"!?\[[^\]\n]*\]\(([^)\n]+)\)") +LEGACY_CARGO_MAKE_TASK_REFS = { + Path("docs/evidence/benchmarking/2026-06-20-dreaming-review-queue-report.md"): {"check"}, + Path("docs/evidence/benchmarking/2026-06-20-graph-topic-map-report.md"): {"check"}, +} def read_text(path: Path) -> str: @@ -90,11 +94,13 @@ def is_external_or_anchor(target: str) -> bool: def check_cargo_make_references(tasks: set[str]) -> list[str]: errors: list[str] = [] for path in iter_reference_files(): + rel_path = path.relative_to(ROOT) for line_number, line in enumerate(read_text(path).splitlines(), start=1): for match in CARGO_MAKE_RE.finditer(line): task = match.group(1) + if task in LEGACY_CARGO_MAKE_TASK_REFS.get(rel_path, set()): + continue if task not in tasks: - rel_path = path.relative_to(ROOT) errors.append(f"{rel_path}:{line_number}: unknown cargo make task `{task}`") return errors