From 0880bb3068ec4c706c571711aa2ac5fb1bb19635 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 1 Jul 2026 00:32:40 -0400 Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Batch modularize oversized Rust test files","authority":"manual"} --- .../competitor_strength_live.rs | 374 +-------------- .../json_boundaries.rs | 278 +++++++++++ .../text_boundaries.rs | 95 ++++ .../trace_replay_reports.rs | 395 +--------------- .../trace_replay_reports_graph_topic_map.rs | 44 ++ .../trace_replay_reports_qmd_trace_replay.rs | 305 ++++++++++++ .../trace_replay_reports_source_scan.rs | 52 +++ .../src/memory_policy/evaluation.rs | 122 +++++ .../elf-domain/src/memory_policy/support.rs | 287 ++++++++++++ .../elf-domain/src/memory_policy/tests.rs | 410 +--------------- .../acceptance/knowledge_pages/helpers.rs | 440 +----------------- .../knowledge_pages/helpers/assertions.rs | 34 ++ .../knowledge_pages/helpers/request.rs | 23 + .../knowledge_pages/helpers/setup.rs | 43 ++ .../knowledge_pages/helpers/source_inserts.rs | 341 ++++++++++++++ 15 files changed, 1648 insertions(+), 1595 deletions(-) create mode 100644 apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/json_boundaries.rs create mode 100644 apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/text_boundaries.rs create mode 100644 apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_graph_topic_map.rs create mode 100644 apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_qmd_trace_replay.rs create mode 100644 apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_source_scan.rs create mode 100644 packages/elf-domain/src/memory_policy/evaluation.rs create mode 100644 packages/elf-domain/src/memory_policy/support.rs create mode 100644 packages/elf-service/tests/acceptance/knowledge_pages/helpers/assertions.rs create mode 100644 packages/elf-service/tests/acceptance/knowledge_pages/helpers/request.rs create mode 100644 packages/elf-service/tests/acceptance/knowledge_pages/helpers/setup.rs create mode 100644 packages/elf-service/tests/acceptance/knowledge_pages/helpers/source_inserts.rs diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs index 3e35242a..1ddcef64 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs @@ -1,3 +1,6 @@ +mod json_boundaries; +mod text_boundaries; + use std::fs; use color_eyre::Result; @@ -26,380 +29,21 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> { support::temporal_history_competitor_gap_json_path()?, )?)?; - assert_current_report_text_boundaries( + text_boundaries::assert_current_report_text_boundaries( &measurement_audit, &competitor_matrix, &iteration_direction, &external_manifest, &comparison_external_projects, ); + text_boundaries::assert_measurement_audit_adapter_status_counts(&measurement_audit); + json_boundaries::assert_measurement_audit_json(&measurement_audit_json)?; + json_boundaries::assert_retrieval_debug_profile_json(&retrieval_debug_profile); + json_boundaries::assert_competitor_strength_matrix_json(&competitor_matrix_json)?; + json_boundaries::assert_temporal_history_json(&temporal_history)?; assert!(competitor_matrix.contains("claude-mem work_resume remains `not_encoded`")); assert!(!competitor_matrix.contains("claude-mem `wrong_result`, OpenViking work_resume")); - let qmd_live = support::find_by_field( - support::array_at(&measurement_audit_json, "/live_real_world_adapters")?, - "/adapter", - "qmd live CLI adapter", - )?; - - assert_eq!(qmd_live.pointer("/pass").and_then(Value::as_u64), Some(17)); - assert_eq!(qmd_live.pointer("/wrong_result").and_then(Value::as_u64), Some(6)); - assert_eq!(qmd_live.pointer("/expected_evidence_matched").and_then(Value::as_u64), Some(38)); - assert_eq!(qmd_live.pointer("/evidence_covered_count").and_then(Value::as_u64), Some(45)); - - let memory_evolution = support::find_by_field( - support::array_at(&measurement_audit_json, "/live_suite_breakdown")?, - "/suite", - "memory_evolution", - )?; - - assert_eq!( - memory_evolution.pointer("/elf_status_counts/wrong_result").and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - memory_evolution.pointer("/qmd_status_counts/wrong_result").and_then(Value::as_u64), - Some(6) - ); - assert_eq!( - retrieval_debug_profile - .pointer("/live_real_world_full_sweep_context/qmd/pass") - .and_then(Value::as_u64), - Some(17) - ); - assert_eq!( - retrieval_debug_profile - .pointer("/live_real_world_full_sweep_context/qmd/wrong_result") - .and_then(Value::as_u64), - Some(6) - ); - - assert_competitor_strength_matrix_json(&competitor_matrix_json)?; - - let openmemory_command = support::find_by_field( - support::array_at(&temporal_history, "/commands")?, - "/command", - "cargo make openmemory-ui-export-readback", - )?; - - assert!( - openmemory_command - .pointer("/artifact") - .and_then(Value::as_str) - .is_some_and(|artifact| artifact.contains("tmp/live-baseline/mem0-checks.json") - && artifact.contains("tmp/live-baseline/mem0-openmemory-ui-export.json")) - ); - - Ok(()) -} - -fn assert_current_report_text_boundaries( - measurement_audit: &str, - competitor_matrix: &str, - iteration_direction: &str, - external_manifest: &str, - comparison_external_projects: &str, -) { - assert!( - measurement_audit.contains( - "| `memory_evolution` | `6` | `pass:1`, `wrong_result:5` | `wrong_result:6` |" - ) - ); - assert!( - measurement_audit - .contains("qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence") - ); - assert!(measurement_audit.contains("Basic local smoke and local OSS history/readback pass")); - assert!(measurement_audit.contains("claude-mem hook/viewer capture is `blocked`")); - assert!(!measurement_audit.contains("claude-mem hook/viewer capture remains untested")); - assert!(!measurement_audit.contains("blocked or untested")); - - assert_measurement_audit_adapter_status_counts(measurement_audit); - - assert!( - competitor_matrix - .contains("broader live suites remain `wrong_result`, `blocked`, or `not_encoded`") - ); - assert!(competitor_matrix.contains( - "Overall adapter-status counts: 4 `pass`,\n6 `wrong_result`, 1 `lifecycle_fail`, 7 `blocked`, and 5 `not_encoded`." - )); - assert!(!competitor_matrix.contains("5 `blocked`, and 7 `not_encoded`")); - assert!( - competitor_matrix - .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") - ); - assert!(competitor_matrix.contains("scoped preference behavior is a measured tie")); - assert!( - !competitor_matrix.contains("mem0/OpenMemory and Letta personalization are `not_encoded`") - ); - assert!(external_manifest.contains( - "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible." - )); - assert!(external_manifest.contains( - "The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run." - )); - assert!( - comparison_external_projects - .contains("Benchmark-grounded for scoped local OSS same-corpus retrieval") - ); - assert!( - comparison_external_projects - .contains("Benchmark-grounded for local same-corpus retrieval, reindex/update/delete") - ); - assert!(iteration_direction.contains("| Jobs | `55` |")); - assert!(iteration_direction.contains("| Encoded suites | `15` |")); - assert!(iteration_direction.contains("| Pass | `49` |")); - assert!(iteration_direction.contains("| Evidence coverage | `123/123` |")); - assert!(iteration_direction.contains("| Expected evidence recall | `115/115` |")); - - for stale_phrase in [ - "same live sweep shape as ELF", - "ELF and qmd live fail 5/6 jobs", - "both systems currently fail 5/6 live memory-evolution jobs", - "wrong_result, incomplete, blocked, and not_encoded states remain visible", - "broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`", - "The qmd live real-world slice covers representative jobs only", - "| Jobs | `40` |", - "| Encoded suites | `11` |", - "| Jobs | `50` |", - "| Encoded suites | `14` |", - "| Pass | `38` |", - "| Pass | `45` |", - "| Evidence coverage | `115/115` |", - "| Expected evidence recall | `107/107` |", - "history/UI/hosted/graph behavior remains", - "current local adapter is incomplete/wrong-result", - "current adapter is incomplete/invalid-result", - ] { - assert!(!measurement_audit.contains(stale_phrase)); - assert!(!competitor_matrix.contains(stale_phrase)); - assert!(!iteration_direction.contains(stale_phrase)); - assert!(!external_manifest.contains(stale_phrase)); - assert!(!comparison_external_projects.contains(stale_phrase)); - } -} - -fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> { - let projects = support::array_at(matrix, "/project_matrix")?; - let scenarios = support::array_at(matrix, "/scenario_matrix")?; - - assert_competitor_strength_matrix_manifest_counts(matrix); - assert_competitor_strength_matrix_project_json(projects)?; - assert_competitor_strength_matrix_scenario_json(scenarios)?; - - Ok(()) -} - -fn assert_competitor_strength_matrix_project_json(projects: &[Value]) -> Result<()> { - let qmd = support::find_by_field(projects, "/project", "qmd")?; - let mem0 = support::find_by_field(projects, "/project", "mem0/OpenMemory")?; - let claude_mem = support::find_by_field(projects, "/project", "claude-mem")?; - let openviking = support::find_by_field(projects, "/project", "OpenViking")?; - - assert_eq!( - qmd.pointer("/current_evidence_class").and_then(Value::as_str), - Some("live_real_world") - ); - assert_eq!(qmd.pointer("/measured_status").and_then(Value::as_str), Some("wrong_result")); - assert_eq!( - qmd.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("not_encoded") - ); - assert!(qmd.pointer("/benchmark_before_claim").and_then(Value::as_str).is_some_and(|claim| { - claim.contains("Keep qmd deep retrieval/debug profiling separate") - && claim.contains("narrow operator-debug live slice") - })); - assert!( - qmd.pointer("/borrow_if_stronger") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("transparent local knobs")) - ); - assert_eq!(mem0.pointer("/measured_status").and_then(Value::as_str), Some("pass")); - assert_eq!( - mem0.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("blocked") - ); - assert_eq!( - mem0.pointer("/unsupported_or_blocked_status/typed_reason").and_then(Value::as_str), - Some("openmemory_export_helper_setup_blocked") - ); - assert!( - mem0.pointer("/benchmark_before_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("OpenMemory product app import/export")) - ); - assert!( - claude_mem - .pointer("/unsupported_or_blocked_status/details") - .and_then(Value::as_str) - .is_some_and(|details| details.contains("rerun/inspection targets") - && details.contains("tmp/live-baseline/claude-mem-checks.json")) - ); - assert_eq!( - openviking.pointer("/current_evidence_class").and_then(Value::as_str), - Some("live_baseline_only") - ); - assert_eq!( - openviking.pointer("/measured_status").and_then(Value::as_str), - Some("wrong_result") - ); - assert_eq!( - openviking.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("blocked") - ); - assert!( - openviking - .pointer("/unsupported_or_blocked_status/details") - .and_then(Value::as_str) - .is_some_and(|details| details.contains("encoded as blocked fixtures")) - ); - assert!( - openviking - .pointer("/benchmark_before_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass")) - ); - Ok(()) } - -fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Result<()> { - let retrieval_debug = support::find_by_field(scenarios, "/scenario_id", "retrieval_debug")?; - let work_resume = support::find_by_field(scenarios, "/scenario_id", "work_resume")?; - let operator_debug = support::find_by_field(scenarios, "/scenario_id", "operator_debugging")?; - let context_trajectory = - support::find_by_field(scenarios, "/scenario_id", "context_trajectory")?; - let consolidation = support::find_by_field(scenarios, "/scenario_id", "consolidation")?; - - assert!( - retrieval_debug - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("Measured tie on encoded retrieval answers")) - ); - assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and( - |state| state.contains("qmd remains stronger on local debug ergonomics not fully scored") - )); - assert!( - work_resume - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("claude-mem work_resume remains not_encoded") - && !claim.contains("claude-mem is wrong_result")) - ); - assert!( - operator_debug - .pointer("/current_elf_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("narrow live_real_world operator-debug slice")) - ); - assert!( - operator_debug - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("qmd now has a narrow live_real_world")) - ); - assert!( - operator_debug - .pointer("/next_measurement") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export")) - ); - assert!( - consolidation - .pointer("/current_elf_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("XY-934 adds live_real_world") - && claim.contains("zero source mutations")) - ); - assert!( - consolidation - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("qmd remains not_encoded") - && claim.contains("product references only")) - ); - - let personalization = support::find_by_field(scenarios, "/scenario_id", "personalization")?; - - assert_personalization_matrix_record(personalization); - - assert!( - context_trajectory - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("not a measured live winner")) - ); - assert!( - context_trajectory - .pointer("/next_measurement") - .and_then(Value::as_str) - .is_some_and(|measurement| measurement.contains("evidence-bearing retrieval pass")) - ); - - Ok(()) -} - -fn assert_personalization_matrix_record(personalization: &Value) { - assert!( - personalization - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim - .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") - && claim.contains("Letta personalization is research_gate not_encoded")) - ); - assert!( - personalization - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("scoped personalization is a tie")) - ); -} - -fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) { - assert_eq!( - matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64), - Some(23) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/evidence_class_counts/live_real_world") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - matrix.pointer("/manifest_summary/overall_status_counts/pass").and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - matrix.pointer("/manifest_summary/overall_status_counts/blocked").and_then(Value::as_u64), - Some(7) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/overall_status_counts/not_encoded") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/overall_status_counts/wrong_result") - .and_then(Value::as_u64), - Some(6) - ); -} - -fn assert_measurement_audit_adapter_status_counts(markdown: &str) { - for expected in [ - "| `blocked` | `7` |", - "| `not_encoded` | `5` |", - "The generated JSON report emits `external_project_count: 16`", - ] { - assert!(markdown.contains(expected), "missing measurement audit text: {expected}"); - } - for stale in ["| `blocked` | `6` |", "| `not_encoded` | `6` |"] { - assert!(!markdown.contains(stale), "stale measurement audit text: {stale}"); - } -} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/json_boundaries.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/json_boundaries.rs new file mode 100644 index 00000000..cd65e437 --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/json_boundaries.rs @@ -0,0 +1,278 @@ +use color_eyre::Result; +use serde_json::Value; + +use crate::support; + +pub(crate) fn assert_measurement_audit_json(measurement_audit_json: &Value) -> Result<()> { + let qmd_live = support::find_by_field( + support::array_at(measurement_audit_json, "/live_real_world_adapters")?, + "/adapter", + "qmd live CLI adapter", + )?; + + assert_eq!(qmd_live.pointer("/pass").and_then(Value::as_u64), Some(17)); + assert_eq!(qmd_live.pointer("/wrong_result").and_then(Value::as_u64), Some(6)); + assert_eq!(qmd_live.pointer("/expected_evidence_matched").and_then(Value::as_u64), Some(38)); + assert_eq!(qmd_live.pointer("/evidence_covered_count").and_then(Value::as_u64), Some(45)); + + let memory_evolution = support::find_by_field( + support::array_at(measurement_audit_json, "/live_suite_breakdown")?, + "/suite", + "memory_evolution", + )?; + + assert_eq!( + memory_evolution.pointer("/elf_status_counts/wrong_result").and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + memory_evolution.pointer("/qmd_status_counts/wrong_result").and_then(Value::as_u64), + Some(6) + ); + + Ok(()) +} + +pub(crate) fn assert_retrieval_debug_profile_json(retrieval_debug_profile: &Value) { + assert_eq!( + retrieval_debug_profile + .pointer("/live_real_world_full_sweep_context/qmd/pass") + .and_then(Value::as_u64), + Some(17) + ); + assert_eq!( + retrieval_debug_profile + .pointer("/live_real_world_full_sweep_context/qmd/wrong_result") + .and_then(Value::as_u64), + Some(6) + ); +} + +pub(crate) fn assert_temporal_history_json(temporal_history: &Value) -> Result<()> { + let openmemory_command = support::find_by_field( + support::array_at(temporal_history, "/commands")?, + "/command", + "cargo make openmemory-ui-export-readback", + )?; + + assert!( + openmemory_command + .pointer("/artifact") + .and_then(Value::as_str) + .is_some_and(|artifact| artifact.contains("tmp/live-baseline/mem0-checks.json") + && artifact.contains("tmp/live-baseline/mem0-openmemory-ui-export.json")) + ); + + Ok(()) +} + +pub(crate) fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> { + let projects = support::array_at(matrix, "/project_matrix")?; + let scenarios = support::array_at(matrix, "/scenario_matrix")?; + + assert_competitor_strength_matrix_manifest_counts(matrix); + assert_competitor_strength_matrix_project_json(projects)?; + assert_competitor_strength_matrix_scenario_json(scenarios)?; + + Ok(()) +} + +fn assert_competitor_strength_matrix_project_json(projects: &[Value]) -> Result<()> { + let qmd = support::find_by_field(projects, "/project", "qmd")?; + let mem0 = support::find_by_field(projects, "/project", "mem0/OpenMemory")?; + let claude_mem = support::find_by_field(projects, "/project", "claude-mem")?; + let openviking = support::find_by_field(projects, "/project", "OpenViking")?; + + assert_eq!( + qmd.pointer("/current_evidence_class").and_then(Value::as_str), + Some("live_real_world") + ); + assert_eq!(qmd.pointer("/measured_status").and_then(Value::as_str), Some("wrong_result")); + assert_eq!( + qmd.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("not_encoded") + ); + assert!(qmd.pointer("/benchmark_before_claim").and_then(Value::as_str).is_some_and(|claim| { + claim.contains("Keep qmd deep retrieval/debug profiling separate") + && claim.contains("narrow operator-debug live slice") + })); + assert!( + qmd.pointer("/borrow_if_stronger") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("transparent local knobs")) + ); + assert_eq!(mem0.pointer("/measured_status").and_then(Value::as_str), Some("pass")); + assert_eq!( + mem0.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("blocked") + ); + assert_eq!( + mem0.pointer("/unsupported_or_blocked_status/typed_reason").and_then(Value::as_str), + Some("openmemory_export_helper_setup_blocked") + ); + assert!( + mem0.pointer("/benchmark_before_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("OpenMemory product app import/export")) + ); + assert!( + claude_mem + .pointer("/unsupported_or_blocked_status/details") + .and_then(Value::as_str) + .is_some_and(|details| details.contains("rerun/inspection targets") + && details.contains("tmp/live-baseline/claude-mem-checks.json")) + ); + assert_eq!( + openviking.pointer("/current_evidence_class").and_then(Value::as_str), + Some("live_baseline_only") + ); + assert_eq!( + openviking.pointer("/measured_status").and_then(Value::as_str), + Some("wrong_result") + ); + assert_eq!( + openviking.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("blocked") + ); + assert!( + openviking + .pointer("/unsupported_or_blocked_status/details") + .and_then(Value::as_str) + .is_some_and(|details| details.contains("encoded as blocked fixtures")) + ); + assert!( + openviking + .pointer("/benchmark_before_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass")) + ); + + Ok(()) +} + +fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Result<()> { + let retrieval_debug = support::find_by_field(scenarios, "/scenario_id", "retrieval_debug")?; + let work_resume = support::find_by_field(scenarios, "/scenario_id", "work_resume")?; + let operator_debug = support::find_by_field(scenarios, "/scenario_id", "operator_debugging")?; + let context_trajectory = + support::find_by_field(scenarios, "/scenario_id", "context_trajectory")?; + let consolidation = support::find_by_field(scenarios, "/scenario_id", "consolidation")?; + + assert!( + retrieval_debug + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("Measured tie on encoded retrieval answers")) + ); + assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and( + |state| state.contains("qmd remains stronger on local debug ergonomics not fully scored") + )); + assert!( + work_resume + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("claude-mem work_resume remains not_encoded") + && !claim.contains("claude-mem is wrong_result")) + ); + assert!( + operator_debug + .pointer("/current_elf_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("narrow live_real_world operator-debug slice")) + ); + assert!( + operator_debug + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("qmd now has a narrow live_real_world")) + ); + assert!( + operator_debug + .pointer("/next_measurement") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export")) + ); + assert!( + consolidation + .pointer("/current_elf_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("XY-934 adds live_real_world") + && claim.contains("zero source mutations")) + ); + assert!( + consolidation + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("qmd remains not_encoded") + && claim.contains("product references only")) + ); + + let personalization = support::find_by_field(scenarios, "/scenario_id", "personalization")?; + + assert_personalization_matrix_record(personalization); + + assert!( + context_trajectory + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("not a measured live winner")) + ); + assert!( + context_trajectory + .pointer("/next_measurement") + .and_then(Value::as_str) + .is_some_and(|measurement| measurement.contains("evidence-bearing retrieval pass")) + ); + + Ok(()) +} + +fn assert_personalization_matrix_record(personalization: &Value) { + assert!( + personalization + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim + .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") + && claim.contains("Letta personalization is research_gate not_encoded")) + ); + assert!( + personalization + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("scoped personalization is a tie")) + ); +} + +fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) { + assert_eq!( + matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64), + Some(23) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/evidence_class_counts/live_real_world") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + matrix.pointer("/manifest_summary/overall_status_counts/pass").and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + matrix.pointer("/manifest_summary/overall_status_counts/blocked").and_then(Value::as_u64), + Some(7) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/overall_status_counts/not_encoded") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/overall_status_counts/wrong_result") + .and_then(Value::as_u64), + Some(6) + ); +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/text_boundaries.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/text_boundaries.rs new file mode 100644 index 00000000..4e09a7ee --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live/text_boundaries.rs @@ -0,0 +1,95 @@ +pub(crate) fn assert_current_report_text_boundaries( + measurement_audit: &str, + competitor_matrix: &str, + iteration_direction: &str, + external_manifest: &str, + comparison_external_projects: &str, +) { + assert!( + measurement_audit.contains( + "| `memory_evolution` | `6` | `pass:1`, `wrong_result:5` | `wrong_result:6` |" + ) + ); + assert!( + measurement_audit + .contains("qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence") + ); + assert!(measurement_audit.contains("Basic local smoke and local OSS history/readback pass")); + assert!(measurement_audit.contains("claude-mem hook/viewer capture is `blocked`")); + assert!(!measurement_audit.contains("claude-mem hook/viewer capture remains untested")); + assert!(!measurement_audit.contains("blocked or untested")); + assert!( + competitor_matrix + .contains("broader live suites remain `wrong_result`, `blocked`, or `not_encoded`") + ); + assert!(competitor_matrix.contains( + "Overall adapter-status counts: 4 `pass`,\n6 `wrong_result`, 1 `lifecycle_fail`, 7 `blocked`, and 5 `not_encoded`." + )); + assert!(!competitor_matrix.contains("5 `blocked`, and 7 `not_encoded`")); + assert!( + competitor_matrix + .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") + ); + assert!(competitor_matrix.contains("scoped preference behavior is a measured tie")); + assert!( + !competitor_matrix.contains("mem0/OpenMemory and Letta personalization are `not_encoded`") + ); + assert!(external_manifest.contains( + "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible." + )); + assert!(external_manifest.contains( + "The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run." + )); + assert!( + comparison_external_projects + .contains("Benchmark-grounded for scoped local OSS same-corpus retrieval") + ); + assert!( + comparison_external_projects + .contains("Benchmark-grounded for local same-corpus retrieval, reindex/update/delete") + ); + assert!(iteration_direction.contains("| Jobs | `55` |")); + assert!(iteration_direction.contains("| Encoded suites | `15` |")); + assert!(iteration_direction.contains("| Pass | `49` |")); + assert!(iteration_direction.contains("| Evidence coverage | `123/123` |")); + assert!(iteration_direction.contains("| Expected evidence recall | `115/115` |")); + + for stale_phrase in [ + "same live sweep shape as ELF", + "ELF and qmd live fail 5/6 jobs", + "both systems currently fail 5/6 live memory-evolution jobs", + "wrong_result, incomplete, blocked, and not_encoded states remain visible", + "broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`", + "The qmd live real-world slice covers representative jobs only", + "| Jobs | `40` |", + "| Encoded suites | `11` |", + "| Jobs | `50` |", + "| Encoded suites | `14` |", + "| Pass | `38` |", + "| Pass | `45` |", + "| Evidence coverage | `115/115` |", + "| Expected evidence recall | `107/107` |", + "history/UI/hosted/graph behavior remains", + "current local adapter is incomplete/wrong-result", + "current adapter is incomplete/invalid-result", + ] { + assert!(!measurement_audit.contains(stale_phrase)); + assert!(!competitor_matrix.contains(stale_phrase)); + assert!(!iteration_direction.contains(stale_phrase)); + assert!(!external_manifest.contains(stale_phrase)); + assert!(!comparison_external_projects.contains(stale_phrase)); + } +} + +pub(crate) fn assert_measurement_audit_adapter_status_counts(markdown: &str) { + for expected in [ + "| `blocked` | `7` |", + "| `not_encoded` | `5` |", + "The generated JSON report emits `external_project_count: 16`", + ] { + assert!(markdown.contains(expected), "missing measurement audit text: {expected}"); + } + for stale in ["| `blocked` | `6` |", "| `not_encoded` | `6` |"] { + assert!(!markdown.contains(stale), "stale measurement audit text: {stale}"); + } +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports.rs b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports.rs index bbb9717a..8b5a9ea2 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports.rs @@ -1,392 +1,3 @@ -use std::{fs, path::Path}; - -use color_eyre::Result; -use serde_json::Value; - -use crate::support; - -fn graph_report_service_sources(workspace: &Path) -> Result { - let mut source = - fs::read_to_string(workspace.join("packages/elf-service/src/graph_report.rs"))?; - - append_rust_sources( - workspace.join("packages/elf-service/src/graph_report").as_path(), - &mut source, - )?; - - Ok(source) -} - -fn mcp_server_sources(workspace: &Path) -> Result { - let mut source = fs::read_to_string(workspace.join("apps/elf-mcp/src/app/server.rs"))?; - - append_rust_sources(workspace.join("apps/elf-mcp/src/app/server").as_path(), &mut source)?; - - Ok(source) -} - -fn api_route_sources(workspace: &Path) -> Result { - let mut source = fs::read_to_string(workspace.join("apps/elf-api/src/routes.rs"))?; - - append_rust_sources(workspace.join("apps/elf-api/src/routes").as_path(), &mut source)?; - - Ok(source) -} - -fn append_rust_sources(dir: &Path, source: &mut String) -> Result<()> { - let mut entries = Vec::new(); - - for entry in fs::read_dir(dir)? { - entries.push(entry?.path()); - } - - entries.sort(); - - for path in entries { - if path.is_dir() { - append_rust_sources(path.as_path(), source)?; - } else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - source.push('\n'); - source.push_str(fs::read_to_string(path)?.as_str()); - } - } - - Ok(()) -} - -#[test] -fn graph_topic_map_report_wires_source_backed_graph_lite_readback() -> Result<()> { - let markdown = fs::read_to_string(support::graph_topic_map_report_markdown_path()?)?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - let workspace = support::workspace_root()?; - let graph_report_service = graph_report_service_sources(&workspace)?; - let api_routes = api_route_sources(&workspace)?; - let mcp_server = mcp_server_sources(&workspace)?; - let graph_spec = fs::read_to_string( - support::workspace_root()?.join("docs/spec/system_graph_memory_postgres_v1.md"), - )?; - - assert!(markdown.contains("Graph Topic-Map Report - June 20, 2026")); - assert!(markdown.contains("elf.graph_report/v1")); - assert!(markdown.contains("sourced")); - assert!(markdown.contains("inferred")); - assert!(markdown.contains("ambiguous")); - assert!(markdown.contains("stale")); - assert!(markdown.contains("superseded")); - assert!(markdown.contains("valid_from")); - assert!(markdown.contains("valid_to")); - assert!(markdown.contains("valid_at")); - assert!(markdown.contains("invalid_at")); - assert!(graph_report_service.contains("ELF_GRAPH_REPORT_SCHEMA_V1")); - assert!(graph_report_service.contains("GraphReportSummary")); - assert!(graph_report_service.contains("build_topic_map")); - assert!(api_routes.contains("/v2/graph/report")); - assert!(mcp_server.contains("elf_graph_report")); - assert!(graph_spec.contains("elf.graph_report/v1")); - assert!(graph_spec.contains("Graphiti/Zep `valid_at` and `invalid_at`")); - assert!(benchmarking_index.contains("2026-06-20-graph-topic-map-report.md")); - assert!(readme.contains("Graph Topic-Map Report - June 20, 2026")); - assert!(readme.contains("Graph topic-map reports after XY-1020")); - - Ok(()) -} - -#[test] -fn qmd_trace_replay_diagnostics_report_preserves_claim_boundaries() -> Result<()> { - let report = serde_json::from_str::(&fs::read_to_string( - support::trace_replay_diagnostics_report_path()?, - )?)?; - let markdown = fs::read_to_string(support::trace_replay_diagnostics_markdown_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let adoption_report = fs::read_to_string(support::competitor_strength_adoption_report_path()?)?; - let adoption_json = serde_json::from_str::(&fs::read_to_string( - support::competitor_strength_adoption_report_json_path()?, - )?)?; - - assert_trace_replay_diagnostics_json(&report)?; - assert_trace_replay_diagnostics_markdown(&markdown); - - assert!(readme.contains("ELF/qmd Trace Replay Diagnostics Report - June 11, 2026")); - assert!(benchmarking_index.contains("2026-06-11-elf-qmd-trace-replay-diagnostics-report.md")); - assert!(benchmarking_index.contains("qmd top-10/replay artifact")); - assert!(benchmarking_index.contains("ELF trace/admin surfaces")); - assert!(adoption_report.contains("| Retrieval quality and local debug UX | `loss` |")); - assert!(adoption_report.contains("Letta scenario rows remain")); - assert!(adoption_report.contains("blocked or `not_tested`")); - - assert_trace_replay_viewer_blocker_boundaries( - &readme, - &markdown, - &adoption_report, - &report, - &adoption_json, - )?; - - assert!( - adoption_report - .contains("Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF") - ); - assert!(support::array_at(&adoption_json, "/adoption_decision/remaining_caveats")?.iter().any( - |caveat| { - caveat.as_str().is_some_and(|text| { - text.contains("Letta scenario rows remain blocked or not_tested") - }) - } - )); - - assert_trace_replay_adoption_json(&adoption_json)?; - - Ok(()) -} - -fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> { - assert_eq!( - report.pointer("/schema").and_then(Value::as_str), - Some("elf.trace_replay_diagnostics_report/v1") - ); - assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-923")); - assert_eq!( - support::string_array_at(report, "/outcome_terms")?, - ["win", "tie", "loss", "not_tested", "blocked", "non_goal"].map(str::to_owned) - ); - assert_eq!( - report.pointer("/summary/retrieval_correctness").and_then(Value::as_str), - Some("tie") - ); - assert_eq!(report.pointer("/summary/outcome_counts/loss").and_then(Value::as_u64), Some(2)); - assert_eq!( - report.pointer("/summary/outcome_counts/not_tested").and_then(Value::as_u64), - Some(4) - ); - assert_eq!(report.pointer("/summary/outcome_counts/win").and_then(Value::as_u64), Some(4)); - assert_eq!(report.pointer("/summary/outcome_counts/tie").and_then(Value::as_u64), Some(5)); - assert_eq!(report.pointer("/summary/outcome_counts/non_goal").and_then(Value::as_u64), Some(1)); - - assert_trace_replay_diagnostics_scenarios(report) -} - -fn assert_trace_replay_diagnostics_scenarios(report: &Value) -> Result<()> { - let scenarios = support::array_at(report, "/scenario_outcomes")?; - let retrieval = - support::find_by_field(scenarios, "/scenario_id", "retrieval_correctness_guardrail")?; - let top10 = - support::find_by_field(scenarios, "/scenario_id", "default_top10_candidate_artifact")?; - let replay = support::find_by_field(scenarios, "/scenario_id", "replay_command_locality")?; - let trace_surface = support::find_by_field( - scenarios, - "/scenario_id", - "trace_admin_replay_surface_availability", - )?; - let operator_trace = - support::find_by_field(scenarios, "/scenario_id", "operator_debug_trace_hydration")?; - let operator_replay = support::find_by_field( - scenarios, - "/scenario_id", - "operator_debug_replay_command_availability", - )?; - let operator_candidate = support::find_by_field( - scenarios, - "/scenario_id", - "operator_debug_candidate_drop_visibility", - )?; - let operator_repair = - support::find_by_field(scenarios, "/scenario_id", "operator_debug_repair_action_clarity")?; - let operator_selected = support::find_by_field( - scenarios, - "/scenario_id", - "operator_debug_selected_but_not_narrated", - )?; - let expansion = - support::find_by_field(scenarios, "/scenario_id", "query_expansion_attribution")?; - let dense_sparse = - support::find_by_field(scenarios, "/scenario_id", "dense_sparse_channel_attribution")?; - let fusion = support::find_by_field(scenarios, "/scenario_id", "fusion_attribution")?; - let rerank = support::find_by_field(scenarios, "/scenario_id", "rerank_attribution")?; - let candidate_drop = - support::find_by_field(scenarios, "/scenario_id", "candidate_drop_diagnostics")?; - let selected = support::find_by_field( - scenarios, - "/scenario_id", - "selected_but_not_narrated_wrong_results", - )?; - let tombstone = - support::find_by_field(scenarios, "/scenario_id", "evidence_absent_tombstone_diagnostics")?; - - assert_eq!(scenarios.len(), 16); - assert_eq!(retrieval.pointer("/outcome").and_then(Value::as_str), Some("tie")); - assert_eq!(top10.pointer("/outcome").and_then(Value::as_str), Some("loss")); - assert_eq!(replay.pointer("/outcome").and_then(Value::as_str), Some("loss")); - assert_eq!(trace_surface.pointer("/outcome").and_then(Value::as_str), Some("tie")); - assert_eq!( - operator_trace.pointer("/evidence_class").and_then(Value::as_str), - Some("live_real_world") - ); - assert_eq!(operator_trace.pointer("/result_type").and_then(Value::as_str), Some("pass")); - assert_eq!(operator_trace.pointer("/outcome").and_then(Value::as_str), Some("win")); - assert_eq!(operator_replay.pointer("/outcome").and_then(Value::as_str), Some("tie")); - assert_eq!(operator_candidate.pointer("/outcome").and_then(Value::as_str), Some("win")); - assert!(support::array_contains_str( - operator_candidate, - "/typed_non_pass_states", - "retrieved_but_dropped" - )?); - assert_eq!(operator_repair.pointer("/outcome").and_then(Value::as_str), Some("tie")); - assert_eq!(operator_selected.pointer("/outcome").and_then(Value::as_str), Some("win")); - assert!(support::array_contains_str( - operator_selected, - "/typed_non_pass_states", - "selected_but_not_narrated" - )?); - assert_eq!(expansion.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!(dense_sparse.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!(fusion.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!(rerank.pointer("/result_type").and_then(Value::as_str), Some("non_goal")); - assert_eq!(rerank.pointer("/outcome").and_then(Value::as_str), Some("non_goal")); - assert_eq!(candidate_drop.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); - assert!(support::array_contains_str( - candidate_drop, - "/typed_non_pass_states", - "retrieved_but_dropped" - )?); - assert_eq!(selected.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); - assert!(support::array_contains_str( - selected, - "/typed_non_pass_states", - "selected_but_not_narrated" - )?); - assert_eq!(tombstone.pointer("/outcome").and_then(Value::as_str), Some("win")); - assert_eq!(tombstone.pointer("/qmd_status").and_then(Value::as_str), Some("wrong_result")); - assert!(support::array_contains_str( - report, - "/wrong_result_diagnostics/qmd_missing_evidence", - "delete-tombstone" - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "qmd currently wins the default local-debug artifact surface: top-10 rows plus short CLI replay." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "ELF narrowly wins the live operator-debug trace hydration and candidate-drop visibility slice against qmd; qmd still ties replay-command and repair-action clarity." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "Do not claim qmd beats ELF as a memory system overall." - )?); - - Ok(()) -} - -fn assert_trace_replay_diagnostics_markdown(markdown: &str) { - assert!(markdown.contains("Retrieval correctness is still tied")); - assert!(markdown.contains("| Default top-10 candidate artifact |")); - assert!(markdown.contains("| Replay command locality |")); - assert!( - markdown - .contains("| Operator-debug trace hydration | `live_real_world` | `pass` | `win` |") - ); - assert!(markdown.contains( - "| Operator-debug replay command availability | `live_real_world` | `pass` | `tie` |" - )); - assert!(markdown.contains( - "| Operator-debug candidate-drop visibility | `live_real_world` | `pass` | `win` |" - )); - assert!(markdown.contains("| Rerank attribution | `live_baseline_only` | `non_goal` |")); - assert!(markdown.contains("| Candidate-drop diagnostics | `research_gate` | `not_encoded` |")); - assert!(markdown.contains("`retrieved_but_dropped` | Defined globally as `not_tested`")); - assert!(markdown.contains("npx tsx src/cli/qmd.ts query")); - assert!(markdown.contains("cargo run -p elf-eval -- --config-a")); - assert!(markdown.contains("cargo make real-world-job-operator-ux-live-adapters")); - assert!(markdown.contains("Do not claim qmd beats ELF as a memory system overall")); - assert!(markdown.contains("Do not score rerank superiority from a qmd `--no-rerank` run")); -} - -fn assert_trace_replay_viewer_blocker_boundaries( - readme: &str, - markdown: &str, - adoption_report: &str, - report: &Value, - adoption_json: &Value, -) -> Result<()> { - let checked_surfaces = [ - support::collapse_whitespace(readme), - support::collapse_whitespace(markdown), - support::collapse_whitespace(adoption_report), - report.to_string(), - adoption_json.to_string(), - ]; - - for surface in checked_surfaces { - assert!(!surface.contains("blocked or not encoded")); - } - - assert!( - support::collapse_whitespace(readme) - .contains("claude-mem viewer flows remain blocked until Docker-contained") - ); - assert!( - support::collapse_whitespace(markdown) - .contains("claude-mem UI repair paths remain blocked until Docker-contained") - ); - assert!( - support::collapse_whitespace(adoption_report) - .contains("claude-mem viewer workflows remain blocked until Docker-contained") - ); - - Ok(()) -} - -fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> { - let local_debug = support::find_by_field( - support::array_at(adoption, "/scenario_outcomes")?, - "/scenario_id", - "local_debug_replay_ux", - )?; - let operator_debug = support::find_by_field( - support::array_at(adoption, "/scenario_outcomes")?, - "/scenario_id", - "operator_debugging_viewer_ux", - )?; - - assert_eq!(local_debug.pointer("/outcome").and_then(Value::as_str), Some("loss")); - assert!( - local_debug - .pointer("/measured_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("qmd stronger on immediate top-10")) - ); - assert!(support::array_contains_str( - local_debug, - "/command_artifacts", - "docs/evidence/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md" - )?); - assert!(support::array_contains_str( - adoption, - "/claim_boundaries/not_allowed", - "Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF memory-system or retrieval-quality win." - )?); - assert_eq!(operator_debug.pointer("/outcome").and_then(Value::as_str), Some("win")); - assert!( - operator_debug - .pointer("/measured_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("narrow live operator-debug win over qmd")) - ); - assert!(support::array_contains_str( - operator_debug, - "/command_artifacts", - "tmp/real-world-job/operator-ux-live-adapters/summary.json" - )?); - assert!(support::array_contains_str( - adoption, - "/claim_boundaries/not_allowed", - "Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow ELF/qmd operator-debug slice." - )?); - - Ok(()) -} +mod trace_replay_reports_graph_topic_map; +mod trace_replay_reports_qmd_trace_replay; +mod trace_replay_reports_source_scan; diff --git a/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_graph_topic_map.rs b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_graph_topic_map.rs new file mode 100644 index 00000000..2a3c55d7 --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_graph_topic_map.rs @@ -0,0 +1,44 @@ +use std::fs; + +use color_eyre::Result; + +use crate::{support, trace_replay_reports::trace_replay_reports_source_scan}; + +#[test] +fn graph_topic_map_report_wires_source_backed_graph_lite_readback() -> Result<()> { + let markdown = fs::read_to_string(support::graph_topic_map_report_markdown_path()?)?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + let workspace = support::workspace_root()?; + let graph_report_service = + trace_replay_reports_source_scan::graph_report_service_sources(&workspace)?; + let api_routes = trace_replay_reports_source_scan::api_route_sources(&workspace)?; + let mcp_server = trace_replay_reports_source_scan::mcp_server_sources(&workspace)?; + let graph_spec = fs::read_to_string( + support::workspace_root()?.join("docs/spec/system_graph_memory_postgres_v1.md"), + )?; + + assert!(markdown.contains("Graph Topic-Map Report - June 20, 2026")); + assert!(markdown.contains("elf.graph_report/v1")); + assert!(markdown.contains("sourced")); + assert!(markdown.contains("inferred")); + assert!(markdown.contains("ambiguous")); + assert!(markdown.contains("stale")); + assert!(markdown.contains("superseded")); + assert!(markdown.contains("valid_from")); + assert!(markdown.contains("valid_to")); + assert!(markdown.contains("valid_at")); + assert!(markdown.contains("invalid_at")); + assert!(graph_report_service.contains("ELF_GRAPH_REPORT_SCHEMA_V1")); + assert!(graph_report_service.contains("GraphReportSummary")); + assert!(graph_report_service.contains("build_topic_map")); + assert!(api_routes.contains("/v2/graph/report")); + assert!(mcp_server.contains("elf_graph_report")); + assert!(graph_spec.contains("elf.graph_report/v1")); + assert!(graph_spec.contains("Graphiti/Zep `valid_at` and `invalid_at`")); + assert!(benchmarking_index.contains("2026-06-20-graph-topic-map-report.md")); + assert!(readme.contains("Graph Topic-Map Report - June 20, 2026")); + assert!(readme.contains("Graph topic-map reports after XY-1020")); + + Ok(()) +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_qmd_trace_replay.rs b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_qmd_trace_replay.rs new file mode 100644 index 00000000..9ca9b1ff --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_qmd_trace_replay.rs @@ -0,0 +1,305 @@ +use std::fs; + +use color_eyre::Result; +use serde_json::Value; + +use crate::support; + +#[test] +fn qmd_trace_replay_diagnostics_report_preserves_claim_boundaries() -> Result<()> { + let report = serde_json::from_str::(&fs::read_to_string( + support::trace_replay_diagnostics_report_path()?, + )?)?; + let markdown = fs::read_to_string(support::trace_replay_diagnostics_markdown_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let adoption_report = fs::read_to_string(support::competitor_strength_adoption_report_path()?)?; + let adoption_json = serde_json::from_str::(&fs::read_to_string( + support::competitor_strength_adoption_report_json_path()?, + )?)?; + + assert_trace_replay_diagnostics_json(&report)?; + assert_trace_replay_diagnostics_markdown(&markdown); + + assert!(readme.contains("ELF/qmd Trace Replay Diagnostics Report - June 11, 2026")); + assert!(benchmarking_index.contains("2026-06-11-elf-qmd-trace-replay-diagnostics-report.md")); + assert!(benchmarking_index.contains("qmd top-10/replay artifact")); + assert!(benchmarking_index.contains("ELF trace/admin surfaces")); + assert!(adoption_report.contains("| Retrieval quality and local debug UX | `loss` |")); + assert!(adoption_report.contains("Letta scenario rows remain")); + assert!(adoption_report.contains("blocked or `not_tested`")); + + assert_trace_replay_viewer_blocker_boundaries( + &readme, + &markdown, + &adoption_report, + &report, + &adoption_json, + )?; + + assert!( + adoption_report + .contains("Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF") + ); + assert!(support::array_at(&adoption_json, "/adoption_decision/remaining_caveats")?.iter().any( + |caveat| { + caveat.as_str().is_some_and(|text| { + text.contains("Letta scenario rows remain blocked or not_tested") + }) + } + )); + + assert_trace_replay_adoption_json(&adoption_json)?; + + Ok(()) +} + +fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> { + assert_eq!( + report.pointer("/schema").and_then(Value::as_str), + Some("elf.trace_replay_diagnostics_report/v1") + ); + assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-923")); + assert_eq!( + support::string_array_at(report, "/outcome_terms")?, + ["win", "tie", "loss", "not_tested", "blocked", "non_goal"].map(str::to_owned) + ); + assert_eq!( + report.pointer("/summary/retrieval_correctness").and_then(Value::as_str), + Some("tie") + ); + assert_eq!(report.pointer("/summary/outcome_counts/loss").and_then(Value::as_u64), Some(2)); + assert_eq!( + report.pointer("/summary/outcome_counts/not_tested").and_then(Value::as_u64), + Some(4) + ); + assert_eq!(report.pointer("/summary/outcome_counts/win").and_then(Value::as_u64), Some(4)); + assert_eq!(report.pointer("/summary/outcome_counts/tie").and_then(Value::as_u64), Some(5)); + assert_eq!(report.pointer("/summary/outcome_counts/non_goal").and_then(Value::as_u64), Some(1)); + + assert_trace_replay_diagnostics_scenarios(report) +} + +fn assert_trace_replay_diagnostics_scenarios(report: &Value) -> Result<()> { + let scenarios = support::array_at(report, "/scenario_outcomes")?; + let retrieval = + support::find_by_field(scenarios, "/scenario_id", "retrieval_correctness_guardrail")?; + let top10 = + support::find_by_field(scenarios, "/scenario_id", "default_top10_candidate_artifact")?; + let replay = support::find_by_field(scenarios, "/scenario_id", "replay_command_locality")?; + let trace_surface = support::find_by_field( + scenarios, + "/scenario_id", + "trace_admin_replay_surface_availability", + )?; + let operator_trace = + support::find_by_field(scenarios, "/scenario_id", "operator_debug_trace_hydration")?; + let operator_replay = support::find_by_field( + scenarios, + "/scenario_id", + "operator_debug_replay_command_availability", + )?; + let operator_candidate = support::find_by_field( + scenarios, + "/scenario_id", + "operator_debug_candidate_drop_visibility", + )?; + let operator_repair = + support::find_by_field(scenarios, "/scenario_id", "operator_debug_repair_action_clarity")?; + let operator_selected = support::find_by_field( + scenarios, + "/scenario_id", + "operator_debug_selected_but_not_narrated", + )?; + let expansion = + support::find_by_field(scenarios, "/scenario_id", "query_expansion_attribution")?; + let dense_sparse = + support::find_by_field(scenarios, "/scenario_id", "dense_sparse_channel_attribution")?; + let fusion = support::find_by_field(scenarios, "/scenario_id", "fusion_attribution")?; + let rerank = support::find_by_field(scenarios, "/scenario_id", "rerank_attribution")?; + let candidate_drop = + support::find_by_field(scenarios, "/scenario_id", "candidate_drop_diagnostics")?; + let selected = support::find_by_field( + scenarios, + "/scenario_id", + "selected_but_not_narrated_wrong_results", + )?; + let tombstone = + support::find_by_field(scenarios, "/scenario_id", "evidence_absent_tombstone_diagnostics")?; + + assert_eq!(scenarios.len(), 16); + assert_eq!(retrieval.pointer("/outcome").and_then(Value::as_str), Some("tie")); + assert_eq!(top10.pointer("/outcome").and_then(Value::as_str), Some("loss")); + assert_eq!(replay.pointer("/outcome").and_then(Value::as_str), Some("loss")); + assert_eq!(trace_surface.pointer("/outcome").and_then(Value::as_str), Some("tie")); + assert_eq!( + operator_trace.pointer("/evidence_class").and_then(Value::as_str), + Some("live_real_world") + ); + assert_eq!(operator_trace.pointer("/result_type").and_then(Value::as_str), Some("pass")); + assert_eq!(operator_trace.pointer("/outcome").and_then(Value::as_str), Some("win")); + assert_eq!(operator_replay.pointer("/outcome").and_then(Value::as_str), Some("tie")); + assert_eq!(operator_candidate.pointer("/outcome").and_then(Value::as_str), Some("win")); + assert!(support::array_contains_str( + operator_candidate, + "/typed_non_pass_states", + "retrieved_but_dropped" + )?); + assert_eq!(operator_repair.pointer("/outcome").and_then(Value::as_str), Some("tie")); + assert_eq!(operator_selected.pointer("/outcome").and_then(Value::as_str), Some("win")); + assert!(support::array_contains_str( + operator_selected, + "/typed_non_pass_states", + "selected_but_not_narrated" + )?); + assert_eq!(expansion.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!(dense_sparse.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!(fusion.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!(rerank.pointer("/result_type").and_then(Value::as_str), Some("non_goal")); + assert_eq!(rerank.pointer("/outcome").and_then(Value::as_str), Some("non_goal")); + assert_eq!(candidate_drop.pointer("/outcome").and_then(Value::as_str), Some("not_tested")); + assert!(support::array_contains_str( + candidate_drop, + "/typed_non_pass_states", + "retrieved_but_dropped" + )?); + assert_eq!(selected.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); + assert!(support::array_contains_str( + selected, + "/typed_non_pass_states", + "selected_but_not_narrated" + )?); + assert_eq!(tombstone.pointer("/outcome").and_then(Value::as_str), Some("win")); + assert_eq!(tombstone.pointer("/qmd_status").and_then(Value::as_str), Some("wrong_result")); + assert!(support::array_contains_str( + report, + "/wrong_result_diagnostics/qmd_missing_evidence", + "delete-tombstone" + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "qmd currently wins the default local-debug artifact surface: top-10 rows plus short CLI replay." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "ELF narrowly wins the live operator-debug trace hydration and candidate-drop visibility slice against qmd; qmd still ties replay-command and repair-action clarity." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "Do not claim qmd beats ELF as a memory system overall." + )?); + + Ok(()) +} + +fn assert_trace_replay_diagnostics_markdown(markdown: &str) { + assert!(markdown.contains("Retrieval correctness is still tied")); + assert!(markdown.contains("| Default top-10 candidate artifact |")); + assert!(markdown.contains("| Replay command locality |")); + assert!( + markdown + .contains("| Operator-debug trace hydration | `live_real_world` | `pass` | `win` |") + ); + assert!(markdown.contains( + "| Operator-debug replay command availability | `live_real_world` | `pass` | `tie` |" + )); + assert!(markdown.contains( + "| Operator-debug candidate-drop visibility | `live_real_world` | `pass` | `win` |" + )); + assert!(markdown.contains("| Rerank attribution | `live_baseline_only` | `non_goal` |")); + assert!(markdown.contains("| Candidate-drop diagnostics | `research_gate` | `not_encoded` |")); + assert!(markdown.contains("`retrieved_but_dropped` | Defined globally as `not_tested`")); + assert!(markdown.contains("npx tsx src/cli/qmd.ts query")); + assert!(markdown.contains("cargo run -p elf-eval -- --config-a")); + assert!(markdown.contains("cargo make real-world-job-operator-ux-live-adapters")); + assert!(markdown.contains("Do not claim qmd beats ELF as a memory system overall")); + assert!(markdown.contains("Do not score rerank superiority from a qmd `--no-rerank` run")); +} + +fn assert_trace_replay_viewer_blocker_boundaries( + readme: &str, + markdown: &str, + adoption_report: &str, + report: &Value, + adoption_json: &Value, +) -> Result<()> { + let checked_surfaces = [ + support::collapse_whitespace(readme), + support::collapse_whitespace(markdown), + support::collapse_whitespace(adoption_report), + report.to_string(), + adoption_json.to_string(), + ]; + + for surface in checked_surfaces { + assert!(!surface.contains("blocked or not encoded")); + } + + assert!( + support::collapse_whitespace(readme) + .contains("claude-mem viewer flows remain blocked until Docker-contained") + ); + assert!( + support::collapse_whitespace(markdown) + .contains("claude-mem UI repair paths remain blocked until Docker-contained") + ); + assert!( + support::collapse_whitespace(adoption_report) + .contains("claude-mem viewer workflows remain blocked until Docker-contained") + ); + + Ok(()) +} + +fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> { + let local_debug = support::find_by_field( + support::array_at(adoption, "/scenario_outcomes")?, + "/scenario_id", + "local_debug_replay_ux", + )?; + let operator_debug = support::find_by_field( + support::array_at(adoption, "/scenario_outcomes")?, + "/scenario_id", + "operator_debugging_viewer_ux", + )?; + + assert_eq!(local_debug.pointer("/outcome").and_then(Value::as_str), Some("loss")); + assert!( + local_debug + .pointer("/measured_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("qmd stronger on immediate top-10")) + ); + assert!(support::array_contains_str( + local_debug, + "/command_artifacts", + "docs/evidence/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md" + )?); + assert!(support::array_contains_str( + adoption, + "/claim_boundaries/not_allowed", + "Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF memory-system or retrieval-quality win." + )?); + assert_eq!(operator_debug.pointer("/outcome").and_then(Value::as_str), Some("win")); + assert!( + operator_debug + .pointer("/measured_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("narrow live operator-debug win over qmd")) + ); + assert!(support::array_contains_str( + operator_debug, + "/command_artifacts", + "tmp/real-world-job/operator-ux-live-adapters/summary.json" + )?); + assert!(support::array_contains_str( + adoption, + "/claim_boundaries/not_allowed", + "Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow ELF/qmd operator-debug slice." + )?); + + Ok(()) +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_source_scan.rs b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_source_scan.rs new file mode 100644 index 00000000..dd4a3dcb --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/trace_replay_reports_source_scan.rs @@ -0,0 +1,52 @@ +use std::{fs, path::Path}; + +use color_eyre::Result; + +pub(crate) fn graph_report_service_sources(workspace: &Path) -> Result { + let mut source = + fs::read_to_string(workspace.join("packages/elf-service/src/graph_report.rs"))?; + + append_rust_sources( + workspace.join("packages/elf-service/src/graph_report").as_path(), + &mut source, + )?; + + Ok(source) +} + +pub(crate) fn mcp_server_sources(workspace: &Path) -> Result { + let mut source = fs::read_to_string(workspace.join("apps/elf-mcp/src/app/server.rs"))?; + + append_rust_sources(workspace.join("apps/elf-mcp/src/app/server").as_path(), &mut source)?; + + Ok(source) +} + +pub(crate) fn api_route_sources(workspace: &Path) -> Result { + let mut source = fs::read_to_string(workspace.join("apps/elf-api/src/routes.rs"))?; + + append_rust_sources(workspace.join("apps/elf-api/src/routes").as_path(), &mut source)?; + + Ok(source) +} + +fn append_rust_sources(dir: &Path, source: &mut String) -> Result<()> { + let mut entries = Vec::new(); + + for entry in fs::read_dir(dir)? { + entries.push(entry?.path()); + } + + entries.sort(); + + for path in entries { + if path.is_dir() { + append_rust_sources(path.as_path(), source)?; + } else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + source.push('\n'); + source.push_str(fs::read_to_string(path)?.as_str()); + } + } + + Ok(()) +} diff --git a/packages/elf-domain/src/memory_policy/evaluation.rs b/packages/elf-domain/src/memory_policy/evaluation.rs new file mode 100644 index 00000000..e7dedf79 --- /dev/null +++ b/packages/elf-domain/src/memory_policy/evaluation.rs @@ -0,0 +1,122 @@ +use crate::memory_policy::{self, MemoryPolicyDecision, MemoryPolicyEvaluation, tests::support}; +use elf_config::{MemoryPolicy, MemoryPolicyRule}; + +#[test] +fn policy_precedence_prefers_note_type_and_scope_over_note_type_only() { + let cfg = support::test_config(MemoryPolicy { + rules: vec![ + MemoryPolicyRule { + note_type: Some("fact".to_string()), + scope: None, + min_confidence: Some(0.05), + min_importance: None, + }, + MemoryPolicyRule { + note_type: Some("fact".to_string()), + scope: Some("agent_private".to_string()), + min_confidence: Some(0.95), + min_importance: None, + }, + MemoryPolicyRule { + note_type: None, + scope: Some("agent_private".to_string()), + min_confidence: Some(0.40), + min_importance: None, + }, + ], + }); + let MemoryPolicyEvaluation { decision, matched_rule } = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + 0.5, + 0.5, + MemoryPolicyDecision::Remember, + ); + + assert_eq!(decision, MemoryPolicyDecision::Ignore); + + let rule = matched_rule.expect("expected policy match"); + + assert_eq!(rule.note_type.as_deref(), Some("fact")); + assert_eq!(rule.scope.as_deref(), Some("agent_private")); + assert_eq!(rule.min_confidence, Some(0.95)); + assert_eq!(rule.min_importance, None); +} + +#[test] +fn evaluate_downgrades_base_remember_update_only() { + let cfg = support::test_config(MemoryPolicy { + rules: vec![MemoryPolicyRule { + note_type: Some("fact".to_string()), + scope: Some("agent_private".to_string()), + min_confidence: Some(0.9), + min_importance: Some(0.5), + }], + }); + let remember = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + 0.95, + 0.4, + MemoryPolicyDecision::Remember, + ); + + assert_eq!(remember.decision, MemoryPolicyDecision::Ignore); + + let update = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + f64::NAN, + f64::NAN, + MemoryPolicyDecision::Update, + ); + + assert_eq!(update.decision, MemoryPolicyDecision::Ignore); + + let ignore = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + 0.1, + 0.1, + MemoryPolicyDecision::Ignore, + ); + + assert_eq!(ignore.decision, MemoryPolicyDecision::Ignore); + + let reject = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + 0.1, + 0.1, + MemoryPolicyDecision::Reject, + ); + + assert_eq!(reject.decision, MemoryPolicyDecision::Reject); +} + +#[test] +fn evaluate_without_matching_threshold_leaves_base_unchanged() { + let cfg = support::test_config(MemoryPolicy { + rules: vec![MemoryPolicyRule { + note_type: Some("fact".to_string()), + scope: Some("agent_private".to_string()), + min_confidence: None, + min_importance: None, + }], + }); + let output = memory_policy::evaluate_memory_policy( + &cfg, + "fact", + "agent_private", + 0.0, + 0.0, + MemoryPolicyDecision::Remember, + ); + + assert_eq!(output.decision, MemoryPolicyDecision::Remember); +} diff --git a/packages/elf-domain/src/memory_policy/support.rs b/packages/elf-domain/src/memory_policy/support.rs new file mode 100644 index 00000000..c1023260 --- /dev/null +++ b/packages/elf-domain/src/memory_policy/support.rs @@ -0,0 +1,287 @@ +use elf_config::{ + Chunking, Config, EmbeddingProviderConfig, Lifecycle, LlmProviderConfig, Memory, MemoryPolicy, + MemoryPolicyRule, Postgres, ProviderConfig, Providers, Qdrant, Ranking, RankingBlend, + RankingBlendSegment, RankingDeterministic, RankingDeterministicDecay, RankingDeterministicHits, + RankingDeterministicLexical, RankingDiversity, RankingRetrievalSources, ReadProfiles, + ScopePrecedence, ScopeWriteAllowed, Scopes, Search, SearchCache, SearchDynamic, + SearchExpansion, SearchExplain, SearchGraphContext, SearchPrefilter, SearchRecursive, Security, + Service, Storage, TtlDays, +}; + +pub(crate) fn test_config(policy: MemoryPolicy) -> Config { + let mut cfg = test_default_config(); + + cfg.memory.policy = policy; + + cfg +} + +fn test_default_config() -> Config { + Config { + service: test_service_config(), + storage: test_storage_config(), + providers: test_providers_config(), + scopes: test_scopes_config(), + memory: test_memory_config(), + search: test_search_config(), + ranking: test_ranking_config(), + lifecycle: test_lifecycle_config(), + security: test_security_config(), + chunking: test_chunking_config(), + context: None, + mcp: None, + } +} + +fn test_service_config() -> Service { + Service { + http_bind: "127.0.0.1:8080".to_string(), + mcp_bind: "127.0.0.1:8082".to_string(), + admin_bind: "127.0.0.1:8081".to_string(), + log_level: "info".to_string(), + } +} + +fn test_storage_config() -> Storage { + Storage { + postgres: Postgres { + dsn: "postgres://user:pass@localhost/db".to_string(), + pool_max_conns: 1, + }, + qdrant: Qdrant { + url: "http://localhost".to_string(), + collection: "mem_notes_v2".to_string(), + docs_collection: "doc_chunks_v1".to_string(), + vector_dim: 4_096, + }, + } +} + +fn test_providers_config() -> Providers { + Providers { + embedding: test_embedding_provider_config(), + rerank: test_rerank_provider_config(), + llm_extractor: test_llm_extractor_provider_config(), + } +} + +fn test_embedding_provider_config() -> EmbeddingProviderConfig { + EmbeddingProviderConfig { + provider_id: "p".to_string(), + api_base: "http://localhost".to_string(), + api_key: "key".to_string(), + path: "/".to_string(), + model: "m".to_string(), + dimensions: 3, + timeout_ms: 1_000, + default_headers: Default::default(), + } +} + +fn test_rerank_provider_config() -> ProviderConfig { + ProviderConfig { + provider_id: "p".to_string(), + api_base: "http://localhost".to_string(), + api_key: "key".to_string(), + path: "/".to_string(), + model: "m".to_string(), + timeout_ms: 1_000, + default_headers: Default::default(), + } +} + +fn test_llm_extractor_provider_config() -> LlmProviderConfig { + LlmProviderConfig { + provider_id: "p".to_string(), + api_base: "http://localhost".to_string(), + api_key: "key".to_string(), + path: "/".to_string(), + model: "m".to_string(), + temperature: 0.1, + timeout_ms: 1_000, + default_headers: Default::default(), + } +} + +fn test_scopes_config() -> Scopes { + Scopes { + allowed: vec!["agent_private".to_string()], + read_profiles: test_read_profiles_config(), + precedence: ScopePrecedence { agent_private: 30, project_shared: 20, org_shared: 10 }, + write_allowed: ScopeWriteAllowed { + agent_private: true, + project_shared: true, + org_shared: true, + }, + } +} + +fn test_read_profiles_config() -> ReadProfiles { + ReadProfiles { + private_only: vec!["agent_private".to_string()], + private_plus_project: vec!["agent_private".to_string()], + all_scopes: vec!["agent_private".to_string()], + } +} + +fn test_memory_config() -> Memory { + Memory { + max_notes_per_add_event: 3, + max_note_chars: 240, + dup_sim_threshold: 0.92, + update_sim_threshold: 0.85, + candidate_k: 60, + top_k: 12, + policy: MemoryPolicy { + rules: vec![ + MemoryPolicyRule { + note_type: Some("fact".to_string()), + scope: Some("agent_private".to_string()), + min_confidence: Some(0.9), + min_importance: Some(0.1), + }, + MemoryPolicyRule { + note_type: Some("preference".to_string()), + scope: Some("agent_private".to_string()), + min_confidence: Some(0.75), + min_importance: None, + }, + MemoryPolicyRule { + note_type: Some("preference".to_string()), + scope: None, + min_confidence: Some(0.6), + min_importance: None, + }, + MemoryPolicyRule { + note_type: None, + scope: None, + min_confidence: None, + min_importance: None, + }, + ], + }, + } +} + +fn test_search_config() -> Search { + Search { + expansion: SearchExpansion { + mode: "off".to_string(), + max_queries: 4, + include_original: true, + }, + dynamic: SearchDynamic { min_candidates: 10, min_top_score: 0.12 }, + prefilter: SearchPrefilter { max_candidates: 0 }, + cache: SearchCache { + enabled: true, + expansion_ttl_days: 7, + rerank_ttl_days: 7, + max_payload_bytes: Some(262_144), + }, + explain: SearchExplain { + retention_days: 7, + capture_candidates: false, + candidate_retention_days: 2, + write_mode: "outbox".to_string(), + }, + recursive: SearchRecursive { + enabled: false, + max_depth: 2, + max_children_per_node: 4, + max_nodes_per_scope: 32, + max_total_nodes: 256, + }, + graph_context: SearchGraphContext { + enabled: false, + max_facts_per_item: 16, + max_evidence_notes_per_fact: 16, + }, + } +} + +fn test_ranking_config() -> Ranking { + Ranking { + recency_tau_days: 60.0, + tie_breaker_weight: 0.1, + deterministic: test_ranking_deterministic_config(), + blend: RankingBlend { + enabled: true, + rerank_normalization: "rank".to_string(), + retrieval_normalization: "rank".to_string(), + segments: vec![ + RankingBlendSegment { max_retrieval_rank: 3, retrieval_weight: 0.8 }, + RankingBlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.5 }, + RankingBlendSegment { max_retrieval_rank: 1_000_000, retrieval_weight: 0.2 }, + ], + }, + diversity: RankingDiversity { + enabled: true, + sim_threshold: 0.88, + mmr_lambda: 0.7, + max_skips: 64, + }, + retrieval_sources: RankingRetrievalSources { + fusion_weight: 1.0, + structured_field_weight: 1.0, + fusion_priority: 1, + structured_field_priority: 0, + }, + } +} + +fn test_ranking_deterministic_config() -> RankingDeterministic { + RankingDeterministic { + enabled: false, + lexical: RankingDeterministicLexical { + enabled: false, + weight: 0.05, + min_ratio: 0.3, + max_query_terms: 16, + max_text_terms: 1_024, + }, + hits: RankingDeterministicHits { + enabled: false, + weight: 0.05, + half_saturation: 8.0, + last_hit_tau_days: 14.0, + }, + decay: RankingDeterministicDecay { enabled: false, weight: 0.05, tau_days: 30.0 }, + } +} + +fn test_lifecycle_config() -> Lifecycle { + Lifecycle { + ttl_days: TtlDays { + plan: 14, + fact: 180, + preference: 0, + constraint: 0, + decision: 0, + profile: 0, + }, + purge_deleted_after_days: 30, + purge_deprecated_after_days: 180, + } +} + +fn test_security_config() -> Security { + Security { + bind_localhost_only: true, + reject_non_english: true, + redact_secrets_on_write: true, + evidence_min_quotes: 1, + evidence_max_quotes: 2, + evidence_max_quote_chars: 320, + auth_mode: "off".to_string(), + auth_keys: vec![], + } +} + +fn test_chunking_config() -> Chunking { + Chunking { + enabled: true, + max_tokens: 512, + overlap_tokens: 128, + tokenizer_repo: "REPLACE_ME".to_string(), + } +} diff --git a/packages/elf-domain/src/memory_policy/tests.rs b/packages/elf-domain/src/memory_policy/tests.rs index 989d175a..5addc9d8 100644 --- a/packages/elf-domain/src/memory_policy/tests.rs +++ b/packages/elf-domain/src/memory_policy/tests.rs @@ -1,408 +1,2 @@ -use crate::memory_policy::{self, MemoryPolicyDecision, MemoryPolicyEvaluation}; -use elf_config::{ - Chunking, Config, EmbeddingProviderConfig, Lifecycle, LlmProviderConfig, Memory, MemoryPolicy, - MemoryPolicyRule, Postgres, ProviderConfig, Providers, Qdrant, Ranking, RankingBlend, - RankingBlendSegment, RankingDeterministic, RankingDeterministicDecay, RankingDeterministicHits, - RankingDeterministicLexical, RankingDiversity, RankingRetrievalSources, ReadProfiles, - ScopePrecedence, ScopeWriteAllowed, Scopes, Search, SearchCache, SearchDynamic, - SearchExpansion, SearchExplain, SearchGraphContext, SearchPrefilter, SearchRecursive, Security, - Service, Storage, TtlDays, -}; - -fn test_config(policy: MemoryPolicy) -> Config { - let mut cfg = test_default_config(); - - cfg.memory.policy = policy; - - cfg -} - -fn test_default_config() -> Config { - Config { - service: test_service_config(), - storage: test_storage_config(), - providers: test_providers_config(), - scopes: test_scopes_config(), - memory: test_memory_config(), - search: test_search_config(), - ranking: test_ranking_config(), - lifecycle: test_lifecycle_config(), - security: test_security_config(), - chunking: test_chunking_config(), - context: None, - mcp: None, - } -} - -fn test_service_config() -> Service { - Service { - http_bind: "127.0.0.1:8080".to_string(), - mcp_bind: "127.0.0.1:8082".to_string(), - admin_bind: "127.0.0.1:8081".to_string(), - log_level: "info".to_string(), - } -} - -fn test_storage_config() -> Storage { - Storage { - postgres: Postgres { - dsn: "postgres://user:pass@localhost/db".to_string(), - pool_max_conns: 1, - }, - qdrant: Qdrant { - url: "http://localhost".to_string(), - collection: "mem_notes_v2".to_string(), - docs_collection: "doc_chunks_v1".to_string(), - vector_dim: 4_096, - }, - } -} - -fn test_providers_config() -> Providers { - Providers { - embedding: test_embedding_provider_config(), - rerank: test_rerank_provider_config(), - llm_extractor: test_llm_extractor_provider_config(), - } -} - -fn test_embedding_provider_config() -> EmbeddingProviderConfig { - EmbeddingProviderConfig { - provider_id: "p".to_string(), - api_base: "http://localhost".to_string(), - api_key: "key".to_string(), - path: "/".to_string(), - model: "m".to_string(), - dimensions: 3, - timeout_ms: 1_000, - default_headers: Default::default(), - } -} - -fn test_rerank_provider_config() -> ProviderConfig { - ProviderConfig { - provider_id: "p".to_string(), - api_base: "http://localhost".to_string(), - api_key: "key".to_string(), - path: "/".to_string(), - model: "m".to_string(), - timeout_ms: 1_000, - default_headers: Default::default(), - } -} - -fn test_llm_extractor_provider_config() -> LlmProviderConfig { - LlmProviderConfig { - provider_id: "p".to_string(), - api_base: "http://localhost".to_string(), - api_key: "key".to_string(), - path: "/".to_string(), - model: "m".to_string(), - temperature: 0.1, - timeout_ms: 1_000, - default_headers: Default::default(), - } -} - -fn test_scopes_config() -> Scopes { - Scopes { - allowed: vec!["agent_private".to_string()], - read_profiles: test_read_profiles_config(), - precedence: ScopePrecedence { agent_private: 30, project_shared: 20, org_shared: 10 }, - write_allowed: ScopeWriteAllowed { - agent_private: true, - project_shared: true, - org_shared: true, - }, - } -} - -fn test_read_profiles_config() -> ReadProfiles { - ReadProfiles { - private_only: vec!["agent_private".to_string()], - private_plus_project: vec!["agent_private".to_string()], - all_scopes: vec!["agent_private".to_string()], - } -} - -fn test_memory_config() -> Memory { - Memory { - max_notes_per_add_event: 3, - max_note_chars: 240, - dup_sim_threshold: 0.92, - update_sim_threshold: 0.85, - candidate_k: 60, - top_k: 12, - policy: MemoryPolicy { - rules: vec![ - MemoryPolicyRule { - note_type: Some("fact".to_string()), - scope: Some("agent_private".to_string()), - min_confidence: Some(0.9), - min_importance: Some(0.1), - }, - MemoryPolicyRule { - note_type: Some("preference".to_string()), - scope: Some("agent_private".to_string()), - min_confidence: Some(0.75), - min_importance: None, - }, - MemoryPolicyRule { - note_type: Some("preference".to_string()), - scope: None, - min_confidence: Some(0.6), - min_importance: None, - }, - MemoryPolicyRule { - note_type: None, - scope: None, - min_confidence: None, - min_importance: None, - }, - ], - }, - } -} - -fn test_search_config() -> Search { - Search { - expansion: SearchExpansion { - mode: "off".to_string(), - max_queries: 4, - include_original: true, - }, - dynamic: SearchDynamic { min_candidates: 10, min_top_score: 0.12 }, - prefilter: SearchPrefilter { max_candidates: 0 }, - cache: SearchCache { - enabled: true, - expansion_ttl_days: 7, - rerank_ttl_days: 7, - max_payload_bytes: Some(262_144), - }, - explain: SearchExplain { - retention_days: 7, - capture_candidates: false, - candidate_retention_days: 2, - write_mode: "outbox".to_string(), - }, - recursive: SearchRecursive { - enabled: false, - max_depth: 2, - max_children_per_node: 4, - max_nodes_per_scope: 32, - max_total_nodes: 256, - }, - graph_context: SearchGraphContext { - enabled: false, - max_facts_per_item: 16, - max_evidence_notes_per_fact: 16, - }, - } -} - -fn test_ranking_config() -> Ranking { - Ranking { - recency_tau_days: 60.0, - tie_breaker_weight: 0.1, - deterministic: test_ranking_deterministic_config(), - blend: RankingBlend { - enabled: true, - rerank_normalization: "rank".to_string(), - retrieval_normalization: "rank".to_string(), - segments: vec![ - RankingBlendSegment { max_retrieval_rank: 3, retrieval_weight: 0.8 }, - RankingBlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.5 }, - RankingBlendSegment { max_retrieval_rank: 1_000_000, retrieval_weight: 0.2 }, - ], - }, - diversity: RankingDiversity { - enabled: true, - sim_threshold: 0.88, - mmr_lambda: 0.7, - max_skips: 64, - }, - retrieval_sources: RankingRetrievalSources { - fusion_weight: 1.0, - structured_field_weight: 1.0, - fusion_priority: 1, - structured_field_priority: 0, - }, - } -} - -fn test_ranking_deterministic_config() -> RankingDeterministic { - RankingDeterministic { - enabled: false, - lexical: RankingDeterministicLexical { - enabled: false, - weight: 0.05, - min_ratio: 0.3, - max_query_terms: 16, - max_text_terms: 1_024, - }, - hits: RankingDeterministicHits { - enabled: false, - weight: 0.05, - half_saturation: 8.0, - last_hit_tau_days: 14.0, - }, - decay: RankingDeterministicDecay { enabled: false, weight: 0.05, tau_days: 30.0 }, - } -} - -fn test_lifecycle_config() -> Lifecycle { - Lifecycle { - ttl_days: TtlDays { - plan: 14, - fact: 180, - preference: 0, - constraint: 0, - decision: 0, - profile: 0, - }, - purge_deleted_after_days: 30, - purge_deprecated_after_days: 180, - } -} - -fn test_security_config() -> Security { - Security { - bind_localhost_only: true, - reject_non_english: true, - redact_secrets_on_write: true, - evidence_min_quotes: 1, - evidence_max_quotes: 2, - evidence_max_quote_chars: 320, - auth_mode: "off".to_string(), - auth_keys: vec![], - } -} - -fn test_chunking_config() -> Chunking { - Chunking { - enabled: true, - max_tokens: 512, - overlap_tokens: 128, - tokenizer_repo: "REPLACE_ME".to_string(), - } -} - -#[test] -fn policy_precedence_prefers_note_type_and_scope_over_note_type_only() { - let cfg = test_config(MemoryPolicy { - rules: vec![ - MemoryPolicyRule { - note_type: Some("fact".to_string()), - scope: None, - min_confidence: Some(0.05), - min_importance: None, - }, - MemoryPolicyRule { - note_type: Some("fact".to_string()), - scope: Some("agent_private".to_string()), - min_confidence: Some(0.95), - min_importance: None, - }, - MemoryPolicyRule { - note_type: None, - scope: Some("agent_private".to_string()), - min_confidence: Some(0.40), - min_importance: None, - }, - ], - }); - let MemoryPolicyEvaluation { decision, matched_rule } = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - 0.5, - 0.5, - MemoryPolicyDecision::Remember, - ); - - assert_eq!(decision, MemoryPolicyDecision::Ignore); - - let rule = matched_rule.expect("expected policy match"); - - assert_eq!(rule.note_type.as_deref(), Some("fact")); - assert_eq!(rule.scope.as_deref(), Some("agent_private")); - assert_eq!(rule.min_confidence, Some(0.95)); - assert_eq!(rule.min_importance, None); -} - -#[test] -fn evaluate_downgrades_base_remember_update_only() { - let cfg = test_config(MemoryPolicy { - rules: vec![MemoryPolicyRule { - note_type: Some("fact".to_string()), - scope: Some("agent_private".to_string()), - min_confidence: Some(0.9), - min_importance: Some(0.5), - }], - }); - let remember = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - 0.95, - 0.4, - MemoryPolicyDecision::Remember, - ); - - assert_eq!(remember.decision, MemoryPolicyDecision::Ignore); - - let update = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - f64::NAN, - f64::NAN, - MemoryPolicyDecision::Update, - ); - - assert_eq!(update.decision, MemoryPolicyDecision::Ignore); - - let ignore = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - 0.1, - 0.1, - MemoryPolicyDecision::Ignore, - ); - - assert_eq!(ignore.decision, MemoryPolicyDecision::Ignore); - - let reject = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - 0.1, - 0.1, - MemoryPolicyDecision::Reject, - ); - - assert_eq!(reject.decision, MemoryPolicyDecision::Reject); -} - -#[test] -fn evaluate_without_matching_threshold_leaves_base_unchanged() { - let cfg = test_config(MemoryPolicy { - rules: vec![MemoryPolicyRule { - note_type: Some("fact".to_string()), - scope: Some("agent_private".to_string()), - min_confidence: None, - min_importance: None, - }], - }); - let output = memory_policy::evaluate_memory_policy( - &cfg, - "fact", - "agent_private", - 0.0, - 0.0, - MemoryPolicyDecision::Remember, - ); - - assert_eq!(output.decision, MemoryPolicyDecision::Remember); -} +mod evaluation; +mod support; diff --git a/packages/elf-service/tests/acceptance/knowledge_pages/helpers.rs b/packages/elf-service/tests/acceptance/knowledge_pages/helpers.rs index c86345ad..e7577214 100644 --- a/packages/elf-service/tests/acceptance/knowledge_pages/helpers.rs +++ b/packages/elf-service/tests/acceptance/knowledge_pages/helpers.rs @@ -1,14 +1,16 @@ -use std::sync::{Arc, atomic::AtomicUsize}; +mod assertions; +mod request; +mod setup; +mod source_inserts; + +pub(crate) use self::{ + assertions::assert_first_rebuild, request::knowledge_foundation_request, setup::setup_service, + source_inserts::insert_rebuild_sources, +}; -use time::OffsetDateTime; use uuid::Uuid; -use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; -use elf_domain::knowledge::KnowledgePageKind; -use elf_service::{ - AddNoteInput, AddNoteRequest, ElfService, KnowledgePageRebuildRequest, - KnowledgePageRebuildResponse, Providers, -}; +use elf_service::ElfService; use elf_testkit::TestDatabase; pub(crate) const TENANT_ID: &str = "tenant_knowledge"; @@ -29,425 +31,3 @@ pub(crate) struct KnowledgeSourceIds { pub(crate) fact_id: Uuid, pub(crate) proposal_id: Uuid, } - -pub(crate) fn knowledge_foundation_request(ids: KnowledgeSourceIds) -> KnowledgePageRebuildRequest { - KnowledgePageRebuildRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - page_kind: KnowledgePageKind::Project, - page_key: "knowledge-foundation".to_string(), - title: Some("Knowledge Foundation".to_string()), - doc_ids: vec![ids.doc_id], - doc_chunk_ids: vec![ids.chunk_id], - note_ids: vec![ids.note_id], - event_ids: vec![ids.event_id], - relation_ids: vec![ids.fact_id], - proposal_ids: vec![ids.proposal_id], - provider_metadata: serde_json::json!({}), - } -} - -pub(crate) fn assert_first_rebuild(first: &KnowledgePageRebuildResponse) { - assert_eq!(first.page.sections.len(), 6); - assert_eq!(first.page.source_refs.len(), 6); - assert!(first.page.sections.iter().all(|section| { - section.citations.as_array().is_some_and(|citations| !citations.is_empty()) - })); - assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc")); - assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc_chunk")); - assert_eq!(first.page.page.source_coverage["coverage_complete"], true); - assert_eq!(first.page.page.rebuild_metadata["deterministic"], true); - assert_eq!( - first.page.page.rebuild_metadata["generated_by"]["runtime"], - "ElfService::knowledge_page_rebuild" - ); - assert_eq!( - first.page.page.rebuild_metadata["memory_candidate_policy"]["direct_memory_ledger_mutation_allowed"], - false - ); - assert_eq!( - first.page.page.rebuild_metadata["version_identity"]["schema"], - "elf.knowledge_page.version_identity/v1" - ); - assert_eq!( - first - .page - .page - .previous_version_diff - .as_ref() - .expect("initial rebuild should expose no-previous diff")["available"], - false - ); -} - -pub(crate) async fn setup_service(test_name: &str) -> Option { - let Some(test_db) = acceptance::test_db().await else { - eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); - - return None; - }; - let Some(qdrant_url) = acceptance::test_qdrant_url() else { - eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); - - return None; - }; - let collection = test_db.collection_name("elf_acceptance"); - let docs_collection = test_db.collection_name("elf_acceptance_docs"); - let cfg = acceptance::test_config( - test_db.dsn().to_string(), - qdrant_url, - 4_096, - collection, - docs_collection, - ); - let extractor = SpyExtractor { - calls: Arc::new(AtomicUsize::new(0)), - payload: serde_json::json!({ "notes": [] }), - }; - let providers = Providers::new( - Arc::new(StubEmbedding { vector_dim: 4_096 }), - Arc::new(StubRerank), - Arc::new(extractor), - ); - let service = - acceptance::build_service(cfg, providers).await.expect("Failed to build service."); - - acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); - - Some(KnowledgeFixture { service, _test_db: test_db }) -} - -pub(crate) async fn insert_source_note(service: &ElfService, key: &str, text: &str) -> Uuid { - let response = service - .add_note(AddNoteRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - scope: "agent_private".to_string(), - notes: vec![AddNoteInput { - r#type: "fact".to_string(), - key: Some(key.to_string()), - text: text.to_string(), - structured: None, - importance: 0.7, - confidence: 0.9, - ttl_days: None, - source_ref: serde_json::json!({ "schema": "acceptance/v1", "key": key }), - write_policy: None, - }], - }) - .await - .expect("add_note should persist source note"); - - response.results[0].note_id.expect("source note id should be present") -} - -pub(crate) async fn insert_event_audit(service: &ElfService, note_id: Uuid) -> Uuid { - let decision_id = Uuid::new_v4(); - - sqlx::query( - "\ -INSERT INTO memory_ingest_decisions ( - decision_id, - tenant_id, - project_id, - agent_id, - scope, - pipeline, - note_type, - note_key, - note_id, - base_decision, - policy_decision, - note_op, - reason_code, - details, - ts -) -VALUES ($1,$2,$3,$4,'agent_private','add_event','fact','knowledge_event',$5,'remember','remember','ADD',NULL,$6,$7)", - ) - .bind(decision_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .bind(note_id) - .bind(serde_json::json!({ "fixture": "knowledge_page_event_audit" })) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("event audit should be inserted"); - - decision_id -} - -pub(crate) async fn insert_source_document(service: &ElfService) -> (Uuid, Uuid) { - let doc_id = Uuid::new_v4(); - let chunk_id = Uuid::new_v4(); - let content = "The Knowledge Workspace compiles Source Library spans into cited derived pages."; - let content_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); - let chunk_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); - let source_ref = serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "uri": "docs://knowledge/workspace/source-span-fixture", - "source_record_id": doc_id, - "content_hash": content_hash, - "source_spans": [ - { - "schema": "doc_source_span/v1", - "span_id": Uuid::new_v4(), - "chunk_id": chunk_id, - "status": "captured", - "start_offset": 0, - "end_offset": content.len(), - "content_hash": content_hash, - "chunk_hash": chunk_hash - } - ] - }); - - sqlx::query( - "\ -INSERT INTO doc_documents ( - doc_id, - tenant_id, - project_id, - agent_id, - scope, - doc_type, - status, - title, - source_ref, - content, - content_bytes, - content_hash, - created_at, - updated_at -) -VALUES ($1,$2,$3,$4,'project_shared','knowledge','active','Knowledge Workspace Source',$5,$6,$7,$8,$9,$9)", - ) - .bind(doc_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .bind(source_ref) - .bind(content) - .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) - .bind(content_hash) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("source document should be inserted"); - sqlx::query( - "\ -INSERT INTO doc_chunks ( - chunk_id, - doc_id, - chunk_index, - start_offset, - end_offset, - chunk_text, - chunk_hash, - created_at -) -VALUES ($1,$2,0,0,$3,$4,$5,$6)", - ) - .bind(chunk_id) - .bind(doc_id) - .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) - .bind(content) - .bind(chunk_hash) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("source document chunk should be inserted"); - - (doc_id, chunk_id) -} - -pub(crate) async fn insert_relation(service: &ElfService, note_id: Uuid) -> Uuid { - let subject_id = Uuid::new_v4(); - let fact_id = Uuid::new_v4(); - let evidence_id = Uuid::new_v4(); - - sqlx::query( - "\ -INSERT INTO graph_entities ( - entity_id, - tenant_id, - project_id, - canonical, - canonical_norm, - kind, - created_at, - updated_at -) -VALUES ($1,$2,$3,'ELF knowledge pages','elf knowledge pages','concept',$4,$4)", - ) - .bind(subject_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("graph entity should be inserted"); - sqlx::query( - "\ -INSERT INTO graph_facts ( - fact_id, - tenant_id, - project_id, - agent_id, - scope, - subject_entity_id, - predicate, - predicate_id, - object_entity_id, - object_value, - valid_from, - valid_to, - created_at, - updated_at -) -VALUES ($1,$2,$3,$4,'project_shared',$5,'compile from',NULL,NULL,'authoritative source memory',$6,NULL,$6,$6)", - ) - .bind(fact_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .bind(subject_id) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("graph fact should be inserted"); - sqlx::query( - "\ -INSERT INTO graph_fact_evidence (evidence_id, fact_id, note_id, created_at) -VALUES ($1,$2,$3,$4)", - ) - .bind(evidence_id) - .bind(fact_id) - .bind(note_id) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("graph fact evidence should be inserted"); - - fact_id -} - -pub(crate) async fn insert_applied_proposal(service: &ElfService, note_id: Uuid) -> Uuid { - let run_id = Uuid::new_v4(); - let proposal_id = Uuid::new_v4(); - let source_refs = serde_json::json!([ - { - "kind": "note", - "id": note_id, - "snapshot": { - "status": "active", - "updated_at": "1970-01-01T00:00:00Z", - "metadata": { "fixture": "knowledge_pages" }, - "source_ref": {} - } - } - ]); - let lineage = serde_json::json!({ "source_refs": source_refs }); - - sqlx::query( - "\ -INSERT INTO consolidation_runs ( - run_id, - tenant_id, - project_id, - agent_id, - contract_schema, - job_kind, - status, - input_refs, - source_snapshot, - lineage, - error, - created_at, - updated_at, - completed_at -) -VALUES ($1,$2,$3,$4,'elf.consolidation/v1','manual','completed',$5,$6,$7,'{}'::jsonb,$8,$8,$8)", - ) - .bind(run_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .bind(&source_refs) - .bind(serde_json::json!({ "source_count": 1 })) - .bind(&lineage) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("consolidation run should be inserted"); - sqlx::query( - "\ -INSERT INTO consolidation_proposals ( - proposal_id, - run_id, - tenant_id, - project_id, - agent_id, - contract_schema, - proposal_kind, - apply_intent, - review_state, - source_refs, - source_snapshot, - lineage, - diff, - confidence, - unsupported_claim_flags, - contradiction_markers, - staleness_markers, - target_ref, - proposed_payload, - reviewer_agent_id, - review_comment, - reviewed_at, - created_at, - updated_at -) -VALUES ($1,$2,$3,$4,$5,'elf.consolidation/v1','knowledge_page','create_derived_knowledge_page','applied',$6,$7,$8,$9,0.9,'[]'::jsonb,'[]'::jsonb,'[]'::jsonb,'{}'::jsonb,$10,$5,'Apply derived page proposal.',$11,$11,$11)", - ) - .bind(proposal_id) - .bind(run_id) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .bind(&source_refs) - .bind(serde_json::json!({ "source_count": 1 })) - .bind(&lineage) - .bind(serde_json::json!({ - "summary": "Create a derived knowledge page from cited source memory.", - "before": {}, - "after": { "page_key": "knowledge-foundation" } - })) - .bind(serde_json::json!({ "page_key": "knowledge-foundation" })) - .bind(OffsetDateTime::UNIX_EPOCH) - .execute(&service.db.pool) - .await - .expect("consolidation proposal should be inserted"); - - proposal_id -} - -pub(crate) async fn insert_rebuild_sources(service: &ElfService) -> KnowledgeSourceIds { - let note_id = insert_source_note( - service, - "knowledge_pages_foundation", - "Fact: Derived knowledge pages are rebuilt from authoritative source memory and keep citations.", - ) - .await; - let event_id = insert_event_audit(service, note_id).await; - let (doc_id, chunk_id) = insert_source_document(service).await; - let fact_id = insert_relation(service, note_id).await; - let proposal_id = insert_applied_proposal(service, note_id).await; - - KnowledgeSourceIds { note_id, event_id, doc_id, chunk_id, fact_id, proposal_id } -} diff --git a/packages/elf-service/tests/acceptance/knowledge_pages/helpers/assertions.rs b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/assertions.rs new file mode 100644 index 00000000..ba48b683 --- /dev/null +++ b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/assertions.rs @@ -0,0 +1,34 @@ +use elf_service::KnowledgePageRebuildResponse; + +pub(crate) fn assert_first_rebuild(first: &KnowledgePageRebuildResponse) { + assert_eq!(first.page.sections.len(), 6); + assert_eq!(first.page.source_refs.len(), 6); + assert!(first.page.sections.iter().all(|section| { + section.citations.as_array().is_some_and(|citations| !citations.is_empty()) + })); + assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc")); + assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc_chunk")); + assert_eq!(first.page.page.source_coverage["coverage_complete"], true); + assert_eq!(first.page.page.rebuild_metadata["deterministic"], true); + assert_eq!( + first.page.page.rebuild_metadata["generated_by"]["runtime"], + "ElfService::knowledge_page_rebuild" + ); + assert_eq!( + first.page.page.rebuild_metadata["memory_candidate_policy"]["direct_memory_ledger_mutation_allowed"], + false + ); + assert_eq!( + first.page.page.rebuild_metadata["version_identity"]["schema"], + "elf.knowledge_page.version_identity/v1" + ); + assert_eq!( + first + .page + .page + .previous_version_diff + .as_ref() + .expect("initial rebuild should expose no-previous diff")["available"], + false + ); +} diff --git a/packages/elf-service/tests/acceptance/knowledge_pages/helpers/request.rs b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/request.rs new file mode 100644 index 00000000..772ea4e6 --- /dev/null +++ b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/request.rs @@ -0,0 +1,23 @@ +use crate::acceptance::knowledge_pages::helpers::{ + AGENT_ID, KnowledgeSourceIds, PROJECT_ID, TENANT_ID, +}; +use elf_domain::knowledge::KnowledgePageKind; +use elf_service::KnowledgePageRebuildRequest; + +pub(crate) fn knowledge_foundation_request(ids: KnowledgeSourceIds) -> KnowledgePageRebuildRequest { + KnowledgePageRebuildRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + page_kind: KnowledgePageKind::Project, + page_key: "knowledge-foundation".to_string(), + title: Some("Knowledge Foundation".to_string()), + doc_ids: vec![ids.doc_id], + doc_chunk_ids: vec![ids.chunk_id], + note_ids: vec![ids.note_id], + event_ids: vec![ids.event_id], + relation_ids: vec![ids.fact_id], + proposal_ids: vec![ids.proposal_id], + provider_metadata: serde_json::json!({}), + } +} diff --git a/packages/elf-service/tests/acceptance/knowledge_pages/helpers/setup.rs b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/setup.rs new file mode 100644 index 00000000..cd597f05 --- /dev/null +++ b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/setup.rs @@ -0,0 +1,43 @@ +use std::sync::{Arc, atomic::AtomicUsize}; + +use crate::acceptance::{ + self, SpyExtractor, StubEmbedding, StubRerank, knowledge_pages::helpers::KnowledgeFixture, +}; +use elf_service::Providers; + +pub(crate) async fn setup_service(test_name: &str) -> Option { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); + + return None; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); + + return None; + }; + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let extractor = SpyExtractor { + calls: Arc::new(AtomicUsize::new(0)), + payload: serde_json::json!({ "notes": [] }), + }; + let providers = Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(StubRerank), + Arc::new(extractor), + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + + acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); + + Some(KnowledgeFixture { service, _test_db: test_db }) +} diff --git a/packages/elf-service/tests/acceptance/knowledge_pages/helpers/source_inserts.rs b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/source_inserts.rs new file mode 100644 index 00000000..8977becf --- /dev/null +++ b/packages/elf-service/tests/acceptance/knowledge_pages/helpers/source_inserts.rs @@ -0,0 +1,341 @@ +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::acceptance::knowledge_pages::helpers::{ + AGENT_ID, KnowledgeSourceIds, PROJECT_ID, TENANT_ID, +}; +use elf_service::{AddNoteInput, AddNoteRequest, ElfService}; + +pub(crate) async fn insert_rebuild_sources(service: &ElfService) -> KnowledgeSourceIds { + let note_id = insert_source_note( + service, + "knowledge_pages_foundation", + "Fact: Derived knowledge pages are rebuilt from authoritative source memory and keep citations.", + ) + .await; + let event_id = insert_event_audit(service, note_id).await; + let (doc_id, chunk_id) = insert_source_document(service).await; + let fact_id = insert_relation(service, note_id).await; + let proposal_id = insert_applied_proposal(service, note_id).await; + + KnowledgeSourceIds { note_id, event_id, doc_id, chunk_id, fact_id, proposal_id } +} + +async fn insert_source_note(service: &ElfService, key: &str, text: &str) -> Uuid { + let response = service + .add_note(AddNoteRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + scope: "agent_private".to_string(), + notes: vec![AddNoteInput { + r#type: "fact".to_string(), + key: Some(key.to_string()), + text: text.to_string(), + structured: None, + importance: 0.7, + confidence: 0.9, + ttl_days: None, + source_ref: serde_json::json!({ "schema": "acceptance/v1", "key": key }), + write_policy: None, + }], + }) + .await + .expect("add_note should persist source note"); + + response.results[0].note_id.expect("source note id should be present") +} + +async fn insert_event_audit(service: &ElfService, note_id: Uuid) -> Uuid { + let decision_id = Uuid::new_v4(); + + sqlx::query( + "\ +INSERT INTO memory_ingest_decisions ( + decision_id, + tenant_id, + project_id, + agent_id, + scope, + pipeline, + note_type, + note_key, + note_id, + base_decision, + policy_decision, + note_op, + reason_code, + details, + ts +) +VALUES ($1,$2,$3,$4,'agent_private','add_event','fact','knowledge_event',$5,'remember','remember','ADD',NULL,$6,$7)", + ) + .bind(decision_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(note_id) + .bind(serde_json::json!({ "fixture": "knowledge_page_event_audit" })) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("event audit should be inserted"); + + decision_id +} + +async fn insert_source_document(service: &ElfService) -> (Uuid, Uuid) { + let doc_id = Uuid::new_v4(); + let chunk_id = Uuid::new_v4(); + let content = "The Knowledge Workspace compiles Source Library spans into cited derived pages."; + let content_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + let chunk_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "uri": "docs://knowledge/workspace/source-span-fixture", + "source_record_id": doc_id, + "content_hash": content_hash, + "source_spans": [ + { + "schema": "doc_source_span/v1", + "span_id": Uuid::new_v4(), + "chunk_id": chunk_id, + "status": "captured", + "start_offset": 0, + "end_offset": content.len(), + "content_hash": content_hash, + "chunk_hash": chunk_hash + } + ] + }); + + sqlx::query( + "\ +INSERT INTO doc_documents ( + doc_id, + tenant_id, + project_id, + agent_id, + scope, + doc_type, + status, + title, + source_ref, + content, + content_bytes, + content_hash, + created_at, + updated_at +) +VALUES ($1,$2,$3,$4,'project_shared','knowledge','active','Knowledge Workspace Source',$5,$6,$7,$8,$9,$9)", + ) + .bind(doc_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(source_ref) + .bind(content) + .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) + .bind(content_hash) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("source document should be inserted"); + sqlx::query( + "\ +INSERT INTO doc_chunks ( + chunk_id, + doc_id, + chunk_index, + start_offset, + end_offset, + chunk_text, + chunk_hash, + created_at +) +VALUES ($1,$2,0,0,$3,$4,$5,$6)", + ) + .bind(chunk_id) + .bind(doc_id) + .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) + .bind(content) + .bind(chunk_hash) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("source document chunk should be inserted"); + + (doc_id, chunk_id) +} + +async fn insert_relation(service: &ElfService, note_id: Uuid) -> Uuid { + let subject_id = Uuid::new_v4(); + let fact_id = Uuid::new_v4(); + let evidence_id = Uuid::new_v4(); + + sqlx::query( + "\ +INSERT INTO graph_entities ( + entity_id, + tenant_id, + project_id, + canonical, + canonical_norm, + kind, + created_at, + updated_at +) +VALUES ($1,$2,$3,'ELF knowledge pages','elf knowledge pages','concept',$4,$4)", + ) + .bind(subject_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("graph entity should be inserted"); + sqlx::query( + "\ +INSERT INTO graph_facts ( + fact_id, + tenant_id, + project_id, + agent_id, + scope, + subject_entity_id, + predicate, + predicate_id, + object_entity_id, + object_value, + valid_from, + valid_to, + created_at, + updated_at +) +VALUES ($1,$2,$3,$4,'project_shared',$5,'compile from',NULL,NULL,'authoritative source memory',$6,NULL,$6,$6)", + ) + .bind(fact_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(subject_id) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("graph fact should be inserted"); + sqlx::query( + "\ +INSERT INTO graph_fact_evidence (evidence_id, fact_id, note_id, created_at) +VALUES ($1,$2,$3,$4)", + ) + .bind(evidence_id) + .bind(fact_id) + .bind(note_id) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("graph fact evidence should be inserted"); + + fact_id +} + +async fn insert_applied_proposal(service: &ElfService, note_id: Uuid) -> Uuid { + let run_id = Uuid::new_v4(); + let proposal_id = Uuid::new_v4(); + let source_refs = serde_json::json!([ + { + "kind": "note", + "id": note_id, + "snapshot": { + "status": "active", + "updated_at": "1970-01-01T00:00:00Z", + "metadata": { "fixture": "knowledge_pages" }, + "source_ref": {} + } + } + ]); + let lineage = serde_json::json!({ "source_refs": source_refs }); + + sqlx::query( + "\ +INSERT INTO consolidation_runs ( + run_id, + tenant_id, + project_id, + agent_id, + contract_schema, + job_kind, + status, + input_refs, + source_snapshot, + lineage, + error, + created_at, + updated_at, + completed_at +) +VALUES ($1,$2,$3,$4,'elf.consolidation/v1','manual','completed',$5,$6,$7,'{}'::jsonb,$8,$8,$8)", + ) + .bind(run_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(&source_refs) + .bind(serde_json::json!({ "source_count": 1 })) + .bind(&lineage) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("consolidation run should be inserted"); + sqlx::query( + "\ +INSERT INTO consolidation_proposals ( + proposal_id, + run_id, + tenant_id, + project_id, + agent_id, + contract_schema, + proposal_kind, + apply_intent, + review_state, + source_refs, + source_snapshot, + lineage, + diff, + confidence, + unsupported_claim_flags, + contradiction_markers, + staleness_markers, + target_ref, + proposed_payload, + reviewer_agent_id, + review_comment, + reviewed_at, + created_at, + updated_at +) +VALUES ($1,$2,$3,$4,$5,'elf.consolidation/v1','knowledge_page','create_derived_knowledge_page','applied',$6,$7,$8,$9,0.9,'[]'::jsonb,'[]'::jsonb,'[]'::jsonb,'{}'::jsonb,$10,$5,'Apply derived page proposal.',$11,$11,$11)", + ) + .bind(proposal_id) + .bind(run_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(&source_refs) + .bind(serde_json::json!({ "source_count": 1 })) + .bind(&lineage) + .bind(serde_json::json!({ + "summary": "Create a derived knowledge page from cited source memory.", + "before": {}, + "after": { "page_key": "knowledge-foundation" } + })) + .bind(serde_json::json!({ "page_key": "knowledge-foundation" })) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("consolidation proposal should be inserted"); + + proposal_id +}