diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength.rs index 0740d8ff..0b38fd11 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength.rs @@ -1,864 +1,2 @@ -use std::fs; - -use color_eyre::{Result, eyre}; -use serde_json::Value; - -use crate::support; - -#[test] -fn qmd_openviking_strength_profile_report_preserves_claim_boundaries() -> Result<()> { - let report = serde_json::from_str::(&fs::read_to_string( - support::strength_profile_report_path()?, - )?)?; - let markdown = fs::read_to_string(support::strength_profile_markdown_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let iteration_direction = fs::read_to_string(support::iteration_direction_report_path()?)?; - - assert_strength_profile_summary(&report); - assert_strength_profile_terms(&report)?; - assert_qmd_strength_profile(&report)?; - assert_qmd_wrong_result_diagnosis(&report)?; - assert_openviking_strength_profile(&report)?; - assert_strength_profile_json_claim_boundaries(&report)?; - assert_strength_profile_markdown_boundaries(&markdown); - assert_operator_facing_strength_profile_boundaries( - &readme, - &benchmarking_index, - &iteration_direction, - ); - - Ok(()) -} - -#[test] -fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> { - let measurement_audit = fs::read_to_string(support::measurement_coverage_audit_path()?)?; - let measurement_audit_json = serde_json::from_str::(&fs::read_to_string( - support::measurement_coverage_audit_json_path()?, - )?)?; - let competitor_matrix = fs::read_to_string(support::competitor_strength_matrix_path()?)?; - let competitor_matrix_json = serde_json::from_str::(&fs::read_to_string( - support::competitor_strength_matrix_json_path()?, - )?)?; - let iteration_direction = fs::read_to_string(support::iteration_direction_report_path()?)?; - let external_manifest = fs::read_to_string(support::external_adapter_manifest_path())?; - let comparison_external_projects = - fs::read_to_string(support::comparison_external_projects_path()?)?; - let retrieval_debug_profile = serde_json::from_str::(&fs::read_to_string( - support::retrieval_debug_profile_json_path()?, - )?)?; - let temporal_history = serde_json::from_str::(&fs::read_to_string( - support::temporal_history_competitor_gap_json_path()?, - )?)?; - - assert_current_report_text_boundaries( - &measurement_audit, - &competitor_matrix, - &iteration_direction, - &external_manifest, - &comparison_external_projects, - ); - - assert!(competitor_matrix.contains("claude-mem work_resume remains `not_encoded`")); - assert!(!competitor_matrix.contains("claude-mem `wrong_result`, OpenViking work_resume")); - - let qmd_live = support::find_by_field( - support::array_at(&measurement_audit_json, "/live_real_world_adapters")?, - "/adapter", - "qmd live CLI adapter", - )?; - - assert_eq!(qmd_live.pointer("/pass").and_then(Value::as_u64), Some(17)); - assert_eq!(qmd_live.pointer("/wrong_result").and_then(Value::as_u64), Some(6)); - assert_eq!(qmd_live.pointer("/expected_evidence_matched").and_then(Value::as_u64), Some(38)); - assert_eq!(qmd_live.pointer("/evidence_covered_count").and_then(Value::as_u64), Some(45)); - - let memory_evolution = support::find_by_field( - support::array_at(&measurement_audit_json, "/live_suite_breakdown")?, - "/suite", - "memory_evolution", - )?; - - assert_eq!( - memory_evolution.pointer("/elf_status_counts/wrong_result").and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - memory_evolution.pointer("/qmd_status_counts/wrong_result").and_then(Value::as_u64), - Some(6) - ); - assert_eq!( - retrieval_debug_profile - .pointer("/live_real_world_full_sweep_context/qmd/pass") - .and_then(Value::as_u64), - Some(17) - ); - assert_eq!( - retrieval_debug_profile - .pointer("/live_real_world_full_sweep_context/qmd/wrong_result") - .and_then(Value::as_u64), - Some(6) - ); - - assert_competitor_strength_matrix_json(&competitor_matrix_json)?; - - let openmemory_command = support::find_by_field( - support::array_at(&temporal_history, "/commands")?, - "/command", - "cargo make openmemory-ui-export-readback", - )?; - - assert!( - openmemory_command - .pointer("/artifact") - .and_then(Value::as_str) - .is_some_and(|artifact| artifact.contains("tmp/live-baseline/mem0-checks.json") - && artifact.contains("tmp/live-baseline/mem0-openmemory-ui-export.json")) - ); - - Ok(()) -} - -fn assert_current_report_text_boundaries( - measurement_audit: &str, - competitor_matrix: &str, - iteration_direction: &str, - external_manifest: &str, - comparison_external_projects: &str, -) { - assert!( - measurement_audit.contains( - "| `memory_evolution` | `6` | `pass:1`, `wrong_result:5` | `wrong_result:6` |" - ) - ); - assert!( - measurement_audit - .contains("qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence") - ); - assert!(measurement_audit.contains("Basic local smoke and local OSS history/readback pass")); - assert!(measurement_audit.contains("claude-mem hook/viewer capture is `blocked`")); - assert!(!measurement_audit.contains("claude-mem hook/viewer capture remains untested")); - assert!(!measurement_audit.contains("blocked or untested")); - - assert_measurement_audit_adapter_status_counts(measurement_audit); - - assert!( - competitor_matrix - .contains("broader live suites remain `wrong_result`, `blocked`, or `not_encoded`") - ); - assert!(competitor_matrix.contains( - "Overall adapter-status counts: 4 `pass`,\n6 `wrong_result`, 1 `lifecycle_fail`, 7 `blocked`, and 5 `not_encoded`." - )); - assert!(!competitor_matrix.contains("5 `blocked`, and 7 `not_encoded`")); - assert!( - competitor_matrix - .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") - ); - assert!(competitor_matrix.contains("scoped preference behavior is a measured tie")); - assert!( - !competitor_matrix.contains("mem0/OpenMemory and Letta personalization are `not_encoded`") - ); - assert!(external_manifest.contains( - "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible." - )); - assert!(external_manifest.contains( - "The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run." - )); - assert!( - comparison_external_projects - .contains("Benchmark-grounded for scoped local OSS same-corpus retrieval") - ); - assert!( - comparison_external_projects - .contains("Benchmark-grounded for local same-corpus retrieval, reindex/update/delete") - ); - assert!(iteration_direction.contains("| Jobs | `55` |")); - assert!(iteration_direction.contains("| Encoded suites | `15` |")); - assert!(iteration_direction.contains("| Pass | `49` |")); - assert!(iteration_direction.contains("| Evidence coverage | `123/123` |")); - assert!(iteration_direction.contains("| Expected evidence recall | `115/115` |")); - - for stale_phrase in [ - "same live sweep shape as ELF", - "ELF and qmd live fail 5/6 jobs", - "both systems currently fail 5/6 live memory-evolution jobs", - "wrong_result, incomplete, blocked, and not_encoded states remain visible", - "broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`", - "The qmd live real-world slice covers representative jobs only", - "| Jobs | `40` |", - "| Encoded suites | `11` |", - "| Jobs | `50` |", - "| Encoded suites | `14` |", - "| Pass | `38` |", - "| Pass | `45` |", - "| Evidence coverage | `115/115` |", - "| Expected evidence recall | `107/107` |", - "history/UI/hosted/graph behavior remains", - "current local adapter is incomplete/wrong-result", - "current adapter is incomplete/invalid-result", - ] { - assert!(!measurement_audit.contains(stale_phrase)); - assert!(!competitor_matrix.contains(stale_phrase)); - assert!(!iteration_direction.contains(stale_phrase)); - assert!(!external_manifest.contains(stale_phrase)); - assert!(!comparison_external_projects.contains(stale_phrase)); - } -} - -fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> { - let projects = support::array_at(matrix, "/project_matrix")?; - let scenarios = support::array_at(matrix, "/scenario_matrix")?; - - assert_competitor_strength_matrix_manifest_counts(matrix); - assert_competitor_strength_matrix_project_json(projects)?; - assert_competitor_strength_matrix_scenario_json(scenarios)?; - - Ok(()) -} - -fn assert_competitor_strength_matrix_project_json(projects: &[Value]) -> Result<()> { - let qmd = support::find_by_field(projects, "/project", "qmd")?; - let mem0 = support::find_by_field(projects, "/project", "mem0/OpenMemory")?; - let claude_mem = support::find_by_field(projects, "/project", "claude-mem")?; - let openviking = support::find_by_field(projects, "/project", "OpenViking")?; - - assert_eq!( - qmd.pointer("/current_evidence_class").and_then(Value::as_str), - Some("live_real_world") - ); - assert_eq!(qmd.pointer("/measured_status").and_then(Value::as_str), Some("wrong_result")); - assert_eq!( - qmd.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("not_encoded") - ); - assert!(qmd.pointer("/benchmark_before_claim").and_then(Value::as_str).is_some_and(|claim| { - claim.contains("Keep qmd deep retrieval/debug profiling separate") - && claim.contains("narrow operator-debug live slice") - })); - assert!( - qmd.pointer("/borrow_if_stronger") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("transparent local knobs")) - ); - assert_eq!(mem0.pointer("/measured_status").and_then(Value::as_str), Some("pass")); - assert_eq!( - mem0.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("blocked") - ); - assert_eq!( - mem0.pointer("/unsupported_or_blocked_status/typed_reason").and_then(Value::as_str), - Some("openmemory_export_helper_setup_blocked") - ); - assert!( - mem0.pointer("/benchmark_before_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("OpenMemory product app import/export")) - ); - assert!( - claude_mem - .pointer("/unsupported_or_blocked_status/details") - .and_then(Value::as_str) - .is_some_and(|details| details.contains("rerun/inspection targets") - && details.contains("tmp/live-baseline/claude-mem-checks.json")) - ); - assert_eq!( - openviking.pointer("/current_evidence_class").and_then(Value::as_str), - Some("live_baseline_only") - ); - assert_eq!( - openviking.pointer("/measured_status").and_then(Value::as_str), - Some("wrong_result") - ); - assert_eq!( - openviking.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), - Some("blocked") - ); - assert!( - openviking - .pointer("/unsupported_or_blocked_status/details") - .and_then(Value::as_str) - .is_some_and(|details| details.contains("encoded as blocked fixtures")) - ); - assert!( - openviking - .pointer("/benchmark_before_claim") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass")) - ); - - Ok(()) -} - -fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Result<()> { - let retrieval_debug = support::find_by_field(scenarios, "/scenario_id", "retrieval_debug")?; - let work_resume = support::find_by_field(scenarios, "/scenario_id", "work_resume")?; - let operator_debug = support::find_by_field(scenarios, "/scenario_id", "operator_debugging")?; - let context_trajectory = - support::find_by_field(scenarios, "/scenario_id", "context_trajectory")?; - let consolidation = support::find_by_field(scenarios, "/scenario_id", "consolidation")?; - - assert!( - retrieval_debug - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("Measured tie on encoded retrieval answers")) - ); - assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and( - |state| state.contains("qmd remains stronger on local debug ergonomics not fully scored") - )); - assert!( - work_resume - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("claude-mem work_resume remains not_encoded") - && !claim.contains("claude-mem is wrong_result")) - ); - assert!( - operator_debug - .pointer("/current_elf_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("narrow live_real_world operator-debug slice")) - ); - assert!( - operator_debug - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("qmd now has a narrow live_real_world")) - ); - assert!( - operator_debug - .pointer("/next_measurement") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export")) - ); - assert!( - consolidation - .pointer("/current_elf_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("XY-934 adds live_real_world") - && claim.contains("zero source mutations")) - ); - assert!( - consolidation - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim.contains("qmd remains not_encoded") - && claim.contains("product references only")) - ); - - let personalization = support::find_by_field(scenarios, "/scenario_id", "personalization")?; - - assert_personalization_matrix_record(personalization); - - assert!( - context_trajectory - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("not a measured live winner")) - ); - assert!( - context_trajectory - .pointer("/next_measurement") - .and_then(Value::as_str) - .is_some_and(|measurement| measurement.contains("evidence-bearing retrieval pass")) - ); - - Ok(()) -} - -fn assert_personalization_matrix_record(personalization: &Value) { - assert!( - personalization - .pointer("/current_competitor_evidence") - .and_then(Value::as_str) - .is_some_and(|claim| claim - .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") - && claim.contains("Letta personalization is research_gate not_encoded")) - ); - assert!( - personalization - .pointer("/current_state") - .and_then(Value::as_str) - .is_some_and(|state| state.contains("scoped personalization is a tie")) - ); -} - -fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) { - assert_eq!( - matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64), - Some(23) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/evidence_class_counts/live_real_world") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - matrix.pointer("/manifest_summary/overall_status_counts/pass").and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - matrix.pointer("/manifest_summary/overall_status_counts/blocked").and_then(Value::as_u64), - Some(7) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/overall_status_counts/not_encoded") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - matrix - .pointer("/manifest_summary/overall_status_counts/wrong_result") - .and_then(Value::as_u64), - Some(6) - ); -} - -fn assert_strength_profile_summary(report: &Value) { - assert_eq!( - report.pointer("/schema").and_then(Value::as_str), - Some("elf.competitor_strength_profile_report/v1") - ); - assert_eq!( - report.pointer("/summary/qmd/retrieval_quality").and_then(Value::as_str), - Some("tie") - ); - assert_eq!( - report.pointer("/summary/qmd/local_query_transparency").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!( - report.pointer("/summary/qmd/local_replayability").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!( - report.pointer("/summary/qmd/overall_outcome").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!( - report.pointer("/summary/openviking/overall_outcome").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!( - report - .pointer("/qmd_strength_profile/win_tie_loss_summary/elf_win") - .and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report.pointer("/qmd_strength_profile/win_tie_loss_summary/tie").and_then(Value::as_u64), - Some(3) - ); - assert_eq!( - report - .pointer("/qmd_strength_profile/win_tie_loss_summary/elf_loss") - .and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report - .pointer("/qmd_strength_profile/win_tie_loss_summary/not_tested") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - report - .pointer("/openviking_context_trajectory_profile/win_tie_loss_summary/not_tested") - .and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - report - .pointer("/openviking_context_trajectory_profile/win_tie_loss_summary/elf_win") - .and_then(Value::as_u64), - Some(1) - ); -} - -fn assert_strength_profile_terms(report: &Value) -> Result<()> { - let result_terms = support::array_at(report, "/result_type_terms")?; - let coverage_terms = support::array_at(report, "/coverage_status_terms")?; - let outcome_terms = support::array_at(report, "/outcome_terms")?; - let actual_result_terms = support::string_array_at(report, "/result_type_terms")?; - let actual_coverage_terms = support::string_array_at(report, "/coverage_status_terms")?; - - assert_eq!( - actual_result_terms, - [ - "pass", - "wrong_result", - "blocked", - "incomplete", - "lifecycle_fail", - "not_encoded", - "unsupported_claim", - ] - .map(str::to_owned) - ); - assert_eq!( - actual_coverage_terms, - [ - "pass", - "wrong_result", - "blocked", - "incomplete", - "lifecycle_fail", - "not_encoded", - "unsupported", - "unsupported_claim", - ] - .map(str::to_owned) - ); - assert!(!result_terms.iter().any(|term| term.as_str() == Some("unsupported"))); - assert!(!result_terms.iter().any(|term| term.as_str() == Some("partial"))); - assert!(!coverage_terms.iter().any(|term| term.as_str() == Some("partial"))); - assert!(result_terms.iter().any(|term| term.as_str() == Some("unsupported_claim"))); - assert!(coverage_terms.iter().any(|term| term.as_str() == Some("unsupported"))); - - assert_value_in_terms(report, "/summary/qmd/overall_outcome", outcome_terms)?; - assert_value_in_terms(report, "/summary/openviking/overall_outcome", outcome_terms)?; - - for scenario in support::array_at(report, "/qmd_strength_profile/scenario_outcomes")? { - assert_value_in_terms(scenario, "/result_type", result_terms)?; - assert_value_in_terms(scenario, "/elf_status", coverage_terms)?; - assert_value_in_terms(scenario, "/qmd_status", coverage_terms)?; - } - for scenario in - support::array_at(report, "/openviking_context_trajectory_profile/scenario_outcomes")? - { - assert_value_in_terms(scenario, "/result_type", result_terms)?; - assert_value_in_terms(scenario, "/openviking_status", coverage_terms)?; - assert_value_in_terms(scenario, "/elf_equivalent_status", coverage_terms)?; - } - - Ok(()) -} - -fn assert_value_in_terms(value: &Value, pointer: &str, terms: &[Value]) -> Result<()> { - let actual = value - .pointer(pointer) - .and_then(Value::as_str) - .ok_or_else(|| eyre::eyre!("missing string at {pointer}"))?; - - assert!( - terms.iter().any(|term| term.as_str() == Some(actual)), - "{actual} at {pointer} is not declared in the report term list" - ); - - Ok(()) -} - -fn assert_qmd_strength_profile(report: &Value) -> Result<()> { - let qmd_scenarios = support::array_at(report, "/qmd_strength_profile/scenario_outcomes")?; - let local_transparency = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-local-query-transparency")?; - let retrieval = support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-retrieval-quality")?; - let rerank_controls = support::find_by_field( - qmd_scenarios, - "/scenario_id", - "qmd-expansion-fusion-rerank-controls", - )?; - let stale_isolation = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-stale-context-isolation")?; - let lifecycle = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-update-delete-cold-start")?; - let operator_debug = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-operator-debug-evidence")?; - let replayability = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-local-replayability")?; - let wrong_result = - support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-wrong-result-diagnosis")?; - - assert_eq!(qmd_scenarios.len(), 8); - assert_eq!(retrieval.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); - assert_eq!( - local_transparency.pointer("/elf_outcome").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!( - local_transparency.pointer("/result_type").and_then(Value::as_str), - Some("not_encoded") - ); - assert_eq!( - rerank_controls.pointer("/result_type").and_then(Value::as_str), - Some("not_encoded") - ); - assert_eq!(stale_isolation.pointer("/result_type").and_then(Value::as_str), Some("pass")); - assert_eq!(stale_isolation.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); - assert_eq!(lifecycle.pointer("/result_type").and_then(Value::as_str), Some("pass")); - assert_eq!(lifecycle.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); - assert_eq!(operator_debug.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); - assert_eq!(operator_debug.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!(replayability.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); - assert_eq!(replayability.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!( - wrong_result.pointer("/evidence_class").and_then(Value::as_str), - Some("research_gate") - ); - assert_eq!(wrong_result.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); - - Ok(()) -} - -fn assert_qmd_wrong_result_diagnosis(report: &Value) -> Result<()> { - let taxonomy = - support::array_at(report, "/qmd_strength_profile/wrong_result_diagnosis/taxonomy")?; - let absent = support::find_by_field(taxonomy, "/class", "evidence_absent")?; - let dropped = support::find_by_field(taxonomy, "/class", "retrieved_but_dropped")?; - let narrated = support::find_by_field(taxonomy, "/class", "selected_but_not_narrated")?; - let lifecycle = - support::find_by_field(taxonomy, "/class", "contradicted_by_lifecycle_evidence")?; - - assert_eq!(absent.pointer("/coverage").and_then(Value::as_str), Some("observed")); - assert_eq!( - dropped.pointer("/coverage").and_then(Value::as_str), - Some("not_observed_candidate_trace_missing") - ); - assert_eq!(narrated.pointer("/coverage").and_then(Value::as_str), Some("observed")); - assert_eq!(lifecycle.pointer("/coverage").and_then(Value::as_str), Some("observed")); - - let qmd_diagnosis_jobs = - support::array_at(report, "/qmd_strength_profile/wrong_result_diagnosis/jobs")?; - let delete_job = - support::find_by_field(qmd_diagnosis_jobs, "/job_id", "memory-evolution-delete-ttl-001")?; - - assert_eq!(qmd_diagnosis_jobs.len(), 6); - assert_eq!(delete_job.pointer("/qmd_status").and_then(Value::as_str), Some("wrong_result")); - assert!(support::array_contains_str(delete_job, "/missing_evidence", "delete-tombstone")?); - assert!( - delete_job - .pointer("/diagnosis") - .and_then(Value::as_str) - .is_some_and(|diagnosis| diagnosis.contains("typed wrong_result")) - ); - - Ok(()) -} - -fn assert_openviking_strength_profile(report: &Value) -> Result<()> { - let openviking_scenarios = - support::array_at(report, "/openviking_context_trajectory_profile/scenario_outcomes")?; - let trajectory = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-staged-retrieval-trajectory", - )?; - let precondition = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-evidence-bearing-retrieval-precondition", - )?; - let local_embed_setup = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-local-embed-setup", - )?; - let missed_terms = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-missed-expected-terms-evidence", - )?; - let hierarchy = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-hierarchy-selection", - )?; - let recursive_expansion = support::find_by_field( - openviking_scenarios, - "/scenario_id", - "openviking-recursive-context-expansion", - )?; - - assert_eq!(openviking_scenarios.len(), 6); - assert_eq!( - trajectory.pointer("/evidence_class").and_then(Value::as_str), - Some("fixture_backed") - ); - assert_eq!(trajectory.pointer("/result_type").and_then(Value::as_str), Some("blocked")); - assert_eq!(trajectory.pointer("/openviking_status").and_then(Value::as_str), Some("blocked")); - assert_eq!(local_embed_setup.pointer("/result_type").and_then(Value::as_str), Some("pass")); - assert_eq!( - local_embed_setup.pointer("/elf_outcome").and_then(Value::as_str), - Some("not_tested") - ); - assert_eq!(local_embed_setup.pointer("/typed_blocker"), Some(&Value::Null)); - assert_eq!(precondition.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); - assert_eq!(precondition.pointer("/elf_outcome").and_then(Value::as_str), Some("elf_win")); - assert_eq!( - precondition.pointer("/typed_blocker").and_then(Value::as_str), - Some("output_missed_expected_terms") - ); - assert_eq!(missed_terms.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); - assert_eq!(missed_terms.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!(hierarchy.pointer("/result_type").and_then(Value::as_str), Some("blocked")); - assert_eq!(hierarchy.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); - assert_eq!( - recursive_expansion.pointer("/result_type").and_then(Value::as_str), - Some("blocked") - ); - assert_eq!( - recursive_expansion.pointer("/elf_outcome").and_then(Value::as_str), - Some("not_tested") - ); - - Ok(()) -} - -fn assert_strength_profile_json_claim_boundaries(report: &Value) -> Result<()> { - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "ELF does not broadly beat qmd; it ties encoded retrieval and lifecycle correctness, keeps qmd query transparency as not_tested for comparative scoring, and leaves replayability not_tested." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "qmd expansion, fusion, and rerank superiority remains not_tested because the current qmd paths use --no-rerank and do not score internals." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "ELF does not beat OpenViking on context trajectory; OpenViking trajectory strengths remain blocked/not_tested behind a wrong_result same-corpus output precondition and missing staged artifacts." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "Research_gate and blocked fixture records are follow-up gates, not pass evidence." - )?); - assert!(support::array_contains_str( - report, - "/claim_boundaries", - "Missing equivalent surfaces are encoded as unsupported, blocked, or not_encoded rather than fake losses." - )?); - - Ok(()) -} - -fn assert_strength_profile_markdown_boundaries(markdown: &str) { - assert!( - markdown.contains( - "| Wrong-result diagnosis | `research_gate` | `not_encoded` | `not_tested` |" - ) - ); - assert!( - markdown.contains("ELF ties qmd on the current encoded retrieval-correctness surfaces") - ); - assert!(markdown.contains("qmd remains the local retrieval-debug UX reference")); - assert!(markdown.contains("not scored as comparative ELF wins or losses")); - assert!(markdown.contains("ELF currently wins only the equivalent OpenViking same-corpus")); - assert!(markdown.contains("Do not claim ELF broadly beats qmd")); - assert!(markdown.contains( - "Do not claim ELF beats OpenViking on staged retrieval, hierarchy, or recursive" - )); - assert!(markdown.contains( - "Do not turn `research_gate`, `blocked`, `not_encoded`, or `unsupported` surfaces" - )); - assert!(markdown.contains("no pass evidence is claimed")); - assert!(markdown.contains("typed `wrong_result` state")); -} - -fn assert_operator_facing_strength_profile_boundaries( - readme: &str, - benchmarking_index: &str, - iteration_direction: &str, -) { - assert!(readme.contains("Full-suite live real-world adapter sweep after XY-926")); - assert!(readme.contains("all 55 checked-in jobs across 13 suites")); - assert!(readme.contains("ELF now live-scores capture/write-policy")); - assert!(readme.contains("consolidation proposal review")); - assert!(readme.contains("knowledge-page rebuild/lint")); - assert!(readme.contains("operator-debugging fixtures")); - assert!(!readme.contains("memory-evolution wrong results")); - assert!(readme.contains("Live temporal reconciliation after XY-905")); - assert!(readme.contains("now reports ELF live `memory_evolution` as 6/6 pass")); - assert!(readme.contains("broad qmd, Graphiti/Zep, mem0/OpenMemory, Letta")); - assert!(readme.contains("production-ops operator boundaries")); - assert!(readme.contains("core/archival live adapter gap")); - assert!( - support::collapse_whitespace(readme).contains("blocked context-trajectory measurement") - ); - assert!( - readme - .contains("consolidation, knowledge, capture, and core/archival typed non-pass states") - ); - assert!(readme.contains("operator-debug trace hydration")); - assert!(readme.contains("qmd remains the local retrieval-debug UX reference")); - assert!(readme.contains("broad ELF-over-qmd")); - assert!(readme.contains("qmd and OpenViking Strength-Profile Report - June 11, 2026")); - assert!(benchmarking_index.contains("2026-06-11-qmd-openviking-strength-profile-report.md")); - assert!( - benchmarking_index.contains("separates qmd retrieval quality from debug/replay ergonomics") - ); - assert!(benchmarking_index.contains("preserves XY-928 OpenViking")); - assert!( - benchmarking_index - .contains("context-trajectory surfaces as blocked/not-tested until scored staged") - ); - assert!( - iteration_direction - .contains("ELF and qmd are tied on the encoded live retrieval, work-resume, and") - ); - assert!(iteration_direction.contains("ELF does not yet beat qmd's local retrieval-debug")); - - assert_iteration_direction_current_measurement_counts(iteration_direction); - - assert!(iteration_direction.contains( - "ELF beats OpenViking on context trajectory. The scenario is encoded as blocked" - )); - assert!( - iteration_direction - .contains("Do not promote a reference project into a win/loss claim until") - ); -} - -fn assert_measurement_audit_adapter_status_counts(markdown: &str) { - for expected in [ - "| `blocked` | `7` |", - "| `not_encoded` | `5` |", - "The generated JSON report emits `external_project_count: 16`", - ] { - assert!(markdown.contains(expected), "missing measurement audit text: {expected}"); - } - for stale in ["| `blocked` | `6` |", "| `not_encoded` | `6` |"] { - assert!(!markdown.contains(stale), "stale measurement audit text: {stale}"); - } -} - -fn assert_iteration_direction_current_measurement_counts(markdown: &str) { - for expected in [ - "| Jobs | `55` |", - "| Encoded suites | `15` |", - "| Blocked | `6` |", - "| Mean score | `0.891` |", - "| Evidence coverage | `123/123` |", - "| Source-ref coverage | `123/123` |", - "| Quote coverage | `123/123` |", - "| Expected evidence recall | `115/115` |", - "| `blocked` | `7` |", - "| `not_encoded` | `5` |", - "`live_baseline_only`, `fixture_backed`, and `research_gate`", - "`blocked` for fixture-backed trajectory gates", - ] { - assert!(markdown.contains(expected), "missing iteration-direction text: {expected}"); - } - for stale in [ - "| Jobs | `40` |", - "| Encoded suites | `11` |", - "| Jobs | `50` |", - "| Encoded suites | `14` |", - "| Mean score | `0.950` |", - "| Mean score | `0.900` |", - "| Evidence coverage | `88/88` |", - "| Evidence coverage | `115/115` |", - "| Expected evidence recall | `80/80` |", - "| Expected evidence recall | `107/107` |", - "| `blocked` | `5` |", - "| `not_encoded` | `7` |", - "`live_baseline_only` plus `research_gate`", - ] { - assert!(!markdown.contains(stale), "stale iteration-direction text: {stale}"); - } -} +mod competitor_strength_live; +mod competitor_strength_profile; diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs new file mode 100644 index 00000000..3e35242a --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_live.rs @@ -0,0 +1,405 @@ +use std::fs; + +use color_eyre::Result; +use serde_json::Value; + +use crate::support; + +#[test] +fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> { + let measurement_audit = fs::read_to_string(support::measurement_coverage_audit_path()?)?; + let measurement_audit_json = serde_json::from_str::(&fs::read_to_string( + support::measurement_coverage_audit_json_path()?, + )?)?; + let competitor_matrix = fs::read_to_string(support::competitor_strength_matrix_path()?)?; + let competitor_matrix_json = serde_json::from_str::(&fs::read_to_string( + support::competitor_strength_matrix_json_path()?, + )?)?; + let iteration_direction = fs::read_to_string(support::iteration_direction_report_path()?)?; + let external_manifest = fs::read_to_string(support::external_adapter_manifest_path())?; + let comparison_external_projects = + fs::read_to_string(support::comparison_external_projects_path()?)?; + let retrieval_debug_profile = serde_json::from_str::(&fs::read_to_string( + support::retrieval_debug_profile_json_path()?, + )?)?; + let temporal_history = serde_json::from_str::(&fs::read_to_string( + support::temporal_history_competitor_gap_json_path()?, + )?)?; + + assert_current_report_text_boundaries( + &measurement_audit, + &competitor_matrix, + &iteration_direction, + &external_manifest, + &comparison_external_projects, + ); + + assert!(competitor_matrix.contains("claude-mem work_resume remains `not_encoded`")); + assert!(!competitor_matrix.contains("claude-mem `wrong_result`, OpenViking work_resume")); + + let qmd_live = support::find_by_field( + support::array_at(&measurement_audit_json, "/live_real_world_adapters")?, + "/adapter", + "qmd live CLI adapter", + )?; + + assert_eq!(qmd_live.pointer("/pass").and_then(Value::as_u64), Some(17)); + assert_eq!(qmd_live.pointer("/wrong_result").and_then(Value::as_u64), Some(6)); + assert_eq!(qmd_live.pointer("/expected_evidence_matched").and_then(Value::as_u64), Some(38)); + assert_eq!(qmd_live.pointer("/evidence_covered_count").and_then(Value::as_u64), Some(45)); + + let memory_evolution = support::find_by_field( + support::array_at(&measurement_audit_json, "/live_suite_breakdown")?, + "/suite", + "memory_evolution", + )?; + + assert_eq!( + memory_evolution.pointer("/elf_status_counts/wrong_result").and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + memory_evolution.pointer("/qmd_status_counts/wrong_result").and_then(Value::as_u64), + Some(6) + ); + assert_eq!( + retrieval_debug_profile + .pointer("/live_real_world_full_sweep_context/qmd/pass") + .and_then(Value::as_u64), + Some(17) + ); + assert_eq!( + retrieval_debug_profile + .pointer("/live_real_world_full_sweep_context/qmd/wrong_result") + .and_then(Value::as_u64), + Some(6) + ); + + assert_competitor_strength_matrix_json(&competitor_matrix_json)?; + + let openmemory_command = support::find_by_field( + support::array_at(&temporal_history, "/commands")?, + "/command", + "cargo make openmemory-ui-export-readback", + )?; + + assert!( + openmemory_command + .pointer("/artifact") + .and_then(Value::as_str) + .is_some_and(|artifact| artifact.contains("tmp/live-baseline/mem0-checks.json") + && artifact.contains("tmp/live-baseline/mem0-openmemory-ui-export.json")) + ); + + Ok(()) +} + +fn assert_current_report_text_boundaries( + measurement_audit: &str, + competitor_matrix: &str, + iteration_direction: &str, + external_manifest: &str, + comparison_external_projects: &str, +) { + assert!( + measurement_audit.contains( + "| `memory_evolution` | `6` | `pass:1`, `wrong_result:5` | `wrong_result:6` |" + ) + ); + assert!( + measurement_audit + .contains("qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence") + ); + assert!(measurement_audit.contains("Basic local smoke and local OSS history/readback pass")); + assert!(measurement_audit.contains("claude-mem hook/viewer capture is `blocked`")); + assert!(!measurement_audit.contains("claude-mem hook/viewer capture remains untested")); + assert!(!measurement_audit.contains("blocked or untested")); + + assert_measurement_audit_adapter_status_counts(measurement_audit); + + assert!( + competitor_matrix + .contains("broader live suites remain `wrong_result`, `blocked`, or `not_encoded`") + ); + assert!(competitor_matrix.contains( + "Overall adapter-status counts: 4 `pass`,\n6 `wrong_result`, 1 `lifecycle_fail`, 7 `blocked`, and 5 `not_encoded`." + )); + assert!(!competitor_matrix.contains("5 `blocked`, and 7 `not_encoded`")); + assert!( + competitor_matrix + .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") + ); + assert!(competitor_matrix.contains("scoped preference behavior is a measured tie")); + assert!( + !competitor_matrix.contains("mem0/OpenMemory and Letta personalization are `not_encoded`") + ); + assert!(external_manifest.contains( + "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible." + )); + assert!(external_manifest.contains( + "The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run." + )); + assert!( + comparison_external_projects + .contains("Benchmark-grounded for scoped local OSS same-corpus retrieval") + ); + assert!( + comparison_external_projects + .contains("Benchmark-grounded for local same-corpus retrieval, reindex/update/delete") + ); + assert!(iteration_direction.contains("| Jobs | `55` |")); + assert!(iteration_direction.contains("| Encoded suites | `15` |")); + assert!(iteration_direction.contains("| Pass | `49` |")); + assert!(iteration_direction.contains("| Evidence coverage | `123/123` |")); + assert!(iteration_direction.contains("| Expected evidence recall | `115/115` |")); + + for stale_phrase in [ + "same live sweep shape as ELF", + "ELF and qmd live fail 5/6 jobs", + "both systems currently fail 5/6 live memory-evolution jobs", + "wrong_result, incomplete, blocked, and not_encoded states remain visible", + "broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`", + "The qmd live real-world slice covers representative jobs only", + "| Jobs | `40` |", + "| Encoded suites | `11` |", + "| Jobs | `50` |", + "| Encoded suites | `14` |", + "| Pass | `38` |", + "| Pass | `45` |", + "| Evidence coverage | `115/115` |", + "| Expected evidence recall | `107/107` |", + "history/UI/hosted/graph behavior remains", + "current local adapter is incomplete/wrong-result", + "current adapter is incomplete/invalid-result", + ] { + assert!(!measurement_audit.contains(stale_phrase)); + assert!(!competitor_matrix.contains(stale_phrase)); + assert!(!iteration_direction.contains(stale_phrase)); + assert!(!external_manifest.contains(stale_phrase)); + assert!(!comparison_external_projects.contains(stale_phrase)); + } +} + +fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> { + let projects = support::array_at(matrix, "/project_matrix")?; + let scenarios = support::array_at(matrix, "/scenario_matrix")?; + + assert_competitor_strength_matrix_manifest_counts(matrix); + assert_competitor_strength_matrix_project_json(projects)?; + assert_competitor_strength_matrix_scenario_json(scenarios)?; + + Ok(()) +} + +fn assert_competitor_strength_matrix_project_json(projects: &[Value]) -> Result<()> { + let qmd = support::find_by_field(projects, "/project", "qmd")?; + let mem0 = support::find_by_field(projects, "/project", "mem0/OpenMemory")?; + let claude_mem = support::find_by_field(projects, "/project", "claude-mem")?; + let openviking = support::find_by_field(projects, "/project", "OpenViking")?; + + assert_eq!( + qmd.pointer("/current_evidence_class").and_then(Value::as_str), + Some("live_real_world") + ); + assert_eq!(qmd.pointer("/measured_status").and_then(Value::as_str), Some("wrong_result")); + assert_eq!( + qmd.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("not_encoded") + ); + assert!(qmd.pointer("/benchmark_before_claim").and_then(Value::as_str).is_some_and(|claim| { + claim.contains("Keep qmd deep retrieval/debug profiling separate") + && claim.contains("narrow operator-debug live slice") + })); + assert!( + qmd.pointer("/borrow_if_stronger") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("transparent local knobs")) + ); + assert_eq!(mem0.pointer("/measured_status").and_then(Value::as_str), Some("pass")); + assert_eq!( + mem0.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("blocked") + ); + assert_eq!( + mem0.pointer("/unsupported_or_blocked_status/typed_reason").and_then(Value::as_str), + Some("openmemory_export_helper_setup_blocked") + ); + assert!( + mem0.pointer("/benchmark_before_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("OpenMemory product app import/export")) + ); + assert!( + claude_mem + .pointer("/unsupported_or_blocked_status/details") + .and_then(Value::as_str) + .is_some_and(|details| details.contains("rerun/inspection targets") + && details.contains("tmp/live-baseline/claude-mem-checks.json")) + ); + assert_eq!( + openviking.pointer("/current_evidence_class").and_then(Value::as_str), + Some("live_baseline_only") + ); + assert_eq!( + openviking.pointer("/measured_status").and_then(Value::as_str), + Some("wrong_result") + ); + assert_eq!( + openviking.pointer("/unsupported_or_blocked_status/state").and_then(Value::as_str), + Some("blocked") + ); + assert!( + openviking + .pointer("/unsupported_or_blocked_status/details") + .and_then(Value::as_str) + .is_some_and(|details| details.contains("encoded as blocked fixtures")) + ); + assert!( + openviking + .pointer("/benchmark_before_claim") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass")) + ); + + Ok(()) +} + +fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Result<()> { + let retrieval_debug = support::find_by_field(scenarios, "/scenario_id", "retrieval_debug")?; + let work_resume = support::find_by_field(scenarios, "/scenario_id", "work_resume")?; + let operator_debug = support::find_by_field(scenarios, "/scenario_id", "operator_debugging")?; + let context_trajectory = + support::find_by_field(scenarios, "/scenario_id", "context_trajectory")?; + let consolidation = support::find_by_field(scenarios, "/scenario_id", "consolidation")?; + + assert!( + retrieval_debug + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("Measured tie on encoded retrieval answers")) + ); + assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and( + |state| state.contains("qmd remains stronger on local debug ergonomics not fully scored") + )); + assert!( + work_resume + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("claude-mem work_resume remains not_encoded") + && !claim.contains("claude-mem is wrong_result")) + ); + assert!( + operator_debug + .pointer("/current_elf_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("narrow live_real_world operator-debug slice")) + ); + assert!( + operator_debug + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("qmd now has a narrow live_real_world")) + ); + assert!( + operator_debug + .pointer("/next_measurement") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export")) + ); + assert!( + consolidation + .pointer("/current_elf_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("XY-934 adds live_real_world") + && claim.contains("zero source mutations")) + ); + assert!( + consolidation + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim.contains("qmd remains not_encoded") + && claim.contains("product references only")) + ); + + let personalization = support::find_by_field(scenarios, "/scenario_id", "personalization")?; + + assert_personalization_matrix_record(personalization); + + assert!( + context_trajectory + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("not a measured live winner")) + ); + assert!( + context_trajectory + .pointer("/next_measurement") + .and_then(Value::as_str) + .is_some_and(|measurement| measurement.contains("evidence-bearing retrieval pass")) + ); + + Ok(()) +} + +fn assert_personalization_matrix_record(personalization: &Value) { + assert!( + personalization + .pointer("/current_competitor_evidence") + .and_then(Value::as_str) + .is_some_and(|claim| claim + .contains("mem0/OpenMemory local OSS entity-scoped personalization now passes") + && claim.contains("Letta personalization is research_gate not_encoded")) + ); + assert!( + personalization + .pointer("/current_state") + .and_then(Value::as_str) + .is_some_and(|state| state.contains("scoped personalization is a tie")) + ); +} + +fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) { + assert_eq!( + matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64), + Some(23) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/evidence_class_counts/live_real_world") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + matrix.pointer("/manifest_summary/overall_status_counts/pass").and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + matrix.pointer("/manifest_summary/overall_status_counts/blocked").and_then(Value::as_u64), + Some(7) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/overall_status_counts/not_encoded") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + matrix + .pointer("/manifest_summary/overall_status_counts/wrong_result") + .and_then(Value::as_u64), + Some(6) + ); +} + +fn assert_measurement_audit_adapter_status_counts(markdown: &str) { + for expected in [ + "| `blocked` | `7` |", + "| `not_encoded` | `5` |", + "The generated JSON report emits `external_project_count: 16`", + ] { + assert!(markdown.contains(expected), "missing measurement audit text: {expected}"); + } + for stale in ["| `blocked` | `6` |", "| `not_encoded` | `6` |"] { + assert!(!markdown.contains(stale), "stale measurement audit text: {stale}"); + } +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_profile.rs b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_profile.rs new file mode 100644 index 00000000..2829183c --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/competitor_strength_profile.rs @@ -0,0 +1,465 @@ +use std::fs; + +use color_eyre::{Result, eyre}; +use serde_json::Value; + +use crate::support; + +#[test] +fn qmd_openviking_strength_profile_report_preserves_claim_boundaries() -> Result<()> { + let report = serde_json::from_str::(&fs::read_to_string( + support::strength_profile_report_path()?, + )?)?; + let markdown = fs::read_to_string(support::strength_profile_markdown_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let iteration_direction = fs::read_to_string(support::iteration_direction_report_path()?)?; + + assert_strength_profile_summary(&report); + assert_strength_profile_terms(&report)?; + assert_qmd_strength_profile(&report)?; + assert_qmd_wrong_result_diagnosis(&report)?; + assert_openviking_strength_profile(&report)?; + assert_strength_profile_json_claim_boundaries(&report)?; + assert_strength_profile_markdown_boundaries(&markdown); + assert_operator_facing_strength_profile_boundaries( + &readme, + &benchmarking_index, + &iteration_direction, + ); + + Ok(()) +} + +fn assert_strength_profile_summary(report: &Value) { + assert_eq!( + report.pointer("/schema").and_then(Value::as_str), + Some("elf.competitor_strength_profile_report/v1") + ); + assert_eq!( + report.pointer("/summary/qmd/retrieval_quality").and_then(Value::as_str), + Some("tie") + ); + assert_eq!( + report.pointer("/summary/qmd/local_query_transparency").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!( + report.pointer("/summary/qmd/local_replayability").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!( + report.pointer("/summary/qmd/overall_outcome").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!( + report.pointer("/summary/openviking/overall_outcome").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!( + report + .pointer("/qmd_strength_profile/win_tie_loss_summary/elf_win") + .and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report.pointer("/qmd_strength_profile/win_tie_loss_summary/tie").and_then(Value::as_u64), + Some(3) + ); + assert_eq!( + report + .pointer("/qmd_strength_profile/win_tie_loss_summary/elf_loss") + .and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report + .pointer("/qmd_strength_profile/win_tie_loss_summary/not_tested") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + report + .pointer("/openviking_context_trajectory_profile/win_tie_loss_summary/not_tested") + .and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + report + .pointer("/openviking_context_trajectory_profile/win_tie_loss_summary/elf_win") + .and_then(Value::as_u64), + Some(1) + ); +} + +fn assert_strength_profile_terms(report: &Value) -> Result<()> { + let result_terms = support::array_at(report, "/result_type_terms")?; + let coverage_terms = support::array_at(report, "/coverage_status_terms")?; + let outcome_terms = support::array_at(report, "/outcome_terms")?; + let actual_result_terms = support::string_array_at(report, "/result_type_terms")?; + let actual_coverage_terms = support::string_array_at(report, "/coverage_status_terms")?; + + assert_eq!( + actual_result_terms, + [ + "pass", + "wrong_result", + "blocked", + "incomplete", + "lifecycle_fail", + "not_encoded", + "unsupported_claim", + ] + .map(str::to_owned) + ); + assert_eq!( + actual_coverage_terms, + [ + "pass", + "wrong_result", + "blocked", + "incomplete", + "lifecycle_fail", + "not_encoded", + "unsupported", + "unsupported_claim", + ] + .map(str::to_owned) + ); + assert!(!result_terms.iter().any(|term| term.as_str() == Some("unsupported"))); + assert!(!result_terms.iter().any(|term| term.as_str() == Some("partial"))); + assert!(!coverage_terms.iter().any(|term| term.as_str() == Some("partial"))); + assert!(result_terms.iter().any(|term| term.as_str() == Some("unsupported_claim"))); + assert!(coverage_terms.iter().any(|term| term.as_str() == Some("unsupported"))); + + assert_value_in_terms(report, "/summary/qmd/overall_outcome", outcome_terms)?; + assert_value_in_terms(report, "/summary/openviking/overall_outcome", outcome_terms)?; + + for scenario in support::array_at(report, "/qmd_strength_profile/scenario_outcomes")? { + assert_value_in_terms(scenario, "/result_type", result_terms)?; + assert_value_in_terms(scenario, "/elf_status", coverage_terms)?; + assert_value_in_terms(scenario, "/qmd_status", coverage_terms)?; + } + for scenario in + support::array_at(report, "/openviking_context_trajectory_profile/scenario_outcomes")? + { + assert_value_in_terms(scenario, "/result_type", result_terms)?; + assert_value_in_terms(scenario, "/openviking_status", coverage_terms)?; + assert_value_in_terms(scenario, "/elf_equivalent_status", coverage_terms)?; + } + + Ok(()) +} + +fn assert_value_in_terms(value: &Value, pointer: &str, terms: &[Value]) -> Result<()> { + let actual = value + .pointer(pointer) + .and_then(Value::as_str) + .ok_or_else(|| eyre::eyre!("missing string at {pointer}"))?; + + assert!( + terms.iter().any(|term| term.as_str() == Some(actual)), + "{actual} at {pointer} is not declared in the report term list" + ); + + Ok(()) +} + +fn assert_qmd_strength_profile(report: &Value) -> Result<()> { + let qmd_scenarios = support::array_at(report, "/qmd_strength_profile/scenario_outcomes")?; + let local_transparency = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-local-query-transparency")?; + let retrieval = support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-retrieval-quality")?; + let rerank_controls = support::find_by_field( + qmd_scenarios, + "/scenario_id", + "qmd-expansion-fusion-rerank-controls", + )?; + let stale_isolation = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-stale-context-isolation")?; + let lifecycle = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-update-delete-cold-start")?; + let operator_debug = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-operator-debug-evidence")?; + let replayability = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-local-replayability")?; + let wrong_result = + support::find_by_field(qmd_scenarios, "/scenario_id", "qmd-wrong-result-diagnosis")?; + + assert_eq!(qmd_scenarios.len(), 8); + assert_eq!(retrieval.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); + assert_eq!( + local_transparency.pointer("/elf_outcome").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!( + local_transparency.pointer("/result_type").and_then(Value::as_str), + Some("not_encoded") + ); + assert_eq!( + rerank_controls.pointer("/result_type").and_then(Value::as_str), + Some("not_encoded") + ); + assert_eq!(stale_isolation.pointer("/result_type").and_then(Value::as_str), Some("pass")); + assert_eq!(stale_isolation.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); + assert_eq!(lifecycle.pointer("/result_type").and_then(Value::as_str), Some("pass")); + assert_eq!(lifecycle.pointer("/elf_outcome").and_then(Value::as_str), Some("tie")); + assert_eq!(operator_debug.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); + assert_eq!(operator_debug.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!(replayability.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); + assert_eq!(replayability.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!( + wrong_result.pointer("/evidence_class").and_then(Value::as_str), + Some("research_gate") + ); + assert_eq!(wrong_result.pointer("/result_type").and_then(Value::as_str), Some("not_encoded")); + + Ok(()) +} + +fn assert_qmd_wrong_result_diagnosis(report: &Value) -> Result<()> { + let taxonomy = + support::array_at(report, "/qmd_strength_profile/wrong_result_diagnosis/taxonomy")?; + let absent = support::find_by_field(taxonomy, "/class", "evidence_absent")?; + let dropped = support::find_by_field(taxonomy, "/class", "retrieved_but_dropped")?; + let narrated = support::find_by_field(taxonomy, "/class", "selected_but_not_narrated")?; + let lifecycle = + support::find_by_field(taxonomy, "/class", "contradicted_by_lifecycle_evidence")?; + + assert_eq!(absent.pointer("/coverage").and_then(Value::as_str), Some("observed")); + assert_eq!( + dropped.pointer("/coverage").and_then(Value::as_str), + Some("not_observed_candidate_trace_missing") + ); + assert_eq!(narrated.pointer("/coverage").and_then(Value::as_str), Some("observed")); + assert_eq!(lifecycle.pointer("/coverage").and_then(Value::as_str), Some("observed")); + + let qmd_diagnosis_jobs = + support::array_at(report, "/qmd_strength_profile/wrong_result_diagnosis/jobs")?; + let delete_job = + support::find_by_field(qmd_diagnosis_jobs, "/job_id", "memory-evolution-delete-ttl-001")?; + + assert_eq!(qmd_diagnosis_jobs.len(), 6); + assert_eq!(delete_job.pointer("/qmd_status").and_then(Value::as_str), Some("wrong_result")); + assert!(support::array_contains_str(delete_job, "/missing_evidence", "delete-tombstone")?); + assert!( + delete_job + .pointer("/diagnosis") + .and_then(Value::as_str) + .is_some_and(|diagnosis| diagnosis.contains("typed wrong_result")) + ); + + Ok(()) +} + +fn assert_openviking_strength_profile(report: &Value) -> Result<()> { + let openviking_scenarios = + support::array_at(report, "/openviking_context_trajectory_profile/scenario_outcomes")?; + let trajectory = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-staged-retrieval-trajectory", + )?; + let precondition = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-evidence-bearing-retrieval-precondition", + )?; + let local_embed_setup = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-local-embed-setup", + )?; + let missed_terms = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-missed-expected-terms-evidence", + )?; + let hierarchy = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-hierarchy-selection", + )?; + let recursive_expansion = support::find_by_field( + openviking_scenarios, + "/scenario_id", + "openviking-recursive-context-expansion", + )?; + + assert_eq!(openviking_scenarios.len(), 6); + assert_eq!( + trajectory.pointer("/evidence_class").and_then(Value::as_str), + Some("fixture_backed") + ); + assert_eq!(trajectory.pointer("/result_type").and_then(Value::as_str), Some("blocked")); + assert_eq!(trajectory.pointer("/openviking_status").and_then(Value::as_str), Some("blocked")); + assert_eq!(local_embed_setup.pointer("/result_type").and_then(Value::as_str), Some("pass")); + assert_eq!( + local_embed_setup.pointer("/elf_outcome").and_then(Value::as_str), + Some("not_tested") + ); + assert_eq!(local_embed_setup.pointer("/typed_blocker"), Some(&Value::Null)); + assert_eq!(precondition.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); + assert_eq!(precondition.pointer("/elf_outcome").and_then(Value::as_str), Some("elf_win")); + assert_eq!( + precondition.pointer("/typed_blocker").and_then(Value::as_str), + Some("output_missed_expected_terms") + ); + assert_eq!(missed_terms.pointer("/result_type").and_then(Value::as_str), Some("wrong_result")); + assert_eq!(missed_terms.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!(hierarchy.pointer("/result_type").and_then(Value::as_str), Some("blocked")); + assert_eq!(hierarchy.pointer("/elf_outcome").and_then(Value::as_str), Some("not_tested")); + assert_eq!( + recursive_expansion.pointer("/result_type").and_then(Value::as_str), + Some("blocked") + ); + assert_eq!( + recursive_expansion.pointer("/elf_outcome").and_then(Value::as_str), + Some("not_tested") + ); + + Ok(()) +} + +fn assert_strength_profile_json_claim_boundaries(report: &Value) -> Result<()> { + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "ELF does not broadly beat qmd; it ties encoded retrieval and lifecycle correctness, keeps qmd query transparency as not_tested for comparative scoring, and leaves replayability not_tested." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "qmd expansion, fusion, and rerank superiority remains not_tested because the current qmd paths use --no-rerank and do not score internals." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "ELF does not beat OpenViking on context trajectory; OpenViking trajectory strengths remain blocked/not_tested behind a wrong_result same-corpus output precondition and missing staged artifacts." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "Research_gate and blocked fixture records are follow-up gates, not pass evidence." + )?); + assert!(support::array_contains_str( + report, + "/claim_boundaries", + "Missing equivalent surfaces are encoded as unsupported, blocked, or not_encoded rather than fake losses." + )?); + + Ok(()) +} + +fn assert_strength_profile_markdown_boundaries(markdown: &str) { + assert!( + markdown.contains( + "| Wrong-result diagnosis | `research_gate` | `not_encoded` | `not_tested` |" + ) + ); + assert!( + markdown.contains("ELF ties qmd on the current encoded retrieval-correctness surfaces") + ); + assert!(markdown.contains("qmd remains the local retrieval-debug UX reference")); + assert!(markdown.contains("not scored as comparative ELF wins or losses")); + assert!(markdown.contains("ELF currently wins only the equivalent OpenViking same-corpus")); + assert!(markdown.contains("Do not claim ELF broadly beats qmd")); + assert!(markdown.contains( + "Do not claim ELF beats OpenViking on staged retrieval, hierarchy, or recursive" + )); + assert!(markdown.contains( + "Do not turn `research_gate`, `blocked`, `not_encoded`, or `unsupported` surfaces" + )); + assert!(markdown.contains("no pass evidence is claimed")); + assert!(markdown.contains("typed `wrong_result` state")); +} + +fn assert_operator_facing_strength_profile_boundaries( + readme: &str, + benchmarking_index: &str, + iteration_direction: &str, +) { + assert!(readme.contains("Full-suite live real-world adapter sweep after XY-926")); + assert!(readme.contains("all 55 checked-in jobs across 13 suites")); + assert!(readme.contains("ELF now live-scores capture/write-policy")); + assert!(readme.contains("consolidation proposal review")); + assert!(readme.contains("knowledge-page rebuild/lint")); + assert!(readme.contains("operator-debugging fixtures")); + assert!(!readme.contains("memory-evolution wrong results")); + assert!(readme.contains("Live temporal reconciliation after XY-905")); + assert!(readme.contains("now reports ELF live `memory_evolution` as 6/6 pass")); + assert!(readme.contains("broad qmd, Graphiti/Zep, mem0/OpenMemory, Letta")); + assert!(readme.contains("production-ops operator boundaries")); + assert!(readme.contains("core/archival live adapter gap")); + assert!( + support::collapse_whitespace(readme).contains("blocked context-trajectory measurement") + ); + assert!( + readme + .contains("consolidation, knowledge, capture, and core/archival typed non-pass states") + ); + assert!(readme.contains("operator-debug trace hydration")); + assert!(readme.contains("qmd remains the local retrieval-debug UX reference")); + assert!(readme.contains("broad ELF-over-qmd")); + assert!(readme.contains("qmd and OpenViking Strength-Profile Report - June 11, 2026")); + assert!(benchmarking_index.contains("2026-06-11-qmd-openviking-strength-profile-report.md")); + assert!( + benchmarking_index.contains("separates qmd retrieval quality from debug/replay ergonomics") + ); + assert!(benchmarking_index.contains("preserves XY-928 OpenViking")); + assert!( + benchmarking_index + .contains("context-trajectory surfaces as blocked/not-tested until scored staged") + ); + assert!( + iteration_direction + .contains("ELF and qmd are tied on the encoded live retrieval, work-resume, and") + ); + assert!(iteration_direction.contains("ELF does not yet beat qmd's local retrieval-debug")); + + assert_iteration_direction_current_measurement_counts(iteration_direction); + + assert!(iteration_direction.contains( + "ELF beats OpenViking on context trajectory. The scenario is encoded as blocked" + )); + assert!( + iteration_direction + .contains("Do not promote a reference project into a win/loss claim until") + ); +} + +fn assert_iteration_direction_current_measurement_counts(markdown: &str) { + for expected in [ + "| Jobs | `55` |", + "| Encoded suites | `15` |", + "| Blocked | `6` |", + "| Mean score | `0.891` |", + "| Evidence coverage | `123/123` |", + "| Source-ref coverage | `123/123` |", + "| Quote coverage | `123/123` |", + "| Expected evidence recall | `115/115` |", + "| `blocked` | `7` |", + "| `not_encoded` | `5` |", + "`live_baseline_only`, `fixture_backed`, and `research_gate`", + "`blocked` for fixture-backed trajectory gates", + ] { + assert!(markdown.contains(expected), "missing iteration-direction text: {expected}"); + } + for stale in [ + "| Jobs | `40` |", + "| Encoded suites | `11` |", + "| Jobs | `50` |", + "| Encoded suites | `14` |", + "| Mean score | `0.950` |", + "| Mean score | `0.900` |", + "| Evidence coverage | `88/88` |", + "| Evidence coverage | `115/115` |", + "| Expected evidence recall | `80/80` |", + "| Expected evidence recall | `107/107` |", + "| `blocked` | `5` |", + "| `not_encoded` | `7` |", + "`live_baseline_only` plus `research_gate`", + ] { + assert!(!markdown.contains(stale), "stale iteration-direction text: {stale}"); + } +} diff --git a/packages/elf-service/src/docs/tests.rs b/packages/elf-service/src/docs/tests.rs index b6b66950..1df94cd9 100644 --- a/packages/elf-service/src/docs/tests.rs +++ b/packages/elf-service/src/docs/tests.rs @@ -1,17 +1,9 @@ -use ahash::AHashMap; -use qdrant_client::qdrant::{ - DatetimeRange, Filter, condition::ConditionOneOf, r#match::MatchValue, -}; -use time::{OffsetDateTime, format_description::well_known::Rfc3339}; -use tokenizers::{Tokenizer, models::wordlevel::WordLevel, pre_tokenizers::whitespace::Whitespace}; -use uuid::Uuid; +mod tests_core; +mod tests_put_validation; +mod tests_search_validation; +mod tests_source_capture; -use crate::docs::{ - self, DocSearchRow, DocType, DocsPutRequest, DocsSearchL0Filters, DocsSearchL0Request, - DocsSparseMode, Error, SourceCaptureSummaryInput, -}; -use elf_domain::writegate::{WritePolicy, WritePolicyAudit, WriteRedactionResult, WriteSpan}; -use elf_storage::models::DocChunk; +use crate::docs::DocsSearchL0Request; const TENANT_ID: &str = "tenant"; const PROJECT_ID: &str = "project"; @@ -40,1101 +32,3 @@ fn test_request_with_query(query: &str) -> DocsSearchL0Request { explain: None, } } - -fn first_datetime_range(filter: &Filter, key: &str) -> Option { - for condition in &filter.must { - if let Some(ConditionOneOf::Field(field)) = condition.condition_one_of.as_ref() { - if field.key != key { - continue; - } - - if let Some(range) = field.datetime_range.as_ref() { - return Some(*range); - } - } - } - - None -} - -fn first_match_value(filter: &Filter, key: &str) -> Option { - for condition in &filter.must { - if let Some(ConditionOneOf::Field(field)) = condition.condition_one_of.as_ref() { - if field.key != key { - continue; - } - - if let Some(r#match) = field.r#match.as_ref() { - let Some(match_value) = r#match.match_value.as_ref() else { - continue; - }; - - return match match_value { - MatchValue::Keyword(value) => Some(value.clone()), - _ => None, - }; - } - } - } - - None -} - -fn test_tokenizer() -> Tokenizer { - let mut vocab = AHashMap::new(); - - vocab.insert("alpha".to_string(), 1_u32); - vocab.insert("beta".to_string(), 2_u32); - vocab.insert("charlie".to_string(), 3_u32); - vocab.insert("delta".to_string(), 4_u32); - vocab.insert("".to_string(), 0_u32); - - let model = WordLevel::builder() - .vocab(vocab) - .unk_token("".to_string()) - .build() - .expect("Failed to build test tokenizer."); - let mut tokenizer = Tokenizer::new(model); - - tokenizer.with_pre_tokenizer(Some(Whitespace)); - - tokenizer -} - -#[test] -fn doc_type_parses_and_serializes() { - let encoded = - serde_json::to_string(&DocType::Knowledge).expect("Expected DocType serialization."); - let parsed = - serde_json::from_str::("\"knowledge\"").expect("Expected parse to succeed."); - let invalid: Result = serde_json::from_str("\"invalid\""); - - assert_eq!(encoded, "\"knowledge\""); - assert_eq!(parsed, DocType::Knowledge); - assert!(invalid.is_err()); -} - -#[test] -fn docs_search_l0_requires_chat_doc_type_for_thread_id() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "thread".to_string(), - scope: None, - status: None, - doc_type: Some("search".to_string()), - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: Some("thread-1".to_string()), - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected thread_id to require doc_type=chat."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("thread_id requires")), - other => panic!("Unexpected error: {other:?}"), - } - - docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "thread".to_string(), - scope: None, - status: None, - doc_type: Some("chat".to_string()), - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: Some("thread-1".to_string()), - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect("Expected thread_id filter to be accepted for chat."); -} - -#[test] -fn validate_docs_put_rejects_invalid_doc_type() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "invalid", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected invalid doc_type to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("doc_type")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn resolve_doc_chunking_profile_is_deterministic_by_doc_type() { - let small = docs::resolve_doc_chunking_profile(DocType::Chat); - - assert_eq!(small.max_tokens, 1_024); - assert_eq!(small.overlap_tokens, 128); - - let default = docs::resolve_doc_chunking_profile(DocType::Knowledge); - - assert_eq!(default.max_tokens, 2_048); - assert_eq!(default.overlap_tokens, 256); -} - -#[test] -fn validate_docs_search_l0_defaults_status_and_filters_dates() { - let filters = docs::validate_docs_search_l0(&test_request_with_query("hello world")) - .expect("valid request"); - - assert_eq!(filters.status, "active"); - - let bad_dates = DocsSearchL0Request { - updated_after: Some("2026-02-25T12:00:00Z".to_string()), - updated_before: Some("2026-02-25T11:00:00Z".to_string()), - sparse_mode: None, - domain: None, - repo: None, - ..test_request_with_query("status") - }; - let err = docs::validate_docs_search_l0(&bad_dates) - .expect_err("Expected bad date order to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("earlier")); - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_rejects_invalid_status() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: Some("archived".to_string()), - doc_type: None, - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected invalid status to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("status")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_rejects_invalid_datetime_format() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: Some("2026-02-25T12:00:00".to_string()), - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected invalid RFC3339 datetime to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("RFC3339")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn build_doc_search_filter_applies_status_and_requested_filters() { - let filters = DocsSearchL0Filters { - scope: Some("project_shared".to_string()), - status: "deleted".to_string(), - doc_type: Some(DocType::Chat), - sparse_mode: DocsSparseMode::Auto, - domain: None, - repo: None, - agent_id: Some("owner".to_string()), - thread_id: Some("thread-7".to_string()), - updated_after: Some( - OffsetDateTime::parse("2026-02-20T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), - ), - updated_before: Some( - OffsetDateTime::parse("2026-02-28T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), - ), - ts_gte: Some( - OffsetDateTime::parse("2026-01-01T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), - ), - ts_lte: Some( - OffsetDateTime::parse("2026-12-31T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), - ), - }; - let filter = super::build_doc_search_filter( - TENANT_ID, - PROJECT_ID, - "requester", - &["agent_private".to_string(), "project_shared".to_string()], - &filters, - ); - - assert_eq!(first_match_value(&filter, "tenant_id").as_deref(), Some("tenant")); - assert_eq!(first_match_value(&filter, "status").as_deref(), Some("deleted")); - assert_eq!(first_match_value(&filter, "scope").as_deref(), Some("project_shared")); - assert_eq!(first_match_value(&filter, "doc_type").as_deref(), Some("chat")); - assert_eq!(first_match_value(&filter, "agent_id").as_deref(), Some("owner")); - assert_eq!(first_match_value(&filter, "thread_id").as_deref(), Some("thread-7")); - assert_eq!(first_match_value(&filter, "domain").as_deref(), None); - assert_eq!(first_match_value(&filter, "repo").as_deref(), None); - - let datetime_range = first_datetime_range(&filter, "updated_at") - .expect("Expected datetime filter for updated_at."); - let after = - OffsetDateTime::parse("2026-02-20T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); - let before = - OffsetDateTime::parse("2026-02-28T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); - let lt = datetime_range.lt.as_ref().expect("Expected datetime filter .lt value."); - let gt = datetime_range.gt.as_ref().expect("Expected datetime filter .gt value."); - - assert_eq!(lt.seconds, before.unix_timestamp()); - assert_eq!(lt.nanos, before.nanosecond() as i32); - assert_eq!(gt.seconds, after.unix_timestamp()); - assert_eq!(gt.nanos, after.nanosecond() as i32); - assert!(datetime_range.gte.is_none()); - assert!(datetime_range.lte.is_none()); - - let doc_ts_range = - first_datetime_range(&filter, "doc_ts").expect("Expected datetime filter for doc_ts."); - let gte = doc_ts_range.gte.as_ref().expect("Expected datetime filter .gte value."); - let lte = doc_ts_range.lte.as_ref().expect("Expected datetime filter .lte value."); - let doc_ts_gte = - OffsetDateTime::parse("2026-01-01T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); - let doc_ts_lte = - OffsetDateTime::parse("2026-12-31T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); - - assert_eq!(gte.seconds, doc_ts_gte.unix_timestamp()); - assert_eq!(gte.nanos, doc_ts_gte.nanosecond() as i32); - assert_eq!(lte.seconds, doc_ts_lte.unix_timestamp()); - assert_eq!(lte.nanos, doc_ts_lte.nanosecond() as i32); - assert!(doc_ts_range.gt.is_none()); - assert!(doc_ts_range.lt.is_none()); -} - -#[test] -fn validate_docs_search_l0_rejects_invalid_doc_ts_order() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: Some("2026-02-25T12:00:00Z".to_string()), - ts_lte: Some("2026-02-25T11:00:00Z".to_string()), - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected bad doc_ts order to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("earlier")); - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_rejects_invalid_sparse_mode() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: Some("invalid".to_string()), - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected invalid sparse mode to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("sparse_mode")); - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_rejects_domain_without_doc_type_search() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: Some("example.com".to_string()), - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected domain without doc_type=search to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("doc_type=search")); - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_rejects_repo_without_doc_type_dev() { - let err = docs::validate_docs_search_l0(&DocsSearchL0Request { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - caller_agent_id: "agent".to_string(), - read_profile: "private_plus_project".to_string(), - query: "status".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: None, - repo: Some("hack-ink/ELF".to_string()), - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - top_k: None, - candidate_k: None, - explain: None, - }) - .expect_err("Expected repo without doc_type=dev to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("doc_type=dev")); - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_search_l0_default_sparse_mode() { - let filters = - docs::validate_docs_search_l0(&test_request_with_query("status")).expect("valid request"); - - assert!(matches!(filters.sparse_mode, DocsSparseMode::Auto)); -} - -#[test] -fn should_enable_sparse_auto_uses_symbol_cues() { - assert!(super::should_enable_sparse_auto("https://example.com/search?q=abc")); - assert!(!super::should_enable_sparse_auto("how to debug a timeout")); -} - -#[test] -fn excerpt_level_max_supports_l0_and_rejects_unknown_level() { - assert_eq!( - super::excerpt_level_max("L0").expect("Expected L0 to be supported."), - super::DEFAULT_L0_MAX_BYTES - ); - assert!(super::excerpt_level_max("L3").is_err()); -} - -#[test] -fn validate_docs_put_rejects_missing_source_ref() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: None, - write_policy: None, - source_ref: serde_json::json!({"schema":"doc_source_ref/v1", "doc_type":"knowledge"}), - content: "Hello world.".to_string(), - }) - .expect_err("Expected missing source_ref.ts to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("source_ref[\"ts\"]")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_non_object_source_ref() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: None, - write_policy: None, - source_ref: serde_json::json!("legacy-shape"), - content: "Hello world.".to_string(), - }) - .expect_err("Expected non-object source_ref to be rejected."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("source_ref must be a JSON object")) - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_mismatched_request_and_source_ref_doc_type() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Chat.as_str().to_string()), - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected mismatched doc_type to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("match")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_wrong_source_ref_schema() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "note_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected wrong source_ref.schema to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("doc_source_ref/v1")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_chat_source_ref_with_missing_thread_metadata() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Chat.as_str().to_string()), - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "chat", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected chat source_ref to require thread_id/role."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("thread_id")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_search_source_ref_with_missing_domain() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Search.as_str().to_string()), - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "search", - "ts": "2026-02-25T12:00:00Z", - "query": "test", - "url": "https://example.com", - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected search source_ref to require domain."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("domain")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_rejects_dev_source_ref_with_multiple_identifiers() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Dev.as_str().to_string()), - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "dev", - "ts": "2026-02-25T12:00:00Z", - "repo": "hack-ink/ELF", - "commit_sha": "9f0a3f4c4eb58bfcf4a5f4f9d0c7be0e13c2f8d19", - "issue_number": 123, - }), - content: "Hello world.".to_string(), - }) - .expect_err("Expected dev source_ref to enforce exactly one identifier field."); - - match err { - Error::InvalidRequest { message } => { - assert!(message.contains("exactly one of commit_sha, pr_number, or issue_number")) - }, - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_uses_source_ref_doc_type_when_request_doc_type_is_absent() { - let resolved_doc_type = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: None, - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "chat", - "ts": "2026-02-25T12:00:00Z", - "thread_id": "thread-1", - "role": "assistant" - }), - content: "Hello world.".to_string(), - }) - .expect("Expected valid source_ref to resolve doc_type."); - - assert_eq!(resolved_doc_type.doc_type, DocType::Chat); -} - -#[test] -fn validate_docs_put_accepts_source_library_article_metadata() { - let validated = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: Some("Saved article".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "source_kind": "article", - "canonical_uri": "https://example.com/research/source-library", - "captured_at": "2026-02-25T12:10:00Z", - "source_created_at": "2026-02-24T09:00:00Z", - "trust_label": "public_web", - "author": "Example Author", - "handle": "example-author", - "excerpt_locator": { - "quote": { - "exact": "Source libraries preserve long-form evidence." - }, - "position": { - "start": 0, - "end": 48 - } - } - }), - content: - "Source libraries preserve long-form evidence. Agents can hydrate exact excerpts later." - .to_string(), - }) - .expect("Expected source library metadata to be accepted."); - - assert_eq!(validated.doc_type, DocType::Knowledge); -} - -#[test] -fn source_capture_metadata_uses_stable_record_and_span_ids() { - let now = OffsetDateTime::parse("2026-02-25T12:15:00Z", &Rfc3339) - .expect("Expected test timestamp to parse."); - let source_ref = serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "source_kind": "article", - "canonical_uri": "https://example.com/research/source-library", - "captured_at": "2026-02-25T12:10:00Z", - "trust_label": "public_web", - }); - let source_ref = source_ref.as_object().expect("Expected source_ref object."); - let content_hash = "doc-content-hash"; - let doc_id = super::source_record_id_for( - TENANT_ID, - PROJECT_ID, - "owner", - "project_shared", - DocType::Knowledge, - source_ref, - content_hash, - ); - let repeated_doc_id = super::source_record_id_for( - TENANT_ID, - PROJECT_ID, - "owner", - "project_shared", - DocType::Knowledge, - source_ref, - content_hash, - ); - let chunk_id = super::doc_chunk_id_for(doc_id, 0); - let chunk = DocChunk { - chunk_id, - doc_id, - chunk_index: 0, - start_offset: 0, - end_offset: 42, - chunk_text: "Source libraries preserve long-form evidence.".to_string(), - chunk_hash: "chunk-content-hash".to_string(), - created_at: now, - }; - let capture = super::build_source_capture_summary(SourceCaptureSummaryInput { - doc_id, - source_ref, - doc_type: DocType::Knowledge, - scope: "project_shared", - title: Some("Saved article"), - content_hash, - raw_content_hash: "raw-content-hash", - now, - chunks: &[chunk], - write_policy_audit: None, - }) - .expect("Expected source capture summary."); - - assert_eq!(doc_id, repeated_doc_id); - assert_eq!(capture.schema, "doc_source_capture/v1"); - assert_eq!(capture.source_record_id, doc_id); - assert_eq!(capture.origin, "https://example.com/research/source-library"); - assert_eq!(capture.captured_at, "2026-02-25T12:10:00Z"); - assert_eq!(capture.content_hash, content_hash); - assert_eq!(capture.visibility_scope, "project_shared"); - assert_eq!(capture.title.as_deref(), Some("Saved article")); - assert_eq!(capture.source_type, "article"); - assert_eq!(capture.source_spans.len(), 1); - assert_eq!(capture.source_spans[0].schema, "doc_source_span/v1"); - assert_eq!(capture.source_spans[0].chunk_id, Some(chunk_id)); - assert_eq!(capture.source_spans[0].status, "captured"); - assert_eq!(capture.source_spans[0].reason_code, None); - assert_eq!(capture.source_spans[0].start_offset, 0); - assert_eq!(capture.source_spans[0].end_offset, 42); - assert_eq!( - capture.source_spans[0].span_id, - super::source_span_id(content_hash, 0, 42, "captured") - ); -} - -#[test] -fn normalized_source_ref_records_policy_span_reasons() { - let now = OffsetDateTime::parse("2026-02-25T12:15:00Z", &Rfc3339) - .expect("Expected test timestamp to parse."); - let source_ref = serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "uri": "file:///tmp/source.txt", - }); - let source_ref_map = source_ref.as_object().expect("Expected source_ref object."); - let audit = WritePolicyAudit { - exclusions: vec![WriteSpan { start: 6, end: 12 }], - redactions: vec![WriteRedactionResult { - span: WriteSpan { start: 20, end: 30 }, - replacement: "[redacted]".to_string(), - }], - }; - let doc_id = super::source_record_id_for( - TENANT_ID, - PROJECT_ID, - "owner", - "project_shared", - DocType::Knowledge, - source_ref_map, - "stored-hash", - ); - let capture = super::build_source_capture_summary(SourceCaptureSummaryInput { - doc_id, - source_ref: source_ref_map, - doc_type: DocType::Knowledge, - scope: "project_shared", - title: None, - content_hash: "stored-hash", - raw_content_hash: "raw-hash", - now, - chunks: &[], - write_policy_audit: Some(&audit), - }) - .expect("Expected source capture summary."); - let normalized = super::normalize_source_ref_for_capture(source_ref, &capture) - .expect("Expected normalized source_ref"); - - assert_eq!(capture.policy_spans.len(), 2); - assert_eq!(capture.policy_spans[0].status, "excluded"); - assert_eq!(capture.policy_spans[0].reason_code.as_deref(), Some("WRITE_POLICY_EXCLUSION")); - assert_eq!(capture.policy_spans[1].status, "redacted"); - assert_eq!(capture.policy_spans[1].reason_code.as_deref(), Some("WRITE_POLICY_REDACTION")); - assert_eq!(normalized["source_record_id"], doc_id.to_string()); - assert_eq!(normalized["origin"], "file:///tmp/source.txt"); - assert_eq!(normalized["captured_at"], "2026-02-25T12:15:00Z"); - assert_eq!(normalized["content_hash"], "stored-hash"); - assert_eq!(normalized["visibility_scope"], "project_shared"); - assert_eq!(normalized["source_type"], "knowledge"); - assert_eq!(normalized["policy_spans"][0]["reason_code"], "WRITE_POLICY_EXCLUSION"); - assert_eq!(normalized["policy_spans"][1]["reason_code"], "WRITE_POLICY_REDACTION"); -} - -#[test] -fn validate_docs_put_rejects_incomplete_source_library_metadata() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: Some("Saved article".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "source_kind": "article", - "captured_at": "2026-02-25T12:10:00Z", - "trust_label": "public_web" - }), - content: "Source libraries preserve long-form evidence.".to_string(), - }) - .expect_err("Expected canonical_uri to be required for source library metadata."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("canonical_uri")), - other => panic!("Unexpected error: {other:?}"), - } - - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: Some("Saved thread".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "source_kind": "social_thread", - "canonical_uri": "https://example.com/thread/123", - "captured_at": "2026-02-25T12:10:00Z", - "trust_label": "public_web" - }), - content: "The thread says source libraries need social captures.".to_string(), - }) - .expect_err("Expected social_thread source_kind to require chat doc_type."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("requires doc_type=chat")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn docs_l0_pointer_carries_hashes_and_position_locator() { - let now = OffsetDateTime::parse("2026-02-25T12:00:00Z", &Rfc3339) - .expect("Expected test timestamp to parse."); - let row = DocSearchRow { - chunk_id: Uuid::parse_str("11111111-1111-4111-8111-111111111111") - .expect("Expected chunk UUID."), - doc_id: Uuid::parse_str("22222222-2222-4222-8222-222222222222") - .expect("Expected doc UUID."), - scope: "project_shared".to_string(), - doc_type: "knowledge".to_string(), - project_id: "project".to_string(), - agent_id: "agent".to_string(), - updated_at: now, - content_hash: "doc-hash".to_string(), - chunk_hash: "chunk-hash".to_string(), - start_offset: 12, - end_offset: 64, - chunk_text: "Source libraries preserve long-form evidence.".to_string(), - }; - let pointer = super::build_docs_l0_pointer(&row, row.chunk_id); - - assert_eq!(pointer.schema, "source_ref/v1"); - assert_eq!(pointer.resolver, "elf_doc_ext/v1"); - assert_eq!(pointer.hashes.content_hash, "doc-hash"); - assert_eq!(pointer.hashes.chunk_hash, "chunk-hash"); - assert_eq!(pointer.reference.source_record_id, row.doc_id); - assert_eq!(pointer.reference.source_span_id, pointer.locator.span_id); - assert_eq!(pointer.locator.position.start, 12); - assert_eq!(pointer.locator.position.end, 64); - assert_eq!(pointer.locator.span_id, super::source_span_id("doc-hash", 12, 64, "captured")); - assert_eq!(pointer.state.content_hash, pointer.hashes.content_hash); - assert_eq!(pointer.state.chunk_hash, pointer.hashes.chunk_hash); -} - -#[test] -fn validate_docs_put_applies_write_policy_and_includes_audit() { - let validated = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: None, - write_policy: Some(WritePolicy { - exclusions: vec![WriteSpan { start: 6, end: 35 }], - redactions: vec![], - }), - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello sk-abcdefghijklmnopqrstuvwxyz!".to_string(), - }) - .expect("Expected valid write policy transformation."); - let expected_audit = WritePolicyAudit { - exclusions: vec![WriteSpan { start: 6, end: 35 }], - ..Default::default() - }; - - assert_eq!(validated.content, "Hello !".to_string()); - assert_eq!(validated.write_policy_audit.unwrap_or_default(), expected_audit); -} - -#[test] -fn validate_docs_put_rejects_secret_after_write_policy() { - let err = docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: Some(DocType::Knowledge.as_str().to_string()), - title: None, - write_policy: Some(WritePolicy { exclusions: vec![], redactions: vec![] }), - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - }), - content: "Hello sk-abcdefghijklmnopqrstuvwxyz!".to_string(), - }) - .expect_err("Expected secret-bearing content to be rejected."); - - match err { - Error::InvalidRequest { message } => assert!(message.contains("contains secrets")), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn validate_docs_put_allows_doc_source_ref_v1_and_rejects_free_text() { - docs::validate_docs_put(&DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("English title".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "notes": "English only." - }), - content: "English content.".to_string(), - }) - .expect("Expected doc_source_ref/v1 source_ref to be accepted."); - - let err = docs::validate_docs_put(&DocsPutRequest { - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "notes": "\u{4f60}\u{597d}\u{4e16}\u{754c}" - }), - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("English title".to_string()), - write_policy: None, - content: "English content.".to_string(), - }) - .expect_err("Expected non-English free-text in source_ref."); - - match err { - Error::NonEnglishInput { field } => assert_eq!(field, "$.source_ref[\"notes\"]"), - other => panic!("Unexpected error: {other:?}"), - } - - let err = docs::validate_docs_put(&DocsPutRequest { - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "ref": "\u{4f60}\u{597d}\u{4e16}\u{754c}" - }), - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("English title".to_string()), - write_policy: None, - content: "English content.".to_string(), - }) - .expect_err("Expected identifier lane with non-Latin text to be rejected."); - - match err { - Error::NonEnglishInput { field } => assert_eq!(field, "$.source_ref[\"ref\"]"), - other => panic!("Unexpected error: {other:?}"), - } -} - -#[test] -fn split_tokens_by_offsets_preserves_original_substring_offsets() { - let tokenizer = test_tokenizer(); - let chunks = super::split_tokens_by_offsets("alpha bravo charlie delta", 2, 1, 10, &tokenizer) - .expect("Expected token chunking to succeed."); - - assert_eq!(chunks.len(), 3); - assert_eq!(chunks[0].start_offset, 0); - assert_eq!(chunks[0].end_offset, 11); - assert_eq!(chunks[1].start_offset, 6); - assert_eq!(chunks[1].end_offset, 19); - assert_eq!(chunks[2].start_offset, 12); - assert_eq!(chunks[2].end_offset, 25); - - for chunk in &chunks { - assert_eq!(chunk.text, "alpha bravo charlie delta"[chunk.start_offset..chunk.end_offset]); - } -} diff --git a/packages/elf-service/src/docs/tests_core.rs b/packages/elf-service/src/docs/tests_core.rs new file mode 100644 index 00000000..ad7a1bd4 --- /dev/null +++ b/packages/elf-service/src/docs/tests_core.rs @@ -0,0 +1,79 @@ +use ahash::AHashMap; +use tokenizers::{Tokenizer, models::wordlevel::WordLevel, pre_tokenizers::whitespace::Whitespace}; + +use crate::docs::{self, DocType}; + +fn test_tokenizer() -> Tokenizer { + let mut vocab = AHashMap::new(); + + vocab.insert("alpha".to_string(), 1_u32); + vocab.insert("beta".to_string(), 2_u32); + vocab.insert("charlie".to_string(), 3_u32); + vocab.insert("delta".to_string(), 4_u32); + vocab.insert("".to_string(), 0_u32); + + let model = WordLevel::builder() + .vocab(vocab) + .unk_token("".to_string()) + .build() + .expect("Failed to build test tokenizer."); + let mut tokenizer = Tokenizer::new(model); + + tokenizer.with_pre_tokenizer(Some(Whitespace)); + + tokenizer +} + +#[test] +fn doc_type_parses_and_serializes() { + let encoded = + serde_json::to_string(&DocType::Knowledge).expect("Expected DocType serialization."); + let parsed = + serde_json::from_str::("\"knowledge\"").expect("Expected parse to succeed."); + let invalid: Result = serde_json::from_str("\"invalid\""); + + assert_eq!(encoded, "\"knowledge\""); + assert_eq!(parsed, DocType::Knowledge); + assert!(invalid.is_err()); +} + +#[test] +fn resolve_doc_chunking_profile_is_deterministic_by_doc_type() { + let small = docs::resolve_doc_chunking_profile(DocType::Chat); + + assert_eq!(small.max_tokens, 1_024); + assert_eq!(small.overlap_tokens, 128); + + let default = docs::resolve_doc_chunking_profile(DocType::Knowledge); + + assert_eq!(default.max_tokens, 2_048); + assert_eq!(default.overlap_tokens, 256); +} + +#[test] +fn excerpt_level_max_supports_l0_and_rejects_unknown_level() { + assert_eq!( + docs::excerpt_level_max("L0").expect("Expected L0 to be supported."), + docs::DEFAULT_L0_MAX_BYTES + ); + assert!(docs::excerpt_level_max("L3").is_err()); +} + +#[test] +fn split_tokens_by_offsets_preserves_original_substring_offsets() { + let tokenizer = test_tokenizer(); + let chunks = docs::split_tokens_by_offsets("alpha bravo charlie delta", 2, 1, 10, &tokenizer) + .expect("Expected token chunking to succeed."); + + assert_eq!(chunks.len(), 3); + assert_eq!(chunks[0].start_offset, 0); + assert_eq!(chunks[0].end_offset, 11); + assert_eq!(chunks[1].start_offset, 6); + assert_eq!(chunks[1].end_offset, 19); + assert_eq!(chunks[2].start_offset, 12); + assert_eq!(chunks[2].end_offset, 25); + + for chunk in &chunks { + assert_eq!(chunk.text, "alpha bravo charlie delta"[chunk.start_offset..chunk.end_offset]); + } +} diff --git a/packages/elf-service/src/docs/tests_put_validation.rs b/packages/elf-service/src/docs/tests_put_validation.rs new file mode 100644 index 00000000..124e16e6 --- /dev/null +++ b/packages/elf-service/src/docs/tests_put_validation.rs @@ -0,0 +1,443 @@ +use crate::docs::{self, DocType, DocsPutRequest, Error}; +use elf_domain::writegate::{WritePolicy, WritePolicyAudit, WriteSpan}; + +#[test] +fn validate_docs_put_rejects_invalid_doc_type() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "invalid", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected invalid doc_type to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("doc_type")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_missing_source_ref() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: None, + write_policy: None, + source_ref: serde_json::json!({"schema":"doc_source_ref/v1", "doc_type":"knowledge"}), + content: "Hello world.".to_string(), + }) + .expect_err("Expected missing source_ref.ts to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("source_ref[\"ts\"]")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_non_object_source_ref() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: None, + write_policy: None, + source_ref: serde_json::json!("legacy-shape"), + content: "Hello world.".to_string(), + }) + .expect_err("Expected non-object source_ref to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("source_ref must be a JSON object")) + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_mismatched_request_and_source_ref_doc_type() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Chat.as_str().to_string()), + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected mismatched doc_type to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("match")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_wrong_source_ref_schema() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "note_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected wrong source_ref.schema to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("doc_source_ref/v1")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_chat_source_ref_with_missing_thread_metadata() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Chat.as_str().to_string()), + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "chat", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected chat source_ref to require thread_id/role."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("thread_id")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_search_source_ref_with_missing_domain() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Search.as_str().to_string()), + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "search", + "ts": "2026-02-25T12:00:00Z", + "query": "test", + "url": "https://example.com", + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected search source_ref to require domain."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("domain")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_rejects_dev_source_ref_with_multiple_identifiers() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Dev.as_str().to_string()), + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "dev", + "ts": "2026-02-25T12:00:00Z", + "repo": "hack-ink/ELF", + "commit_sha": "9f0a3f4c4eb58bfcf4a5f4f9d0c7be0e13c2f8d19", + "issue_number": 123, + }), + content: "Hello world.".to_string(), + }) + .expect_err("Expected dev source_ref to enforce exactly one identifier field."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("exactly one of commit_sha, pr_number, or issue_number")) + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_uses_source_ref_doc_type_when_request_doc_type_is_absent() { + let resolved_doc_type = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: None, + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "chat", + "ts": "2026-02-25T12:00:00Z", + "thread_id": "thread-1", + "role": "assistant" + }), + content: "Hello world.".to_string(), + }) + .expect("Expected valid source_ref to resolve doc_type."); + + assert_eq!(resolved_doc_type.doc_type, DocType::Chat); +} + +#[test] +fn validate_docs_put_accepts_source_library_article_metadata() { + let validated = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: Some("Saved article".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "source_kind": "article", + "canonical_uri": "https://example.com/research/source-library", + "captured_at": "2026-02-25T12:10:00Z", + "source_created_at": "2026-02-24T09:00:00Z", + "trust_label": "public_web", + "author": "Example Author", + "handle": "example-author", + "excerpt_locator": { + "quote": { + "exact": "Source libraries preserve long-form evidence." + }, + "position": { + "start": 0, + "end": 48 + } + } + }), + content: + "Source libraries preserve long-form evidence. Agents can hydrate exact excerpts later." + .to_string(), + }) + .expect("Expected source library metadata to be accepted."); + + assert_eq!(validated.doc_type, DocType::Knowledge); +} + +#[test] +fn validate_docs_put_rejects_incomplete_source_library_metadata() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: Some("Saved article".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "source_kind": "article", + "captured_at": "2026-02-25T12:10:00Z", + "trust_label": "public_web" + }), + content: "Source libraries preserve long-form evidence.".to_string(), + }) + .expect_err("Expected canonical_uri to be required for source library metadata."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("canonical_uri")), + other => panic!("Unexpected error: {other:?}"), + } + + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: Some("Saved thread".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "source_kind": "social_thread", + "canonical_uri": "https://example.com/thread/123", + "captured_at": "2026-02-25T12:10:00Z", + "trust_label": "public_web" + }), + content: "The thread says source libraries need social captures.".to_string(), + }) + .expect_err("Expected social_thread source_kind to require chat doc_type."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("requires doc_type=chat")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_applies_write_policy_and_includes_audit() { + let validated = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: None, + write_policy: Some(WritePolicy { + exclusions: vec![WriteSpan { start: 6, end: 35 }], + redactions: vec![], + }), + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello sk-abcdefghijklmnopqrstuvwxyz!".to_string(), + }) + .expect("Expected valid write policy transformation."); + let expected_audit = WritePolicyAudit { + exclusions: vec![WriteSpan { start: 6, end: 35 }], + ..Default::default() + }; + + assert_eq!(validated.content, "Hello !".to_string()); + assert_eq!(validated.write_policy_audit.unwrap_or_default(), expected_audit); +} + +#[test] +fn validate_docs_put_rejects_secret_after_write_policy() { + let err = docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: Some(DocType::Knowledge.as_str().to_string()), + title: None, + write_policy: Some(WritePolicy { exclusions: vec![], redactions: vec![] }), + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + }), + content: "Hello sk-abcdefghijklmnopqrstuvwxyz!".to_string(), + }) + .expect_err("Expected secret-bearing content to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("contains secrets")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_put_allows_doc_source_ref_v1_and_rejects_free_text() { + docs::validate_docs_put(&DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("English title".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "notes": "English only." + }), + content: "English content.".to_string(), + }) + .expect("Expected doc_source_ref/v1 source_ref to be accepted."); + + let err = docs::validate_docs_put(&DocsPutRequest { + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "notes": "\u{4f60}\u{597d}\u{4e16}\u{754c}" + }), + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("English title".to_string()), + write_policy: None, + content: "English content.".to_string(), + }) + .expect_err("Expected non-English free-text in source_ref."); + + match err { + Error::NonEnglishInput { field } => assert_eq!(field, "$.source_ref[\"notes\"]"), + other => panic!("Unexpected error: {other:?}"), + } + + let err = docs::validate_docs_put(&DocsPutRequest { + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "ref": "\u{4f60}\u{597d}\u{4e16}\u{754c}" + }), + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("English title".to_string()), + write_policy: None, + content: "English content.".to_string(), + }) + .expect_err("Expected identifier lane with non-Latin text to be rejected."); + + match err { + Error::NonEnglishInput { field } => assert_eq!(field, "$.source_ref[\"ref\"]"), + other => panic!("Unexpected error: {other:?}"), + } +} diff --git a/packages/elf-service/src/docs/tests_search_validation.rs b/packages/elf-service/src/docs/tests_search_validation.rs new file mode 100644 index 00000000..500eab4a --- /dev/null +++ b/packages/elf-service/src/docs/tests_search_validation.rs @@ -0,0 +1,418 @@ +use qdrant_client::qdrant::{ + DatetimeRange, Filter, condition::ConditionOneOf, r#match::MatchValue, +}; +use time::{OffsetDateTime, format_description::well_known::Rfc3339}; + +use crate::docs::{ + self, DocType, DocsSearchL0Filters, DocsSearchL0Request, DocsSparseMode, Error, + tests::{self, PROJECT_ID, TENANT_ID}, +}; + +fn first_datetime_range(filter: &Filter, key: &str) -> Option { + for condition in &filter.must { + if let Some(ConditionOneOf::Field(field)) = condition.condition_one_of.as_ref() { + if field.key != key { + continue; + } + + if let Some(range) = field.datetime_range.as_ref() { + return Some(*range); + } + } + } + + None +} + +fn first_match_value(filter: &Filter, key: &str) -> Option { + for condition in &filter.must { + if let Some(ConditionOneOf::Field(field)) = condition.condition_one_of.as_ref() { + if field.key != key { + continue; + } + + if let Some(r#match) = field.r#match.as_ref() { + let Some(match_value) = r#match.match_value.as_ref() else { + continue; + }; + + return match match_value { + MatchValue::Keyword(value) => Some(value.clone()), + _ => None, + }; + } + } + } + + None +} + +#[test] +fn docs_search_l0_requires_chat_doc_type_for_thread_id() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "thread".to_string(), + scope: None, + status: None, + doc_type: Some("search".to_string()), + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: Some("thread-1".to_string()), + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected thread_id to require doc_type=chat."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("thread_id requires")), + other => panic!("Unexpected error: {other:?}"), + } + + docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "thread".to_string(), + scope: None, + status: None, + doc_type: Some("chat".to_string()), + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: Some("thread-1".to_string()), + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect("Expected thread_id filter to be accepted for chat."); +} + +#[test] +fn validate_docs_search_l0_defaults_status_and_filters_dates() { + let filters = docs::validate_docs_search_l0(&tests::test_request_with_query("hello world")) + .expect("valid request"); + + assert_eq!(filters.status, "active"); + + let bad_dates = DocsSearchL0Request { + updated_after: Some("2026-02-25T12:00:00Z".to_string()), + updated_before: Some("2026-02-25T11:00:00Z".to_string()), + sparse_mode: None, + domain: None, + repo: None, + ..tests::test_request_with_query("status") + }; + let err = docs::validate_docs_search_l0(&bad_dates) + .expect_err("Expected bad date order to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("earlier")); + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_rejects_invalid_status() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: Some("archived".to_string()), + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected invalid status to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("status")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_rejects_invalid_datetime_format() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: Some("2026-02-25T12:00:00".to_string()), + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected invalid RFC3339 datetime to be rejected."); + + match err { + Error::InvalidRequest { message } => assert!(message.contains("RFC3339")), + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn build_doc_search_filter_applies_status_and_requested_filters() { + let filters = DocsSearchL0Filters { + scope: Some("project_shared".to_string()), + status: "deleted".to_string(), + doc_type: Some(DocType::Chat), + sparse_mode: DocsSparseMode::Auto, + domain: None, + repo: None, + agent_id: Some("owner".to_string()), + thread_id: Some("thread-7".to_string()), + updated_after: Some( + OffsetDateTime::parse("2026-02-20T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), + ), + updated_before: Some( + OffsetDateTime::parse("2026-02-28T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), + ), + ts_gte: Some( + OffsetDateTime::parse("2026-01-01T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), + ), + ts_lte: Some( + OffsetDateTime::parse("2026-12-31T00:00:00Z", &Rfc3339).expect("Invalid timestamp."), + ), + }; + let filter = docs::build_doc_search_filter( + TENANT_ID, + PROJECT_ID, + "requester", + &["agent_private".to_string(), "project_shared".to_string()], + &filters, + ); + + assert_eq!(first_match_value(&filter, "tenant_id").as_deref(), Some("tenant")); + assert_eq!(first_match_value(&filter, "status").as_deref(), Some("deleted")); + assert_eq!(first_match_value(&filter, "scope").as_deref(), Some("project_shared")); + assert_eq!(first_match_value(&filter, "doc_type").as_deref(), Some("chat")); + assert_eq!(first_match_value(&filter, "agent_id").as_deref(), Some("owner")); + assert_eq!(first_match_value(&filter, "thread_id").as_deref(), Some("thread-7")); + assert_eq!(first_match_value(&filter, "domain").as_deref(), None); + assert_eq!(first_match_value(&filter, "repo").as_deref(), None); + + let datetime_range = first_datetime_range(&filter, "updated_at") + .expect("Expected datetime filter for updated_at."); + let after = + OffsetDateTime::parse("2026-02-20T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); + let before = + OffsetDateTime::parse("2026-02-28T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); + let lt = datetime_range.lt.as_ref().expect("Expected datetime filter .lt value."); + let gt = datetime_range.gt.as_ref().expect("Expected datetime filter .gt value."); + + assert_eq!(lt.seconds, before.unix_timestamp()); + assert_eq!(lt.nanos, before.nanosecond() as i32); + assert_eq!(gt.seconds, after.unix_timestamp()); + assert_eq!(gt.nanos, after.nanosecond() as i32); + assert!(datetime_range.gte.is_none()); + assert!(datetime_range.lte.is_none()); + + let doc_ts_range = + first_datetime_range(&filter, "doc_ts").expect("Expected datetime filter for doc_ts."); + let gte = doc_ts_range.gte.as_ref().expect("Expected datetime filter .gte value."); + let lte = doc_ts_range.lte.as_ref().expect("Expected datetime filter .lte value."); + let doc_ts_gte = + OffsetDateTime::parse("2026-01-01T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); + let doc_ts_lte = + OffsetDateTime::parse("2026-12-31T00:00:00Z", &Rfc3339).expect("Invalid timestamp."); + + assert_eq!(gte.seconds, doc_ts_gte.unix_timestamp()); + assert_eq!(gte.nanos, doc_ts_gte.nanosecond() as i32); + assert_eq!(lte.seconds, doc_ts_lte.unix_timestamp()); + assert_eq!(lte.nanos, doc_ts_lte.nanosecond() as i32); + assert!(doc_ts_range.gt.is_none()); + assert!(doc_ts_range.lt.is_none()); +} + +#[test] +fn validate_docs_search_l0_rejects_invalid_doc_ts_order() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: Some("2026-02-25T12:00:00Z".to_string()), + ts_lte: Some("2026-02-25T11:00:00Z".to_string()), + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected bad doc_ts order to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("earlier")); + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_rejects_invalid_sparse_mode() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: Some("invalid".to_string()), + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected invalid sparse mode to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("sparse_mode")); + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_rejects_domain_without_doc_type_search() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: Some("example.com".to_string()), + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected domain without doc_type=search to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("doc_type=search")); + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_rejects_repo_without_doc_type_dev() { + let err = docs::validate_docs_search_l0(&DocsSearchL0Request { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + caller_agent_id: "agent".to_string(), + read_profile: "private_plus_project".to_string(), + query: "status".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: Some("hack-ink/ELF".to_string()), + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + top_k: None, + candidate_k: None, + explain: None, + }) + .expect_err("Expected repo without doc_type=dev to be rejected."); + + match err { + Error::InvalidRequest { message } => { + assert!(message.contains("doc_type=dev")); + }, + other => panic!("Unexpected error: {other:?}"), + } +} + +#[test] +fn validate_docs_search_l0_default_sparse_mode() { + let filters = docs::validate_docs_search_l0(&tests::test_request_with_query("status")) + .expect("valid request"); + + assert!(matches!(filters.sparse_mode, DocsSparseMode::Auto)); +} + +#[test] +fn should_enable_sparse_auto_uses_symbol_cues() { + assert!(docs::should_enable_sparse_auto("https://example.com/search?q=abc")); + assert!(!docs::should_enable_sparse_auto("how to debug a timeout")); +} diff --git a/packages/elf-service/src/docs/tests_source_capture.rs b/packages/elf-service/src/docs/tests_source_capture.rs new file mode 100644 index 00000000..83d3820e --- /dev/null +++ b/packages/elf-service/src/docs/tests_source_capture.rs @@ -0,0 +1,182 @@ +use time::{OffsetDateTime, format_description::well_known::Rfc3339}; +use uuid::Uuid; + +use crate::docs::{ + self, DocSearchRow, DocType, SourceCaptureSummaryInput, + tests::{PROJECT_ID, TENANT_ID}, +}; +use elf_domain::writegate::{WritePolicyAudit, WriteRedactionResult, WriteSpan}; +use elf_storage::models::DocChunk; + +#[test] +fn source_capture_metadata_uses_stable_record_and_span_ids() { + let now = OffsetDateTime::parse("2026-02-25T12:15:00Z", &Rfc3339) + .expect("Expected test timestamp to parse."); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "source_kind": "article", + "canonical_uri": "https://example.com/research/source-library", + "captured_at": "2026-02-25T12:10:00Z", + "trust_label": "public_web", + }); + let source_ref = source_ref.as_object().expect("Expected source_ref object."); + let content_hash = "doc-content-hash"; + let doc_id = docs::source_record_id_for( + TENANT_ID, + PROJECT_ID, + "owner", + "project_shared", + DocType::Knowledge, + source_ref, + content_hash, + ); + let repeated_doc_id = docs::source_record_id_for( + TENANT_ID, + PROJECT_ID, + "owner", + "project_shared", + DocType::Knowledge, + source_ref, + content_hash, + ); + let chunk_id = docs::doc_chunk_id_for(doc_id, 0); + let chunk = DocChunk { + chunk_id, + doc_id, + chunk_index: 0, + start_offset: 0, + end_offset: 42, + chunk_text: "Source libraries preserve long-form evidence.".to_string(), + chunk_hash: "chunk-content-hash".to_string(), + created_at: now, + }; + let capture = docs::build_source_capture_summary(SourceCaptureSummaryInput { + doc_id, + source_ref, + doc_type: DocType::Knowledge, + scope: "project_shared", + title: Some("Saved article"), + content_hash, + raw_content_hash: "raw-content-hash", + now, + chunks: &[chunk], + write_policy_audit: None, + }) + .expect("Expected source capture summary."); + + assert_eq!(doc_id, repeated_doc_id); + assert_eq!(capture.schema, "doc_source_capture/v1"); + assert_eq!(capture.source_record_id, doc_id); + assert_eq!(capture.origin, "https://example.com/research/source-library"); + assert_eq!(capture.captured_at, "2026-02-25T12:10:00Z"); + assert_eq!(capture.content_hash, content_hash); + assert_eq!(capture.visibility_scope, "project_shared"); + assert_eq!(capture.title.as_deref(), Some("Saved article")); + assert_eq!(capture.source_type, "article"); + assert_eq!(capture.source_spans.len(), 1); + assert_eq!(capture.source_spans[0].schema, "doc_source_span/v1"); + assert_eq!(capture.source_spans[0].chunk_id, Some(chunk_id)); + assert_eq!(capture.source_spans[0].status, "captured"); + assert_eq!(capture.source_spans[0].reason_code, None); + assert_eq!(capture.source_spans[0].start_offset, 0); + assert_eq!(capture.source_spans[0].end_offset, 42); + assert_eq!( + capture.source_spans[0].span_id, + docs::source_span_id(content_hash, 0, 42, "captured") + ); +} + +#[test] +fn normalized_source_ref_records_policy_span_reasons() { + let now = OffsetDateTime::parse("2026-02-25T12:15:00Z", &Rfc3339) + .expect("Expected test timestamp to parse."); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "uri": "file:///tmp/source.txt", + }); + let source_ref_map = source_ref.as_object().expect("Expected source_ref object."); + let audit = WritePolicyAudit { + exclusions: vec![WriteSpan { start: 6, end: 12 }], + redactions: vec![WriteRedactionResult { + span: WriteSpan { start: 20, end: 30 }, + replacement: "[redacted]".to_string(), + }], + }; + let doc_id = docs::source_record_id_for( + TENANT_ID, + PROJECT_ID, + "owner", + "project_shared", + DocType::Knowledge, + source_ref_map, + "stored-hash", + ); + let capture = docs::build_source_capture_summary(SourceCaptureSummaryInput { + doc_id, + source_ref: source_ref_map, + doc_type: DocType::Knowledge, + scope: "project_shared", + title: None, + content_hash: "stored-hash", + raw_content_hash: "raw-hash", + now, + chunks: &[], + write_policy_audit: Some(&audit), + }) + .expect("Expected source capture summary."); + let normalized = docs::normalize_source_ref_for_capture(source_ref, &capture) + .expect("Expected normalized source_ref"); + + assert_eq!(capture.policy_spans.len(), 2); + assert_eq!(capture.policy_spans[0].status, "excluded"); + assert_eq!(capture.policy_spans[0].reason_code.as_deref(), Some("WRITE_POLICY_EXCLUSION")); + assert_eq!(capture.policy_spans[1].status, "redacted"); + assert_eq!(capture.policy_spans[1].reason_code.as_deref(), Some("WRITE_POLICY_REDACTION")); + assert_eq!(normalized["source_record_id"], doc_id.to_string()); + assert_eq!(normalized["origin"], "file:///tmp/source.txt"); + assert_eq!(normalized["captured_at"], "2026-02-25T12:15:00Z"); + assert_eq!(normalized["content_hash"], "stored-hash"); + assert_eq!(normalized["visibility_scope"], "project_shared"); + assert_eq!(normalized["source_type"], "knowledge"); + assert_eq!(normalized["policy_spans"][0]["reason_code"], "WRITE_POLICY_EXCLUSION"); + assert_eq!(normalized["policy_spans"][1]["reason_code"], "WRITE_POLICY_REDACTION"); +} + +#[test] +fn docs_l0_pointer_carries_hashes_and_position_locator() { + let now = OffsetDateTime::parse("2026-02-25T12:00:00Z", &Rfc3339) + .expect("Expected test timestamp to parse."); + let row = DocSearchRow { + chunk_id: Uuid::parse_str("11111111-1111-4111-8111-111111111111") + .expect("Expected chunk UUID."), + doc_id: Uuid::parse_str("22222222-2222-4222-8222-222222222222") + .expect("Expected doc UUID."), + scope: "project_shared".to_string(), + doc_type: "knowledge".to_string(), + project_id: "project".to_string(), + agent_id: "agent".to_string(), + updated_at: now, + content_hash: "doc-hash".to_string(), + chunk_hash: "chunk-hash".to_string(), + start_offset: 12, + end_offset: 64, + chunk_text: "Source libraries preserve long-form evidence.".to_string(), + }; + let pointer = docs::build_docs_l0_pointer(&row, row.chunk_id); + + assert_eq!(pointer.schema, "source_ref/v1"); + assert_eq!(pointer.resolver, "elf_doc_ext/v1"); + assert_eq!(pointer.hashes.content_hash, "doc-hash"); + assert_eq!(pointer.hashes.chunk_hash, "chunk-hash"); + assert_eq!(pointer.reference.source_record_id, row.doc_id); + assert_eq!(pointer.reference.source_span_id, pointer.locator.span_id); + assert_eq!(pointer.locator.position.start, 12); + assert_eq!(pointer.locator.position.end, 64); + assert_eq!(pointer.locator.span_id, docs::source_span_id("doc-hash", 12, 64, "captured")); + assert_eq!(pointer.state.content_hash, pointer.hashes.content_hash); + assert_eq!(pointer.state.chunk_hash, pointer.hashes.chunk_hash); +}