diff --git a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge.rs b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge.rs index 3dcc727a..c4307a5d 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge.rs @@ -1,764 +1,5 @@ -use std::{env, fs, path::Path, process}; +pub(crate) mod consolidation_knowledge_tests_helpers; -use color_eyre::{Result, eyre}; -use serde_json::Value; - -use crate::support; - -fn real_world_live_adapter_sources(workspace: &Path) -> Result { - let mut source = fs::read_to_string( - workspace.join("apps/elf-eval/src/bin/real_world_live_adapter/main.rs"), - )?; - - append_rust_sources( - workspace.join("apps/elf-eval/src/bin/real_world_live_adapter").as_path(), - &mut source, - )?; - - Ok(source) -} - -fn real_world_job_benchmark_sources(workspace: &Path) -> Result { - let mut source = fs::read_to_string( - workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark/main.rs"), - )?; - - append_rust_sources( - workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark").as_path(), - &mut source, - )?; - - Ok(source) -} - -fn append_rust_sources(dir: &Path, source: &mut String) -> Result<()> { - let mut entries = Vec::new(); - - for entry in fs::read_dir(dir)? { - entries.push(entry?.path()); - } - - entries.sort(); - - for path in entries { - if path.is_dir() { - append_rust_sources(path.as_path(), source)?; - } else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - source.push('\n'); - source.push_str(fs::read_to_string(path)?.as_str()); - } - } - - Ok(()) -} - -#[test] -fn declared_not_encoded_consolidation_jobs_do_not_require_fake_proposals() -> Result<()> { - let fixture_path = - support::consolidation_fixture_dir().join("contradiction_report_discard.json"); - let mut fixture = serde_json::from_str::(&fs::read_to_string(fixture_path)?)?; - - fixture - .pointer_mut("/corpus/adapter_response") - .and_then(Value::as_object_mut) - .ok_or_else(|| eyre::eyre!("missing adapter_response object"))? - .remove("consolidation"); - - let encoding = serde_json::json!({ - "status": "not_encoded", - "reason": "The qmd live adapter retrieves evidence-linked answers but does not generate or review consolidation proposals." - }); - - fixture - .as_object_mut() - .ok_or_else(|| eyre::eyre!("fixture is not an object"))? - .insert("encoding".to_string(), encoding); - - let temp_dir = - env::temp_dir().join(format!("elf-real-world-not-encoded-consolidation-{}", process::id())); - - fs::create_dir_all(&temp_dir)?; - fs::write( - temp_dir.join("not_encoded_consolidation.json"), - serde_json::to_vec_pretty(&fixture)?, - )?; - - let report = support::run_json_report_from(temp_dir)?; - let jobs = support::array_at(&report, "/jobs")?; - let job = - support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; - - assert_eq!(job.pointer("/status").and_then(Value::as_str), Some("not_encoded")); - assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(1)); - - Ok(()) -} - -#[test] -fn capture_write_policy_live_report_preserves_competitor_boundaries() -> Result<()> { - let report = serde_json::from_str::(&fs::read_to_string( - support::capture_write_policy_live_report_path()?, - )?)?; - let markdown = fs::read_to_string(support::capture_write_policy_live_markdown_path()?)?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - - assert_eq!( - report.pointer("/schema").and_then(Value::as_str), - Some("elf.capture_write_policy_live_report/v1") - ); - assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-933")); - assert_eq!( - report - .pointer("/live_capture_results/elf_live_real_world/suite_status") - .and_then(Value::as_str), - Some("pass") - ); - assert_eq!( - report - .pointer("/live_capture_results/elf_live_real_world/encoded_job_count") - .and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - report - .pointer("/live_capture_results/elf_live_real_world/redaction_leak_count") - .and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report - .pointer("/live_capture_results/qmd_live_real_world/suite_status") - .and_then(Value::as_str), - Some("not_encoded") - ); - - let jobs = support::array_at(&report, "/jobs")?; - let source_binding = support::find_by_field(jobs, "/job_id", "capture-source-id-binding-001")?; - let source_binding_refs = support::array_at(source_binding, "/runtime_source_refs")?; - let release_summary_ref = - support::find_by_field(source_binding_refs, "/evidence_id", "source-id-release-summary")?; - - assert!(support::array_contains_str( - source_binding, - "/source_ids", - "capture:issue-comment-42" - )?); - assert_eq!( - release_summary_ref.pointer("/source_id").and_then(Value::as_str), - Some("capture:issue-comment-42") - ); - assert_eq!( - release_summary_ref.pointer("/evidence_binding").and_then(Value::as_str), - Some("source_ref") - ); - - let write_policy = - support::find_by_field(jobs, "/job_id", "capture-write-policy-redaction-001")?; - - assert_eq!( - write_policy.pointer("/write_policy_redaction_count").and_then(Value::as_u64), - Some(1) - ); - assert_eq!( - write_policy - .pointer("/runtime_source_refs/0/write_policy_applied") - .and_then(Value::as_bool), - Some(true) - ); - - let boundary = support::find_by_field(jobs, "/job_id", "capture-integration-boundaries-001")?; - - assert!(support::array_contains_str(boundary, "/excluded_evidence_ids", "private-span-trap")?); - assert!(!support::array_contains_str(boundary, "/stored_evidence_ids", "private-span-trap")?); - assert!( - support::array_at(boundary, "/runtime_source_refs")? - .iter() - .all(|item| item.pointer("/evidence_id").and_then(Value::as_str) - != Some("private-span-trap")) - ); - - let positions = support::array_at(&report, "/competitor_positions")?; - let qmd = support::find_by_field(positions, "/project", "qmd")?; - let agentmemory = support::find_by_field(positions, "/project", "agentmemory")?; - let claude_mem = support::find_by_field(positions, "/project", "claude-mem")?; - - assert_eq!(qmd.pointer("/position").and_then(Value::as_str), Some("untested")); - assert!(qmd.pointer("/reason").and_then(Value::as_str).is_some_and(|reason| { - reason.contains("typed not_encoded") && reason.contains("ELF self-check") - })); - assert_eq!(agentmemory.pointer("/position").and_then(Value::as_str), Some("blocked")); - assert!(agentmemory.pointer("/reason").and_then(Value::as_str).is_some_and(|reason| { - reason.contains("process-local StateKV Map") && reason.contains("in-memory index") - })); - assert_eq!(claude_mem.pointer("/position").and_then(Value::as_str), Some("blocked")); - assert!( - claude_mem - .pointer("/reason") - .and_then(Value::as_str) - .is_some_and(|reason| reason.contains("hooks, timeline, observations") - && reason.contains("Docker-contained hook/viewer runner")) - ); - - assert_capture_write_policy_docs(&markdown, &benchmarking_index, &readme); - - Ok(()) -} - -fn assert_capture_write_policy_docs(markdown: &str, benchmarking_index: &str, readme: &str) { - assert!(markdown.contains("ELF now has live capture/write-policy self-check evidence")); - assert!(markdown.contains("not an ELF-over-qmd win")); - assert!(markdown.contains("| claude-mem capture/viewer flows | `blocked` |")); - assert!(!markdown.contains("claude-mem capture breadth is untested")); - assert!(markdown.contains("runtime `source_ref` metadata returned by search")); - assert!(markdown.contains("Do not claim ELF broadly beats agentmemory or claude-mem")); - assert!(benchmarking_index.contains("2026-06-11-capture-write-policy-live-report.md")); - assert!(readme.contains("Capture/Write-Policy Live Report - June 11, 2026")); - assert!(readme.contains("mem0/OpenMemory")); - assert!(readme.contains("and memsearch now pass their scoped local baseline")); - assert!( - support::collapse_whitespace(readme) - .contains("claude-mem hook/viewer capture remains blocked until Docker-contained") - ); -} - -#[test] -fn live_consolidation_report_preserves_reviewable_output_boundaries() -> Result<()> { - let workspace = support::workspace_root()?; - let report = serde_json::from_str::(&fs::read_to_string( - support::live_consolidation_proposal_scoring_report_path()?, - )?)?; - let markdown = - fs::read_to_string(support::live_consolidation_proposal_scoring_markdown_path()?)?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - let benchmark_runbook = fs::read_to_string( - workspace - .join("docs") - .join("runbook") - .join("benchmarking") - .join("real_world_agent_memory_benchmark.md"), - )?; - let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?; - let live_script = - fs::read_to_string(workspace.join("scripts/real-world-consolidation-live-adapter.sh"))?; - let live_adapter = real_world_live_adapter_sources(&workspace)?; - - assert_eq!( - report.pointer("/schema").and_then(Value::as_str), - Some("elf.live_consolidation_proposal_scoring_report/v1") - ); - assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-934")); - assert_eq!( - report - .pointer("/live_consolidation_results/elf_live_real_world/suite_status") - .and_then(Value::as_str), - Some("pass") - ); - assert_eq!( - report - .pointer("/live_consolidation_results/elf_live_real_world/encoded_job_count") - .and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - report - .pointer("/live_consolidation_results/elf_live_real_world/proposal_count") - .and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - report - .pointer("/live_consolidation_results/elf_live_real_world/source_mutation_count") - .and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report - .pointer("/live_consolidation_results/elf_live_real_world/review_event_count") - .and_then(Value::as_u64), - Some(6) - ); - assert_eq!( - report - .pointer("/live_consolidation_results/qmd_live_real_world/suite_status") - .and_then(Value::as_str), - Some("not_encoded") - ); - - let jobs = support::array_at(&report, "/jobs")?; - let project_summary = - support::find_by_field(jobs, "/job_id", "consolidation-project-summary-apply-001")?; - let preference = - support::find_by_field(jobs, "/job_id", "consolidation-preference-candidate-defer-001")?; - let contradiction = - support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; - - assert_eq!( - project_summary.pointer("/final_review_state").and_then(Value::as_str), - Some("applied") - ); - assert_eq!(project_summary.pointer("/review_event_count").and_then(Value::as_u64), Some(2)); - assert_eq!(preference.pointer("/final_review_state").and_then(Value::as_str), Some("archived")); - assert_eq!( - contradiction.pointer("/final_review_state").and_then(Value::as_str), - Some("rejected") - ); - assert_eq!( - contradiction.pointer("/unsupported_claim_flag_count").and_then(Value::as_u64), - Some(1) - ); - assert_eq!(contradiction.pointer("/source_lineage_count").and_then(Value::as_u64), Some(3)); - - let positions = support::array_at(&report, "/reference_positions")?; - let qmd = support::find_by_field(positions, "/project", "qmd")?; - let managed = support::find_by_field(positions, "/project", "managed_dreaming_memory_systems")?; - let always_on = - support::find_by_field(positions, "/project", "always_on_memory_agent_patterns")?; - - assert_eq!(qmd.pointer("/position").and_then(Value::as_str), Some("untested")); - assert_eq!(managed.pointer("/position").and_then(Value::as_str), Some("product_reference")); - assert_eq!(always_on.pointer("/position").and_then(Value::as_str), Some("product_reference")); - assert!(markdown.contains("ELF now has service-backed live consolidation proposal scoring")); - assert!(markdown.contains("This is not scheduled production consolidation")); - assert!(markdown.contains("Source mutations")); - assert!(markdown.contains("Do not mix knowledge-page rebuild/lint scoring")); - assert!( - benchmarking_index.contains("2026-06-16-live-consolidation-proposal-scoring-report.md") - ); - assert!(readme.contains("Live Consolidation Proposal Scoring Report - June 16, 2026")); - assert!(readme.contains("real-world-memory-live-consolidation")); - assert!(benchmark_runbook.contains("Current live consolidation increment")); - assert!(benchmark_runbook.contains("tmp/real-world-memory/live-consolidation/summary.json")); - assert!(makefile.contains("[tasks.real-world-memory-live-consolidation]")); - assert!(makefile.contains("scripts/real-world-docker.sh")); - - let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?; - - assert_live_consolidation_scripts(&docker_script, &live_script, &live_adapter); - - Ok(()) -} - -fn assert_live_consolidation_scripts(docker_script: &str, live_script: &str, live_adapter: &str) { - assert!(docker_script.contains("scripts/real-world-consolidation-live-adapter.sh")); - assert!(live_script.contains("elf.real_world_consolidation_live_adapter_sweep/v1")); - assert!(live_script.contains("real_world_live_adapter -- elf")); - assert!(!live_script.contains("real_world_live_adapter -- qmd")); - assert!(live_adapter.contains("fn materialize_elf_consolidation(")); - assert!(live_adapter.contains("ConsolidationProposalReviewRequest")); -} - -#[test] -fn live_knowledge_page_rebuild_lint_has_dedicated_docker_task() -> Result<()> { - let workspace = support::workspace_root()?; - let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?; - let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?; - let live_script = - fs::read_to_string(workspace.join("scripts/real-world-knowledge-live-adapter.sh"))?; - let live_adapter = real_world_live_adapter_sources(&workspace)?; - let knowledge_spec = fs::read_to_string( - workspace.join("docs").join("spec").join("system_knowledge_pages_v1.md"), - )?; - let version_diff_report = fs::read_to_string( - workspace - .join("docs") - .join("evidence") - .join("benchmarking") - .join("2026-06-20-knowledge-workspace-version-diff-report.md"), - )?; - let benchmark_runbook = fs::read_to_string( - workspace - .join("docs") - .join("runbook") - .join("benchmarking") - .join("real_world_agent_memory_benchmark.md"), - )?; - let live_runbook = fs::read_to_string( - workspace - .join("docs") - .join("runbook") - .join("benchmarking") - .join("live_baseline_benchmark.md"), - )?; - let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; - let readme = fs::read_to_string(support::readme_path()?)?; - - assert!(makefile.contains("[tasks.real-world-memory-live-knowledge]")); - assert!(makefile.contains("scripts/real-world-docker.sh")); - assert!(makefile.contains("memory-live-knowledge")); - assert!(docker_script.contains("memory-live-knowledge)")); - assert!(docker_script.contains("-e ELF_KNOWLEDGE_LIVE_REPORT_DIR")); - assert!(docker_script.contains("-e ELF_KNOWLEDGE_LIVE_FIXTURES")); - assert!(docker_script.contains("scripts/real-world-knowledge-live-adapter.sh")); - assert!(live_script.contains("elf.real_world_knowledge_live_adapter_sweep/v1")); - assert!(live_script.contains("apps/elf-eval/fixtures/real_world_memory/knowledge")); - assert!(live_script.contains("tmp/real-world-memory/live-knowledge")); - assert!(live_script.contains("real-world-memory-live-knowledge")); - assert!(live_script.contains("ElfService knowledge_page_rebuild")); - assert!(live_script.contains("knowledge_page_lint")); - assert!(live_script.contains("knowledge_pages_search")); - assert!(live_script.contains("pages remain derived benchmark artifacts")); - assert!(live_adapter.contains("\"page_version_diff\"")); - assert!(live_adapter.contains("version_diff_available")); - assert!(live_adapter.contains("fn materialize_elf_knowledge(")); - assert!(live_adapter.contains("KnowledgePageRebuildRequest")); - assert!(live_adapter.contains("KnowledgePageLintRequest")); - assert!(live_adapter.contains("KnowledgePageSearchRequest")); - assert!(real_world_job_benchmark_sources(&workspace)?.contains("version_diff_coverage")); - assert!(knowledge_spec.contains("elf.knowledge_page.version_diff/v1")); - assert!( - version_diff_report.contains("Knowledge Workspace Version-Diff Report - June 20, 2026") - ); - assert!(version_diff_report.contains("version_diff_coverage = 1.000")); - assert!(benchmark_runbook.contains("Current live knowledge-page rebuild/lint increment")); - assert!(benchmark_runbook.contains("cargo make real-world-memory-live-knowledge")); - assert!(benchmark_runbook.contains("tmp/real-world-memory/live-knowledge/summary.json")); - assert!(live_runbook.contains("cargo make real-world-memory-live-knowledge")); - assert!(benchmarking_index.contains("2026-06-20-live-knowledge-page-rebuild-lint-report.md")); - assert!(benchmarking_index.contains("2026-06-20-knowledge-workspace-version-diff-report.md")); - assert!(readme.contains("Live Knowledge-Page Rebuild/Lint Report - June 20, 2026")); - assert!(readme.contains("Knowledge Workspace Version-Diff Report - June 20, 2026")); - - Ok(()) -} - -#[test] -fn runner_discovers_nested_fixture_layout() -> Result<()> { - let report = support::run_json_report_from(support::fixture_root())?; - - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(82)); - - Ok(()) -} - -#[test] -fn operator_debug_fixture_reports_trace_links_and_failure_details() -> Result<()> { - let report = support::run_json_report_from(support::operator_debug_fixture_dir())?; - - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(7)); - assert_eq!( - report.pointer("/summary/operator_debug_job_count").and_then(Value::as_u64), - Some(7) - ); - assert_eq!(report.pointer("/summary/raw_sql_needed_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/trace_incomplete_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/operator_ux_gap_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(7)); - assert_eq!(report.pointer("/summary/unsupported_claim").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0)); - assert_eq!( - report.pointer("/summary/trace_explainability_count").and_then(Value::as_u64), - Some(3) - ); - - let jobs = support::array_at(&report, "/jobs")?; - let dropped = support::find_by_field(jobs, "/job_id", "operator-debug-dropped-evidence-001")?; - let selected = - support::find_by_field(jobs, "/job_id", "operator-debug-selected-not-narrated-001")?; - let compact = - support::find_by_field(jobs, "/job_id", "operator-debug-qmd-style-compact-replay-001")?; - - assert_eq!(dropped.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!( - dropped.pointer("/operator_debug/raw_sql_needed").and_then(Value::as_bool), - Some(false) - ); - assert_eq!( - dropped.pointer("/operator_debug/dropped_candidate_visibility").and_then(Value::as_str), - Some("visible in Retrieval Funnel and Replay Candidates") - ); - assert_eq!( - dropped.pointer("/operator_debug/viewer_url").and_then(Value::as_str), - Some("/viewer?trace_id=11111111-1111-4111-8111-111111111111") - ); - assert_eq!( - dropped.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), - Some("filter.read_profile") - ); - assert!(support::array_contains_str( - dropped, - "/trace_explainability/stages/1/dropped_evidence", - "trace-dropped-expected" - )?); - assert!(support::array_contains_str( - dropped, - "/trace_explainability/stages/1/distractor_evidence", - "trace-dropped-decoy" - )?); - assert!(support::array_contains_str(dropped, "/produced_evidence", "trace-dropped-expected")?); - assert_eq!(selected.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!( - selected.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), - Some("selection.narration") - ); - assert_eq!( - selected.pointer("/operator_debug/failure_mode").and_then(Value::as_str), - Some("selected_but_not_narrated") - ); - assert_eq!(compact.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!( - compact.pointer("/operator_debug/failure_mode").and_then(Value::as_str), - Some("qmd_style_compact_replay") - ); - assert_eq!( - compact.pointer("/operator_debug/replay_command_available").and_then(Value::as_bool), - Some(true) - ); - assert_eq!( - compact.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), - Some("recall_debug.compact_replay") - ); - assert!(support::array_contains_str( - compact, - "/trace_explainability/stages/4/kept_evidence", - "compact-replay-artifact" - )?); - assert!(support::array_contains_str( - compact, - "/produced_evidence", - "qmd-short-replay-reference" - )?); - - Ok(()) -} - -#[test] -fn consolidation_fixtures_report_reviewable_proposal_metrics() -> Result<()> { - let report = support::run_json_report_from(support::consolidation_fixture_dir())?; - - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(4)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(4)); - assert_eq!( - report.pointer("/summary/consolidation/proposal_count").and_then(Value::as_u64), - Some(4) - ); - assert_eq!( - report.pointer("/summary/consolidation/source_mutation_count").and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report - .pointer("/summary/consolidation/proposal_unsupported_claim_count") - .and_then(Value::as_u64), - Some(1) - ); - assert_eq!( - report.pointer("/summary/consolidation/executable_gap_count").and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - report.pointer("/summary/consolidation/lineage_completeness").and_then(Value::as_f64), - Some(1.0) - ); - assert_eq!( - report.pointer("/summary/consolidation/review_action_correctness").and_then(Value::as_f64), - Some(1.0) - ); - - let jobs = support::array_at(&report, "/jobs")?; - let project_summary = - support::find_by_field(jobs, "/job_id", "consolidation-project-summary-apply-001")?; - let contradiction = - support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; - - assert_eq!( - project_summary - .pointer("/consolidation/proposals/0/actual_review_action") - .and_then(Value::as_str), - Some("apply") - ); - assert_eq!( - contradiction - .pointer("/consolidation/proposals/0/actual_review_action") - .and_then(Value::as_str), - Some("discard") - ); - assert_eq!( - contradiction - .pointer("/consolidation/proposals/0/unsupported_claim_count") - .and_then(Value::as_u64), - Some(1) - ); - - let suites = support::array_at(&report, "/suites")?; - let consolidation_suite = support::find_by_field(suites, "/suite_id", "consolidation")?; - - assert_eq!(consolidation_suite.pointer("/status").and_then(Value::as_str), Some("pass")); - - Ok(()) -} - -#[test] -fn knowledge_fixtures_report_page_metrics() -> Result<()> { - let report = support::run_json_report_from(support::knowledge_fixture_dir())?; - - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(3)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(3)); - assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(5)); - assert_eq!( - report.pointer("/summary/knowledge/section_count").and_then(Value::as_u64), - Some(13) - ); - assert_eq!( - report.pointer("/summary/knowledge/citation_coverage").and_then(Value::as_f64), - Some(0.923) - ); - assert_eq!( - report.pointer("/summary/knowledge/stale_claim_detection").and_then(Value::as_f64), - Some(1.0) - ); - assert_eq!( - report.pointer("/summary/knowledge/rebuild_determinism").and_then(Value::as_f64), - Some(1.0) - ); - assert_eq!( - report.pointer("/summary/knowledge/backlink_count").and_then(Value::as_u64), - Some(11) - ); - assert_eq!( - report.pointer("/summary/knowledge/pages_with_backlinks").and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - report.pointer("/summary/knowledge/backlink_coverage").and_then(Value::as_f64), - Some(1.0) - ); - assert_eq!( - report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64), - Some(0.979) - ); - assert_eq!( - report.pointer("/summary/knowledge/pages_with_version_diff").and_then(Value::as_u64), - Some(1) - ); - assert_eq!( - report.pointer("/summary/knowledge/unsupported_summary_count").and_then(Value::as_u64), - Some(1) - ); - assert_eq!( - report.pointer("/summary/knowledge/allowed_variance_count").and_then(Value::as_u64), - Some(1) - ); - - let suites = support::array_at(&report, "/suites")?; - let knowledge_suite = support::find_by_field(suites, "/suite_id", "knowledge_compilation")?; - - assert_eq!(knowledge_suite.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!(knowledge_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(3)); - - let jobs = support::array_at(&report, "/jobs")?; - let project_page_job = support::find_by_field(jobs, "/job_id", "knowledge-project-page-001")?; - let watch_rebuild_job = support::find_by_field(jobs, "/job_id", "knowledge-watch-rebuild-003")?; - - assert_eq!( - project_page_job.pointer("/knowledge/unsupported_summary_count").and_then(Value::as_u64), - Some(1) - ); - assert_eq!( - project_page_job.pointer("/knowledge/untraced_section_count").and_then(Value::as_u64), - Some(0) - ); - assert_eq!( - watch_rebuild_job.pointer("/knowledge/pages_with_version_diff").and_then(Value::as_u64), - Some(1) - ); - assert!( - watch_rebuild_job - .pointer("/produced_answer") - .and_then(Value::as_str) - .is_some_and(|answer| answer - .contains("PageIndex/OpenKB adapter claim as lint evidence") - && answer.contains("leaves source documents plus Memory Notes unmodified")) - ); - - Ok(()) -} - -#[test] -fn project_decisions_fixtures_report_decision_policy_cases() -> Result<()> { - let report = support::run_json_report_from(support::project_decisions_fixture_dir())?; - - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(5)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(5)); - assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0)); - assert_eq!( - report.pointer("/summary/conflict_detection_count").and_then(Value::as_u64), - Some(2) - ); - assert_eq!( - report.pointer("/summary/update_rationale_available_count").and_then(Value::as_u64), - Some(5) - ); - assert_eq!( - report.pointer("/summary/expected_evidence_recall").and_then(Value::as_f64), - Some(1.0) - ); - - let suites = support::array_at(&report, "/suites")?; - let project_decisions = support::find_by_field(suites, "/suite_id", "project_decisions")?; - - assert_eq!(project_decisions.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!(project_decisions.pointer("/encoded_job_count").and_then(Value::as_u64), Some(5)); - assert_eq!( - project_decisions.pointer("/update_rationale_available_count").and_then(Value::as_u64), - Some(5) - ); - - let jobs = support::array_at(&report, "/jobs")?; - let accepted = - support::find_by_field(jobs, "/job_id", "project-decision-accepted-typed-failures-001")?; - let reversal = - support::find_by_field(jobs, "/job_id", "project-decision-reversal-live-baseline-001")?; - let validation = - support::find_by_field(jobs, "/job_id", "project-decision-current-validation-gate-001")?; - let tradeoff = - support::find_by_field(jobs, "/job_id", "project-decision-tradeoff-fixture-backed-001")?; - let caveat = - support::find_by_field(jobs, "/job_id", "project-decision-private-manifest-caveat-001")?; - - assert_eq!(accepted.pointer("/answer_type").and_then(Value::as_str), Some("decision_record")); - assert_eq!( - accepted.pointer("/expected_evidence").and_then(Value::as_array).map(Vec::len), - Some(2) - ); - assert_eq!( - reversal.pointer("/evolution/historical_evidence/0").and_then(Value::as_str), - Some("live-baseline-suite-win-old") - ); - assert_eq!( - validation.pointer("/evolution/current_evidence/0").and_then(Value::as_str), - Some("validation-gate-current-decodex") - ); - assert_eq!(tradeoff.pointer("/requires_caveat").and_then(Value::as_bool), Some(true)); - assert_eq!(caveat.pointer("/can_answer_unknown").and_then(Value::as_bool), Some(true)); - - for job in jobs { - let expected_evidence = support::array_at(job, "/expected_evidence")?; - - assert!( - !expected_evidence.is_empty(), - "project decision job {} must declare required evidence", - job.pointer("/job_id").and_then(Value::as_str).unwrap_or("") - ); - } - for entry in fs::read_dir(support::project_decisions_fixture_dir())? { - let path = entry?.path(); - - if path.extension().and_then(|ext| ext.to_str()) != Some("json") { - continue; - } - - let fixture = serde_json::from_str::(&fs::read_to_string(path)?)?; - let required_evidence = support::array_at(&fixture, "/required_evidence")?; - let negative_traps = support::array_at(&fixture, "/negative_traps")?; - - assert!(!required_evidence.is_empty()); - assert!(!negative_traps.is_empty()); - } - - Ok(()) -} +mod consolidation_knowledge_tests_capture; +mod consolidation_knowledge_tests_fixtures; +mod consolidation_knowledge_tests_live; diff --git a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_capture.rs b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_capture.rs new file mode 100644 index 00000000..37fc4f9d --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_capture.rs @@ -0,0 +1,176 @@ +use std::{env, fs, process}; + +use color_eyre::{Result, eyre}; +use serde_json::Value; + +use crate::support; + +#[test] +fn declared_not_encoded_consolidation_jobs_do_not_require_fake_proposals() -> Result<()> { + let fixture_path = + support::consolidation_fixture_dir().join("contradiction_report_discard.json"); + let mut fixture = serde_json::from_str::(&fs::read_to_string(fixture_path)?)?; + + fixture + .pointer_mut("/corpus/adapter_response") + .and_then(Value::as_object_mut) + .ok_or_else(|| eyre::eyre!("missing adapter_response object"))? + .remove("consolidation"); + + let encoding = serde_json::json!({ + "status": "not_encoded", + "reason": "The qmd live adapter retrieves evidence-linked answers but does not generate or review consolidation proposals." + }); + + fixture + .as_object_mut() + .ok_or_else(|| eyre::eyre!("fixture is not an object"))? + .insert("encoding".to_string(), encoding); + + let temp_dir = + env::temp_dir().join(format!("elf-real-world-not-encoded-consolidation-{}", process::id())); + + fs::create_dir_all(&temp_dir)?; + fs::write( + temp_dir.join("not_encoded_consolidation.json"), + serde_json::to_vec_pretty(&fixture)?, + )?; + + let report = support::run_json_report_from(temp_dir)?; + let jobs = support::array_at(&report, "/jobs")?; + let job = + support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; + + assert_eq!(job.pointer("/status").and_then(Value::as_str), Some("not_encoded")); + assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(1)); + + Ok(()) +} + +#[test] +fn capture_write_policy_live_report_preserves_competitor_boundaries() -> Result<()> { + let report = serde_json::from_str::(&fs::read_to_string( + support::capture_write_policy_live_report_path()?, + )?)?; + let markdown = fs::read_to_string(support::capture_write_policy_live_markdown_path()?)?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + + assert_eq!( + report.pointer("/schema").and_then(Value::as_str), + Some("elf.capture_write_policy_live_report/v1") + ); + assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-933")); + assert_eq!( + report + .pointer("/live_capture_results/elf_live_real_world/suite_status") + .and_then(Value::as_str), + Some("pass") + ); + assert_eq!( + report + .pointer("/live_capture_results/elf_live_real_world/encoded_job_count") + .and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + report + .pointer("/live_capture_results/elf_live_real_world/redaction_leak_count") + .and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report + .pointer("/live_capture_results/qmd_live_real_world/suite_status") + .and_then(Value::as_str), + Some("not_encoded") + ); + + let jobs = support::array_at(&report, "/jobs")?; + let source_binding = support::find_by_field(jobs, "/job_id", "capture-source-id-binding-001")?; + let source_binding_refs = support::array_at(source_binding, "/runtime_source_refs")?; + let release_summary_ref = + support::find_by_field(source_binding_refs, "/evidence_id", "source-id-release-summary")?; + + assert!(support::array_contains_str( + source_binding, + "/source_ids", + "capture:issue-comment-42" + )?); + assert_eq!( + release_summary_ref.pointer("/source_id").and_then(Value::as_str), + Some("capture:issue-comment-42") + ); + assert_eq!( + release_summary_ref.pointer("/evidence_binding").and_then(Value::as_str), + Some("source_ref") + ); + + let write_policy = + support::find_by_field(jobs, "/job_id", "capture-write-policy-redaction-001")?; + + assert_eq!( + write_policy.pointer("/write_policy_redaction_count").and_then(Value::as_u64), + Some(1) + ); + assert_eq!( + write_policy + .pointer("/runtime_source_refs/0/write_policy_applied") + .and_then(Value::as_bool), + Some(true) + ); + + let boundary = support::find_by_field(jobs, "/job_id", "capture-integration-boundaries-001")?; + + assert!(support::array_contains_str(boundary, "/excluded_evidence_ids", "private-span-trap")?); + assert!(!support::array_contains_str(boundary, "/stored_evidence_ids", "private-span-trap")?); + assert!( + support::array_at(boundary, "/runtime_source_refs")? + .iter() + .all(|item| item.pointer("/evidence_id").and_then(Value::as_str) + != Some("private-span-trap")) + ); + + let positions = support::array_at(&report, "/competitor_positions")?; + let qmd = support::find_by_field(positions, "/project", "qmd")?; + let agentmemory = support::find_by_field(positions, "/project", "agentmemory")?; + let claude_mem = support::find_by_field(positions, "/project", "claude-mem")?; + + assert_eq!(qmd.pointer("/position").and_then(Value::as_str), Some("untested")); + assert!(qmd.pointer("/reason").and_then(Value::as_str).is_some_and(|reason| { + reason.contains("typed not_encoded") && reason.contains("ELF self-check") + })); + assert_eq!(agentmemory.pointer("/position").and_then(Value::as_str), Some("blocked")); + assert!(agentmemory.pointer("/reason").and_then(Value::as_str).is_some_and(|reason| { + reason.contains("process-local StateKV Map") && reason.contains("in-memory index") + })); + assert_eq!(claude_mem.pointer("/position").and_then(Value::as_str), Some("blocked")); + assert!( + claude_mem + .pointer("/reason") + .and_then(Value::as_str) + .is_some_and(|reason| reason.contains("hooks, timeline, observations") + && reason.contains("Docker-contained hook/viewer runner")) + ); + + assert_capture_write_policy_docs(&markdown, &benchmarking_index, &readme); + + Ok(()) +} + +fn assert_capture_write_policy_docs(markdown: &str, benchmarking_index: &str, readme: &str) { + assert!(markdown.contains("ELF now has live capture/write-policy self-check evidence")); + assert!(markdown.contains("not an ELF-over-qmd win")); + assert!(markdown.contains("| claude-mem capture/viewer flows | `blocked` |")); + assert!(!markdown.contains("claude-mem capture breadth is untested")); + assert!(markdown.contains("runtime `source_ref` metadata returned by search")); + assert!(markdown.contains("Do not claim ELF broadly beats agentmemory or claude-mem")); + assert!(benchmarking_index.contains("2026-06-11-capture-write-policy-live-report.md")); + assert!(readme.contains("Capture/Write-Policy Live Report - June 11, 2026")); + assert!(readme.contains("mem0/OpenMemory")); + assert!(readme.contains("and memsearch now pass their scoped local baseline")); + assert!( + support::collapse_whitespace(readme) + .contains("claude-mem hook/viewer capture remains blocked until Docker-contained") + ); +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_fixtures.rs b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_fixtures.rs new file mode 100644 index 00000000..932c2332 --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_fixtures.rs @@ -0,0 +1,346 @@ +use std::fs; + +use color_eyre::Result; +use serde_json::Value; + +use crate::support; + +#[test] +fn runner_discovers_nested_fixture_layout() -> Result<()> { + let report = support::run_json_report_from(support::fixture_root())?; + + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(82)); + + Ok(()) +} + +#[test] +fn operator_debug_fixture_reports_trace_links_and_failure_details() -> Result<()> { + let report = support::run_json_report_from(support::operator_debug_fixture_dir())?; + + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(7)); + assert_eq!( + report.pointer("/summary/operator_debug_job_count").and_then(Value::as_u64), + Some(7) + ); + assert_eq!(report.pointer("/summary/raw_sql_needed_count").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/trace_incomplete_count").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/operator_ux_gap_count").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(7)); + assert_eq!(report.pointer("/summary/unsupported_claim").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0)); + assert_eq!( + report.pointer("/summary/trace_explainability_count").and_then(Value::as_u64), + Some(3) + ); + + let jobs = support::array_at(&report, "/jobs")?; + let dropped = support::find_by_field(jobs, "/job_id", "operator-debug-dropped-evidence-001")?; + let selected = + support::find_by_field(jobs, "/job_id", "operator-debug-selected-not-narrated-001")?; + let compact = + support::find_by_field(jobs, "/job_id", "operator-debug-qmd-style-compact-replay-001")?; + + assert_eq!(dropped.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!( + dropped.pointer("/operator_debug/raw_sql_needed").and_then(Value::as_bool), + Some(false) + ); + assert_eq!( + dropped.pointer("/operator_debug/dropped_candidate_visibility").and_then(Value::as_str), + Some("visible in Retrieval Funnel and Replay Candidates") + ); + assert_eq!( + dropped.pointer("/operator_debug/viewer_url").and_then(Value::as_str), + Some("/viewer?trace_id=11111111-1111-4111-8111-111111111111") + ); + assert_eq!( + dropped.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), + Some("filter.read_profile") + ); + assert!(support::array_contains_str( + dropped, + "/trace_explainability/stages/1/dropped_evidence", + "trace-dropped-expected" + )?); + assert!(support::array_contains_str( + dropped, + "/trace_explainability/stages/1/distractor_evidence", + "trace-dropped-decoy" + )?); + assert!(support::array_contains_str(dropped, "/produced_evidence", "trace-dropped-expected")?); + assert_eq!(selected.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!( + selected.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), + Some("selection.narration") + ); + assert_eq!( + selected.pointer("/operator_debug/failure_mode").and_then(Value::as_str), + Some("selected_but_not_narrated") + ); + assert_eq!(compact.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!( + compact.pointer("/operator_debug/failure_mode").and_then(Value::as_str), + Some("qmd_style_compact_replay") + ); + assert_eq!( + compact.pointer("/operator_debug/replay_command_available").and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + compact.pointer("/trace_explainability/failure_stage").and_then(Value::as_str), + Some("recall_debug.compact_replay") + ); + assert!(support::array_contains_str( + compact, + "/trace_explainability/stages/4/kept_evidence", + "compact-replay-artifact" + )?); + assert!(support::array_contains_str( + compact, + "/produced_evidence", + "qmd-short-replay-reference" + )?); + + Ok(()) +} + +#[test] +fn consolidation_fixtures_report_reviewable_proposal_metrics() -> Result<()> { + let report = support::run_json_report_from(support::consolidation_fixture_dir())?; + + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(4)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(4)); + assert_eq!( + report.pointer("/summary/consolidation/proposal_count").and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + report.pointer("/summary/consolidation/source_mutation_count").and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report + .pointer("/summary/consolidation/proposal_unsupported_claim_count") + .and_then(Value::as_u64), + Some(1) + ); + assert_eq!( + report.pointer("/summary/consolidation/executable_gap_count").and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report.pointer("/summary/consolidation/lineage_completeness").and_then(Value::as_f64), + Some(1.0) + ); + assert_eq!( + report.pointer("/summary/consolidation/review_action_correctness").and_then(Value::as_f64), + Some(1.0) + ); + + let jobs = support::array_at(&report, "/jobs")?; + let project_summary = + support::find_by_field(jobs, "/job_id", "consolidation-project-summary-apply-001")?; + let contradiction = + support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; + + assert_eq!( + project_summary + .pointer("/consolidation/proposals/0/actual_review_action") + .and_then(Value::as_str), + Some("apply") + ); + assert_eq!( + contradiction + .pointer("/consolidation/proposals/0/actual_review_action") + .and_then(Value::as_str), + Some("discard") + ); + assert_eq!( + contradiction + .pointer("/consolidation/proposals/0/unsupported_claim_count") + .and_then(Value::as_u64), + Some(1) + ); + + let suites = support::array_at(&report, "/suites")?; + let consolidation_suite = support::find_by_field(suites, "/suite_id", "consolidation")?; + + assert_eq!(consolidation_suite.pointer("/status").and_then(Value::as_str), Some("pass")); + + Ok(()) +} + +#[test] +fn knowledge_fixtures_report_page_metrics() -> Result<()> { + let report = support::run_json_report_from(support::knowledge_fixture_dir())?; + + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(3)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(3)); + assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(5)); + assert_eq!( + report.pointer("/summary/knowledge/section_count").and_then(Value::as_u64), + Some(13) + ); + assert_eq!( + report.pointer("/summary/knowledge/citation_coverage").and_then(Value::as_f64), + Some(0.923) + ); + assert_eq!( + report.pointer("/summary/knowledge/stale_claim_detection").and_then(Value::as_f64), + Some(1.0) + ); + assert_eq!( + report.pointer("/summary/knowledge/rebuild_determinism").and_then(Value::as_f64), + Some(1.0) + ); + assert_eq!( + report.pointer("/summary/knowledge/backlink_count").and_then(Value::as_u64), + Some(11) + ); + assert_eq!( + report.pointer("/summary/knowledge/pages_with_backlinks").and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + report.pointer("/summary/knowledge/backlink_coverage").and_then(Value::as_f64), + Some(1.0) + ); + assert_eq!( + report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64), + Some(0.979) + ); + assert_eq!( + report.pointer("/summary/knowledge/pages_with_version_diff").and_then(Value::as_u64), + Some(1) + ); + assert_eq!( + report.pointer("/summary/knowledge/unsupported_summary_count").and_then(Value::as_u64), + Some(1) + ); + assert_eq!( + report.pointer("/summary/knowledge/allowed_variance_count").and_then(Value::as_u64), + Some(1) + ); + + let suites = support::array_at(&report, "/suites")?; + let knowledge_suite = support::find_by_field(suites, "/suite_id", "knowledge_compilation")?; + + assert_eq!(knowledge_suite.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!(knowledge_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(3)); + + let jobs = support::array_at(&report, "/jobs")?; + let project_page_job = support::find_by_field(jobs, "/job_id", "knowledge-project-page-001")?; + let watch_rebuild_job = support::find_by_field(jobs, "/job_id", "knowledge-watch-rebuild-003")?; + + assert_eq!( + project_page_job.pointer("/knowledge/unsupported_summary_count").and_then(Value::as_u64), + Some(1) + ); + assert_eq!( + project_page_job.pointer("/knowledge/untraced_section_count").and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + watch_rebuild_job.pointer("/knowledge/pages_with_version_diff").and_then(Value::as_u64), + Some(1) + ); + assert!( + watch_rebuild_job + .pointer("/produced_answer") + .and_then(Value::as_str) + .is_some_and(|answer| answer + .contains("PageIndex/OpenKB adapter claim as lint evidence") + && answer.contains("leaves source documents plus Memory Notes unmodified")) + ); + + Ok(()) +} + +#[test] +fn project_decisions_fixtures_report_decision_policy_cases() -> Result<()> { + let report = support::run_json_report_from(support::project_decisions_fixture_dir())?; + + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(5)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(5)); + assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0)); + assert_eq!( + report.pointer("/summary/conflict_detection_count").and_then(Value::as_u64), + Some(2) + ); + assert_eq!( + report.pointer("/summary/update_rationale_available_count").and_then(Value::as_u64), + Some(5) + ); + assert_eq!( + report.pointer("/summary/expected_evidence_recall").and_then(Value::as_f64), + Some(1.0) + ); + + let suites = support::array_at(&report, "/suites")?; + let project_decisions = support::find_by_field(suites, "/suite_id", "project_decisions")?; + + assert_eq!(project_decisions.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!(project_decisions.pointer("/encoded_job_count").and_then(Value::as_u64), Some(5)); + assert_eq!( + project_decisions.pointer("/update_rationale_available_count").and_then(Value::as_u64), + Some(5) + ); + + let jobs = support::array_at(&report, "/jobs")?; + let accepted = + support::find_by_field(jobs, "/job_id", "project-decision-accepted-typed-failures-001")?; + let reversal = + support::find_by_field(jobs, "/job_id", "project-decision-reversal-live-baseline-001")?; + let validation = + support::find_by_field(jobs, "/job_id", "project-decision-current-validation-gate-001")?; + let tradeoff = + support::find_by_field(jobs, "/job_id", "project-decision-tradeoff-fixture-backed-001")?; + let caveat = + support::find_by_field(jobs, "/job_id", "project-decision-private-manifest-caveat-001")?; + + assert_eq!(accepted.pointer("/answer_type").and_then(Value::as_str), Some("decision_record")); + assert_eq!( + accepted.pointer("/expected_evidence").and_then(Value::as_array).map(Vec::len), + Some(2) + ); + assert_eq!( + reversal.pointer("/evolution/historical_evidence/0").and_then(Value::as_str), + Some("live-baseline-suite-win-old") + ); + assert_eq!( + validation.pointer("/evolution/current_evidence/0").and_then(Value::as_str), + Some("validation-gate-current-decodex") + ); + assert_eq!(tradeoff.pointer("/requires_caveat").and_then(Value::as_bool), Some(true)); + assert_eq!(caveat.pointer("/can_answer_unknown").and_then(Value::as_bool), Some(true)); + + for job in jobs { + let expected_evidence = support::array_at(job, "/expected_evidence")?; + + assert!( + !expected_evidence.is_empty(), + "project decision job {} must declare required evidence", + job.pointer("/job_id").and_then(Value::as_str).unwrap_or("") + ); + } + for entry in fs::read_dir(support::project_decisions_fixture_dir())? { + let path = entry?.path(); + + if path.extension().and_then(|ext| ext.to_str()) != Some("json") { + continue; + } + + let fixture = serde_json::from_str::(&fs::read_to_string(path)?)?; + let required_evidence = support::array_at(&fixture, "/required_evidence")?; + let negative_traps = support::array_at(&fixture, "/negative_traps")?; + + assert!(!required_evidence.is_empty()); + assert!(!negative_traps.is_empty()); + } + + Ok(()) +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_helpers.rs b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_helpers.rs new file mode 100644 index 00000000..b9be63a9 --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_helpers.rs @@ -0,0 +1,49 @@ +use std::{fs, path::Path}; + +use color_eyre::Result; +pub(super) fn real_world_live_adapter_sources(workspace: &Path) -> Result { + let mut source = fs::read_to_string( + workspace.join("apps/elf-eval/src/bin/real_world_live_adapter/main.rs"), + )?; + + append_rust_sources( + workspace.join("apps/elf-eval/src/bin/real_world_live_adapter").as_path(), + &mut source, + )?; + + Ok(source) +} + +pub(super) fn real_world_job_benchmark_sources(workspace: &Path) -> Result { + let mut source = fs::read_to_string( + workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark/main.rs"), + )?; + + append_rust_sources( + workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark").as_path(), + &mut source, + )?; + + Ok(source) +} + +pub(super) fn append_rust_sources(dir: &Path, source: &mut String) -> Result<()> { + let mut entries = Vec::new(); + + for entry in fs::read_dir(dir)? { + entries.push(entry?.path()); + } + + entries.sort(); + + for path in entries { + if path.is_dir() { + append_rust_sources(path.as_path(), source)?; + } else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + source.push('\n'); + source.push_str(fs::read_to_string(path)?.as_str()); + } + } + + Ok(()) +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_live.rs b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_live.rs new file mode 100644 index 00000000..77af72d6 --- /dev/null +++ b/apps/elf-eval/tests/real_world_job_benchmark/consolidation_knowledge_tests_live.rs @@ -0,0 +1,212 @@ +use std::fs; + +use color_eyre::Result; +use serde_json::Value; + +use crate::{consolidation_knowledge::consolidation_knowledge_tests_helpers, support}; + +#[test] +fn live_consolidation_report_preserves_reviewable_output_boundaries() -> Result<()> { + let workspace = support::workspace_root()?; + let report = serde_json::from_str::(&fs::read_to_string( + support::live_consolidation_proposal_scoring_report_path()?, + )?)?; + let markdown = + fs::read_to_string(support::live_consolidation_proposal_scoring_markdown_path()?)?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + let benchmark_runbook = fs::read_to_string( + workspace + .join("docs") + .join("runbook") + .join("benchmarking") + .join("real_world_agent_memory_benchmark.md"), + )?; + let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?; + let live_script = + fs::read_to_string(workspace.join("scripts/real-world-consolidation-live-adapter.sh"))?; + let live_adapter = + consolidation_knowledge_tests_helpers::real_world_live_adapter_sources(&workspace)?; + + assert_eq!( + report.pointer("/schema").and_then(Value::as_str), + Some("elf.live_consolidation_proposal_scoring_report/v1") + ); + assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-934")); + assert_eq!( + report + .pointer("/live_consolidation_results/elf_live_real_world/suite_status") + .and_then(Value::as_str), + Some("pass") + ); + assert_eq!( + report + .pointer("/live_consolidation_results/elf_live_real_world/encoded_job_count") + .and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + report + .pointer("/live_consolidation_results/elf_live_real_world/proposal_count") + .and_then(Value::as_u64), + Some(4) + ); + assert_eq!( + report + .pointer("/live_consolidation_results/elf_live_real_world/source_mutation_count") + .and_then(Value::as_u64), + Some(0) + ); + assert_eq!( + report + .pointer("/live_consolidation_results/elf_live_real_world/review_event_count") + .and_then(Value::as_u64), + Some(6) + ); + assert_eq!( + report + .pointer("/live_consolidation_results/qmd_live_real_world/suite_status") + .and_then(Value::as_str), + Some("not_encoded") + ); + + let jobs = support::array_at(&report, "/jobs")?; + let project_summary = + support::find_by_field(jobs, "/job_id", "consolidation-project-summary-apply-001")?; + let preference = + support::find_by_field(jobs, "/job_id", "consolidation-preference-candidate-defer-001")?; + let contradiction = + support::find_by_field(jobs, "/job_id", "consolidation-contradiction-report-discard-001")?; + + assert_eq!( + project_summary.pointer("/final_review_state").and_then(Value::as_str), + Some("applied") + ); + assert_eq!(project_summary.pointer("/review_event_count").and_then(Value::as_u64), Some(2)); + assert_eq!(preference.pointer("/final_review_state").and_then(Value::as_str), Some("archived")); + assert_eq!( + contradiction.pointer("/final_review_state").and_then(Value::as_str), + Some("rejected") + ); + assert_eq!( + contradiction.pointer("/unsupported_claim_flag_count").and_then(Value::as_u64), + Some(1) + ); + assert_eq!(contradiction.pointer("/source_lineage_count").and_then(Value::as_u64), Some(3)); + + let positions = support::array_at(&report, "/reference_positions")?; + let qmd = support::find_by_field(positions, "/project", "qmd")?; + let managed = support::find_by_field(positions, "/project", "managed_dreaming_memory_systems")?; + let always_on = + support::find_by_field(positions, "/project", "always_on_memory_agent_patterns")?; + + assert_eq!(qmd.pointer("/position").and_then(Value::as_str), Some("untested")); + assert_eq!(managed.pointer("/position").and_then(Value::as_str), Some("product_reference")); + assert_eq!(always_on.pointer("/position").and_then(Value::as_str), Some("product_reference")); + assert!(markdown.contains("ELF now has service-backed live consolidation proposal scoring")); + assert!(markdown.contains("This is not scheduled production consolidation")); + assert!(markdown.contains("Source mutations")); + assert!(markdown.contains("Do not mix knowledge-page rebuild/lint scoring")); + assert!( + benchmarking_index.contains("2026-06-16-live-consolidation-proposal-scoring-report.md") + ); + assert!(readme.contains("Live Consolidation Proposal Scoring Report - June 16, 2026")); + assert!(readme.contains("real-world-memory-live-consolidation")); + assert!(benchmark_runbook.contains("Current live consolidation increment")); + assert!(benchmark_runbook.contains("tmp/real-world-memory/live-consolidation/summary.json")); + assert!(makefile.contains("[tasks.real-world-memory-live-consolidation]")); + assert!(makefile.contains("scripts/real-world-docker.sh")); + + let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?; + + assert_live_consolidation_scripts(&docker_script, &live_script, &live_adapter); + + Ok(()) +} + +fn assert_live_consolidation_scripts(docker_script: &str, live_script: &str, live_adapter: &str) { + assert!(docker_script.contains("scripts/real-world-consolidation-live-adapter.sh")); + assert!(live_script.contains("elf.real_world_consolidation_live_adapter_sweep/v1")); + assert!(live_script.contains("real_world_live_adapter -- elf")); + assert!(!live_script.contains("real_world_live_adapter -- qmd")); + assert!(live_adapter.contains("fn materialize_elf_consolidation(")); + assert!(live_adapter.contains("ConsolidationProposalReviewRequest")); +} + +#[test] +fn live_knowledge_page_rebuild_lint_has_dedicated_docker_task() -> Result<()> { + let workspace = support::workspace_root()?; + let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?; + let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?; + let live_script = + fs::read_to_string(workspace.join("scripts/real-world-knowledge-live-adapter.sh"))?; + let live_adapter = + consolidation_knowledge_tests_helpers::real_world_live_adapter_sources(&workspace)?; + let knowledge_spec = fs::read_to_string( + workspace.join("docs").join("spec").join("system_knowledge_pages_v1.md"), + )?; + let version_diff_report = fs::read_to_string( + workspace + .join("docs") + .join("evidence") + .join("benchmarking") + .join("2026-06-20-knowledge-workspace-version-diff-report.md"), + )?; + let benchmark_runbook = fs::read_to_string( + workspace + .join("docs") + .join("runbook") + .join("benchmarking") + .join("real_world_agent_memory_benchmark.md"), + )?; + let live_runbook = fs::read_to_string( + workspace + .join("docs") + .join("runbook") + .join("benchmarking") + .join("live_baseline_benchmark.md"), + )?; + let benchmarking_index = fs::read_to_string(support::benchmarking_index_path()?)?; + let readme = fs::read_to_string(support::readme_path()?)?; + + assert!(makefile.contains("[tasks.real-world-memory-live-knowledge]")); + assert!(makefile.contains("scripts/real-world-docker.sh")); + assert!(makefile.contains("memory-live-knowledge")); + assert!(docker_script.contains("memory-live-knowledge)")); + assert!(docker_script.contains("-e ELF_KNOWLEDGE_LIVE_REPORT_DIR")); + assert!(docker_script.contains("-e ELF_KNOWLEDGE_LIVE_FIXTURES")); + assert!(docker_script.contains("scripts/real-world-knowledge-live-adapter.sh")); + assert!(live_script.contains("elf.real_world_knowledge_live_adapter_sweep/v1")); + assert!(live_script.contains("apps/elf-eval/fixtures/real_world_memory/knowledge")); + assert!(live_script.contains("tmp/real-world-memory/live-knowledge")); + assert!(live_script.contains("real-world-memory-live-knowledge")); + assert!(live_script.contains("ElfService knowledge_page_rebuild")); + assert!(live_script.contains("knowledge_page_lint")); + assert!(live_script.contains("knowledge_pages_search")); + assert!(live_script.contains("pages remain derived benchmark artifacts")); + assert!(live_adapter.contains("\"page_version_diff\"")); + assert!(live_adapter.contains("version_diff_available")); + assert!(live_adapter.contains("fn materialize_elf_knowledge(")); + assert!(live_adapter.contains("KnowledgePageRebuildRequest")); + assert!(live_adapter.contains("KnowledgePageLintRequest")); + assert!(live_adapter.contains("KnowledgePageSearchRequest")); + assert!( + consolidation_knowledge_tests_helpers::real_world_job_benchmark_sources(&workspace)? + .contains("version_diff_coverage") + ); + assert!(knowledge_spec.contains("elf.knowledge_page.version_diff/v1")); + assert!( + version_diff_report.contains("Knowledge Workspace Version-Diff Report - June 20, 2026") + ); + assert!(version_diff_report.contains("version_diff_coverage = 1.000")); + assert!(benchmark_runbook.contains("Current live knowledge-page rebuild/lint increment")); + assert!(benchmark_runbook.contains("cargo make real-world-memory-live-knowledge")); + assert!(benchmark_runbook.contains("tmp/real-world-memory/live-knowledge/summary.json")); + assert!(live_runbook.contains("cargo make real-world-memory-live-knowledge")); + assert!(benchmarking_index.contains("2026-06-20-live-knowledge-page-rebuild-lint-report.md")); + assert!(benchmarking_index.contains("2026-06-20-knowledge-workspace-version-diff-report.md")); + assert!(readme.contains("Live Knowledge-Page Rebuild/Lint Report - June 20, 2026")); + assert!(readme.contains("Knowledge Workspace Version-Diff Report - June 20, 2026")); + + Ok(()) +} diff --git a/packages/elf-service/tests/acceptance/chunk_search.rs b/packages/elf-service/tests/acceptance/chunk_search.rs index ee454473..fe3db601 100644 --- a/packages/elf-service/tests/acceptance/chunk_search.rs +++ b/packages/elf-service/tests/acceptance/chunk_search.rs @@ -1,674 +1,6 @@ +pub(crate) mod tests_helpers; + mod filter_impact; mod payload_levels; mod relation_context; - -use std::{ - collections::HashMap, - sync::{Arc, atomic::AtomicUsize}, -}; - -use qdrant_client::{ - Payload, - qdrant::{Document, PointStruct, UpsertPointsBuilder, Vector}, -}; -use serde_json::Value; -use sqlx::PgExecutor; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; -use elf_config::ProviderConfig; -use elf_service::{ - BoxFuture, ElfService, Providers, RerankProvider, Result, SearchDetailsRequest, SearchRequest, - SearchTimelineRequest, -}; -use elf_storage::qdrant::{BM25_MODEL, BM25_VECTOR_NAME, DENSE_VECTOR_NAME}; -use elf_testkit::TestDatabase; - -struct TestContext { - service: ElfService, - test_db: TestDatabase, - embedding_version: String, -} - -struct KeywordRerank { - keyword: &'static str, -} -impl RerankProvider for KeywordRerank { - fn rerank<'a>( - &'a self, - _cfg: &'a ProviderConfig, - _query: &'a str, - docs: &'a [String], - ) -> BoxFuture<'a, Result>> { - let keyword = self.keyword; - - Box::pin(async move { - Ok(docs.iter().map(|doc| if doc.contains(keyword) { 1.0 } else { 0.1 }).collect()) - }) - } -} - -fn build_providers(rerank: R) -> Providers -where - R: RerankProvider + Send + Sync + 'static, -{ - Providers::new( - Arc::new(StubEmbedding { vector_dim: 4_096 }), - Arc::new(rerank), - Arc::new(SpyExtractor { - calls: Arc::new(AtomicUsize::new(0)), - payload: serde_json::json!({ "notes": [] }), - }), - ) -} - -fn build_payload( - note_id: Uuid, - chunk_id: Uuid, - chunk_index: i32, - start_offset: i32, - end_offset: i32, -) -> Payload { - let mut payload = Payload::new(); - - payload.insert("note_id", note_id.to_string()); - payload.insert("chunk_id", chunk_id.to_string()); - payload.insert("chunk_index", Value::from(chunk_index)); - payload.insert("start_offset", Value::from(start_offset)); - payload.insert("end_offset", Value::from(end_offset)); - payload.insert("tenant_id", "t"); - payload.insert("project_id", "p"); - payload.insert("agent_id", "a"); - payload.insert("scope", "agent_private"); - payload.insert("status", "active"); - - payload -} - -fn build_vectors(text: &str) -> HashMap { - let mut vectors = HashMap::new(); - - vectors.insert(DENSE_VECTOR_NAME.to_string(), Vector::from(vec![0.0_f32; 4_096])); - vectors.insert( - BM25_VECTOR_NAME.to_string(), - Vector::from(Document::new(text.to_string(), BM25_MODEL)), - ); - - vectors -} - -async fn setup_context(test_name: &str, providers: Providers) -> Option { - let Some(test_db) = acceptance::test_db().await else { - eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); - - return None; - }; - let Some(qdrant_url) = acceptance::test_qdrant_url() else { - eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); - - return None; - }; - let collection = test_db.collection_name("elf_acceptance"); - let docs_collection = test_db.collection_name("elf_acceptance_docs"); - let cfg = acceptance::test_config( - test_db.dsn().to_string(), - qdrant_url, - 4_096, - collection, - docs_collection, - ); - let service = - acceptance::build_service(cfg, providers).await.expect("Failed to build service."); - - acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); - - reset_collection(&service).await; - - let embedding_version = format!( - "{}:{}:{}", - service.cfg.providers.embedding.provider_id, - service.cfg.providers.embedding.model, - service.cfg.storage.qdrant.vector_dim - ); - - Some(TestContext { service, test_db, embedding_version }) -} - -async fn reset_collection(service: &ElfService) { - acceptance::reset_qdrant_collection( - &service.qdrant.client, - &service.qdrant.collection, - service.qdrant.vector_dim, - ) - .await - .expect("Failed to reset Qdrant collection."); -} - -async fn insert_note<'e, E>(executor: E, note_id: Uuid, note_text: &str, embedding_version: &str) -where - E: PgExecutor<'e>, -{ - insert_note_with_importance_and_source_ref( - executor, - note_id, - note_text, - embedding_version, - 0.4_f32, - 0.9_f32, - "agent_private", - serde_json::json!({}), - ) - .await; -} - -async fn insert_note_with_importance<'e, E>( - executor: E, - note_id: Uuid, - note_text: &str, - embedding_version: &str, - importance: f32, - confidence: f32, - scope: &str, -) where - E: PgExecutor<'e>, -{ - insert_note_with_importance_and_source_ref( - executor, - note_id, - note_text, - embedding_version, - importance, - confidence, - scope, - serde_json::json!({}), - ) - .await; -} - -#[allow(clippy::too_many_arguments)] -async fn insert_note_with_importance_and_source_ref<'e, E>( - executor: E, - note_id: Uuid, - note_text: &str, - embedding_version: &str, - importance: f32, - confidence: f32, - scope: &str, - source_ref: Value, -) where - E: PgExecutor<'e>, -{ - let now = OffsetDateTime::now_utc(); - - sqlx::query( - "\ -INSERT INTO memory_notes ( - note_id, - tenant_id, - project_id, - agent_id, - scope, - type, - key, - text, - importance, - confidence, - status, - created_at, - updated_at, - expires_at, - embedding_version, - source_ref, - hit_count, - last_hit_at -) -VALUES ( - $1, - $2, - $3, - $4, - $5, - $6, - $7, - $8, - $9, - $10, - $11, - $12, - $13, - $14, - $15, - $16, - $17, - $18 -)", - ) - .bind(note_id) - .bind("t") - .bind("p") - .bind("a") - .bind(scope) - .bind("fact") - .bind(Option::::None) - .bind(note_text) - .bind(importance) - .bind(confidence) - .bind("active") - .bind(now) - .bind(now) - .bind(Option::::None) - .bind(embedding_version) - .bind(source_ref) - .bind(0_i64) - .bind(Option::::None) - .execute(executor) - .await - .expect("Failed to insert memory note."); -} - -#[allow(clippy::too_many_arguments)] -async fn insert_summary_field_row<'e, E>(executor: E, field_id: Uuid, note_id: Uuid, summary: &str) -where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ -INSERT INTO memory_note_fields (field_id, note_id, field_kind, item_index, text) -VALUES ($1, $2, $3, $4, $5)", - ) - .bind(field_id) - .bind(note_id) - .bind("summary") - .bind(0_i32) - .bind(summary) - .execute(executor) - .await - .expect("Failed to insert note summary field."); -} - -#[allow(clippy::too_many_arguments)] -async fn insert_chunk<'e, E>( - executor: E, - chunk_id: Uuid, - note_id: Uuid, - chunk_index: i32, - start_offset: i32, - end_offset: i32, - text: &str, - embedding_version: &str, -) where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ -INSERT INTO memory_note_chunks ( - chunk_id, - note_id, - chunk_index, - start_offset, - end_offset, - text, - embedding_version -) -VALUES ($1, $2, $3, $4, $5, $6, $7)", - ) - .bind(chunk_id) - .bind(note_id) - .bind(chunk_index) - .bind(start_offset) - .bind(end_offset) - .bind(text) - .bind(embedding_version) - .execute(executor) - .await - .expect("Failed to insert chunk metadata."); -} - -async fn upsert_point( - service: &ElfService, - chunk_id: Uuid, - note_id: Uuid, - chunk_index: i32, - start_offset: i32, - end_offset: i32, - text: &str, -) { - let payload = build_payload(note_id, chunk_id, chunk_index, start_offset, end_offset); - let vectors = build_vectors(text); - let point = PointStruct::new(chunk_id.to_string(), vectors, payload); - - service - .qdrant - .client - .upsert_points( - UpsertPointsBuilder::new(service.qdrant.collection.clone(), vec![point]).wait(true), - ) - .await - .expect("Failed to upsert Qdrant point."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] -async fn search_returns_chunk_items() { - let providers = build_providers(StubRerank); - let Some(context) = setup_context("search_returns_chunk_items", providers).await else { - return; - }; - let note_id = Uuid::new_v4(); - let chunk_id = Uuid::new_v4(); - let note_text = "First sentence. Second sentence."; - - insert_note(&context.service.db.pool, note_id, note_text, &context.embedding_version).await; - insert_chunk( - &context.service.db.pool, - chunk_id, - note_id, - 0, - 0, - note_text.len() as i32, - note_text, - &context.embedding_version, - ) - .await; - upsert_point(&context.service, chunk_id, note_id, 0, 0, note_text.len() as i32, note_text) - .await; - - let response = context - .service - .search_raw(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: Default::default(), - query: "First".to_string(), - top_k: Some(5), - candidate_k: Some(10), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Search failed."); - let item = response.items.first().expect("Expected search result."); - - assert_eq!(item.chunk_id, chunk_id); - assert!(!item.snippet.is_empty()); - - context.test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] -async fn search_stitches_adjacent_chunks() { - let providers = build_providers(StubRerank); - let Some(context) = setup_context("search_stitches_adjacent_chunks", providers).await else { - return; - }; - let note_id = Uuid::new_v4(); - let chunk_texts = ["First sentence. ", "Second sentence. ", "Third sentence."]; - let note_text = chunk_texts.concat(); - - insert_note(&context.service.db.pool, note_id, ¬e_text, &context.embedding_version).await; - - let mut offset = 0_i32; - let mut chunk_ids = Vec::new(); - - for (index, chunk_text) in chunk_texts.iter().enumerate() { - let chunk_id = Uuid::new_v4(); - let start = offset; - let end = start + chunk_text.len() as i32; - - insert_chunk( - &context.service.db.pool, - chunk_id, - note_id, - index as i32, - start, - end, - chunk_text, - &context.embedding_version, - ) - .await; - - chunk_ids.push((chunk_id, start, end, *chunk_text)); - - offset = end; - } - - let (chunk_id, start, end, text) = chunk_ids[1]; - - upsert_point(&context.service, chunk_id, note_id, 1, start, end, text).await; - - let response = context - .service - .search_raw(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: Default::default(), - query: "Second".to_string(), - top_k: Some(5), - candidate_k: Some(10), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Search failed."); - let item = response.items.first().expect("Expected search result."); - - assert_eq!(item.chunk_id, chunk_id); - assert!(item.snippet.contains("First sentence.")); - assert!(item.snippet.contains("Second sentence.")); - assert!(item.snippet.contains("Third sentence.")); - - context.test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] -async fn search_skips_missing_chunk_metadata() { - let providers = build_providers(StubRerank); - let Some(context) = setup_context("search_skips_missing_chunk_metadata", providers).await - else { - return; - }; - let note_id = Uuid::new_v4(); - let chunk_id = Uuid::new_v4(); - let note_text = "Missing chunk metadata."; - - insert_note(&context.service.db.pool, note_id, note_text, &context.embedding_version).await; - upsert_point(&context.service, chunk_id, note_id, 0, 0, note_text.len() as i32, note_text) - .await; - - let response = context - .service - .search_raw(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: Default::default(), - query: "Missing".to_string(), - top_k: Some(5), - candidate_k: Some(10), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Search failed."); - - assert!(response.items.is_empty()); - - context.test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] -async fn progressive_search_returns_index_timeline_and_details() { - let providers = build_providers(StubRerank); - let Some(context) = - setup_context("progressive_search_returns_index_timeline_and_details", providers).await - else { - return; - }; - let note_id = Uuid::new_v4(); - let chunk_id = Uuid::new_v4(); - let note_text = "Progressive retrieval works best with staged expansion."; - - insert_note(&context.service.db.pool, note_id, note_text, &context.embedding_version).await; - insert_chunk( - &context.service.db.pool, - chunk_id, - note_id, - 0, - 0, - note_text.len() as i32, - note_text, - &context.embedding_version, - ) - .await; - upsert_point(&context.service, chunk_id, note_id, 0, 0, note_text.len() as i32, note_text) - .await; - - let index = context - .service - .search(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: Default::default(), - query: "Progressive".to_string(), - top_k: Some(5), - candidate_k: Some(10), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Search index failed."); - - assert!(!index.items.is_empty()); - - let timeline = context - .service - .search_timeline(SearchTimelineRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - search_session_id: index.search_session_id, - payload_level: Default::default(), - group_by: None, - }) - .await - .expect("Search timeline failed."); - - assert!(!timeline.groups.is_empty()); - - let details = context - .service - .search_details(SearchDetailsRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - search_session_id: index.search_session_id, - payload_level: Default::default(), - note_ids: vec![note_id], - record_hits: Some(false), - }) - .await - .expect("Search details failed."); - let returned = details - .results - .first() - .and_then(|result| result.note.as_ref()) - .expect("Expected note details."); - - assert_eq!(returned.note_id, note_id); - assert_eq!(returned.text, note_text); - - context.test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] -async fn search_dedupes_note_results() { - let providers = build_providers(KeywordRerank { keyword: "preferred" }); - let Some(context) = setup_context("search_dedupes_note_results", providers).await else { - return; - }; - let note_id = Uuid::new_v4(); - let chunk_texts = ["preferred alpha. ", "bridge chunk. ", "other alpha."]; - let note_text = chunk_texts.concat(); - - insert_note(&context.service.db.pool, note_id, ¬e_text, &context.embedding_version).await; - - let mut offset = 0_i32; - let mut chunk_ids = Vec::new(); - - for (index, chunk_text) in chunk_texts.iter().enumerate() { - let chunk_id = Uuid::new_v4(); - let start = offset; - let end = start + chunk_text.len() as i32; - - insert_chunk( - &context.service.db.pool, - chunk_id, - note_id, - index as i32, - start, - end, - chunk_text, - &context.embedding_version, - ) - .await; - - chunk_ids.push((chunk_id, start, end, *chunk_text)); - - offset = end; - } - - let (chunk_id_a, start_a, end_a, text_a) = chunk_ids[0]; - let (chunk_id_c, start_c, end_c, text_c) = chunk_ids[2]; - - upsert_point(&context.service, chunk_id_a, note_id, 0, start_a, end_a, text_a).await; - upsert_point(&context.service, chunk_id_c, note_id, 2, start_c, end_c, text_c).await; - - let response = context - .service - .search_raw(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "a".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: Default::default(), - query: "alpha".to_string(), - top_k: Some(5), - candidate_k: Some(10), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Search failed."); - let item = response.items.first().expect("Expected search result."); - - assert_eq!(response.items.len(), 1); - assert_eq!(item.note_id, note_id); - assert!( - item.chunk_id == chunk_id_a || item.chunk_id == chunk_id_c, - "Expected deduped result chunk_id to be one of the ingested chunks." - ); - - context.test_db.cleanup().await.expect("Failed to cleanup test database."); -} +mod tests_core; diff --git a/packages/elf-service/tests/acceptance/chunk_search/filter_impact.rs b/packages/elf-service/tests/acceptance/chunk_search/filter_impact.rs index 5788e8d3..37430805 100644 --- a/packages/elf-service/tests/acceptance/chunk_search/filter_impact.rs +++ b/packages/elf-service/tests/acceptance/chunk_search/filter_impact.rs @@ -3,7 +3,7 @@ use uuid::Uuid; use crate::acceptance::{ StubRerank, - chunk_search::{self, TestContext}, + chunk_search::tests_helpers::{self, TestContext}, }; use elf_service::{SearchRequest, TraceTrajectoryGetRequest}; @@ -16,7 +16,7 @@ async fn seed_filter_impact_notes( low_note_text: &str, high_note_text: &str, ) { - chunk_search::insert_note_with_importance( + tests_helpers::insert_note_with_importance( &context.service.db.pool, low_note_id, low_note_text, @@ -26,7 +26,7 @@ async fn seed_filter_impact_notes( "agent_private", ) .await; - chunk_search::insert_note_with_importance( + tests_helpers::insert_note_with_importance( &context.service.db.pool, high_note_id, high_note_text, @@ -36,7 +36,7 @@ async fn seed_filter_impact_notes( "agent_private", ) .await; - chunk_search::insert_chunk( + tests_helpers::insert_chunk( &context.service.db.pool, low_chunk_id, low_note_id, @@ -47,7 +47,7 @@ async fn seed_filter_impact_notes( &context.embedding_version, ) .await; - chunk_search::insert_chunk( + tests_helpers::insert_chunk( &context.service.db.pool, high_chunk_id, high_note_id, @@ -58,7 +58,7 @@ async fn seed_filter_impact_notes( &context.embedding_version, ) .await; - chunk_search::upsert_point( + tests_helpers::upsert_point( &context.service, low_chunk_id, low_note_id, @@ -68,7 +68,7 @@ async fn seed_filter_impact_notes( low_note_text, ) .await; - chunk_search::upsert_point( + tests_helpers::upsert_point( &context.service, high_chunk_id, high_note_id, @@ -106,14 +106,14 @@ async fn load_filter_impact_from_trace(context: &TestContext, trace_id: Uuid) -> #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] async fn search_filter_affects_candidate_set_and_records_filter_impact() { - let provider = chunk_search::build_providers(StubRerank); + let provider = tests_helpers::build_providers(StubRerank); let low_note_text = "alpha low confidence note"; let high_note_text = "alpha high confidence note"; let low_note_id = Uuid::new_v4(); let high_note_id = Uuid::new_v4(); let low_chunk_id = Uuid::new_v4(); let high_chunk_id = Uuid::new_v4(); - let mut context = match chunk_search::setup_context( + let mut context = match tests_helpers::setup_context( "search_filter_affects_candidate_set_and_records_filter_impact", provider, ) diff --git a/packages/elf-service/tests/acceptance/chunk_search/payload_levels.rs b/packages/elf-service/tests/acceptance/chunk_search/payload_levels.rs index be9fa341..00bad23f 100644 --- a/packages/elf-service/tests/acceptance/chunk_search/payload_levels.rs +++ b/packages/elf-service/tests/acceptance/chunk_search/payload_levels.rs @@ -3,7 +3,7 @@ use uuid::Uuid; use crate::acceptance::{ StubRerank, - chunk_search::{self, TestContext}, + chunk_search::tests_helpers::{self, TestContext}, }; use elf_service::{NoteFetchResponse, PayloadLevel, SearchDetailsRequest, SearchRequest}; @@ -96,9 +96,9 @@ async fn fetch_search_detail_note_for_level( #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] async fn search_raw_payload_level_shapes_source_ref() { - let providers = chunk_search::build_providers(StubRerank); + let providers = tests_helpers::build_providers(StubRerank); let Some(context) = - chunk_search::setup_context("search_raw_payload_level_shapes_source_ref", providers).await + tests_helpers::setup_context("search_raw_payload_level_shapes_source_ref", providers).await else { return; }; @@ -116,7 +116,7 @@ async fn search_raw_payload_level_shapes_source_ref() { } }); - chunk_search::insert_note_with_importance_and_source_ref( + tests_helpers::insert_note_with_importance_and_source_ref( &context.service.db.pool, note_id, note_text, @@ -127,7 +127,7 @@ async fn search_raw_payload_level_shapes_source_ref() { source_ref.clone(), ) .await; - chunk_search::insert_chunk( + tests_helpers::insert_chunk( &context.service.db.pool, chunk_id, note_id, @@ -138,7 +138,7 @@ async fn search_raw_payload_level_shapes_source_ref() { &context.embedding_version, ) .await; - chunk_search::upsert_point( + tests_helpers::upsert_point( &context.service, chunk_id, note_id, @@ -163,8 +163,8 @@ async fn search_raw_payload_level_shapes_source_ref() { #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] async fn search_details_payload_level_shapes_text_and_fields() { - let providers = chunk_search::build_providers(StubRerank); - let Some(context) = chunk_search::setup_context( + let providers = tests_helpers::build_providers(StubRerank); + let Some(context) = tests_helpers::setup_context( "search_details_payload_level_shapes_text_and_fields", providers, ) @@ -191,7 +191,7 @@ async fn search_details_payload_level_shapes_text_and_fields() { assert!(note_text.len() > max_note_chars); - chunk_search::insert_note_with_importance_and_source_ref( + tests_helpers::insert_note_with_importance_and_source_ref( &context.service.db.pool, note_id, note_text.as_str(), @@ -202,14 +202,14 @@ async fn search_details_payload_level_shapes_text_and_fields() { source_ref.clone(), ) .await; - chunk_search::insert_summary_field_row( + tests_helpers::insert_summary_field_row( &context.service.db.pool, field_id, note_id, structured_summary, ) .await; - chunk_search::insert_chunk( + tests_helpers::insert_chunk( &context.service.db.pool, chunk_id, note_id, @@ -220,7 +220,7 @@ async fn search_details_payload_level_shapes_text_and_fields() { &context.embedding_version, ) .await; - chunk_search::upsert_point( + tests_helpers::upsert_point( &context.service, chunk_id, note_id, diff --git a/packages/elf-service/tests/acceptance/chunk_search/relation_context.rs b/packages/elf-service/tests/acceptance/chunk_search/relation_context.rs index c22554c7..4027a3d3 100644 --- a/packages/elf-service/tests/acceptance/chunk_search/relation_context.rs +++ b/packages/elf-service/tests/acceptance/chunk_search/relation_context.rs @@ -4,7 +4,7 @@ use uuid::Uuid; use crate::acceptance::{ self, StubRerank, - chunk_search::{self, TestContext}, + chunk_search::tests_helpers::{self, TestContext}, }; use elf_service::{ElfService, Providers, RelationTemporalStatus, SearchRequest}; @@ -171,7 +171,7 @@ async fn setup_graph_context_test( acceptance::build_service(cfg, providers).await.expect("Failed to build service."); acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); - chunk_search::reset_collection(&service).await; + tests_helpers::reset_collection(&service).await; let embedding_version = format!( "{}:{}:{}", @@ -201,15 +201,15 @@ async fn seed_relation_context_fixture( let note_1_evidence_created_at = now - Duration::seconds(30); let note_2_evidence_created_at = now - Duration::seconds(10); - chunk_search::insert_note(&service.db.pool, note_id, chunk_text, embedding_version).await; - chunk_search::insert_note( + tests_helpers::insert_note(&service.db.pool, note_id, chunk_text, embedding_version).await; + tests_helpers::insert_note( &service.db.pool, note_id_2, "Second note for evidence ordering.", embedding_version, ) .await; - chunk_search::insert_chunk( + tests_helpers::insert_chunk( &service.db.pool, chunk_id, note_id, @@ -220,7 +220,7 @@ async fn seed_relation_context_fixture( embedding_version, ) .await; - chunk_search::upsert_point( + tests_helpers::upsert_point( service, chunk_id, note_id, @@ -283,7 +283,7 @@ async fn seed_relation_context_fixture( #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] async fn search_raw_quick_includes_relation_context_and_respects_fact_bounds() { - let providers = chunk_search::build_providers(StubRerank); + let providers = tests_helpers::build_providers(StubRerank); let Some(context) = setup_graph_context_test( "search_raw_quick_includes_relation_context_and_respects_fact_bounds", providers, @@ -339,7 +339,7 @@ async fn search_raw_quick_includes_relation_context_and_respects_fact_bounds() { #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] async fn search_raw_quick_marks_historical_relation_context() { - let providers = chunk_search::build_providers(StubRerank); + let providers = tests_helpers::build_providers(StubRerank); let Some(context) = setup_graph_context_test( "search_raw_quick_marks_historical_relation_context", providers, diff --git a/packages/elf-service/tests/acceptance/chunk_search/tests_core.rs b/packages/elf-service/tests/acceptance/chunk_search/tests_core.rs new file mode 100644 index 00000000..fec0e63b --- /dev/null +++ b/packages/elf-service/tests/acceptance/chunk_search/tests_core.rs @@ -0,0 +1,397 @@ +use uuid::Uuid; + +use crate::acceptance::{ + StubRerank, + chunk_search::tests_helpers::{self, KeywordRerank}, +}; +use elf_service::{SearchDetailsRequest, SearchRequest, SearchTimelineRequest}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn search_returns_chunk_items() { + let providers = tests_helpers::build_providers(StubRerank); + let Some(context) = tests_helpers::setup_context("search_returns_chunk_items", providers).await + else { + return; + }; + let note_id = Uuid::new_v4(); + let chunk_id = Uuid::new_v4(); + let note_text = "First sentence. Second sentence."; + + tests_helpers::insert_note( + &context.service.db.pool, + note_id, + note_text, + &context.embedding_version, + ) + .await; + tests_helpers::insert_chunk( + &context.service.db.pool, + chunk_id, + note_id, + 0, + 0, + note_text.len() as i32, + note_text, + &context.embedding_version, + ) + .await; + tests_helpers::upsert_point( + &context.service, + chunk_id, + note_id, + 0, + 0, + note_text.len() as i32, + note_text, + ) + .await; + + let response = context + .service + .search_raw(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: Default::default(), + query: "First".to_string(), + top_k: Some(5), + candidate_k: Some(10), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Search failed."); + let item = response.items.first().expect("Expected search result."); + + assert_eq!(item.chunk_id, chunk_id); + assert!(!item.snippet.is_empty()); + + context.test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn search_stitches_adjacent_chunks() { + let providers = tests_helpers::build_providers(StubRerank); + let Some(context) = + tests_helpers::setup_context("search_stitches_adjacent_chunks", providers).await + else { + return; + }; + let note_id = Uuid::new_v4(); + let chunk_texts = ["First sentence. ", "Second sentence. ", "Third sentence."]; + let note_text = chunk_texts.concat(); + + tests_helpers::insert_note( + &context.service.db.pool, + note_id, + ¬e_text, + &context.embedding_version, + ) + .await; + + let mut offset = 0_i32; + let mut chunk_ids = Vec::new(); + + for (index, chunk_text) in chunk_texts.iter().enumerate() { + let chunk_id = Uuid::new_v4(); + let start = offset; + let end = start + chunk_text.len() as i32; + + tests_helpers::insert_chunk( + &context.service.db.pool, + chunk_id, + note_id, + index as i32, + start, + end, + chunk_text, + &context.embedding_version, + ) + .await; + + chunk_ids.push((chunk_id, start, end, *chunk_text)); + + offset = end; + } + + let (chunk_id, start, end, text) = chunk_ids[1]; + + tests_helpers::upsert_point(&context.service, chunk_id, note_id, 1, start, end, text).await; + + let response = context + .service + .search_raw(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: Default::default(), + query: "Second".to_string(), + top_k: Some(5), + candidate_k: Some(10), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Search failed."); + let item = response.items.first().expect("Expected search result."); + + assert_eq!(item.chunk_id, chunk_id); + assert!(item.snippet.contains("First sentence.")); + assert!(item.snippet.contains("Second sentence.")); + assert!(item.snippet.contains("Third sentence.")); + + context.test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn search_skips_missing_chunk_metadata() { + let providers = tests_helpers::build_providers(StubRerank); + let Some(context) = + tests_helpers::setup_context("search_skips_missing_chunk_metadata", providers).await + else { + return; + }; + let note_id = Uuid::new_v4(); + let chunk_id = Uuid::new_v4(); + let note_text = "Missing chunk metadata."; + + tests_helpers::insert_note( + &context.service.db.pool, + note_id, + note_text, + &context.embedding_version, + ) + .await; + tests_helpers::upsert_point( + &context.service, + chunk_id, + note_id, + 0, + 0, + note_text.len() as i32, + note_text, + ) + .await; + + let response = context + .service + .search_raw(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: Default::default(), + query: "Missing".to_string(), + top_k: Some(5), + candidate_k: Some(10), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Search failed."); + + assert!(response.items.is_empty()); + + context.test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn progressive_search_returns_index_timeline_and_details() { + let providers = tests_helpers::build_providers(StubRerank); + let Some(context) = tests_helpers::setup_context( + "progressive_search_returns_index_timeline_and_details", + providers, + ) + .await + else { + return; + }; + let note_id = Uuid::new_v4(); + let chunk_id = Uuid::new_v4(); + let note_text = "Progressive retrieval works best with staged expansion."; + + tests_helpers::insert_note( + &context.service.db.pool, + note_id, + note_text, + &context.embedding_version, + ) + .await; + tests_helpers::insert_chunk( + &context.service.db.pool, + chunk_id, + note_id, + 0, + 0, + note_text.len() as i32, + note_text, + &context.embedding_version, + ) + .await; + tests_helpers::upsert_point( + &context.service, + chunk_id, + note_id, + 0, + 0, + note_text.len() as i32, + note_text, + ) + .await; + + let index = context + .service + .search(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: Default::default(), + query: "Progressive".to_string(), + top_k: Some(5), + candidate_k: Some(10), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Search index failed."); + + assert!(!index.items.is_empty()); + + let timeline = context + .service + .search_timeline(SearchTimelineRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + search_session_id: index.search_session_id, + payload_level: Default::default(), + group_by: None, + }) + .await + .expect("Search timeline failed."); + + assert!(!timeline.groups.is_empty()); + + let details = context + .service + .search_details(SearchDetailsRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + search_session_id: index.search_session_id, + payload_level: Default::default(), + note_ids: vec![note_id], + record_hits: Some(false), + }) + .await + .expect("Search details failed."); + let returned = details + .results + .first() + .and_then(|result| result.note.as_ref()) + .expect("Expected note details."); + + assert_eq!(returned.note_id, note_id); + assert_eq!(returned.text, note_text); + + context.test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn search_dedupes_note_results() { + let providers = tests_helpers::build_providers(KeywordRerank { keyword: "preferred" }); + let Some(context) = + tests_helpers::setup_context("search_dedupes_note_results", providers).await + else { + return; + }; + let note_id = Uuid::new_v4(); + let chunk_texts = ["preferred alpha. ", "bridge chunk. ", "other alpha."]; + let note_text = chunk_texts.concat(); + + tests_helpers::insert_note( + &context.service.db.pool, + note_id, + ¬e_text, + &context.embedding_version, + ) + .await; + + let mut offset = 0_i32; + let mut chunk_ids = Vec::new(); + + for (index, chunk_text) in chunk_texts.iter().enumerate() { + let chunk_id = Uuid::new_v4(); + let start = offset; + let end = start + chunk_text.len() as i32; + + tests_helpers::insert_chunk( + &context.service.db.pool, + chunk_id, + note_id, + index as i32, + start, + end, + chunk_text, + &context.embedding_version, + ) + .await; + + chunk_ids.push((chunk_id, start, end, *chunk_text)); + + offset = end; + } + + let (chunk_id_a, start_a, end_a, text_a) = chunk_ids[0]; + let (chunk_id_c, start_c, end_c, text_c) = chunk_ids[2]; + + tests_helpers::upsert_point(&context.service, chunk_id_a, note_id, 0, start_a, end_a, text_a) + .await; + tests_helpers::upsert_point(&context.service, chunk_id_c, note_id, 2, start_c, end_c, text_c) + .await; + + let response = context + .service + .search_raw(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "a".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: Default::default(), + query: "alpha".to_string(), + top_k: Some(5), + candidate_k: Some(10), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Search failed."); + let item = response.items.first().expect("Expected search result."); + + assert_eq!(response.items.len(), 1); + assert_eq!(item.note_id, note_id); + assert!( + item.chunk_id == chunk_id_a || item.chunk_id == chunk_id_c, + "Expected deduped result chunk_id to be one of the ingested chunks." + ); + + context.test_db.cleanup().await.expect("Failed to cleanup test database."); +} diff --git a/packages/elf-service/tests/acceptance/chunk_search/tests_helpers.rs b/packages/elf-service/tests/acceptance/chunk_search/tests_helpers.rs new file mode 100644 index 00000000..eaba97a7 --- /dev/null +++ b/packages/elf-service/tests/acceptance/chunk_search/tests_helpers.rs @@ -0,0 +1,350 @@ +use std::{ + collections::HashMap, + sync::{Arc, atomic::AtomicUsize}, +}; + +use qdrant_client::{ + Payload, + qdrant::{Document, PointStruct, UpsertPointsBuilder, Vector}, +}; +use serde_json::Value; +use sqlx::PgExecutor; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::acceptance::{self, SpyExtractor, StubEmbedding}; +use elf_config::ProviderConfig; +use elf_service::{BoxFuture, ElfService, Providers, RerankProvider, Result}; +use elf_storage::qdrant::{BM25_MODEL, BM25_VECTOR_NAME, DENSE_VECTOR_NAME}; +use elf_testkit::TestDatabase; + +pub(super) struct TestContext { + pub(super) service: ElfService, + pub(super) test_db: TestDatabase, + pub(super) embedding_version: String, +} + +pub(super) struct KeywordRerank { + pub(super) keyword: &'static str, +} +impl RerankProvider for KeywordRerank { + fn rerank<'a>( + &'a self, + _cfg: &'a ProviderConfig, + _query: &'a str, + docs: &'a [String], + ) -> BoxFuture<'a, Result>> { + let keyword = self.keyword; + + Box::pin(async move { + Ok(docs.iter().map(|doc| if doc.contains(keyword) { 1.0 } else { 0.1 }).collect()) + }) + } +} + +pub(super) fn build_providers(rerank: R) -> Providers +where + R: RerankProvider + Send + Sync + 'static, +{ + Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(rerank), + Arc::new(SpyExtractor { + calls: Arc::new(AtomicUsize::new(0)), + payload: serde_json::json!({ "notes": [] }), + }), + ) +} + +pub(super) fn build_payload( + note_id: Uuid, + chunk_id: Uuid, + chunk_index: i32, + start_offset: i32, + end_offset: i32, +) -> Payload { + let mut payload = Payload::new(); + + payload.insert("note_id", note_id.to_string()); + payload.insert("chunk_id", chunk_id.to_string()); + payload.insert("chunk_index", Value::from(chunk_index)); + payload.insert("start_offset", Value::from(start_offset)); + payload.insert("end_offset", Value::from(end_offset)); + payload.insert("tenant_id", "t"); + payload.insert("project_id", "p"); + payload.insert("agent_id", "a"); + payload.insert("scope", "agent_private"); + payload.insert("status", "active"); + + payload +} + +pub(super) fn build_vectors(text: &str) -> HashMap { + let mut vectors = HashMap::new(); + + vectors.insert(DENSE_VECTOR_NAME.to_string(), Vector::from(vec![0.0_f32; 4_096])); + vectors.insert( + BM25_VECTOR_NAME.to_string(), + Vector::from(Document::new(text.to_string(), BM25_MODEL)), + ); + + vectors +} + +pub(super) async fn setup_context(test_name: &str, providers: Providers) -> Option { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); + + return None; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); + + return None; + }; + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + + acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); + + reset_collection(&service).await; + + let embedding_version = format!( + "{}:{}:{}", + service.cfg.providers.embedding.provider_id, + service.cfg.providers.embedding.model, + service.cfg.storage.qdrant.vector_dim + ); + + Some(TestContext { service, test_db, embedding_version }) +} + +pub(super) async fn reset_collection(service: &ElfService) { + acceptance::reset_qdrant_collection( + &service.qdrant.client, + &service.qdrant.collection, + service.qdrant.vector_dim, + ) + .await + .expect("Failed to reset Qdrant collection."); +} + +pub(super) async fn insert_note<'e, E>( + executor: E, + note_id: Uuid, + note_text: &str, + embedding_version: &str, +) where + E: PgExecutor<'e>, +{ + insert_note_with_importance_and_source_ref( + executor, + note_id, + note_text, + embedding_version, + 0.4_f32, + 0.9_f32, + "agent_private", + serde_json::json!({}), + ) + .await; +} + +pub(super) async fn insert_note_with_importance<'e, E>( + executor: E, + note_id: Uuid, + note_text: &str, + embedding_version: &str, + importance: f32, + confidence: f32, + scope: &str, +) where + E: PgExecutor<'e>, +{ + insert_note_with_importance_and_source_ref( + executor, + note_id, + note_text, + embedding_version, + importance, + confidence, + scope, + serde_json::json!({}), + ) + .await; +} + +#[allow(clippy::too_many_arguments)] +pub(super) async fn insert_note_with_importance_and_source_ref<'e, E>( + executor: E, + note_id: Uuid, + note_text: &str, + embedding_version: &str, + importance: f32, + confidence: f32, + scope: &str, + source_ref: Value, +) where + E: PgExecutor<'e>, +{ + let now = OffsetDateTime::now_utc(); + + sqlx::query( + "\ +INSERT INTO memory_notes ( + note_id, + tenant_id, + project_id, + agent_id, + scope, + type, + key, + text, + importance, + confidence, + status, + created_at, + updated_at, + expires_at, + embedding_version, + source_ref, + hit_count, + last_hit_at +) +VALUES ( + $1, + $2, + $3, + $4, + $5, + $6, + $7, + $8, + $9, + $10, + $11, + $12, + $13, + $14, + $15, + $16, + $17, + $18 +)", + ) + .bind(note_id) + .bind("t") + .bind("p") + .bind("a") + .bind(scope) + .bind("fact") + .bind(Option::::None) + .bind(note_text) + .bind(importance) + .bind(confidence) + .bind("active") + .bind(now) + .bind(now) + .bind(Option::::None) + .bind(embedding_version) + .bind(source_ref) + .bind(0_i64) + .bind(Option::::None) + .execute(executor) + .await + .expect("Failed to insert memory note."); +} + +#[allow(clippy::too_many_arguments)] +pub(super) async fn insert_summary_field_row<'e, E>( + executor: E, + field_id: Uuid, + note_id: Uuid, + summary: &str, +) where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ +INSERT INTO memory_note_fields (field_id, note_id, field_kind, item_index, text) +VALUES ($1, $2, $3, $4, $5)", + ) + .bind(field_id) + .bind(note_id) + .bind("summary") + .bind(0_i32) + .bind(summary) + .execute(executor) + .await + .expect("Failed to insert note summary field."); +} + +#[allow(clippy::too_many_arguments)] +pub(super) async fn insert_chunk<'e, E>( + executor: E, + chunk_id: Uuid, + note_id: Uuid, + chunk_index: i32, + start_offset: i32, + end_offset: i32, + text: &str, + embedding_version: &str, +) where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ +INSERT INTO memory_note_chunks ( + chunk_id, + note_id, + chunk_index, + start_offset, + end_offset, + text, + embedding_version +) +VALUES ($1, $2, $3, $4, $5, $6, $7)", + ) + .bind(chunk_id) + .bind(note_id) + .bind(chunk_index) + .bind(start_offset) + .bind(end_offset) + .bind(text) + .bind(embedding_version) + .execute(executor) + .await + .expect("Failed to insert chunk metadata."); +} + +pub(super) async fn upsert_point( + service: &ElfService, + chunk_id: Uuid, + note_id: Uuid, + chunk_index: i32, + start_offset: i32, + end_offset: i32, + text: &str, +) { + let payload = build_payload(note_id, chunk_id, chunk_index, start_offset, end_offset); + let vectors = build_vectors(text); + let point = PointStruct::new(chunk_id.to_string(), vectors, payload); + + service + .qdrant + .client + .upsert_points( + UpsertPointsBuilder::new(service.qdrant.collection.clone(), vec![point]).wait(true), + ) + .await + .expect("Failed to upsert Qdrant point."); +} diff --git a/packages/elf-service/tests/acceptance/consolidation.rs b/packages/elf-service/tests/acceptance/consolidation.rs index d986ac2a..ec00964d 100644 --- a/packages/elf-service/tests/acceptance/consolidation.rs +++ b/packages/elf-service/tests/acceptance/consolidation.rs @@ -1,656 +1,5 @@ -use std::sync::{Arc, atomic::AtomicUsize}; +pub(crate) mod tests_helpers; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; -use elf_chunking::ChunkingConfig; -use elf_domain::consolidation::{ - ConsolidationApplyIntent, ConsolidationInputRef, ConsolidationLineage, ConsolidationMarker, - ConsolidationMarkerSeverity, ConsolidationMarkers, ConsolidationProposalDiff, - ConsolidationReviewAction, ConsolidationSourceKind, ConsolidationSourceSnapshot, - ConsolidationUnsupportedClaimFlag, -}; -use elf_service::{ - AddNoteInput, AddNoteRequest, ConsolidationProposalGetRequest, ConsolidationProposalInput, - ConsolidationProposalReviewRequest, ConsolidationProposalsListRequest, - ConsolidationProposalsListResponse, ConsolidationRunCreateRequest, - ConsolidationRunCreateResponse, ConsolidationRunGetRequest, ElfService, ListRequest, - MemoryCorrectionAction, MemoryCorrectionRequest, MemoryCorrectionResponse, - MemoryHistoryGetRequest, Providers, -}; -use elf_storage::{db::Db, qdrant::QdrantStore}; -use elf_testkit::TestDatabase; -use elf_worker::worker::{self, WorkerState}; - -const TENANT_ID: &str = "tenant_consolidation"; -const PROJECT_ID: &str = "project_consolidation"; -const AGENT_ID: &str = "agent_consolidation"; -const REVIEWER_ID: &str = "reviewer_consolidation"; - -struct ConsolidationFixture { - service: ElfService, - _test_db: TestDatabase, -} - -fn source_ref(note_id: Uuid) -> ConsolidationInputRef { - ConsolidationInputRef { - kind: ConsolidationSourceKind::Note, - id: note_id, - snapshot: ConsolidationSourceSnapshot { - status: Some("active".to_string()), - updated_at: Some(OffsetDateTime::UNIX_EPOCH), - content_hash: Some("blake3:acceptance-source".to_string()), - embedding_version: Some("test:test:4096".to_string()), - trace_version: None, - source_ref: serde_json::json!({ "schema": "acceptance/v1" }), - metadata: serde_json::json!({ "fixture": "consolidation" }), - }, - } -} - -fn lineage(source: &ConsolidationInputRef) -> ConsolidationLineage { - ConsolidationLineage { - source_refs: vec![source.clone()], - parent_run_id: None, - parent_proposal_ids: Vec::new(), - } -} - -fn proposal_input(source: &ConsolidationInputRef, kind: &str) -> ConsolidationProposalInput { - proposal_input_with_payload( - source, - kind, - serde_json::json!({ - "type": "fact", - "text": "Fact: Consolidation proposals are derived and reviewable." - }), - ) -} - -fn proposal_input_with_payload( - source: &ConsolidationInputRef, - kind: &str, - proposed_payload: serde_json::Value, -) -> ConsolidationProposalInput { - ConsolidationProposalInput { - proposal_kind: kind.to_string(), - apply_intent: ConsolidationApplyIntent::CreateDerivedNote, - source_refs: vec![source.clone()], - source_snapshot: serde_json::json!({ "source_count": 1 }), - lineage: lineage(source), - confidence: 0.82, - unsupported_claim_flags: vec![ConsolidationUnsupportedClaimFlag { - claim_id: Some("unsupported-claim".to_string()), - message: "The source does not prove that source notes may be rewritten.".to_string(), - source: Some(source.clone()), - }], - markers: ConsolidationMarkers { - contradictions: vec![ConsolidationMarker { - severity: ConsolidationMarkerSeverity::High, - message: "Stale rewrite evidence conflicts with the proposal-only rule." - .to_string(), - source: Some(source.clone()), - }], - staleness: Vec::new(), - }, - diff: ConsolidationProposalDiff { - summary: "Create a reviewed derived note without changing source evidence.".to_string(), - before: serde_json::json!({}), - after: serde_json::json!({ - "target": "derived_note", - "text": "Fact: Consolidation proposals are derived and reviewable." - }), - }, - target_ref: serde_json::json!({}), - proposed_payload, - } -} - -fn proposal_id_by_kind(response: &ConsolidationProposalsListResponse, proposal_kind: &str) -> Uuid { - response - .proposals - .iter() - .find(|proposal| proposal.proposal_kind == proposal_kind) - .map(|proposal| proposal.proposal_id) - .expect("proposal kind should be present") -} - -async fn setup_service(test_name: &str) -> Option { - let Some(test_db) = acceptance::test_db().await else { - eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); - - return None; - }; - let Some(qdrant_url) = acceptance::test_qdrant_url() else { - eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); - - return None; - }; - let collection = test_db.collection_name("elf_acceptance"); - let docs_collection = test_db.collection_name("elf_acceptance_docs"); - let cfg = acceptance::test_config( - test_db.dsn().to_string(), - qdrant_url, - 4_096, - collection, - docs_collection, - ); - let extractor = SpyExtractor { - calls: Arc::new(AtomicUsize::new(0)), - payload: serde_json::json!({ "notes": [] }), - }; - let providers = Providers::new( - Arc::new(StubEmbedding { vector_dim: 4_096 }), - Arc::new(StubRerank), - Arc::new(extractor), - ); - let service = - acceptance::build_service(cfg, providers).await.expect("Failed to build service."); - - acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); - - Some(ConsolidationFixture { service, _test_db: test_db }) -} - -async fn insert_source_note(service: &ElfService, key: &str, text: &str) -> Uuid { - let response = service - .add_note(AddNoteRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - scope: "agent_private".to_string(), - notes: vec![AddNoteInput { - r#type: "fact".to_string(), - key: Some(key.to_string()), - text: text.to_string(), - structured: None, - importance: 0.7, - confidence: 0.9, - ttl_days: None, - source_ref: serde_json::json!({ "schema": "acceptance/v1", "key": key }), - write_policy: None, - }], - }) - .await - .expect("add_note should persist source note"); - - response.results[0].note_id.expect("source note id should be present") -} - -async fn create_run_with_proposals( - service: &ElfService, - source: &ConsolidationInputRef, - proposals: Vec, -) -> ConsolidationRunCreateResponse { - service - .consolidation_run_create(ConsolidationRunCreateRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - job_kind: "manual".to_string(), - input_refs: vec![source.clone()], - source_snapshot: serde_json::json!({ "source_count": 1 }), - lineage: lineage(source), - proposals, - }) - .await - .expect("consolidation run should be created") -} - -async fn process_consolidation_worker(service: &ElfService) { - let tokenizer = elf_chunking::load_tokenizer(&service.cfg.chunking.tokenizer_repo) - .expect("worker tokenizer should load"); - let mut embedding = acceptance::dummy_embedding_provider(); - - embedding.dimensions = service.cfg.storage.qdrant.vector_dim; - - let worker_state = WorkerState { - db: Db::connect(&service.cfg.storage.postgres).await.expect("Failed to connect worker DB."), - qdrant: QdrantStore::new(&service.cfg.storage.qdrant) - .expect("Failed to build Qdrant store."), - docs_qdrant: QdrantStore::new_with_collection( - &service.cfg.storage.qdrant, - &service.cfg.storage.qdrant.docs_collection, - ) - .expect("Failed to build docs Qdrant store."), - embedding, - chunking: ChunkingConfig { - max_tokens: service.cfg.chunking.max_tokens, - overlap_tokens: service.cfg.chunking.overlap_tokens, - }, - tokenizer, - }; - - worker::process_once(&worker_state).await.expect("consolidation worker should process once"); -} - -async fn materialized_proposals( - service: &ElfService, - run_id: Uuid, -) -> ConsolidationProposalsListResponse { - service - .consolidation_proposals_list(ConsolidationProposalsListRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - run_id: Some(run_id), - review_state: None, - limit: None, - }) - .await - .expect("consolidation proposals should be listed") -} - -async fn promote_reviewed_memory(service: &ElfService) -> Uuid { - let note_id = insert_source_note( - service, - "memory_authority_source", - "Fact: Reviewed memories require source-linked approval.", - ) - .await; - let source = source_ref(note_id); - let created = - create_run_with_proposals(service, &source, vec![proposal_input(&source, "derived_note")]) - .await; - - process_consolidation_worker(service).await; - - let materialized = materialized_proposals(service, created.run.run_id).await; - let proposal_id = materialized.proposals[0].proposal_id; - let reviewed = service - .consolidation_proposal_review(ConsolidationProposalReviewRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - reviewer_agent_id: AGENT_ID.to_string(), - proposal_id, - review_action: ConsolidationReviewAction::Apply, - review_comment: Some("Approve memory authority candidate.".to_string()), - }) - .await - .expect("review action should promote memory"); - - reviewed - .target_ref - .get("id") - .and_then(serde_json::Value::as_str) - .and_then(|value| Uuid::parse_str(value).ok()) - .expect("applied proposal should point at promoted note") -} - -async fn active_list_contains(service: &ElfService, note_id: Uuid) -> bool { - service - .list(ListRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: Some(AGENT_ID.to_string()), - scope: Some("agent_private".to_string()), - status: None, - r#type: None, - }) - .await - .expect("active notes should list") - .items - .iter() - .any(|item| item.note_id == note_id) -} - -async fn apply_memory_correction( - service: &ElfService, - note_id: Uuid, - action: MemoryCorrectionAction, - reason: &str, - source: &str, - restore_version_id: Option, -) -> MemoryCorrectionResponse { - service - .memory_correction_apply(MemoryCorrectionRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - actor_agent_id: AGENT_ID.to_string(), - note_id, - action, - reason: reason.to_string(), - source_ref: serde_json::json!({ - "schema": "acceptance/review", - "source": source - }), - restore_version_id, - }) - .await - .expect("memory correction should persist") -} - -async fn memory_history_event_types(service: &ElfService, note_id: Uuid) -> Vec { - service - .memory_history_get(MemoryHistoryGetRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - note_id, - }) - .await - .expect("promoted memory history should be readable") - .events - .into_iter() - .map(|event| event.event_type) - .collect() -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] -async fn apply_action_is_audited_without_source_rewrite() { - let Some(fixture) = setup_service("apply_action_is_audited_without_source_rewrite").await - else { - return; - }; - let service = &fixture.service; - let source_text = - "Fact: Current consolidation output is derived and never rewrites source notes."; - let note_id = insert_source_note(service, "consolidation_source_rule", source_text).await; - let source = source_ref(note_id); - let created = - create_run_with_proposals(service, &source, vec![proposal_input(&source, "derived_note")]) - .await; - - assert_eq!(created.run.status, "pending"); - assert!(created.proposals.is_empty()); - - process_consolidation_worker(service).await; - - let completed = service - .consolidation_run_get(ConsolidationRunGetRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - run_id: created.run.run_id, - }) - .await - .expect("consolidation run should remain readable"); - let materialized = materialized_proposals(service, created.run.run_id).await; - let proposal = &materialized.proposals[0]; - let job_status: String = - sqlx::query_scalar("SELECT status FROM consolidation_run_jobs WHERE job_id = $1") - .bind(created.job_id) - .fetch_one(&service.db.pool) - .await - .expect("consolidation job should be queryable"); - - assert_eq!(completed.status, "completed"); - assert_eq!(job_status, "DONE"); - assert_eq!(materialized.proposals.len(), 1); - assert_eq!(proposal.review_state, "proposed"); - assert_eq!(proposal.unsupported_claim_flags.as_array().map(Vec::len), Some(1)); - assert_eq!(proposal.contradiction_markers.as_array().map(Vec::len), Some(1)); - - let reviewed = service - .consolidation_proposal_review(ConsolidationProposalReviewRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - reviewer_agent_id: REVIEWER_ID.to_string(), - proposal_id: proposal.proposal_id, - review_action: ConsolidationReviewAction::Apply, - review_comment: Some("Apply reviewed derived proposal.".to_string()), - }) - .await - .expect("review action should apply"); - - assert_eq!(reviewed.review_state, "applied"); - assert_eq!(reviewed.review_events.len(), 2); - assert_eq!(reviewed.review_events[0].action, "approve"); - assert_eq!(reviewed.review_events[0].from_review_state, "proposed"); - assert_eq!(reviewed.review_events[0].to_review_state, "approved"); - assert_eq!(reviewed.review_events[1].action, "apply"); - assert_eq!(reviewed.review_events[1].from_review_state, "approved"); - assert_eq!(reviewed.review_events[1].to_review_state, "applied"); - - let promoted_note_id = reviewed - .target_ref - .get("id") - .and_then(serde_json::Value::as_str) - .and_then(|value| Uuid::parse_str(value).ok()) - .expect("applied proposal should point at promoted note"); - let promoted_source_ref: serde_json::Value = - sqlx::query_scalar("SELECT source_ref FROM memory_notes WHERE note_id = $1") - .bind(promoted_note_id) - .fetch_one(&service.db.pool) - .await - .expect("promoted memory source ref should be queryable"); - let promoted_status: String = - sqlx::query_scalar("SELECT status FROM memory_notes WHERE note_id = $1") - .bind(promoted_note_id) - .fetch_one(&service.db.pool) - .await - .expect("promoted memory status should be queryable"); - let promoted_agent_id: String = - sqlx::query_scalar("SELECT agent_id FROM memory_notes WHERE note_id = $1") - .bind(promoted_note_id) - .fetch_one(&service.db.pool) - .await - .expect("promoted memory owner should be queryable"); - - assert_eq!(promoted_status, "active"); - assert_eq!(promoted_agent_id, AGENT_ID); - assert_eq!(promoted_source_ref["schema"], "elf.memory_promotion/v1"); - assert_eq!( - promoted_source_ref["proposal_id"].as_str().map(str::to_string), - Some(proposal.proposal_id.to_string()) - ); - assert_eq!(promoted_source_ref["review"]["reviewer_agent_id"], REVIEWER_ID); - - let stored_text: String = - sqlx::query_scalar("SELECT text FROM memory_notes WHERE note_id = $1") - .bind(note_id) - .fetch_one(&service.db.pool) - .await - .expect("source note should still exist"); - let version_count: i64 = - sqlx::query_scalar("SELECT count(*) FROM memory_note_versions WHERE note_id = $1") - .bind(note_id) - .fetch_one(&service.db.pool) - .await - .expect("source note versions should be queryable"); - - assert_eq!(stored_text, source_text); - assert_eq!(version_count, 1); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] -async fn apply_project_shared_memory_creates_owner_grant() { - let Some(fixture) = setup_service("apply_project_shared_memory_creates_owner_grant").await - else { - return; - }; - let service = &fixture.service; - let note_id = insert_source_note( - service, - "consolidation_project_shared_source", - "Fact: Shared memory promotions must preserve project grant semantics.", - ) - .await; - let source = source_ref(note_id); - let proposal = proposal_input_with_payload( - &source, - "derived_note", - serde_json::json!({ - "type": "fact", - "scope": "project_shared", - "text": "Fact: Project-shared promoted memories keep project grants." - }), - ); - let created = create_run_with_proposals(service, &source, vec![proposal]).await; - - process_consolidation_worker(service).await; - - let materialized = materialized_proposals(service, created.run.run_id).await; - let reviewed = service - .consolidation_proposal_review(ConsolidationProposalReviewRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - reviewer_agent_id: REVIEWER_ID.to_string(), - proposal_id: materialized.proposals[0].proposal_id, - review_action: ConsolidationReviewAction::Apply, - review_comment: Some("Apply reviewed project-shared memory.".to_string()), - }) - .await - .expect("project-shared review action should promote memory"); - let promoted_note_id = reviewed - .target_ref - .get("id") - .and_then(serde_json::Value::as_str) - .and_then(|value| Uuid::parse_str(value).ok()) - .expect("applied proposal should point at promoted note"); - let promoted: (String, String, String) = - sqlx::query_as("SELECT project_id, agent_id, scope FROM memory_notes WHERE note_id = $1") - .bind(promoted_note_id) - .fetch_one(&service.db.pool) - .await - .expect("promoted memory should be queryable"); - let grant_count: i64 = sqlx::query_scalar( - "\ -SELECT count(*) -FROM memory_space_grants -WHERE tenant_id = $1 - AND project_id = $2 - AND scope = 'project_shared' - AND space_owner_agent_id = $3 - AND grantee_kind = 'project' - AND revoked_at IS NULL", - ) - .bind(TENANT_ID) - .bind(PROJECT_ID) - .bind(AGENT_ID) - .fetch_one(&service.db.pool) - .await - .expect("project grant should be queryable"); - - assert_eq!( - promoted, - (PROJECT_ID.to_string(), AGENT_ID.to_string(), "project_shared".to_string()) - ); - assert_eq!(grant_count, 1); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] -async fn promoted_memory_corrections_suppress_and_restore_recall() { - let Some(fixture) = - setup_service("promoted_memory_corrections_suppress_and_restore_recall").await - else { - return; - }; - let service = &fixture.service; - let promoted_note_id = promote_reviewed_memory(service).await; - let superseded = apply_memory_correction( - service, - promoted_note_id, - MemoryCorrectionAction::Supersede, - "Newer reviewed source supersedes the derived memory.", - "supersede", - None, - ) - .await; - - assert_eq!(superseded.status, "deprecated"); - assert!(!active_list_contains(service, promoted_note_id).await); - - let restored = apply_memory_correction( - service, - promoted_note_id, - MemoryCorrectionAction::Restore, - "Rollback to prior approved memory after reviewer audit.", - "restore", - superseded.version_id, - ) - .await; - - assert_eq!(restored.status, "active"); - assert!(active_list_contains(service, promoted_note_id).await); - - let deleted = apply_memory_correction( - service, - promoted_note_id, - MemoryCorrectionAction::Delete, - "Reviewer removed the restored memory from normal recall.", - "delete", - None, - ) - .await; - - assert_eq!(deleted.status, "deleted"); - assert!(!active_list_contains(service, promoted_note_id).await); - - let event_types = memory_history_event_types(service, promoted_note_id).await; - - for expected in ["add", "derived", "applied", "superseded", "restored", "delete"] { - assert!(event_types.iter().any(|event_type| event_type == expected)); - } -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] -async fn discard_and_defer_actions_remain_auditable() { - let Some(fixture) = setup_service("discard_and_defer_actions_remain_auditable").await else { - return; - }; - let service = &fixture.service; - let note_id = insert_source_note( - service, - "consolidation_review_actions", - "Fact: Discarded and deferred proposals remain auditable.", - ) - .await; - let source = source_ref(note_id); - let created = create_run_with_proposals( - service, - &source, - vec![ - proposal_input(&source, "contradiction_report"), - proposal_input(&source, "preference_candidate"), - ], - ) - .await; - - process_consolidation_worker(service).await; - - let materialized = materialized_proposals(service, created.run.run_id).await; - let discarded_id = proposal_id_by_kind(&materialized, "contradiction_report"); - let deferred_id = proposal_id_by_kind(&materialized, "preference_candidate"); - let discarded = service - .consolidation_proposal_review(ConsolidationProposalReviewRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - reviewer_agent_id: AGENT_ID.to_string(), - proposal_id: discarded_id, - review_action: ConsolidationReviewAction::Discard, - review_comment: Some("Discard stale synthesis.".to_string()), - }) - .await - .expect("discard should be allowed"); - let deferred = service - .consolidation_proposal_review(ConsolidationProposalReviewRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - reviewer_agent_id: AGENT_ID.to_string(), - proposal_id: deferred_id, - review_action: ConsolidationReviewAction::Defer, - review_comment: Some("Defer until more evidence is available.".to_string()), - }) - .await - .expect("defer should be allowed"); - let deferred_readback = service - .consolidation_proposal_get(ConsolidationProposalGetRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - proposal_id: deferred_id, - }) - .await - .expect("deferred proposal should remain readable"); - - assert_eq!(discarded.review_state, "rejected"); - assert_eq!(discarded.review_events.len(), 1); - assert_eq!(discarded.review_events[0].action, "discard"); - assert_eq!(deferred.review_state, "archived"); - assert_eq!(deferred.review_events.len(), 1); - assert_eq!(deferred.review_events[0].action, "defer"); - assert_eq!(deferred_readback.review_events.len(), 1); - assert_eq!(deferred_readback.review_events[0].to_review_state, "archived"); -} +mod tests_apply; +mod tests_corrections; +mod tests_review; diff --git a/packages/elf-service/tests/acceptance/consolidation/tests_apply.rs b/packages/elf-service/tests/acceptance/consolidation/tests_apply.rs new file mode 100644 index 00000000..3be2107c --- /dev/null +++ b/packages/elf-service/tests/acceptance/consolidation/tests_apply.rs @@ -0,0 +1,207 @@ +use uuid::Uuid; + +use crate::acceptance::consolidation::tests_helpers::{ + self, AGENT_ID, PROJECT_ID, REVIEWER_ID, TENANT_ID, +}; +use elf_domain::consolidation::ConsolidationReviewAction; +use elf_service::{ConsolidationProposalReviewRequest, ConsolidationRunGetRequest}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn apply_action_is_audited_without_source_rewrite() { + let Some(fixture) = + tests_helpers::setup_service("apply_action_is_audited_without_source_rewrite").await + else { + return; + }; + let service = &fixture.service; + let source_text = + "Fact: Current consolidation output is derived and never rewrites source notes."; + let note_id = + tests_helpers::insert_source_note(service, "consolidation_source_rule", source_text).await; + let source = tests_helpers::source_ref(note_id); + let created = tests_helpers::create_run_with_proposals( + service, + &source, + vec![tests_helpers::proposal_input(&source, "derived_note")], + ) + .await; + + assert_eq!(created.run.status, "pending"); + assert!(created.proposals.is_empty()); + + tests_helpers::process_consolidation_worker(service).await; + + let completed = service + .consolidation_run_get(ConsolidationRunGetRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + run_id: created.run.run_id, + }) + .await + .expect("consolidation run should remain readable"); + let materialized = tests_helpers::materialized_proposals(service, created.run.run_id).await; + let proposal = &materialized.proposals[0]; + let job_status: String = + sqlx::query_scalar("SELECT status FROM consolidation_run_jobs WHERE job_id = $1") + .bind(created.job_id) + .fetch_one(&service.db.pool) + .await + .expect("consolidation job should be queryable"); + + assert_eq!(completed.status, "completed"); + assert_eq!(job_status, "DONE"); + assert_eq!(materialized.proposals.len(), 1); + assert_eq!(proposal.review_state, "proposed"); + assert_eq!(proposal.unsupported_claim_flags.as_array().map(Vec::len), Some(1)); + assert_eq!(proposal.contradiction_markers.as_array().map(Vec::len), Some(1)); + + let reviewed = service + .consolidation_proposal_review(ConsolidationProposalReviewRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + reviewer_agent_id: REVIEWER_ID.to_string(), + proposal_id: proposal.proposal_id, + review_action: ConsolidationReviewAction::Apply, + review_comment: Some("Apply reviewed derived proposal.".to_string()), + }) + .await + .expect("review action should apply"); + + assert_eq!(reviewed.review_state, "applied"); + assert_eq!(reviewed.review_events.len(), 2); + assert_eq!(reviewed.review_events[0].action, "approve"); + assert_eq!(reviewed.review_events[0].from_review_state, "proposed"); + assert_eq!(reviewed.review_events[0].to_review_state, "approved"); + assert_eq!(reviewed.review_events[1].action, "apply"); + assert_eq!(reviewed.review_events[1].from_review_state, "approved"); + assert_eq!(reviewed.review_events[1].to_review_state, "applied"); + + let promoted_note_id = reviewed + .target_ref + .get("id") + .and_then(serde_json::Value::as_str) + .and_then(|value| Uuid::parse_str(value).ok()) + .expect("applied proposal should point at promoted note"); + let promoted_source_ref: serde_json::Value = + sqlx::query_scalar("SELECT source_ref FROM memory_notes WHERE note_id = $1") + .bind(promoted_note_id) + .fetch_one(&service.db.pool) + .await + .expect("promoted memory source ref should be queryable"); + let promoted_status: String = + sqlx::query_scalar("SELECT status FROM memory_notes WHERE note_id = $1") + .bind(promoted_note_id) + .fetch_one(&service.db.pool) + .await + .expect("promoted memory status should be queryable"); + let promoted_agent_id: String = + sqlx::query_scalar("SELECT agent_id FROM memory_notes WHERE note_id = $1") + .bind(promoted_note_id) + .fetch_one(&service.db.pool) + .await + .expect("promoted memory owner should be queryable"); + + assert_eq!(promoted_status, "active"); + assert_eq!(promoted_agent_id, AGENT_ID); + assert_eq!(promoted_source_ref["schema"], "elf.memory_promotion/v1"); + assert_eq!( + promoted_source_ref["proposal_id"].as_str().map(str::to_string), + Some(proposal.proposal_id.to_string()) + ); + assert_eq!(promoted_source_ref["review"]["reviewer_agent_id"], REVIEWER_ID); + + let stored_text: String = + sqlx::query_scalar("SELECT text FROM memory_notes WHERE note_id = $1") + .bind(note_id) + .fetch_one(&service.db.pool) + .await + .expect("source note should still exist"); + let version_count: i64 = + sqlx::query_scalar("SELECT count(*) FROM memory_note_versions WHERE note_id = $1") + .bind(note_id) + .fetch_one(&service.db.pool) + .await + .expect("source note versions should be queryable"); + + assert_eq!(stored_text, source_text); + assert_eq!(version_count, 1); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn apply_project_shared_memory_creates_owner_grant() { + let Some(fixture) = + tests_helpers::setup_service("apply_project_shared_memory_creates_owner_grant").await + else { + return; + }; + let service = &fixture.service; + let note_id = tests_helpers::insert_source_note( + service, + "consolidation_project_shared_source", + "Fact: Shared memory promotions must preserve project grant semantics.", + ) + .await; + let source = tests_helpers::source_ref(note_id); + let proposal = tests_helpers::proposal_input_with_payload( + &source, + "derived_note", + serde_json::json!({ + "type": "fact", + "scope": "project_shared", + "text": "Fact: Project-shared promoted memories keep project grants." + }), + ); + let created = tests_helpers::create_run_with_proposals(service, &source, vec![proposal]).await; + + tests_helpers::process_consolidation_worker(service).await; + + let materialized = tests_helpers::materialized_proposals(service, created.run.run_id).await; + let reviewed = service + .consolidation_proposal_review(ConsolidationProposalReviewRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + reviewer_agent_id: REVIEWER_ID.to_string(), + proposal_id: materialized.proposals[0].proposal_id, + review_action: ConsolidationReviewAction::Apply, + review_comment: Some("Apply reviewed project-shared memory.".to_string()), + }) + .await + .expect("project-shared review action should promote memory"); + let promoted_note_id = reviewed + .target_ref + .get("id") + .and_then(serde_json::Value::as_str) + .and_then(|value| Uuid::parse_str(value).ok()) + .expect("applied proposal should point at promoted note"); + let promoted: (String, String, String) = + sqlx::query_as("SELECT project_id, agent_id, scope FROM memory_notes WHERE note_id = $1") + .bind(promoted_note_id) + .fetch_one(&service.db.pool) + .await + .expect("promoted memory should be queryable"); + let grant_count: i64 = sqlx::query_scalar( + "\ +SELECT count(*) +FROM memory_space_grants +WHERE tenant_id = $1 + AND project_id = $2 + AND scope = 'project_shared' + AND space_owner_agent_id = $3 + AND grantee_kind = 'project' + AND revoked_at IS NULL", + ) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .fetch_one(&service.db.pool) + .await + .expect("project grant should be queryable"); + + assert_eq!( + promoted, + (PROJECT_ID.to_string(), AGENT_ID.to_string(), "project_shared".to_string()) + ); + assert_eq!(grant_count, 1); +} diff --git a/packages/elf-service/tests/acceptance/consolidation/tests_corrections.rs b/packages/elf-service/tests/acceptance/consolidation/tests_corrections.rs new file mode 100644 index 00000000..dfee80cd --- /dev/null +++ b/packages/elf-service/tests/acceptance/consolidation/tests_corrections.rs @@ -0,0 +1,59 @@ +use crate::acceptance::consolidation::tests_helpers; +use elf_service::MemoryCorrectionAction; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn promoted_memory_corrections_suppress_and_restore_recall() { + let Some(fixture) = + tests_helpers::setup_service("promoted_memory_corrections_suppress_and_restore_recall") + .await + else { + return; + }; + let service = &fixture.service; + let promoted_note_id = tests_helpers::promote_reviewed_memory(service).await; + let superseded = tests_helpers::apply_memory_correction( + service, + promoted_note_id, + MemoryCorrectionAction::Supersede, + "Newer reviewed source supersedes the derived memory.", + "supersede", + None, + ) + .await; + + assert_eq!(superseded.status, "deprecated"); + assert!(!tests_helpers::active_list_contains(service, promoted_note_id).await); + + let restored = tests_helpers::apply_memory_correction( + service, + promoted_note_id, + MemoryCorrectionAction::Restore, + "Rollback to prior approved memory after reviewer audit.", + "restore", + superseded.version_id, + ) + .await; + + assert_eq!(restored.status, "active"); + assert!(tests_helpers::active_list_contains(service, promoted_note_id).await); + + let deleted = tests_helpers::apply_memory_correction( + service, + promoted_note_id, + MemoryCorrectionAction::Delete, + "Reviewer removed the restored memory from normal recall.", + "delete", + None, + ) + .await; + + assert_eq!(deleted.status, "deleted"); + assert!(!tests_helpers::active_list_contains(service, promoted_note_id).await); + + let event_types = tests_helpers::memory_history_event_types(service, promoted_note_id).await; + + for expected in ["add", "derived", "applied", "superseded", "restored", "delete"] { + assert!(event_types.iter().any(|event_type| event_type == expected)); + } +} diff --git a/packages/elf-service/tests/acceptance/consolidation/tests_helpers.rs b/packages/elf-service/tests/acceptance/consolidation/tests_helpers.rs new file mode 100644 index 00000000..83ef23e5 --- /dev/null +++ b/packages/elf-service/tests/acceptance/consolidation/tests_helpers.rs @@ -0,0 +1,341 @@ +use std::sync::{Arc, atomic::AtomicUsize}; + +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; +use elf_chunking::ChunkingConfig; +use elf_domain::consolidation::{ + ConsolidationApplyIntent, ConsolidationInputRef, ConsolidationLineage, ConsolidationMarker, + ConsolidationMarkerSeverity, ConsolidationMarkers, ConsolidationProposalDiff, + ConsolidationReviewAction, ConsolidationSourceKind, ConsolidationSourceSnapshot, + ConsolidationUnsupportedClaimFlag, +}; +use elf_service::{ + AddNoteInput, AddNoteRequest, ConsolidationProposalInput, ConsolidationProposalReviewRequest, + ConsolidationProposalsListRequest, ConsolidationProposalsListResponse, + ConsolidationRunCreateRequest, ConsolidationRunCreateResponse, ElfService, ListRequest, + MemoryCorrectionAction, MemoryCorrectionRequest, MemoryCorrectionResponse, + MemoryHistoryGetRequest, Providers, +}; +use elf_storage::{db::Db, qdrant::QdrantStore}; +use elf_testkit::TestDatabase; +use elf_worker::worker::{self, WorkerState}; + +pub(super) const TENANT_ID: &str = "tenant_consolidation"; +pub(super) const PROJECT_ID: &str = "project_consolidation"; +pub(super) const AGENT_ID: &str = "agent_consolidation"; +pub(super) const REVIEWER_ID: &str = "reviewer_consolidation"; + +pub(super) struct ConsolidationFixture { + pub(super) service: ElfService, + _test_db: TestDatabase, +} + +pub(super) fn source_ref(note_id: Uuid) -> ConsolidationInputRef { + ConsolidationInputRef { + kind: ConsolidationSourceKind::Note, + id: note_id, + snapshot: ConsolidationSourceSnapshot { + status: Some("active".to_string()), + updated_at: Some(OffsetDateTime::UNIX_EPOCH), + content_hash: Some("blake3:acceptance-source".to_string()), + embedding_version: Some("test:test:4096".to_string()), + trace_version: None, + source_ref: serde_json::json!({ "schema": "acceptance/v1" }), + metadata: serde_json::json!({ "fixture": "consolidation" }), + }, + } +} + +pub(super) fn lineage(source: &ConsolidationInputRef) -> ConsolidationLineage { + ConsolidationLineage { + source_refs: vec![source.clone()], + parent_run_id: None, + parent_proposal_ids: Vec::new(), + } +} + +pub(super) fn proposal_input( + source: &ConsolidationInputRef, + kind: &str, +) -> ConsolidationProposalInput { + proposal_input_with_payload( + source, + kind, + serde_json::json!({ + "type": "fact", + "text": "Fact: Consolidation proposals are derived and reviewable." + }), + ) +} + +pub(super) fn proposal_input_with_payload( + source: &ConsolidationInputRef, + kind: &str, + proposed_payload: serde_json::Value, +) -> ConsolidationProposalInput { + ConsolidationProposalInput { + proposal_kind: kind.to_string(), + apply_intent: ConsolidationApplyIntent::CreateDerivedNote, + source_refs: vec![source.clone()], + source_snapshot: serde_json::json!({ "source_count": 1 }), + lineage: lineage(source), + confidence: 0.82, + unsupported_claim_flags: vec![ConsolidationUnsupportedClaimFlag { + claim_id: Some("unsupported-claim".to_string()), + message: "The source does not prove that source notes may be rewritten.".to_string(), + source: Some(source.clone()), + }], + markers: ConsolidationMarkers { + contradictions: vec![ConsolidationMarker { + severity: ConsolidationMarkerSeverity::High, + message: "Stale rewrite evidence conflicts with the proposal-only rule." + .to_string(), + source: Some(source.clone()), + }], + staleness: Vec::new(), + }, + diff: ConsolidationProposalDiff { + summary: "Create a reviewed derived note without changing source evidence.".to_string(), + before: serde_json::json!({}), + after: serde_json::json!({ + "target": "derived_note", + "text": "Fact: Consolidation proposals are derived and reviewable." + }), + }, + target_ref: serde_json::json!({}), + proposed_payload, + } +} + +pub(super) fn proposal_id_by_kind( + response: &ConsolidationProposalsListResponse, + proposal_kind: &str, +) -> Uuid { + response + .proposals + .iter() + .find(|proposal| proposal.proposal_kind == proposal_kind) + .map(|proposal| proposal.proposal_id) + .expect("proposal kind should be present") +} + +pub(super) async fn setup_service(test_name: &str) -> Option { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); + + return None; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!("Skipping {test_name}; set ELF_QDRANT_URL to run this test."); + + return None; + }; + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let extractor = SpyExtractor { + calls: Arc::new(AtomicUsize::new(0)), + payload: serde_json::json!({ "notes": [] }), + }; + let providers = Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(StubRerank), + Arc::new(extractor), + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + + acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); + + Some(ConsolidationFixture { service, _test_db: test_db }) +} + +pub(super) async fn insert_source_note(service: &ElfService, key: &str, text: &str) -> Uuid { + let response = service + .add_note(AddNoteRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + scope: "agent_private".to_string(), + notes: vec![AddNoteInput { + r#type: "fact".to_string(), + key: Some(key.to_string()), + text: text.to_string(), + structured: None, + importance: 0.7, + confidence: 0.9, + ttl_days: None, + source_ref: serde_json::json!({ "schema": "acceptance/v1", "key": key }), + write_policy: None, + }], + }) + .await + .expect("add_note should persist source note"); + + response.results[0].note_id.expect("source note id should be present") +} + +pub(super) async fn create_run_with_proposals( + service: &ElfService, + source: &ConsolidationInputRef, + proposals: Vec, +) -> ConsolidationRunCreateResponse { + service + .consolidation_run_create(ConsolidationRunCreateRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + job_kind: "manual".to_string(), + input_refs: vec![source.clone()], + source_snapshot: serde_json::json!({ "source_count": 1 }), + lineage: lineage(source), + proposals, + }) + .await + .expect("consolidation run should be created") +} + +pub(super) async fn process_consolidation_worker(service: &ElfService) { + let tokenizer = elf_chunking::load_tokenizer(&service.cfg.chunking.tokenizer_repo) + .expect("worker tokenizer should load"); + let mut embedding = acceptance::dummy_embedding_provider(); + + embedding.dimensions = service.cfg.storage.qdrant.vector_dim; + + let worker_state = WorkerState { + db: Db::connect(&service.cfg.storage.postgres).await.expect("Failed to connect worker DB."), + qdrant: QdrantStore::new(&service.cfg.storage.qdrant) + .expect("Failed to build Qdrant store."), + docs_qdrant: QdrantStore::new_with_collection( + &service.cfg.storage.qdrant, + &service.cfg.storage.qdrant.docs_collection, + ) + .expect("Failed to build docs Qdrant store."), + embedding, + chunking: ChunkingConfig { + max_tokens: service.cfg.chunking.max_tokens, + overlap_tokens: service.cfg.chunking.overlap_tokens, + }, + tokenizer, + }; + + worker::process_once(&worker_state).await.expect("consolidation worker should process once"); +} + +pub(super) async fn materialized_proposals( + service: &ElfService, + run_id: Uuid, +) -> ConsolidationProposalsListResponse { + service + .consolidation_proposals_list(ConsolidationProposalsListRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + run_id: Some(run_id), + review_state: None, + limit: None, + }) + .await + .expect("consolidation proposals should be listed") +} + +pub(super) async fn promote_reviewed_memory(service: &ElfService) -> Uuid { + let note_id = insert_source_note( + service, + "memory_authority_source", + "Fact: Reviewed memories require source-linked approval.", + ) + .await; + let source = source_ref(note_id); + let created = + create_run_with_proposals(service, &source, vec![proposal_input(&source, "derived_note")]) + .await; + + process_consolidation_worker(service).await; + + let materialized = materialized_proposals(service, created.run.run_id).await; + let proposal_id = materialized.proposals[0].proposal_id; + let reviewed = service + .consolidation_proposal_review(ConsolidationProposalReviewRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + reviewer_agent_id: AGENT_ID.to_string(), + proposal_id, + review_action: ConsolidationReviewAction::Apply, + review_comment: Some("Approve memory authority candidate.".to_string()), + }) + .await + .expect("review action should promote memory"); + + reviewed + .target_ref + .get("id") + .and_then(serde_json::Value::as_str) + .and_then(|value| Uuid::parse_str(value).ok()) + .expect("applied proposal should point at promoted note") +} + +pub(super) async fn active_list_contains(service: &ElfService, note_id: Uuid) -> bool { + service + .list(ListRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: Some(AGENT_ID.to_string()), + scope: Some("agent_private".to_string()), + status: None, + r#type: None, + }) + .await + .expect("active notes should list") + .items + .iter() + .any(|item| item.note_id == note_id) +} + +pub(super) async fn apply_memory_correction( + service: &ElfService, + note_id: Uuid, + action: MemoryCorrectionAction, + reason: &str, + source: &str, + restore_version_id: Option, +) -> MemoryCorrectionResponse { + service + .memory_correction_apply(MemoryCorrectionRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + actor_agent_id: AGENT_ID.to_string(), + note_id, + action, + reason: reason.to_string(), + source_ref: serde_json::json!({ + "schema": "acceptance/review", + "source": source + }), + restore_version_id, + }) + .await + .expect("memory correction should persist") +} + +pub(super) async fn memory_history_event_types(service: &ElfService, note_id: Uuid) -> Vec { + service + .memory_history_get(MemoryHistoryGetRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + note_id, + }) + .await + .expect("promoted memory history should be readable") + .events + .into_iter() + .map(|event| event.event_type) + .collect() +} diff --git a/packages/elf-service/tests/acceptance/consolidation/tests_review.rs b/packages/elf-service/tests/acceptance/consolidation/tests_review.rs new file mode 100644 index 00000000..27b5b22b --- /dev/null +++ b/packages/elf-service/tests/acceptance/consolidation/tests_review.rs @@ -0,0 +1,75 @@ +use crate::acceptance::consolidation::tests_helpers::{self, AGENT_ID, PROJECT_ID, TENANT_ID}; +use elf_domain::consolidation::ConsolidationReviewAction; +use elf_service::{ConsolidationProposalGetRequest, ConsolidationProposalReviewRequest}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn discard_and_defer_actions_remain_auditable() { + let Some(fixture) = + tests_helpers::setup_service("discard_and_defer_actions_remain_auditable").await + else { + return; + }; + let service = &fixture.service; + let note_id = tests_helpers::insert_source_note( + service, + "consolidation_review_actions", + "Fact: Discarded and deferred proposals remain auditable.", + ) + .await; + let source = tests_helpers::source_ref(note_id); + let created = tests_helpers::create_run_with_proposals( + service, + &source, + vec![ + tests_helpers::proposal_input(&source, "contradiction_report"), + tests_helpers::proposal_input(&source, "preference_candidate"), + ], + ) + .await; + + tests_helpers::process_consolidation_worker(service).await; + + let materialized = tests_helpers::materialized_proposals(service, created.run.run_id).await; + let discarded_id = tests_helpers::proposal_id_by_kind(&materialized, "contradiction_report"); + let deferred_id = tests_helpers::proposal_id_by_kind(&materialized, "preference_candidate"); + let discarded = service + .consolidation_proposal_review(ConsolidationProposalReviewRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + reviewer_agent_id: AGENT_ID.to_string(), + proposal_id: discarded_id, + review_action: ConsolidationReviewAction::Discard, + review_comment: Some("Discard stale synthesis.".to_string()), + }) + .await + .expect("discard should be allowed"); + let deferred = service + .consolidation_proposal_review(ConsolidationProposalReviewRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + reviewer_agent_id: AGENT_ID.to_string(), + proposal_id: deferred_id, + review_action: ConsolidationReviewAction::Defer, + review_comment: Some("Defer until more evidence is available.".to_string()), + }) + .await + .expect("defer should be allowed"); + let deferred_readback = service + .consolidation_proposal_get(ConsolidationProposalGetRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + proposal_id: deferred_id, + }) + .await + .expect("deferred proposal should remain readable"); + + assert_eq!(discarded.review_state, "rejected"); + assert_eq!(discarded.review_events.len(), 1); + assert_eq!(discarded.review_events[0].action, "discard"); + assert_eq!(deferred.review_state, "archived"); + assert_eq!(deferred.review_events.len(), 1); + assert_eq!(deferred.review_events[0].action, "defer"); + assert_eq!(deferred_readback.review_events.len(), 1); + assert_eq!(deferred_readback.review_events[0].to_review_state, "archived"); +}