diff --git a/apps/elf-mcp/src/app/server/tests.rs b/apps/elf-mcp/src/app/server/tests.rs index 49f8d7de..ec106b56 100644 --- a/apps/elf-mcp/src/app/server/tests.rs +++ b/apps/elf-mcp/src/app/server/tests.rs @@ -1,412 +1,15 @@ -use std::{ - collections::HashMap, - sync::{Arc, Mutex}, - time::Duration, -}; +mod forwarding; +mod schemas; +mod tool_definitions; -use axum::{ - Json, Router, - extract::State, - http::{HeaderMap, Method, Uri}, - routing, -}; -use serde_json::Map; -use tokio::{ - net::TcpListener, - sync::{ - oneshot, - oneshot::{Receiver, Sender}, - }, - time, -}; +use axum::http::HeaderMap; use crate::app::{ McpAuthState, - server::{ElfContextHeaders, ElfMcp, HEADER_AUTHORIZATION, HttpMethod}, + server::{ElfContextHeaders, ElfMcp, HEADER_AUTHORIZATION}, }; use elf_config::McpContext; -type RequestRecorder = Arc>>>; - -const ALL_TOOL_DEFINITIONS: [ToolDefinition; 37] = [ - ToolDefinition::new( - "elf_notes_ingest", - HttpMethod::Post, - "/v2/notes/ingest", - "Ingest deterministic notes into ELF. This tool never calls an LLM.", - ), - ToolDefinition::new( - "elf_graph_query", - HttpMethod::Post, - "/v2/graph/query", - "Query graph entities and relations by structured criteria.", - ), - ToolDefinition::new( - "elf_graph_report", - HttpMethod::Post, - "/v2/graph/report", - "Build a source-backed graph topic map with current, historical, future, inferred, ambiguous, stale, and superseded fact markers.", - ), - ToolDefinition::new( - "elf_events_ingest", - HttpMethod::Post, - "/v2/events/ingest", - "Ingest an event by extracting evidence-bound notes using the configured LLM extractor.", - ), - ToolDefinition::new( - "elf_searches_create", - HttpMethod::Post, - "/v2/searches", - "Create a search session using quick-find or planned-search mode. Response includes optional trajectory_summary.", - ), - ToolDefinition::new( - "elf_core_blocks_get", - HttpMethod::Get, - "/v2/core-blocks", - "Fetch core memory blocks explicitly attached to the configured agent and read profile.", - ), - ToolDefinition::new( - "elf_entity_memory_get", - HttpMethod::Get, - "/v2/entity-memory", - "Fetch an entity-scoped memory view across attached core blocks and graph-linked archival notes.", - ), - ToolDefinition::new( - "elf_dreaming_review_queue", - HttpMethod::Get, - "/v2/admin/dreaming/review-queue", - "List source-backed Dreaming review queue proposals with variants, affected refs, lint flags, policy gates, and review audit.", - ), - ToolDefinition::new( - "elf_recall_debug_panel", - HttpMethod::Post, - "/v2/recall-debug/panel", - "Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", - ), - ToolDefinition::new( - "elf_work_journal_entry_create", - HttpMethod::Post, - "/v2/work-journal/entries", - "Capture one source-adjacent Work Journal entry with source refs, redaction, next-step, rejected-option, and promotion-boundary metadata.", - ), - ToolDefinition::new( - "elf_work_journal_entry_get", - HttpMethod::Get, - "/v2/work-journal/entries/{entry_id}", - "Fetch one readable Work Journal entry by entry_id.", - ), - ToolDefinition::new( - "elf_work_journal_session_readback", - HttpMethod::Post, - "/v2/work-journal/readback", - "Read newest Work Journal entries for a session and return a where_stopped projection with journal evidence.", - ), - ToolDefinition::new( - "elf_searches_get", - HttpMethod::Get, - "/v2/searches/{search_id}", - "Fetch a search session index view by search_id, including optional trajectory_summary.", - ), - ToolDefinition::new( - "elf_searches_timeline", - HttpMethod::Get, - "/v2/searches/{search_id}/timeline", - "Build a timeline view from a search session.", - ), - ToolDefinition::new( - "elf_searches_notes", - HttpMethod::Post, - "/v2/searches/{search_id}/notes", - "Fetch note details for selected note_ids from a search session. l0/l1 strip evidence/source_ref/structured; l2 returns full detail.", - ), - ToolDefinition::new( - "elf_notes_list", - HttpMethod::Get, - "/v2/notes", - "List notes in a tenant and project with optional filters.", - ), - ToolDefinition::new( - "elf_notes_get", - HttpMethod::Get, - "/v2/notes/{note_id}", - "Fetch a single note by note_id.", - ), - ToolDefinition::new( - "elf_notes_patch", - HttpMethod::Patch, - "/v2/notes/{note_id}", - "Patch a note by note_id. Only provided fields are updated.", - ), - ToolDefinition::new( - "elf_notes_delete", - HttpMethod::Delete, - "/v2/notes/{note_id}", - "Delete a note by note_id.", - ), - ToolDefinition::new( - "elf_notes_publish", - HttpMethod::Post, - "/v2/notes/{note_id}/publish", - "Publish a note from agent_private into a shared space (team_shared or org_shared).", - ), - ToolDefinition::new( - "elf_notes_unpublish", - HttpMethod::Post, - "/v2/notes/{note_id}/unpublish", - "Unpublish a shared note back into agent_private scope.", - ), - ToolDefinition::new( - "elf_space_grants_list", - HttpMethod::Get, - "/v2/spaces/{space}/grants", - "List sharing grants for a space (team_shared or org_shared).", - ), - ToolDefinition::new( - "elf_space_grant_upsert", - HttpMethod::Post, - "/v2/spaces/{space}/grants", - "Upsert a sharing grant for a space (team_shared or org_shared).", - ), - ToolDefinition::new( - "elf_space_grant_revoke", - HttpMethod::Post, - "/v2/spaces/{space}/grants/revoke", - "Revoke a sharing grant for a space (team_shared or org_shared).", - ), - ToolDefinition::new( - "elf_admin_traces_recent_list", - HttpMethod::Get, - "/v2/admin/traces/recent", - "List recent traces by tenant/project with optional cursor and filters.", - ), - ToolDefinition::new( - "elf_admin_trace_get", - HttpMethod::Get, - "/v2/admin/traces/{trace_id}", - "Fetch trace metadata, items, and optional trajectory summary by trace_id.", - ), - ToolDefinition::new( - "elf_admin_trajectory_get", - HttpMethod::Get, - "/v2/admin/trajectories/{trace_id}", - "Fetch trace trajectory and stage payload by trace_id.", - ), - ToolDefinition::new( - "elf_admin_trace_item_get", - HttpMethod::Get, - "/v2/admin/trace-items/{item_id}", - "Fetch a trace item explain payload by item_id.", - ), - ToolDefinition::new( - "elf_admin_note_provenance_get", - HttpMethod::Get, - "/v2/admin/notes/{note_id}/provenance", - "Fetch provenance bundle for a note.", - ), - ToolDefinition::new( - "elf_admin_memory_history_get", - HttpMethod::Get, - "/v2/admin/notes/{note_id}/history", - "Fetch chronological memory history for a note.", - ), - ToolDefinition::new( - "elf_admin_trace_bundle_get", - HttpMethod::Get, - "/v2/admin/traces/{trace_id}/bundle", - "Fetch trace bundle for replay and diagnostics by trace_id.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profiles_list", - HttpMethod::Get, - "/v2/admin/events/ingestion-profiles", - "List latest ingestion profiles for add_event.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profiles_create", - HttpMethod::Post, - "/v2/admin/events/ingestion-profiles", - "Create a new ingestion profile version for add_event.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profile_get", - HttpMethod::Get, - "/v2/admin/events/ingestion-profiles/{profile_id}", - "Get a single ingestion profile by id/version for add_event.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profile_versions_list", - HttpMethod::Get, - "/v2/admin/events/ingestion-profiles/{profile_id}/versions", - "List all versions of one ingestion profile for add_event.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profile_default_get", - HttpMethod::Get, - "/v2/admin/events/ingestion-profiles/default", - "Get the active default ingestion profile for add_event.", - ), - ToolDefinition::new( - "elf_admin_events_ingestion_profile_default_set", - HttpMethod::Put, - "/v2/admin/events/ingestion-profiles/default", - "Set the default ingestion profile for add_event.", - ), -]; - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct ToolDefinition { - name: &'static str, - method: HttpMethod, - path: &'static str, - description: &'static str, - streaming: bool, -} -impl ToolDefinition { - const fn new( - name: &'static str, - method: HttpMethod, - path: &'static str, - description: &'static str, - ) -> Self { - Self { name, method, path, description, streaming: true } - } -} - -struct RecordedRequest { - method: Method, - path: String, - body: serde_json::Value, -} - -fn build_tools() -> HashMap<&'static str, ToolDefinition> { - ALL_TOOL_DEFINITIONS.into_iter().map(|tool| (tool.name, tool)).collect() -} - -#[test] -fn registers_all_tools() { - let tools = build_tools(); - let expected = [ - "elf_notes_ingest", - "elf_graph_query", - "elf_graph_report", - "elf_events_ingest", - "elf_core_blocks_get", - "elf_entity_memory_get", - "elf_searches_create", - "elf_searches_get", - "elf_searches_timeline", - "elf_searches_notes", - "elf_notes_list", - "elf_notes_get", - "elf_notes_patch", - "elf_notes_delete", - "elf_notes_publish", - "elf_notes_unpublish", - "elf_space_grants_list", - "elf_space_grant_upsert", - "elf_space_grant_revoke", - "elf_admin_traces_recent_list", - "elf_dreaming_review_queue", - "elf_recall_debug_panel", - "elf_work_journal_entry_create", - "elf_work_journal_entry_get", - "elf_work_journal_session_readback", - "elf_admin_trace_get", - "elf_admin_trajectory_get", - "elf_admin_trace_item_get", - "elf_admin_note_provenance_get", - "elf_admin_memory_history_get", - "elf_admin_trace_bundle_get", - "elf_admin_events_ingestion_profiles_list", - "elf_admin_events_ingestion_profiles_create", - "elf_admin_events_ingestion_profile_get", - "elf_admin_events_ingestion_profile_versions_list", - "elf_admin_events_ingestion_profile_default_get", - "elf_admin_events_ingestion_profile_default_set", - ]; - - for name in expected { - assert!(tools.contains_key(name), "Missing tool registration: {name}."); - } - - assert_eq!(tools.len(), expected.len(), "Unexpected tool count for MCP registration."); -} - -#[test] -fn notes_ingest_schema_includes_structured_entities_relations() { - let schema = super::notes_ingest_schema(); - let notes = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("notes ingest schema is missing properties.") - .get("notes") - .and_then(serde_json::Value::as_object) - .expect("notes schema is missing notes."); - let note_items = notes - .get("items") - .and_then(serde_json::Value::as_object) - .expect("notes schema is missing items."); - let note_properties = note_items - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("notes schema is missing note item properties."); - let structured = note_properties - .get("structured") - .and_then(serde_json::Value::as_object) - .expect("notes schema is missing structured."); - let structured_type = structured - .get("type") - .and_then(serde_json::Value::as_array) - .expect("structured.type is not an array."); - - assert!( - structured_type.contains(&serde_json::Value::String("object".to_string())) - && structured_type.contains(&serde_json::Value::String("null".to_string())) - ); - - let structured_properties = structured - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("structured schema is missing properties."); - - assert!(structured_properties.contains_key("entities")); - assert!(structured_properties.contains_key("relations")); - - let relation_object = structured_properties - .get("relations") - .and_then(serde_json::Value::as_object) - .and_then(|relations| relations.get("items")) - .and_then(serde_json::Value::as_object) - .and_then(|items| items.get("properties")) - .and_then(serde_json::Value::as_object) - .expect("relations schema is missing properties.") - .get("object") - .and_then(serde_json::Value::as_object) - .expect("relation schema is missing object."); - let one_of = relation_object - .get("oneOf") - .and_then(serde_json::Value::as_array) - .expect("relation object is missing oneOf."); - - assert_eq!(one_of.len(), 2, "relation object should have entity/value oneOf variants."); - assert!(one_of.iter().any(|variant| { - variant.as_object().is_some_and(|branch| { - branch - .get("required") - .and_then(serde_json::Value::as_array) - .is_some_and(|required| required.iter().any(|value| value == "entity")) - }) - })); - assert!(one_of.iter().any(|variant| { - variant.as_object().is_some_and(|branch| { - branch - .get("required") - .and_then(serde_json::Value::as_array) - .is_some_and(|required| required.iter().any(|value| value == "value")) - }) - })); -} - #[test] fn admin_paths_use_admin_api_base() { let context = McpContext { @@ -429,48 +32,6 @@ fn admin_paths_use_admin_api_base() { assert_eq!(mcp.api_base_for_path("/v2/recall-debug/panel"), "http://127.0.0.1:9000"); } -#[test] -fn recall_debug_tool_uses_public_agent_route() { - let tools = build_tools(); - let tool = tools.get("elf_recall_debug_panel").expect("Missing recall debug panel tool."); - - assert_eq!(tool.path, "/v2/recall-debug/panel"); - assert!(tool.description.contains("recall_trace")); -} - -#[test] -fn recall_debug_panel_schema_rejects_context_override_fields() { - let schema = super::recall_debug_panel_schema(); - let properties = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("recall debug panel schema is missing properties."); - - assert_eq!(schema.get("additionalProperties"), Some(&serde_json::Value::Bool(false))); - - for key in ["tenant_id", "project_id", "agent_id", "read_profile"] { - assert!(!properties.contains_key(key), "{key} must not be a tool param."); - } - for key in ["graph_subject", "graph_predicate"] { - let one_of = properties - .get(key) - .and_then(serde_json::Value::as_object) - .and_then(|schema| schema.get("oneOf")) - .and_then(serde_json::Value::as_array) - .expect("selector schema is missing oneOf."); - - for branch in one_of.iter().filter_map(serde_json::Value::as_object) { - if branch.get("type").and_then(serde_json::Value::as_str) == Some("object") { - assert_eq!( - branch.get("additionalProperties"), - Some(&serde_json::Value::Bool(false)), - "{key} selector object branches must be closed." - ); - } - } - } -} - #[test] fn off_mode_allows_requests_without_auth_header() { let headers = HeaderMap::new(); @@ -501,305 +62,3 @@ fn static_keys_mode_rejects_non_bearer_schemes() { &McpAuthState::StaticKeys { bearer_token: "token-a".to_string() } )); } - -#[test] -fn docs_search_l0_schema_includes_filter_fields() { - let schema = super::docs_search_l0_schema(); - let properties = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("docs_search_l0 schema is missing properties."); - let required = ["query"]; - let expected = [ - "scope", - "status", - "doc_type", - "agent_id", - "thread_id", - "updated_after", - "updated_before", - "ts_gte", - "ts_lte", - "sparse_mode", - "domain", - "repo", - "explain", - ]; - - for field in required { - assert!( - schema - .get("required") - .and_then(serde_json::Value::as_array) - .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), - "Missing required field {field}." - ); - } - for field in expected { - assert!(properties.contains_key(field), "Missing schema field: {field}."); - } - - assert_eq!( - properties.get("status").and_then(serde_json::Value::as_object).and_then(|status| { - status.get("enum").and_then(serde_json::Value::as_array).map(|vals| vals.to_vec()) - }), - Some(vec![ - serde_json::Value::String("active".to_string()), - serde_json::Value::String("deleted".to_string()), - serde_json::Value::Null, - ]) - ); - assert_eq!( - properties.get("sparse_mode").and_then(serde_json::Value::as_object).and_then(|field| { - field.get("enum").and_then(serde_json::Value::as_array).map(|vals| vals.to_vec()) - }), - Some(vec![ - serde_json::Value::String("auto".to_string()), - serde_json::Value::String("on".to_string()), - serde_json::Value::String("off".to_string()), - serde_json::Value::Null, - ]) - ); -} - -#[test] -fn docs_put_schema_includes_required_fields_and_write_policy() { - let schema = super::docs_put_schema(); - let properties = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("docs_put schema is missing properties."); - let required = ["scope", "content", "source_ref"]; - let expected = ["scope", "doc_type", "title", "source_ref", "write_policy", "content"]; - - for field in required { - assert!( - schema - .get("required") - .and_then(serde_json::Value::as_array) - .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), - "Missing required field {field}." - ); - } - for field in expected { - assert!(properties.contains_key(field), "Missing schema field: {field}."); - } - - let write_policy = properties.get("write_policy").and_then(serde_json::Value::as_object); - let source_ref_properties = properties - .get("source_ref") - .and_then(|value| value.get("properties")) - .and_then(serde_json::Value::as_object) - .expect("docs_put source_ref schema is missing properties."); - - assert!( - write_policy.is_some_and(|field| { - field.get("type").and_then(serde_json::Value::as_array).is_some_and(|types| { - types.contains(&serde_json::Value::String("object".to_string())) - && types.contains(&serde_json::Value::String("null".to_string())) - }) - }), - "Missing write_policy object/null type in docs_put schema." - ); - - for field in ["source_kind", "canonical_uri", "captured_at", "trust_label", "excerpt_locator"] { - assert!(source_ref_properties.contains_key(field), "Missing source_ref field: {field}."); - } -} - -#[test] -fn work_journal_schemas_include_families_and_source_refs() { - let create_schema = super::work_journal_entry_create_schema(); - let create_properties = create_schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("work_journal_entry_create schema is missing properties."); - let readback_schema = super::work_journal_session_readback_schema(); - let readback_properties = readback_schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("work_journal_session_readback schema is missing properties."); - - for field in ["scope", "session_id", "family", "body", "source_refs"] { - assert!( - create_schema - .get("required") - .and_then(serde_json::Value::as_array) - .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), - "Missing Work Journal required field {field}." - ); - } - - assert!(create_properties.contains_key("write_policy")); - assert!(create_properties.contains_key("promotion_boundary")); - assert!(readback_properties.contains_key("session_id")); - assert!(readback_properties.contains_key("families")); -} - -#[test] -fn docs_excerpts_get_schema_includes_l0_level_and_optional_explain() { - let schema = super::docs_excerpts_get_schema(); - let properties = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("docs_excerpts_get schema is missing properties."); - let level_values = properties - .get("level") - .and_then(|level| level.get("enum")) - .and_then(|values| values.as_array()) - .expect("docs_excerpts_get level schema is missing enum."); - - assert!(level_values.contains(&serde_json::Value::String("L0".to_string()))); - assert!(properties.contains_key("explain")); -} - -#[test] -fn payload_level_schema_for_search_tools_is_l0_l1_l2() { - for schema in [ - super::searches_create_schema(), - super::searches_get_schema(), - super::searches_timeline_schema(), - super::searches_notes_schema(), - ] { - let properties = schema - .get("properties") - .and_then(serde_json::Value::as_object) - .expect("Search schema is missing properties."); - let payload_level = properties - .get("payload_level") - .and_then(serde_json::Value::as_object) - .expect("payload_level field is missing from search schema."); - let payload_level_values = payload_level - .get("enum") - .and_then(serde_json::Value::as_array) - .expect("payload_level enum is missing."); - - assert_eq!(payload_level_values.len(), 4, "Unexpected payload_level enum length."); - assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l0"))); - assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l1"))); - assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l2"))); - assert!(payload_level_values.iter().any(|value| value.is_null())); - } -} - -#[test] -fn searches_notes_tool_description_mentions_payload_level_shapes() { - let tools = build_tools(); - let tool = - tools.get("elf_searches_notes").expect("Missing elf_searches_notes tool definition."); - let description = tool.description.to_lowercase(); - - assert_eq!(tool.path, "/v2/searches/{search_id}/notes"); - assert!(description.contains("l0")); - assert!(description.contains("l1")); - assert!(description.contains("l2")); - assert!(description.contains("source_ref")); - assert!(description.contains("structured")); -} - -#[tokio::test] -async fn recall_debug_panel_rejects_context_override_params() { - let context = McpContext { - tenant_id: "tenant-a".to_string(), - project_id: "project-a".to_string(), - agent_id: "agent-a".to_string(), - read_profile: "private_plus_project".to_string(), - }; - let mcp = ElfMcp::new( - "http://127.0.0.1:1".to_string(), - "http://127.0.0.1:1".to_string(), - ElfContextHeaders::new(&context), - McpAuthState::Off, - ); - let params = Map::from_iter([( - "tenant_id".to_string(), - serde_json::Value::String("tenant-override".to_string()), - )]); - let result = mcp.elf_recall_debug_panel(params).await; - let err = result.expect_err("context override params must fail before forwarding."); - - assert!(format!("{err:?}").contains("tenant_id")); -} - -#[tokio::test] -async fn default_ingestion_profile_set_uses_put_admin_default_path() { - let (admin_base, received) = spawn_recording_admin_server().await; - let context = McpContext { - tenant_id: "tenant-a".to_string(), - project_id: "project-a".to_string(), - agent_id: "agent-a".to_string(), - read_profile: "private_plus_project".to_string(), - }; - let mcp = ElfMcp::new( - "http://127.0.0.1:9000".to_string(), - admin_base, - ElfContextHeaders::new(&context), - McpAuthState::Off, - ); - let params = Map::from_iter([ - ("profile_id".to_string(), serde_json::Value::String("profile-a".to_string())), - ("version".to_string(), serde_json::Value::Number(2.into())), - ]); - let result = mcp.elf_admin_events_ingestion_profile_default_set(params).await; - - assert!(result.is_ok(), "default setter should forward successfully: {result:?}"); - - let request = receive_recorded_request(received).await; - - assert_eq!(request.method, Method::PUT); - assert_eq!(request.path, "/v2/admin/events/ingestion-profiles/default"); - assert_eq!( - request.body.get("profile_id").and_then(serde_json::Value::as_str), - Some("profile-a") - ); - assert_eq!(request.body.get("version").and_then(serde_json::Value::as_i64), Some(2)); -} - -async fn spawn_recording_admin_server() -> (String, Receiver) { - let (tx, rx) = oneshot::channel(); - let app = Router::new() - .route("/v2/admin/events/ingestion-profiles/default", routing::any(record_request)) - .with_state(Arc::new(Mutex::new(Some(tx)))); - let listener = match TcpListener::bind("127.0.0.1:0").await { - Ok(listener) => listener, - Err(err) => panic!("Failed to bind MCP recording admin server: {err}."), - }; - let addr = match listener.local_addr() { - Ok(addr) => addr, - Err(err) => panic!("Failed to read MCP recording admin server address: {err}."), - }; - - tokio::spawn(async move { - if let Err(err) = axum::serve(listener, app).await { - panic!("MCP recording admin server failed: {err}."); - } - }); - - (format!("http://{addr}"), rx) -} - -async fn record_request( - State(recorder): State, - method: Method, - uri: Uri, - Json(body): Json, -) -> Json { - let mut sender = match recorder.lock() { - Ok(sender) => sender, - Err(err) => panic!("MCP recording admin server mutex was poisoned: {err}."), - }; - - if let Some(tx) = sender.take() { - let _ = tx.send(RecordedRequest { method, path: uri.path().to_string(), body }); - } - - Json(serde_json::json!({ "ok": true })) -} - -async fn receive_recorded_request(received: Receiver) -> RecordedRequest { - match time::timeout(Duration::from_secs(3), received).await { - Ok(Ok(request)) => request, - Ok(Err(err)) => panic!("MCP recording admin server closed before recording: {err}."), - Err(err) => panic!("Timed out waiting for MCP recording admin server: {err}."), - } -} diff --git a/apps/elf-mcp/src/app/server/tests/forwarding.rs b/apps/elf-mcp/src/app/server/tests/forwarding.rs new file mode 100644 index 00000000..55d8a36c --- /dev/null +++ b/apps/elf-mcp/src/app/server/tests/forwarding.rs @@ -0,0 +1,136 @@ +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + +use axum::{ + Json, Router, + extract::State, + http::{Method, Uri}, + routing, +}; +use serde_json::{Map, Value}; +use tokio::{ + net::TcpListener, + sync::{ + oneshot, + oneshot::{Receiver, Sender}, + }, + time, +}; + +use crate::app::{ + McpAuthState, + server::{ElfContextHeaders, ElfMcp}, +}; +use elf_config::McpContext; + +type RequestRecorder = Arc>>>; + +struct RecordedRequest { + method: Method, + path: String, + body: Value, +} + +#[tokio::test] +async fn recall_debug_panel_rejects_context_override_params() { + let context = McpContext { + tenant_id: "tenant-a".to_string(), + project_id: "project-a".to_string(), + agent_id: "agent-a".to_string(), + read_profile: "private_plus_project".to_string(), + }; + let mcp = ElfMcp::new( + "http://127.0.0.1:1".to_string(), + "http://127.0.0.1:1".to_string(), + ElfContextHeaders::new(&context), + McpAuthState::Off, + ); + let params = + Map::from_iter([("tenant_id".to_string(), Value::String("tenant-override".to_string()))]); + let result = mcp.elf_recall_debug_panel(params).await; + let err = result.expect_err("context override params must fail before forwarding."); + + assert!(format!("{err:?}").contains("tenant_id")); +} + +#[tokio::test] +async fn default_ingestion_profile_set_uses_put_admin_default_path() { + let (admin_base, received) = spawn_recording_admin_server().await; + let context = McpContext { + tenant_id: "tenant-a".to_string(), + project_id: "project-a".to_string(), + agent_id: "agent-a".to_string(), + read_profile: "private_plus_project".to_string(), + }; + let mcp = ElfMcp::new( + "http://127.0.0.1:9000".to_string(), + admin_base, + ElfContextHeaders::new(&context), + McpAuthState::Off, + ); + let params = Map::from_iter([ + ("profile_id".to_string(), Value::String("profile-a".to_string())), + ("version".to_string(), Value::Number(2.into())), + ]); + let result = mcp.elf_admin_events_ingestion_profile_default_set(params).await; + + assert!(result.is_ok(), "default setter should forward successfully: {result:?}"); + + let request = receive_recorded_request(received).await; + + assert_eq!(request.method, Method::PUT); + assert_eq!(request.path, "/v2/admin/events/ingestion-profiles/default"); + assert_eq!(request.body.get("profile_id").and_then(Value::as_str), Some("profile-a")); + assert_eq!(request.body.get("version").and_then(Value::as_i64), Some(2)); +} + +async fn spawn_recording_admin_server() -> (String, Receiver) { + let (tx, rx) = oneshot::channel(); + let app = Router::new() + .route("/v2/admin/events/ingestion-profiles/default", routing::any(record_request)) + .with_state(Arc::new(Mutex::new(Some(tx)))); + let listener = match TcpListener::bind("127.0.0.1:0").await { + Ok(listener) => listener, + Err(err) => panic!("Failed to bind MCP recording admin server: {err}."), + }; + let addr = match listener.local_addr() { + Ok(addr) => addr, + Err(err) => panic!("Failed to read MCP recording admin server address: {err}."), + }; + + tokio::spawn(async move { + if let Err(err) = axum::serve(listener, app).await { + panic!("MCP recording admin server failed: {err}."); + } + }); + + (format!("http://{addr}"), rx) +} + +async fn record_request( + State(recorder): State, + method: Method, + uri: Uri, + Json(body): Json, +) -> Json { + let mut sender = match recorder.lock() { + Ok(sender) => sender, + Err(err) => panic!("MCP recording admin server mutex was poisoned: {err}."), + }; + + if let Some(tx) = sender.take() { + let _ = tx.send(RecordedRequest { method, path: uri.path().to_string(), body }); + } + + Json(serde_json::json!({ "ok": true })) +} + +async fn receive_recorded_request(received: Receiver) -> RecordedRequest { + match time::timeout(Duration::from_secs(3), received).await { + Ok(Ok(request)) => request, + Ok(Err(err)) => panic!("MCP recording admin server closed before recording: {err}."), + Err(err) => panic!("Timed out waiting for MCP recording admin server: {err}."), + } +} diff --git a/apps/elf-mcp/src/app/server/tests/schemas.rs b/apps/elf-mcp/src/app/server/tests/schemas.rs new file mode 100644 index 00000000..f3d1b9f3 --- /dev/null +++ b/apps/elf-mcp/src/app/server/tests/schemas.rs @@ -0,0 +1,287 @@ +use serde_json::Value; + +use crate::app::server; + +#[test] +fn notes_ingest_schema_includes_structured_entities_relations() { + let schema = server::notes_ingest_schema(); + let notes = schema + .get("properties") + .and_then(Value::as_object) + .expect("notes ingest schema is missing properties.") + .get("notes") + .and_then(Value::as_object) + .expect("notes schema is missing notes."); + let note_items = + notes.get("items").and_then(Value::as_object).expect("notes schema is missing items."); + let note_properties = note_items + .get("properties") + .and_then(Value::as_object) + .expect("notes schema is missing note item properties."); + let structured = note_properties + .get("structured") + .and_then(Value::as_object) + .expect("notes schema is missing structured."); + let structured_type = + structured.get("type").and_then(Value::as_array).expect("structured.type is not an array."); + + assert!( + structured_type.contains(&Value::String("object".to_string())) + && structured_type.contains(&Value::String("null".to_string())) + ); + + let structured_properties = structured + .get("properties") + .and_then(Value::as_object) + .expect("structured schema is missing properties."); + + assert!(structured_properties.contains_key("entities")); + assert!(structured_properties.contains_key("relations")); + + let relation_object = structured_properties + .get("relations") + .and_then(Value::as_object) + .and_then(|relations| relations.get("items")) + .and_then(Value::as_object) + .and_then(|items| items.get("properties")) + .and_then(Value::as_object) + .expect("relations schema is missing properties.") + .get("object") + .and_then(Value::as_object) + .expect("relation schema is missing object."); + let one_of = relation_object + .get("oneOf") + .and_then(Value::as_array) + .expect("relation object is missing oneOf."); + + assert_eq!(one_of.len(), 2, "relation object should have entity/value oneOf variants."); + assert!(one_of.iter().any(|variant| { + variant.as_object().is_some_and(|branch| { + branch + .get("required") + .and_then(Value::as_array) + .is_some_and(|required| required.iter().any(|value| value == "entity")) + }) + })); + assert!(one_of.iter().any(|variant| { + variant.as_object().is_some_and(|branch| { + branch + .get("required") + .and_then(Value::as_array) + .is_some_and(|required| required.iter().any(|value| value == "value")) + }) + })); +} + +#[test] +fn recall_debug_panel_schema_rejects_context_override_fields() { + let schema = server::recall_debug_panel_schema(); + let properties = schema + .get("properties") + .and_then(Value::as_object) + .expect("recall debug panel schema is missing properties."); + + assert_eq!(schema.get("additionalProperties"), Some(&Value::Bool(false))); + + for key in ["tenant_id", "project_id", "agent_id", "read_profile"] { + assert!(!properties.contains_key(key), "{key} must not be a tool param."); + } + for key in ["graph_subject", "graph_predicate"] { + let one_of = properties + .get(key) + .and_then(Value::as_object) + .and_then(|schema| schema.get("oneOf")) + .and_then(Value::as_array) + .expect("selector schema is missing oneOf."); + + for branch in one_of.iter().filter_map(Value::as_object) { + if branch.get("type").and_then(Value::as_str) == Some("object") { + assert_eq!( + branch.get("additionalProperties"), + Some(&Value::Bool(false)), + "{key} selector object branches must be closed." + ); + } + } + } +} + +#[test] +fn docs_search_l0_schema_includes_filter_fields() { + let schema = server::docs_search_l0_schema(); + let properties = schema + .get("properties") + .and_then(Value::as_object) + .expect("docs_search_l0 schema is missing properties."); + let required = ["query"]; + let expected = [ + "scope", + "status", + "doc_type", + "agent_id", + "thread_id", + "updated_after", + "updated_before", + "ts_gte", + "ts_lte", + "sparse_mode", + "domain", + "repo", + "explain", + ]; + + for field in required { + assert!( + schema + .get("required") + .and_then(Value::as_array) + .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), + "Missing required field {field}." + ); + } + for field in expected { + assert!(properties.contains_key(field), "Missing schema field: {field}."); + } + + assert_eq!( + properties.get("status").and_then(Value::as_object).and_then(|status| { + status.get("enum").and_then(Value::as_array).map(|vals| vals.to_vec()) + }), + Some(vec![ + Value::String("active".to_string()), + Value::String("deleted".to_string()), + Value::Null, + ]) + ); + assert_eq!( + properties.get("sparse_mode").and_then(Value::as_object).and_then(|field| { + field.get("enum").and_then(Value::as_array).map(|vals| vals.to_vec()) + }), + Some(vec![ + Value::String("auto".to_string()), + Value::String("on".to_string()), + Value::String("off".to_string()), + Value::Null, + ]) + ); +} + +#[test] +fn docs_put_schema_includes_required_fields_and_write_policy() { + let schema = server::docs_put_schema(); + let properties = schema + .get("properties") + .and_then(Value::as_object) + .expect("docs_put schema is missing properties."); + let required = ["scope", "content", "source_ref"]; + let expected = ["scope", "doc_type", "title", "source_ref", "write_policy", "content"]; + + for field in required { + assert!( + schema + .get("required") + .and_then(Value::as_array) + .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), + "Missing required field {field}." + ); + } + for field in expected { + assert!(properties.contains_key(field), "Missing schema field: {field}."); + } + + let write_policy = properties.get("write_policy").and_then(Value::as_object); + let source_ref_properties = properties + .get("source_ref") + .and_then(|value| value.get("properties")) + .and_then(Value::as_object) + .expect("docs_put source_ref schema is missing properties."); + + assert!( + write_policy.is_some_and(|field| { + field.get("type").and_then(Value::as_array).is_some_and(|types| { + types.contains(&Value::String("object".to_string())) + && types.contains(&Value::String("null".to_string())) + }) + }), + "Missing write_policy object/null type in docs_put schema." + ); + + for field in ["source_kind", "canonical_uri", "captured_at", "trust_label", "excerpt_locator"] { + assert!(source_ref_properties.contains_key(field), "Missing source_ref field: {field}."); + } +} + +#[test] +fn work_journal_schemas_include_families_and_source_refs() { + let create_schema = server::work_journal_entry_create_schema(); + let create_properties = create_schema + .get("properties") + .and_then(Value::as_object) + .expect("work_journal_entry_create schema is missing properties."); + let readback_schema = server::work_journal_session_readback_schema(); + let readback_properties = readback_schema + .get("properties") + .and_then(Value::as_object) + .expect("work_journal_session_readback schema is missing properties."); + + for field in ["scope", "session_id", "family", "body", "source_refs"] { + assert!( + create_schema + .get("required") + .and_then(Value::as_array) + .is_some_and(|fields| { fields.iter().any(|value| value.as_str() == Some(field)) }), + "Missing Work Journal required field {field}." + ); + } + + assert!(create_properties.contains_key("write_policy")); + assert!(create_properties.contains_key("promotion_boundary")); + assert!(readback_properties.contains_key("session_id")); + assert!(readback_properties.contains_key("families")); +} + +#[test] +fn docs_excerpts_get_schema_includes_l0_level_and_optional_explain() { + let schema = server::docs_excerpts_get_schema(); + let properties = schema + .get("properties") + .and_then(Value::as_object) + .expect("docs_excerpts_get schema is missing properties."); + let level_values = properties + .get("level") + .and_then(|level| level.get("enum")) + .and_then(|values| values.as_array()) + .expect("docs_excerpts_get level schema is missing enum."); + + assert!(level_values.contains(&Value::String("L0".to_string()))); + assert!(properties.contains_key("explain")); +} + +#[test] +fn payload_level_schema_for_search_tools_is_l0_l1_l2() { + for schema in [ + server::searches_create_schema(), + server::searches_get_schema(), + server::searches_timeline_schema(), + server::searches_notes_schema(), + ] { + let properties = schema + .get("properties") + .and_then(Value::as_object) + .expect("Search schema is missing properties."); + let payload_level = properties + .get("payload_level") + .and_then(Value::as_object) + .expect("payload_level field is missing from search schema."); + let payload_level_values = payload_level + .get("enum") + .and_then(Value::as_array) + .expect("payload_level enum is missing."); + + assert_eq!(payload_level_values.len(), 4, "Unexpected payload_level enum length."); + assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l0"))); + assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l1"))); + assert!(payload_level_values.iter().any(|value| value.as_str() == Some("l2"))); + assert!(payload_level_values.iter().any(|value| value.is_null())); + } +} diff --git a/apps/elf-mcp/src/app/server/tests/tool_definitions.rs b/apps/elf-mcp/src/app/server/tests/tool_definitions.rs new file mode 100644 index 00000000..e63a36fc --- /dev/null +++ b/apps/elf-mcp/src/app/server/tests/tool_definitions.rs @@ -0,0 +1,325 @@ +use std::collections::HashMap; + +use crate::app::server::HttpMethod; + +const ALL_TOOL_DEFINITIONS: [ToolDefinition; 37] = [ + ToolDefinition::new( + "elf_notes_ingest", + HttpMethod::Post, + "/v2/notes/ingest", + "Ingest deterministic notes into ELF. This tool never calls an LLM.", + ), + ToolDefinition::new( + "elf_graph_query", + HttpMethod::Post, + "/v2/graph/query", + "Query graph entities and relations by structured criteria.", + ), + ToolDefinition::new( + "elf_graph_report", + HttpMethod::Post, + "/v2/graph/report", + "Build a source-backed graph topic map with current, historical, future, inferred, ambiguous, stale, and superseded fact markers.", + ), + ToolDefinition::new( + "elf_events_ingest", + HttpMethod::Post, + "/v2/events/ingest", + "Ingest an event by extracting evidence-bound notes using the configured LLM extractor.", + ), + ToolDefinition::new( + "elf_searches_create", + HttpMethod::Post, + "/v2/searches", + "Create a search session using quick-find or planned-search mode. Response includes optional trajectory_summary.", + ), + ToolDefinition::new( + "elf_core_blocks_get", + HttpMethod::Get, + "/v2/core-blocks", + "Fetch core memory blocks explicitly attached to the configured agent and read profile.", + ), + ToolDefinition::new( + "elf_entity_memory_get", + HttpMethod::Get, + "/v2/entity-memory", + "Fetch an entity-scoped memory view across attached core blocks and graph-linked archival notes.", + ), + ToolDefinition::new( + "elf_dreaming_review_queue", + HttpMethod::Get, + "/v2/admin/dreaming/review-queue", + "List source-backed Dreaming review queue proposals with variants, affected refs, lint flags, policy gates, and review audit.", + ), + ToolDefinition::new( + "elf_recall_debug_panel", + HttpMethod::Post, + "/v2/recall-debug/panel", + "Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", + ), + ToolDefinition::new( + "elf_work_journal_entry_create", + HttpMethod::Post, + "/v2/work-journal/entries", + "Capture one source-adjacent Work Journal entry with source refs, redaction, next-step, rejected-option, and promotion-boundary metadata.", + ), + ToolDefinition::new( + "elf_work_journal_entry_get", + HttpMethod::Get, + "/v2/work-journal/entries/{entry_id}", + "Fetch one readable Work Journal entry by entry_id.", + ), + ToolDefinition::new( + "elf_work_journal_session_readback", + HttpMethod::Post, + "/v2/work-journal/readback", + "Read newest Work Journal entries for a session and return a where_stopped projection with journal evidence.", + ), + ToolDefinition::new( + "elf_searches_get", + HttpMethod::Get, + "/v2/searches/{search_id}", + "Fetch a search session index view by search_id, including optional trajectory_summary.", + ), + ToolDefinition::new( + "elf_searches_timeline", + HttpMethod::Get, + "/v2/searches/{search_id}/timeline", + "Build a timeline view from a search session.", + ), + ToolDefinition::new( + "elf_searches_notes", + HttpMethod::Post, + "/v2/searches/{search_id}/notes", + "Fetch note details for selected note_ids from a search session. l0/l1 strip evidence/source_ref/structured; l2 returns full detail.", + ), + ToolDefinition::new( + "elf_notes_list", + HttpMethod::Get, + "/v2/notes", + "List notes in a tenant and project with optional filters.", + ), + ToolDefinition::new( + "elf_notes_get", + HttpMethod::Get, + "/v2/notes/{note_id}", + "Fetch a single note by note_id.", + ), + ToolDefinition::new( + "elf_notes_patch", + HttpMethod::Patch, + "/v2/notes/{note_id}", + "Patch a note by note_id. Only provided fields are updated.", + ), + ToolDefinition::new( + "elf_notes_delete", + HttpMethod::Delete, + "/v2/notes/{note_id}", + "Delete a note by note_id.", + ), + ToolDefinition::new( + "elf_notes_publish", + HttpMethod::Post, + "/v2/notes/{note_id}/publish", + "Publish a note from agent_private into a shared space (team_shared or org_shared).", + ), + ToolDefinition::new( + "elf_notes_unpublish", + HttpMethod::Post, + "/v2/notes/{note_id}/unpublish", + "Unpublish a shared note back into agent_private scope.", + ), + ToolDefinition::new( + "elf_space_grants_list", + HttpMethod::Get, + "/v2/spaces/{space}/grants", + "List sharing grants for a space (team_shared or org_shared).", + ), + ToolDefinition::new( + "elf_space_grant_upsert", + HttpMethod::Post, + "/v2/spaces/{space}/grants", + "Upsert a sharing grant for a space (team_shared or org_shared).", + ), + ToolDefinition::new( + "elf_space_grant_revoke", + HttpMethod::Post, + "/v2/spaces/{space}/grants/revoke", + "Revoke a sharing grant for a space (team_shared or org_shared).", + ), + ToolDefinition::new( + "elf_admin_traces_recent_list", + HttpMethod::Get, + "/v2/admin/traces/recent", + "List recent traces by tenant/project with optional cursor and filters.", + ), + ToolDefinition::new( + "elf_admin_trace_get", + HttpMethod::Get, + "/v2/admin/traces/{trace_id}", + "Fetch trace metadata, items, and optional trajectory summary by trace_id.", + ), + ToolDefinition::new( + "elf_admin_trajectory_get", + HttpMethod::Get, + "/v2/admin/trajectories/{trace_id}", + "Fetch trace trajectory and stage payload by trace_id.", + ), + ToolDefinition::new( + "elf_admin_trace_item_get", + HttpMethod::Get, + "/v2/admin/trace-items/{item_id}", + "Fetch a trace item explain payload by item_id.", + ), + ToolDefinition::new( + "elf_admin_note_provenance_get", + HttpMethod::Get, + "/v2/admin/notes/{note_id}/provenance", + "Fetch provenance bundle for a note.", + ), + ToolDefinition::new( + "elf_admin_memory_history_get", + HttpMethod::Get, + "/v2/admin/notes/{note_id}/history", + "Fetch chronological memory history for a note.", + ), + ToolDefinition::new( + "elf_admin_trace_bundle_get", + HttpMethod::Get, + "/v2/admin/traces/{trace_id}/bundle", + "Fetch trace bundle for replay and diagnostics by trace_id.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profiles_list", + HttpMethod::Get, + "/v2/admin/events/ingestion-profiles", + "List latest ingestion profiles for add_event.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profiles_create", + HttpMethod::Post, + "/v2/admin/events/ingestion-profiles", + "Create a new ingestion profile version for add_event.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profile_get", + HttpMethod::Get, + "/v2/admin/events/ingestion-profiles/{profile_id}", + "Get a single ingestion profile by id/version for add_event.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profile_versions_list", + HttpMethod::Get, + "/v2/admin/events/ingestion-profiles/{profile_id}/versions", + "List all versions of one ingestion profile for add_event.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profile_default_get", + HttpMethod::Get, + "/v2/admin/events/ingestion-profiles/default", + "Get the active default ingestion profile for add_event.", + ), + ToolDefinition::new( + "elf_admin_events_ingestion_profile_default_set", + HttpMethod::Put, + "/v2/admin/events/ingestion-profiles/default", + "Set the default ingestion profile for add_event.", + ), +]; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct ToolDefinition { + name: &'static str, + method: HttpMethod, + path: &'static str, + description: &'static str, + streaming: bool, +} +impl ToolDefinition { + const fn new( + name: &'static str, + method: HttpMethod, + path: &'static str, + description: &'static str, + ) -> Self { + Self { name, method, path, description, streaming: true } + } +} + +fn build_tools() -> HashMap<&'static str, ToolDefinition> { + ALL_TOOL_DEFINITIONS.into_iter().map(|tool| (tool.name, tool)).collect() +} + +#[test] +fn registers_all_tools() { + let tools = build_tools(); + let expected = [ + "elf_notes_ingest", + "elf_graph_query", + "elf_graph_report", + "elf_events_ingest", + "elf_core_blocks_get", + "elf_entity_memory_get", + "elf_searches_create", + "elf_searches_get", + "elf_searches_timeline", + "elf_searches_notes", + "elf_notes_list", + "elf_notes_get", + "elf_notes_patch", + "elf_notes_delete", + "elf_notes_publish", + "elf_notes_unpublish", + "elf_space_grants_list", + "elf_space_grant_upsert", + "elf_space_grant_revoke", + "elf_admin_traces_recent_list", + "elf_dreaming_review_queue", + "elf_recall_debug_panel", + "elf_work_journal_entry_create", + "elf_work_journal_entry_get", + "elf_work_journal_session_readback", + "elf_admin_trace_get", + "elf_admin_trajectory_get", + "elf_admin_trace_item_get", + "elf_admin_note_provenance_get", + "elf_admin_memory_history_get", + "elf_admin_trace_bundle_get", + "elf_admin_events_ingestion_profiles_list", + "elf_admin_events_ingestion_profiles_create", + "elf_admin_events_ingestion_profile_get", + "elf_admin_events_ingestion_profile_versions_list", + "elf_admin_events_ingestion_profile_default_get", + "elf_admin_events_ingestion_profile_default_set", + ]; + + for name in expected { + assert!(tools.contains_key(name), "Missing tool registration: {name}."); + } + + assert_eq!(tools.len(), expected.len(), "Unexpected tool count for MCP registration."); +} + +#[test] +fn recall_debug_tool_uses_public_agent_route() { + let tools = build_tools(); + let tool = tools.get("elf_recall_debug_panel").expect("Missing recall debug panel tool."); + + assert_eq!(tool.path, "/v2/recall-debug/panel"); + assert!(tool.description.contains("recall_trace")); +} + +#[test] +fn searches_notes_tool_description_mentions_payload_level_shapes() { + let tools = build_tools(); + let tool = + tools.get("elf_searches_notes").expect("Missing elf_searches_notes tool definition."); + let description = tool.description.to_lowercase(); + + assert_eq!(tool.path, "/v2/searches/{search_id}/notes"); + assert!(description.contains("l0")); + assert!(description.contains("l1")); + assert!(description.contains("l2")); + assert!(description.contains("source_ref")); + assert!(description.contains("structured")); +} diff --git a/packages/elf-service/src/search/tests.rs b/packages/elf-service/src/search/tests.rs index 6420b4d3..83bb5388 100644 --- a/packages/elf-service/src/search/tests.rs +++ b/packages/elf-service/src/search/tests.rs @@ -1,852 +1,7 @@ -use std::path::PathBuf; - -use serde_json::Value; - -use crate::{ - ElfService, - search::{ - self, BlendRankingOverride, ChunkCandidate, ChunkMeta, ChunkSnippet, HashMap, NoteMeta, - OffsetDateTime, RankingRequestOverride, RerankCacheCandidate, RerankCacheItem, - RerankCachePayload, RetrievalSourceCandidates, RetrievalSourceKind, - RetrievalSourcesRankingOverride, ScoredChunk, TraceReplayCandidate, TraceReplayContext, - Uuid, - ranking::{self, ResolvedDiversityPolicy}, - }, -}; -use elf_config::{Config, SearchDynamic}; - -#[test] -fn dense_embedding_input_includes_project_context_suffix() { - let input = - ranking::build_dense_embedding_input("Find payments code.", Some("This is a billing API.")); - - assert!(input.starts_with("Find payments code.\n\nProject context:\n")); - assert!(input.contains("This is a billing API.")); -} - -#[test] -fn dense_embedding_input_skips_empty_project_context() { - let input = ranking::build_dense_embedding_input("Find payments code.", Some(" ")); - - assert_eq!(input, "Find payments code."); -} - -#[test] -fn normalize_queries_includes_original_and_dedupes() { - let queries = vec!["alpha".to_string(), "beta".to_string(), "alpha".to_string()]; - let normalized = ranking::normalize_queries(queries, "alpha", true, 4); - - assert_eq!(normalized, vec!["alpha".to_string(), "beta".to_string()]); -} - -#[test] -fn normalize_queries_respects_max_queries() { - let queries = - vec!["one".to_string(), "two".to_string(), "three".to_string(), "four".to_string()]; - let normalized = ranking::normalize_queries(queries, "zero", true, 3); - - assert_eq!(normalized.len(), 3); -} - -#[test] -fn dynamic_trigger_checks_candidates_and_score() { - let cfg = SearchDynamic { min_candidates: 10, min_top_score: 0.2 }; - - assert!(ranking::should_expand_dynamic(5, 0.9, &cfg)); - assert!(ranking::should_expand_dynamic(20, 0.1, &cfg)); - assert!(!ranking::should_expand_dynamic(20, 0.9, &cfg)); -} - -#[test] -fn rank_normalize_maps_rank_to_unit_interval() { - assert!((ranking::rank_normalize(1, 1) - 1.0).abs() < 1e-6); - assert!((ranking::rank_normalize(1, 5) - 1.0).abs() < 1e-6); - assert!((ranking::rank_normalize(3, 5) - 0.5).abs() < 1e-6); - assert!((ranking::rank_normalize(5, 5) - 0.0).abs() < 1e-6); - assert!((ranking::rank_normalize(0, 5) - 0.0).abs() < 1e-6); -} - -#[test] -fn build_trace_audit_includes_token_id_when_present() { - let audit = search::build_trace_audit("agent-a", Some("tok-123")); - - assert_eq!(audit.get("actor_id"), Some(&Value::from("agent-a"))); - assert_eq!(audit.get("token_id"), Some(&Value::from("tok-123"))); -} - -#[test] -fn build_trace_audit_omits_token_id_when_empty() { - let audit = search::build_trace_audit("agent-a", Some(" ")); - - assert_eq!(audit.get("actor_id"), Some(&Value::from("agent-a"))); - assert!(audit.get("token_id").is_none()); -} - -#[test] -fn relation_context_rows_without_evidence_are_suppressed() { - let now = OffsetDateTime::from_unix_timestamp(100).expect("valid timestamp"); - let note_id = Uuid::from_u128(1); - let contexts = - ElfService::group_relation_context_rows(vec![search::SearchRelationContextRow { - note_id, - fact_id: Uuid::from_u128(2), - scope: "project_shared".to_string(), - subject_canonical: Some("Alice".to_string()), - subject_kind: Some("person".to_string()), - predicate: "prefers".to_string(), - object_entity_id: None, - object_canonical: None, - object_kind: None, - object_value: Some("source-bound recall".to_string()), - valid_from: now, - valid_to: None, - is_current: true, - evidence_note_ids: Vec::new(), - }]); - - assert!(!contexts.contains_key(¬e_id)); -} - -#[test] -fn relation_context_sql_enforces_shared_grant_keys() { - assert!( - search::RELATION_CONTEXT_SQL - .contains("concat(gf.scope, ':', gf.agent_id) = ANY($10::text[])") - ); - assert!( - search::RELATION_CONTEXT_SQL.contains( - "concat(evidence_note.scope, ':', evidence_note.agent_id) = ANY($10::text[])" - ) - ); -} - -fn test_chunk_candidate(note_id: Uuid, retrieval_rank: u32) -> ChunkCandidate { - ChunkCandidate { - chunk_id: Uuid::new_v4(), - note_id, - chunk_index: 0, - retrieval_rank, - retrieval_score: None, - scope: None, - updated_at: None, - embedding_version: Some("v1".to_string()), - } -} - -fn default_retrieval_sources_policy() -> ranking::ResolvedRetrievalSourcesPolicy { - ranking::ResolvedRetrievalSourcesPolicy { - fusion_weight: 1.0, - structured_field_weight: 1.0, - recursive_weight: 0.0, - fusion_priority: 1, - structured_field_priority: 0, - recursive_priority: 0, - } -} - -#[test] -fn merge_retrieval_candidates_keeps_structured_hits_under_full_fusion_capacity() { - let mut fusion = Vec::new(); - - for rank in 1..=10 { - fusion.push(test_chunk_candidate(Uuid::new_v4(), rank)); - } - - let structured = vec![test_chunk_candidate(Uuid::new_v4(), 1)]; - let structured_chunk_id = structured[0].chunk_id; - let merged = ranking::merge_retrieval_candidates( - vec![ - RetrievalSourceCandidates { source: RetrievalSourceKind::Fusion, candidates: fusion }, - RetrievalSourceCandidates { - source: RetrievalSourceKind::StructuredField, - candidates: structured, - }, - ], - &default_retrieval_sources_policy(), - 10, - ); - let merged_chunk_ids: Vec = merged.iter().map(|candidate| candidate.chunk_id).collect(); - - assert!( - merged_chunk_ids.contains(&structured_chunk_id), - "Structured candidate was dropped by retrieval fusion." - ); -} - -#[test] -fn merge_retrieval_candidates_prefers_dual_source_signal_on_tie() { - let shared_note_id = Uuid::new_v4(); - let shared_chunk_id = Uuid::new_v4(); - let fusion_only_note_id = Uuid::new_v4(); - let fusion_only_chunk_id = Uuid::new_v4(); - let fusion = vec![ - ChunkCandidate { - chunk_id: shared_chunk_id, - note_id: shared_note_id, - chunk_index: 0, - retrieval_rank: 9, - retrieval_score: None, - scope: None, - updated_at: None, - embedding_version: Some("v1".to_string()), - }, - ChunkCandidate { - chunk_id: fusion_only_chunk_id, - note_id: fusion_only_note_id, - chunk_index: 0, - retrieval_rank: 1, - retrieval_score: None, - scope: None, - updated_at: None, - embedding_version: Some("v1".to_string()), - }, - ]; - let structured = vec![ChunkCandidate { - chunk_id: shared_chunk_id, - note_id: shared_note_id, - chunk_index: 0, - retrieval_rank: 1, - retrieval_score: None, - scope: None, - updated_at: None, - embedding_version: Some("v1".to_string()), - }]; - let merged = ranking::merge_retrieval_candidates( - vec![ - RetrievalSourceCandidates { source: RetrievalSourceKind::Fusion, candidates: fusion }, - RetrievalSourceCandidates { - source: RetrievalSourceKind::StructuredField, - candidates: structured, - }, - ], - &default_retrieval_sources_policy(), - 1, - ); - let first = merged.first().expect("Expected merged candidate."); - - assert_eq!(first.chunk_id, shared_chunk_id); -} - -#[test] -fn merge_retrieval_candidates_uses_configured_source_priority_on_tie() { - let fusion_chunk_id = Uuid::from_u128(1); - let recursive_chunk_id = Uuid::from_u128(2); - let mut fusion_candidate = test_chunk_candidate(Uuid::new_v4(), 1); - let mut recursive_candidate = test_chunk_candidate(Uuid::new_v4(), 1); - - fusion_candidate.chunk_id = fusion_chunk_id; - recursive_candidate.chunk_id = recursive_chunk_id; - - let policy = ranking::ResolvedRetrievalSourcesPolicy { - fusion_weight: 1.0, - structured_field_weight: 0.0, - recursive_weight: 1.0, - fusion_priority: 10, - structured_field_priority: 20, - recursive_priority: 0, - }; - let merged = ranking::merge_retrieval_candidates( - vec![ - RetrievalSourceCandidates { - source: RetrievalSourceKind::Fusion, - candidates: vec![fusion_candidate], - }, - RetrievalSourceCandidates { - source: RetrievalSourceKind::Recursive, - candidates: vec![recursive_candidate], - }, - ], - &policy, - 2, - ); - - assert_eq!(merged[0].chunk_id, recursive_chunk_id); - assert_eq!(merged[1].chunk_id, fusion_chunk_id); -} - -#[test] -fn retrieval_weight_for_rank_uses_first_matching_segment_or_last() { - let segments = vec![ - ranking::BlendSegment { max_retrieval_rank: 3, retrieval_weight: 0.7 }, - ranking::BlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.2 }, - ]; - - assert!((ranking::retrieval_weight_for_rank(1, &segments) - 0.7).abs() < 1e-6); - assert!((ranking::retrieval_weight_for_rank(3, &segments) - 0.7).abs() < 1e-6); - assert!((ranking::retrieval_weight_for_rank(4, &segments) - 0.2).abs() < 1e-6); - assert!((ranking::retrieval_weight_for_rank(999, &segments) - 0.2).abs() < 1e-6); -} - -#[test] -fn blend_math_is_linear_and_additive() { - let segments = vec![ - ranking::BlendSegment { max_retrieval_rank: 2, retrieval_weight: 0.7 }, - ranking::BlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.2 }, - ]; - let retrieval_rank = 3; - let rerank_rank = 2; - let retrieval_norm = ranking::rank_normalize(retrieval_rank, 10); - let rerank_norm = ranking::rank_normalize(rerank_rank, 4); - let blend_retrieval_weight = ranking::retrieval_weight_for_rank(retrieval_rank, &segments); - - assert!((blend_retrieval_weight - 0.2).abs() < 1e-6); - assert!((retrieval_norm - (7.0 / 9.0)).abs() < 1e-6); - assert!((rerank_norm - (2.0 / 3.0)).abs() < 1e-6); - - let retrieval_term = blend_retrieval_weight * retrieval_norm; - let rerank_term = (1.0 - blend_retrieval_weight) * rerank_norm; - let tie_breaker_score = 0.1; - let scope_context_boost = 0.0; - let final_score = retrieval_term + rerank_term + tie_breaker_score + scope_context_boost; - let expected = (0.2 * (7.0 / 9.0)) + (0.8 * (2.0 / 3.0)) + 0.1; - - assert!((final_score - expected).abs() < 1e-6, "Unexpected final_score: {final_score}"); -} - -#[test] -fn expansion_cache_key_changes_with_max_queries() { - let key_a = ranking::build_expansion_cache_key("alpha", 4, true, "llm", "model", 0.1_f32) - .expect("Expected cache key."); - let key_b = ranking::build_expansion_cache_key("alpha", 5, true, "llm", "model", 0.1_f32) - .expect("Expected cache key."); - - assert_ne!(key_a, key_b); -} - -#[test] -fn rerank_cache_key_changes_with_updated_at() { - let ts_a = OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."); - let ts_b = OffsetDateTime::from_unix_timestamp(2).expect("Valid timestamp."); - let chunk_id = Uuid::new_v4(); - let key_a = ranking::build_rerank_cache_key("q", "rerank", "model", &[(chunk_id, ts_a)]) - .expect("Expected cache key."); - let key_b = ranking::build_rerank_cache_key("q", "rerank", "model", &[(chunk_id, ts_b)]) - .expect("Expected cache key."); - - assert_ne!(key_a, key_b); -} - -#[test] -fn rerank_cache_payload_rejects_mismatched_counts() { - let payload = RerankCachePayload { - items: vec![RerankCacheItem { - chunk_id: Uuid::new_v4(), - updated_at: OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."), - score: 0.5, - }], - }; - let candidates = vec![RerankCacheCandidate { - chunk_id: Uuid::new_v4(), - updated_at: OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."), - }]; - - assert!(ranking::build_cached_scores(&payload, &candidates).is_none()); -} - -#[test] -fn cache_key_prefix_is_stable() { - let prefix = ranking::cache_key_prefix("abcd1234efgh5678"); - - assert_eq!(prefix, "abcd1234efgh"); -} - -#[test] -fn lexical_overlap_ratio_is_deterministic_and_bounded() { - let query_tokens = vec!["deploy".to_string(), "steps".to_string()]; - let ratio = ranking::lexical_overlap_ratio(&query_tokens, "Deploy steps for staging.", 128); - - assert!((ratio - 1.0).abs() < 1e-6, "Unexpected ratio: {ratio}"); - - let ratio = ranking::lexical_overlap_ratio(&query_tokens, "Deploy only.", 128); - - assert!((ratio - 0.5).abs() < 1e-6, "Unexpected ratio: {ratio}"); - assert!((0.0..=1.0).contains(&ratio), "Ratio must be in [0, 1]."); -} - -#[test] -fn deterministic_ranking_terms_do_not_apply_when_disabled() { - let mut cfg = parse_example_config(); - - cfg.ranking.deterministic.enabled = false; - cfg.ranking.deterministic.lexical.enabled = true; - cfg.ranking.deterministic.hits.enabled = true; - cfg.ranking.deterministic.decay.enabled = true; - - let now = OffsetDateTime::from_unix_timestamp(1_000_000).expect("Valid timestamp."); - let note = NoteMeta { - note_id: Uuid::new_v4(), - note_type: "fact".to_string(), - key: None, - scope: "project_shared".to_string(), - agent_id: "agent-a".to_string(), - importance: 0.1, - confidence: 0.9, - updated_at: now, - expires_at: None, - source_ref: serde_json::json!({}), - embedding_version: "v1".to_string(), - hit_count: 8, - last_hit_at: Some(now), - }; - let chunk = - ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; - let item = ChunkSnippet { - note, - chunk, - snippet: "deploy steps".to_string(), - retrieval_rank: 1, - retrieval_score: None, - }; - let mut scored = ScoredChunk { - item, - final_score: 1.0, - rerank_score: 0.5, - rerank_rank: 1, - rerank_norm: 1.0, - retrieval_norm: 1.0, - blend_retrieval_weight: 0.5, - retrieval_term: 0.5, - rerank_term: 0.5, - tie_breaker_score: 0.0, - scope_context_boost: 0.0, - age_days: 30.0, - importance: 0.1, - deterministic_lexical_overlap_ratio: 0.0, - deterministic_lexical_bonus: 0.0, - deterministic_hit_count: 0, - deterministic_last_hit_age_days: None, - deterministic_hit_boost: 0.0, - deterministic_decay_penalty: 0.0, - }; - let terms = ranking::compute_deterministic_ranking_terms( - &cfg, - &ranking::tokenize_query( - "deploy steps", - cfg.ranking.deterministic.lexical.max_query_terms as usize, - ), - scored.item.snippet.as_str(), - scored.item.note.hit_count, - scored.item.note.last_hit_at, - scored.age_days, - now, - ); - - scored.final_score += terms.lexical_bonus + terms.hit_boost + terms.decay_penalty; - scored.deterministic_lexical_overlap_ratio = terms.lexical_overlap_ratio; - scored.deterministic_lexical_bonus = terms.lexical_bonus; - scored.deterministic_hit_count = terms.hit_count; - scored.deterministic_last_hit_age_days = terms.last_hit_age_days; - scored.deterministic_hit_boost = terms.hit_boost; - scored.deterministic_decay_penalty = terms.decay_penalty; - - assert!((scored.final_score - 1.0).abs() < 1e-6, "Score must not change."); - assert!((scored.deterministic_lexical_bonus - 0.0).abs() < 1e-6); - assert!((scored.deterministic_hit_boost - 0.0).abs() < 1e-6); - assert!((scored.deterministic_decay_penalty - 0.0).abs() < 1e-6); -} - -#[test] -fn deterministic_ranking_terms_apply_and_are_bounded() { - let mut cfg = parse_example_config(); - - cfg.ranking.deterministic.enabled = true; - cfg.ranking.deterministic.lexical.enabled = true; - cfg.ranking.deterministic.hits.enabled = true; - cfg.ranking.deterministic.decay.enabled = true; - - let now = OffsetDateTime::from_unix_timestamp(1_000_000).expect("Valid timestamp."); - let note = NoteMeta { - note_id: Uuid::new_v4(), - note_type: "fact".to_string(), - key: None, - scope: "project_shared".to_string(), - agent_id: "agent-a".to_string(), - importance: 0.1, - confidence: 0.9, - updated_at: now, - expires_at: None, - source_ref: serde_json::json!({}), - embedding_version: "v1".to_string(), - hit_count: 8, - last_hit_at: Some(now), - }; - let chunk = - ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; - let item = ChunkSnippet { - note, - chunk, - snippet: "deploy steps".to_string(), - retrieval_rank: 1, - retrieval_score: None, - }; - let mut scored = ScoredChunk { - item, - final_score: 1.0, - rerank_score: 0.5, - rerank_rank: 1, - rerank_norm: 1.0, - retrieval_norm: 1.0, - blend_retrieval_weight: 0.5, - retrieval_term: 0.5, - rerank_term: 0.5, - tie_breaker_score: 0.0, - scope_context_boost: 0.0, - age_days: 30.0, - importance: 0.1, - deterministic_lexical_overlap_ratio: 0.0, - deterministic_lexical_bonus: 0.0, - deterministic_hit_count: 0, - deterministic_last_hit_age_days: None, - deterministic_hit_boost: 0.0, - deterministic_decay_penalty: 0.0, - }; - let terms = ranking::compute_deterministic_ranking_terms( - &cfg, - &ranking::tokenize_query( - "deploy steps", - cfg.ranking.deterministic.lexical.max_query_terms as usize, - ), - scored.item.snippet.as_str(), - scored.item.note.hit_count, - scored.item.note.last_hit_at, - scored.age_days, - now, - ); - - scored.final_score += terms.lexical_bonus + terms.hit_boost + terms.decay_penalty; - scored.deterministic_lexical_overlap_ratio = terms.lexical_overlap_ratio; - scored.deterministic_lexical_bonus = terms.lexical_bonus; - scored.deterministic_hit_count = terms.hit_count; - scored.deterministic_last_hit_age_days = terms.last_hit_age_days; - scored.deterministic_hit_boost = terms.hit_boost; - scored.deterministic_decay_penalty = terms.decay_penalty; - - assert!(scored.final_score.is_finite(), "Score must be finite."); - assert!((0.0..=1.0).contains(&scored.deterministic_lexical_overlap_ratio)); - assert!(scored.deterministic_lexical_bonus >= 0.0); - assert!(scored.deterministic_hit_boost >= 0.0); - assert!(scored.deterministic_decay_penalty <= 0.0); - - let expected_lex = cfg.ranking.deterministic.lexical.weight; - - assert!((scored.deterministic_lexical_bonus - expected_lex).abs() < 1e-6); - - let expected_hit = cfg.ranking.deterministic.hits.weight * 0.5; - - assert!((scored.deterministic_hit_boost - expected_hit).abs() < 1e-6); -} - -fn test_scored_chunk(note_id: Uuid, retrieval_rank: u32, now: OffsetDateTime) -> ScoredChunk { - let note = NoteMeta { - note_id, - note_type: "fact".to_string(), - key: None, - scope: "project_shared".to_string(), - agent_id: "agent-a".to_string(), - importance: 0.1, - confidence: 0.9, - updated_at: now, - expires_at: None, - source_ref: serde_json::json!({}), - embedding_version: "v1".to_string(), - hit_count: 0, - last_hit_at: None, - }; - let chunk = ChunkMeta { - chunk_id: Uuid::new_v4(), - chunk_index: i32::try_from(retrieval_rank.saturating_sub(1)).unwrap_or(0), - start_offset: 0, - end_offset: 16, - }; - let item = ChunkSnippet { - note, - chunk, - snippet: format!("snippet-{retrieval_rank}"), - retrieval_rank, - retrieval_score: None, - }; - - ScoredChunk { - item, - final_score: 0.0, - rerank_score: 0.0, - rerank_rank: retrieval_rank, - rerank_norm: 0.0, - retrieval_norm: 0.0, - blend_retrieval_weight: 0.5, - retrieval_term: 0.0, - rerank_term: 0.0, - tie_breaker_score: 0.0, - scope_context_boost: 0.0, - age_days: 0.0, - importance: 0.1, - deterministic_lexical_overlap_ratio: 0.0, - deterministic_lexical_bonus: 0.0, - deterministic_hit_count: 0, - deterministic_last_hit_age_days: None, - deterministic_hit_boost: 0.0, - deterministic_decay_penalty: 0.0, - } -} - -#[test] -fn diversity_selection_skips_high_similarity_when_alternative_exists() { - let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); - let note_a = Uuid::new_v4(); - let note_b = Uuid::new_v4(); - let note_c = Uuid::new_v4(); - let candidates = vec![ - test_scored_chunk(note_a, 1, now), - test_scored_chunk(note_b, 2, now), - test_scored_chunk(note_c, 3, now), - ]; - let mut vectors = HashMap::new(); - - vectors.insert(note_a, vec![1.0, 0.0]); - vectors.insert(note_b, vec![0.99, 0.01]); - vectors.insert(note_c, vec![0.0, 1.0]); - - let policy = ResolvedDiversityPolicy { - enabled: true, - sim_threshold: 0.9, - mmr_lambda: 0.7, - max_skips: 64, - }; - let (selected, decisions) = ranking::select_diverse_results(candidates, 2, &policy, &vectors); - let selected_ids: Vec = selected.iter().map(|item| item.item.note.note_id).collect(); - - assert_eq!(selected_ids, vec![note_a, note_c]); - assert_eq!( - decisions.get(¬e_b).and_then(|decision| decision.skipped_reason.as_deref()), - Some("similarity_threshold") - ); -} - -#[test] -fn diversity_selection_backfills_when_max_skips_is_reached() { - let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); - let note_a = Uuid::new_v4(); - let note_b = Uuid::new_v4(); - let candidates = vec![test_scored_chunk(note_a, 1, now), test_scored_chunk(note_b, 2, now)]; - let mut vectors = HashMap::new(); - - vectors.insert(note_a, vec![1.0, 0.0]); - vectors.insert(note_b, vec![0.99, 0.01]); - - let policy = ResolvedDiversityPolicy { - enabled: true, - sim_threshold: 0.9, - mmr_lambda: 0.7, - max_skips: 0, - }; - let (selected, decisions) = ranking::select_diverse_results(candidates, 2, &policy, &vectors); - let selected_ids: Vec = selected.iter().map(|item| item.item.note.note_id).collect(); - let selected_reason = decisions.get(¬e_b).map(|decision| decision.selected_reason.as_str()); - - assert_eq!(selected_ids, vec![note_a, note_b]); - assert_eq!(selected_reason, Some("max_skips_backfill")); -} - -#[test] -fn replay_diversity_decisions_prefer_selected_entry_for_same_note() { - let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); - let note_id = Uuid::new_v4(); - let first = TraceReplayCandidate { - note_id, - chunk_id: Uuid::new_v4(), - chunk_index: 0, - snippet: "first".to_string(), - retrieval_rank: 2, - retrieval_score: None, - rerank_score: 0.2, - note_scope: "project_shared".to_string(), - note_importance: 0.1, - note_updated_at: now, - note_hit_count: 0, - note_last_hit_at: None, - diversity_selected: Some(false), - diversity_selected_rank: None, - diversity_selected_reason: Some("not_selected".to_string()), - diversity_skipped_reason: Some("lower_mmr".to_string()), - diversity_nearest_selected_note_id: None, - diversity_similarity: Some(0.95), - diversity_mmr_score: Some(0.12), - diversity_missing_embedding: Some(false), - }; - let second = TraceReplayCandidate { - note_id, - chunk_id: Uuid::new_v4(), - chunk_index: 1, - snippet: "second".to_string(), - retrieval_rank: 1, - retrieval_score: None, - rerank_score: 0.3, - note_scope: "project_shared".to_string(), - note_importance: 0.1, - note_updated_at: now, - note_hit_count: 0, - note_last_hit_at: None, - diversity_selected: Some(true), - diversity_selected_rank: Some(2), - diversity_selected_reason: Some("mmr".to_string()), - diversity_skipped_reason: None, - diversity_nearest_selected_note_id: None, - diversity_similarity: Some(0.35), - diversity_mmr_score: Some(0.44), - diversity_missing_embedding: Some(false), - }; - let decisions = ranking::extract_replay_diversity_decisions(&[first, second]); - let decision = decisions.get(¬e_id).expect("Expected merged decision."); - - assert!(decision.selected); - assert_eq!(decision.selected_rank, Some(2)); - assert_eq!(decision.selected_reason, "mmr"); -} - -fn parse_example_config() -> Config { - let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../.."); - let path = root_dir.join("elf.example.toml"); - - elf_config::load(&path).expect("elf.example.toml must remain parseable and valid.") -} - -#[test] -fn ranking_policy_id_is_stable_and_has_expected_format() { - let cfg = parse_example_config(); - let id_a = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); - let id_b = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); - - assert_eq!(id_a, id_b); - assert!(id_a.starts_with("ranking_v2:"), "Unexpected policy id: {id_a}"); - assert_eq!(id_a.len(), "ranking_v2:".len() + 12, "Unexpected policy id: {id_a}"); -} - -#[test] -fn ranking_policy_id_changes_with_override() { - let cfg = parse_example_config(); - let base = search::ranking_policy_id(&cfg, None).expect("Expected base policy id."); - let override_ = RankingRequestOverride { - blend: Some(BlendRankingOverride { - enabled: Some(false), - rerank_normalization: None, - retrieval_normalization: None, - segments: None, - }), - diversity: None, - retrieval_sources: None, - }; - let overridden = - search::ranking_policy_id(&cfg, Some(&override_)).expect("Expected overridden policy id."); - - assert_ne!(base, overridden); -} - -#[test] -fn ranking_policy_id_changes_with_retrieval_source_override() { - let cfg = parse_example_config(); - let base = search::ranking_policy_id(&cfg, None).expect("Expected base policy id."); - let override_ = RankingRequestOverride { - blend: None, - diversity: None, - retrieval_sources: Some(RetrievalSourcesRankingOverride { - fusion_weight: Some(0.75), - structured_field_weight: Some(1.25), - recursive_weight: Some(0.0), - fusion_priority: Some(2), - structured_field_priority: Some(1), - recursive_priority: Some(0), - }), - }; - let overridden = - search::ranking_policy_id(&cfg, Some(&override_)).expect("Expected overridden policy id."); - - assert_ne!(base, overridden); -} - -#[test] -fn replay_ranking_policy_id_matches_ranking_policy_id() { - let cfg = parse_example_config(); - let expected = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); - let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); - let trace = TraceReplayContext { - trace_id: Uuid::new_v4(), - query: "deployment steps".to_string(), - candidate_count: 3, - top_k: 2, - created_at: now, - }; - let candidates = vec![ - TraceReplayCandidate { - note_id: Uuid::new_v4(), - chunk_id: Uuid::new_v4(), - chunk_index: 0, - snippet: "deployment steps".to_string(), - retrieval_rank: 1, - retrieval_score: None, - rerank_score: 0.1, - note_scope: "project_shared".to_string(), - note_importance: 0.1, - note_updated_at: now, - note_hit_count: 0, - note_last_hit_at: None, - diversity_selected: None, - diversity_selected_rank: None, - diversity_selected_reason: None, - diversity_skipped_reason: None, - diversity_nearest_selected_note_id: None, - diversity_similarity: None, - diversity_mmr_score: None, - diversity_missing_embedding: None, - }, - TraceReplayCandidate { - note_id: Uuid::new_v4(), - chunk_id: Uuid::new_v4(), - chunk_index: 0, - snippet: "deployment steps".to_string(), - retrieval_rank: 2, - retrieval_score: None, - rerank_score: 0.9, - note_scope: "project_shared".to_string(), - note_importance: 0.1, - note_updated_at: now, - note_hit_count: 0, - note_last_hit_at: None, - diversity_selected: None, - diversity_selected_rank: None, - diversity_selected_reason: None, - diversity_skipped_reason: None, - diversity_nearest_selected_note_id: None, - diversity_similarity: None, - diversity_mmr_score: None, - diversity_missing_embedding: None, - }, - TraceReplayCandidate { - note_id: Uuid::new_v4(), - chunk_id: Uuid::new_v4(), - chunk_index: 0, - snippet: "deployment steps".to_string(), - retrieval_rank: 3, - retrieval_score: None, - rerank_score: 0.2, - note_scope: "org_shared".to_string(), - note_importance: 0.1, - note_updated_at: now, - note_hit_count: 0, - note_last_hit_at: None, - diversity_selected: None, - diversity_selected_rank: None, - diversity_selected_reason: None, - diversity_skipped_reason: None, - diversity_nearest_selected_note_id: None, - diversity_similarity: None, - diversity_mmr_score: None, - diversity_missing_embedding: None, - }, - ]; - let out = search::replay_ranking_from_candidates(&cfg, &trace, None, &candidates, 2) - .expect("Expected replay output."); - - for item in out { - assert_eq!(item.explain.ranking.policy_id, expected); - } -} +mod tests_cache_keys; +mod tests_deterministic; +mod tests_diversity; +mod tests_policy_id; +mod tests_query_basics; +mod tests_relation_context; +mod tests_retrieval_merge; diff --git a/packages/elf-service/src/search/tests_cache_keys.rs b/packages/elf-service/src/search/tests_cache_keys.rs new file mode 100644 index 00000000..76a1cc5b --- /dev/null +++ b/packages/elf-service/src/search/tests_cache_keys.rs @@ -0,0 +1,50 @@ +use crate::search::{ + OffsetDateTime, RerankCacheCandidate, RerankCacheItem, RerankCachePayload, Uuid, ranking, +}; + +#[test] +fn expansion_cache_key_changes_with_max_queries() { + let key_a = ranking::build_expansion_cache_key("alpha", 4, true, "llm", "model", 0.1_f32) + .expect("Expected cache key."); + let key_b = ranking::build_expansion_cache_key("alpha", 5, true, "llm", "model", 0.1_f32) + .expect("Expected cache key."); + + assert_ne!(key_a, key_b); +} + +#[test] +fn rerank_cache_key_changes_with_updated_at() { + let ts_a = OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."); + let ts_b = OffsetDateTime::from_unix_timestamp(2).expect("Valid timestamp."); + let chunk_id = Uuid::new_v4(); + let key_a = ranking::build_rerank_cache_key("q", "rerank", "model", &[(chunk_id, ts_a)]) + .expect("Expected cache key."); + let key_b = ranking::build_rerank_cache_key("q", "rerank", "model", &[(chunk_id, ts_b)]) + .expect("Expected cache key."); + + assert_ne!(key_a, key_b); +} + +#[test] +fn rerank_cache_payload_rejects_mismatched_counts() { + let payload = RerankCachePayload { + items: vec![RerankCacheItem { + chunk_id: Uuid::new_v4(), + updated_at: OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."), + score: 0.5, + }], + }; + let candidates = vec![RerankCacheCandidate { + chunk_id: Uuid::new_v4(), + updated_at: OffsetDateTime::from_unix_timestamp(1).expect("Valid timestamp."), + }]; + + assert!(ranking::build_cached_scores(&payload, &candidates).is_none()); +} + +#[test] +fn cache_key_prefix_is_stable() { + let prefix = ranking::cache_key_prefix("abcd1234efgh5678"); + + assert_eq!(prefix, "abcd1234efgh"); +} diff --git a/packages/elf-service/src/search/tests_deterministic.rs b/packages/elf-service/src/search/tests_deterministic.rs new file mode 100644 index 00000000..37d1b1c5 --- /dev/null +++ b/packages/elf-service/src/search/tests_deterministic.rs @@ -0,0 +1,199 @@ +use std::path::PathBuf; + +use crate::search::{ + ChunkMeta, ChunkSnippet, NoteMeta, OffsetDateTime, ScoredChunk, Uuid, ranking, +}; +use elf_config::Config; + +fn parse_example_config() -> Config { + let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../.."); + let path = root_dir.join("elf.example.toml"); + + elf_config::load(&path).expect("elf.example.toml must remain parseable and valid.") +} + +#[test] +fn lexical_overlap_ratio_is_deterministic_and_bounded() { + let query_tokens = vec!["deploy".to_string(), "steps".to_string()]; + let ratio = ranking::lexical_overlap_ratio(&query_tokens, "Deploy steps for staging.", 128); + + assert!((ratio - 1.0).abs() < 1e-6, "Unexpected ratio: {ratio}"); + + let ratio = ranking::lexical_overlap_ratio(&query_tokens, "Deploy only.", 128); + + assert!((ratio - 0.5).abs() < 1e-6, "Unexpected ratio: {ratio}"); + assert!((0.0..=1.0).contains(&ratio), "Ratio must be in [0, 1]."); +} + +#[test] +fn deterministic_ranking_terms_do_not_apply_when_disabled() { + let mut cfg = parse_example_config(); + + cfg.ranking.deterministic.enabled = false; + cfg.ranking.deterministic.lexical.enabled = true; + cfg.ranking.deterministic.hits.enabled = true; + cfg.ranking.deterministic.decay.enabled = true; + + let now = OffsetDateTime::from_unix_timestamp(1_000_000).expect("Valid timestamp."); + let note = NoteMeta { + note_id: Uuid::new_v4(), + note_type: "fact".to_string(), + key: None, + scope: "project_shared".to_string(), + agent_id: "agent-a".to_string(), + importance: 0.1, + confidence: 0.9, + updated_at: now, + expires_at: None, + source_ref: serde_json::json!({}), + embedding_version: "v1".to_string(), + hit_count: 8, + last_hit_at: Some(now), + }; + let chunk = + ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; + let item = ChunkSnippet { + note, + chunk, + snippet: "deploy steps".to_string(), + retrieval_rank: 1, + retrieval_score: None, + }; + let mut scored = ScoredChunk { + item, + final_score: 1.0, + rerank_score: 0.5, + rerank_rank: 1, + rerank_norm: 1.0, + retrieval_norm: 1.0, + blend_retrieval_weight: 0.5, + retrieval_term: 0.5, + rerank_term: 0.5, + tie_breaker_score: 0.0, + scope_context_boost: 0.0, + age_days: 30.0, + importance: 0.1, + deterministic_lexical_overlap_ratio: 0.0, + deterministic_lexical_bonus: 0.0, + deterministic_hit_count: 0, + deterministic_last_hit_age_days: None, + deterministic_hit_boost: 0.0, + deterministic_decay_penalty: 0.0, + }; + let terms = ranking::compute_deterministic_ranking_terms( + &cfg, + &ranking::tokenize_query( + "deploy steps", + cfg.ranking.deterministic.lexical.max_query_terms as usize, + ), + scored.item.snippet.as_str(), + scored.item.note.hit_count, + scored.item.note.last_hit_at, + scored.age_days, + now, + ); + + scored.final_score += terms.lexical_bonus + terms.hit_boost + terms.decay_penalty; + scored.deterministic_lexical_overlap_ratio = terms.lexical_overlap_ratio; + scored.deterministic_lexical_bonus = terms.lexical_bonus; + scored.deterministic_hit_count = terms.hit_count; + scored.deterministic_last_hit_age_days = terms.last_hit_age_days; + scored.deterministic_hit_boost = terms.hit_boost; + scored.deterministic_decay_penalty = terms.decay_penalty; + + assert!((scored.final_score - 1.0).abs() < 1e-6, "Score must not change."); + assert!((scored.deterministic_lexical_bonus - 0.0).abs() < 1e-6); + assert!((scored.deterministic_hit_boost - 0.0).abs() < 1e-6); + assert!((scored.deterministic_decay_penalty - 0.0).abs() < 1e-6); +} + +#[test] +fn deterministic_ranking_terms_apply_and_are_bounded() { + let mut cfg = parse_example_config(); + + cfg.ranking.deterministic.enabled = true; + cfg.ranking.deterministic.lexical.enabled = true; + cfg.ranking.deterministic.hits.enabled = true; + cfg.ranking.deterministic.decay.enabled = true; + + let now = OffsetDateTime::from_unix_timestamp(1_000_000).expect("Valid timestamp."); + let note = NoteMeta { + note_id: Uuid::new_v4(), + note_type: "fact".to_string(), + key: None, + scope: "project_shared".to_string(), + agent_id: "agent-a".to_string(), + importance: 0.1, + confidence: 0.9, + updated_at: now, + expires_at: None, + source_ref: serde_json::json!({}), + embedding_version: "v1".to_string(), + hit_count: 8, + last_hit_at: Some(now), + }; + let chunk = + ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; + let item = ChunkSnippet { + note, + chunk, + snippet: "deploy steps".to_string(), + retrieval_rank: 1, + retrieval_score: None, + }; + let mut scored = ScoredChunk { + item, + final_score: 1.0, + rerank_score: 0.5, + rerank_rank: 1, + rerank_norm: 1.0, + retrieval_norm: 1.0, + blend_retrieval_weight: 0.5, + retrieval_term: 0.5, + rerank_term: 0.5, + tie_breaker_score: 0.0, + scope_context_boost: 0.0, + age_days: 30.0, + importance: 0.1, + deterministic_lexical_overlap_ratio: 0.0, + deterministic_lexical_bonus: 0.0, + deterministic_hit_count: 0, + deterministic_last_hit_age_days: None, + deterministic_hit_boost: 0.0, + deterministic_decay_penalty: 0.0, + }; + let terms = ranking::compute_deterministic_ranking_terms( + &cfg, + &ranking::tokenize_query( + "deploy steps", + cfg.ranking.deterministic.lexical.max_query_terms as usize, + ), + scored.item.snippet.as_str(), + scored.item.note.hit_count, + scored.item.note.last_hit_at, + scored.age_days, + now, + ); + + scored.final_score += terms.lexical_bonus + terms.hit_boost + terms.decay_penalty; + scored.deterministic_lexical_overlap_ratio = terms.lexical_overlap_ratio; + scored.deterministic_lexical_bonus = terms.lexical_bonus; + scored.deterministic_hit_count = terms.hit_count; + scored.deterministic_last_hit_age_days = terms.last_hit_age_days; + scored.deterministic_hit_boost = terms.hit_boost; + scored.deterministic_decay_penalty = terms.decay_penalty; + + assert!(scored.final_score.is_finite(), "Score must be finite."); + assert!((0.0..=1.0).contains(&scored.deterministic_lexical_overlap_ratio)); + assert!(scored.deterministic_lexical_bonus >= 0.0); + assert!(scored.deterministic_hit_boost >= 0.0); + assert!(scored.deterministic_decay_penalty <= 0.0); + + let expected_lex = cfg.ranking.deterministic.lexical.weight; + + assert!((scored.deterministic_lexical_bonus - expected_lex).abs() < 1e-6); + + let expected_hit = cfg.ranking.deterministic.hits.weight * 0.5; + + assert!((scored.deterministic_hit_boost - expected_hit).abs() < 1e-6); +} diff --git a/packages/elf-service/src/search/tests_diversity.rs b/packages/elf-service/src/search/tests_diversity.rs new file mode 100644 index 00000000..37391bae --- /dev/null +++ b/packages/elf-service/src/search/tests_diversity.rs @@ -0,0 +1,172 @@ +use crate::search::{ + ChunkMeta, ChunkSnippet, HashMap, NoteMeta, OffsetDateTime, ScoredChunk, TraceReplayCandidate, + Uuid, + ranking::{self, ResolvedDiversityPolicy}, +}; + +fn test_scored_chunk(note_id: Uuid, retrieval_rank: u32, now: OffsetDateTime) -> ScoredChunk { + let note = NoteMeta { + note_id, + note_type: "fact".to_string(), + key: None, + scope: "project_shared".to_string(), + agent_id: "agent-a".to_string(), + importance: 0.1, + confidence: 0.9, + updated_at: now, + expires_at: None, + source_ref: serde_json::json!({}), + embedding_version: "v1".to_string(), + hit_count: 0, + last_hit_at: None, + }; + let chunk = ChunkMeta { + chunk_id: Uuid::new_v4(), + chunk_index: i32::try_from(retrieval_rank.saturating_sub(1)).unwrap_or(0), + start_offset: 0, + end_offset: 16, + }; + let item = ChunkSnippet { + note, + chunk, + snippet: format!("snippet-{retrieval_rank}"), + retrieval_rank, + retrieval_score: None, + }; + + ScoredChunk { + item, + final_score: 0.0, + rerank_score: 0.0, + rerank_rank: retrieval_rank, + rerank_norm: 0.0, + retrieval_norm: 0.0, + blend_retrieval_weight: 0.5, + retrieval_term: 0.0, + rerank_term: 0.0, + tie_breaker_score: 0.0, + scope_context_boost: 0.0, + age_days: 0.0, + importance: 0.1, + deterministic_lexical_overlap_ratio: 0.0, + deterministic_lexical_bonus: 0.0, + deterministic_hit_count: 0, + deterministic_last_hit_age_days: None, + deterministic_hit_boost: 0.0, + deterministic_decay_penalty: 0.0, + } +} + +#[test] +fn diversity_selection_skips_high_similarity_when_alternative_exists() { + let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); + let note_a = Uuid::new_v4(); + let note_b = Uuid::new_v4(); + let note_c = Uuid::new_v4(); + let candidates = vec![ + test_scored_chunk(note_a, 1, now), + test_scored_chunk(note_b, 2, now), + test_scored_chunk(note_c, 3, now), + ]; + let mut vectors = HashMap::new(); + + vectors.insert(note_a, vec![1.0, 0.0]); + vectors.insert(note_b, vec![0.99, 0.01]); + vectors.insert(note_c, vec![0.0, 1.0]); + + let policy = ResolvedDiversityPolicy { + enabled: true, + sim_threshold: 0.9, + mmr_lambda: 0.7, + max_skips: 64, + }; + let (selected, decisions) = ranking::select_diverse_results(candidates, 2, &policy, &vectors); + let selected_ids: Vec = selected.iter().map(|item| item.item.note.note_id).collect(); + + assert_eq!(selected_ids, vec![note_a, note_c]); + assert_eq!( + decisions.get(¬e_b).and_then(|decision| decision.skipped_reason.as_deref()), + Some("similarity_threshold") + ); +} + +#[test] +fn diversity_selection_backfills_when_max_skips_is_reached() { + let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); + let note_a = Uuid::new_v4(); + let note_b = Uuid::new_v4(); + let candidates = vec![test_scored_chunk(note_a, 1, now), test_scored_chunk(note_b, 2, now)]; + let mut vectors = HashMap::new(); + + vectors.insert(note_a, vec![1.0, 0.0]); + vectors.insert(note_b, vec![0.99, 0.01]); + + let policy = ResolvedDiversityPolicy { + enabled: true, + sim_threshold: 0.9, + mmr_lambda: 0.7, + max_skips: 0, + }; + let (selected, decisions) = ranking::select_diverse_results(candidates, 2, &policy, &vectors); + let selected_ids: Vec = selected.iter().map(|item| item.item.note.note_id).collect(); + let selected_reason = decisions.get(¬e_b).map(|decision| decision.selected_reason.as_str()); + + assert_eq!(selected_ids, vec![note_a, note_b]); + assert_eq!(selected_reason, Some("max_skips_backfill")); +} + +#[test] +fn replay_diversity_decisions_prefer_selected_entry_for_same_note() { + let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); + let note_id = Uuid::new_v4(); + let first = TraceReplayCandidate { + note_id, + chunk_id: Uuid::new_v4(), + chunk_index: 0, + snippet: "first".to_string(), + retrieval_rank: 2, + retrieval_score: None, + rerank_score: 0.2, + note_scope: "project_shared".to_string(), + note_importance: 0.1, + note_updated_at: now, + note_hit_count: 0, + note_last_hit_at: None, + diversity_selected: Some(false), + diversity_selected_rank: None, + diversity_selected_reason: Some("not_selected".to_string()), + diversity_skipped_reason: Some("lower_mmr".to_string()), + diversity_nearest_selected_note_id: None, + diversity_similarity: Some(0.95), + diversity_mmr_score: Some(0.12), + diversity_missing_embedding: Some(false), + }; + let second = TraceReplayCandidate { + note_id, + chunk_id: Uuid::new_v4(), + chunk_index: 1, + snippet: "second".to_string(), + retrieval_rank: 1, + retrieval_score: None, + rerank_score: 0.3, + note_scope: "project_shared".to_string(), + note_importance: 0.1, + note_updated_at: now, + note_hit_count: 0, + note_last_hit_at: None, + diversity_selected: Some(true), + diversity_selected_rank: Some(2), + diversity_selected_reason: Some("mmr".to_string()), + diversity_skipped_reason: None, + diversity_nearest_selected_note_id: None, + diversity_similarity: Some(0.35), + diversity_mmr_score: Some(0.44), + diversity_missing_embedding: Some(false), + }; + let decisions = ranking::extract_replay_diversity_decisions(&[first, second]); + let decision = decisions.get(¬e_id).expect("Expected merged decision."); + + assert!(decision.selected); + assert_eq!(decision.selected_rank, Some(2)); + assert_eq!(decision.selected_reason, "mmr"); +} diff --git a/packages/elf-service/src/search/tests_policy_id.rs b/packages/elf-service/src/search/tests_policy_id.rs new file mode 100644 index 00000000..722181a6 --- /dev/null +++ b/packages/elf-service/src/search/tests_policy_id.rs @@ -0,0 +1,155 @@ +use std::path::PathBuf; + +use crate::search::{ + self, BlendRankingOverride, OffsetDateTime, RankingRequestOverride, + RetrievalSourcesRankingOverride, TraceReplayCandidate, TraceReplayContext, Uuid, +}; +use elf_config::Config; + +fn parse_example_config() -> Config { + let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../.."); + let path = root_dir.join("elf.example.toml"); + + elf_config::load(&path).expect("elf.example.toml must remain parseable and valid.") +} + +#[test] +fn ranking_policy_id_is_stable_and_has_expected_format() { + let cfg = parse_example_config(); + let id_a = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); + let id_b = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); + + assert_eq!(id_a, id_b); + assert!(id_a.starts_with("ranking_v2:"), "Unexpected policy id: {id_a}"); + assert_eq!(id_a.len(), "ranking_v2:".len() + 12, "Unexpected policy id: {id_a}"); +} + +#[test] +fn ranking_policy_id_changes_with_override() { + let cfg = parse_example_config(); + let base = search::ranking_policy_id(&cfg, None).expect("Expected base policy id."); + let override_ = RankingRequestOverride { + blend: Some(BlendRankingOverride { + enabled: Some(false), + rerank_normalization: None, + retrieval_normalization: None, + segments: None, + }), + diversity: None, + retrieval_sources: None, + }; + let overridden = + search::ranking_policy_id(&cfg, Some(&override_)).expect("Expected overridden policy id."); + + assert_ne!(base, overridden); +} + +#[test] +fn ranking_policy_id_changes_with_retrieval_source_override() { + let cfg = parse_example_config(); + let base = search::ranking_policy_id(&cfg, None).expect("Expected base policy id."); + let override_ = RankingRequestOverride { + blend: None, + diversity: None, + retrieval_sources: Some(RetrievalSourcesRankingOverride { + fusion_weight: Some(0.75), + structured_field_weight: Some(1.25), + recursive_weight: Some(0.0), + fusion_priority: Some(2), + structured_field_priority: Some(1), + recursive_priority: Some(0), + }), + }; + let overridden = + search::ranking_policy_id(&cfg, Some(&override_)).expect("Expected overridden policy id."); + + assert_ne!(base, overridden); +} + +#[test] +fn replay_ranking_policy_id_matches_ranking_policy_id() { + let cfg = parse_example_config(); + let expected = search::ranking_policy_id(&cfg, None).expect("Expected policy id."); + let now = OffsetDateTime::from_unix_timestamp(0).expect("Valid timestamp."); + let trace = TraceReplayContext { + trace_id: Uuid::new_v4(), + query: "deployment steps".to_string(), + candidate_count: 3, + top_k: 2, + created_at: now, + }; + let candidates = vec![ + TraceReplayCandidate { + note_id: Uuid::new_v4(), + chunk_id: Uuid::new_v4(), + chunk_index: 0, + snippet: "deployment steps".to_string(), + retrieval_rank: 1, + retrieval_score: None, + rerank_score: 0.1, + note_scope: "project_shared".to_string(), + note_importance: 0.1, + note_updated_at: now, + note_hit_count: 0, + note_last_hit_at: None, + diversity_selected: None, + diversity_selected_rank: None, + diversity_selected_reason: None, + diversity_skipped_reason: None, + diversity_nearest_selected_note_id: None, + diversity_similarity: None, + diversity_mmr_score: None, + diversity_missing_embedding: None, + }, + TraceReplayCandidate { + note_id: Uuid::new_v4(), + chunk_id: Uuid::new_v4(), + chunk_index: 0, + snippet: "deployment steps".to_string(), + retrieval_rank: 2, + retrieval_score: None, + rerank_score: 0.9, + note_scope: "project_shared".to_string(), + note_importance: 0.1, + note_updated_at: now, + note_hit_count: 0, + note_last_hit_at: None, + diversity_selected: None, + diversity_selected_rank: None, + diversity_selected_reason: None, + diversity_skipped_reason: None, + diversity_nearest_selected_note_id: None, + diversity_similarity: None, + diversity_mmr_score: None, + diversity_missing_embedding: None, + }, + TraceReplayCandidate { + note_id: Uuid::new_v4(), + chunk_id: Uuid::new_v4(), + chunk_index: 0, + snippet: "deployment steps".to_string(), + retrieval_rank: 3, + retrieval_score: None, + rerank_score: 0.2, + note_scope: "org_shared".to_string(), + note_importance: 0.1, + note_updated_at: now, + note_hit_count: 0, + note_last_hit_at: None, + diversity_selected: None, + diversity_selected_rank: None, + diversity_selected_reason: None, + diversity_skipped_reason: None, + diversity_nearest_selected_note_id: None, + diversity_similarity: None, + diversity_mmr_score: None, + diversity_missing_embedding: None, + }, + ]; + let out = search::replay_ranking_from_candidates(&cfg, &trace, None, &candidates, 2) + .expect("Expected replay output."); + + for item in out { + assert_eq!(item.explain.ranking.policy_id, expected); + } +} diff --git a/packages/elf-service/src/search/tests_query_basics.rs b/packages/elf-service/src/search/tests_query_basics.rs new file mode 100644 index 00000000..b061a040 --- /dev/null +++ b/packages/elf-service/src/search/tests_query_basics.rs @@ -0,0 +1,71 @@ +use serde_json::Value; + +use crate::search::{self, ranking}; +use elf_config::SearchDynamic; + +#[test] +fn dense_embedding_input_includes_project_context_suffix() { + let input = + ranking::build_dense_embedding_input("Find payments code.", Some("This is a billing API.")); + + assert!(input.starts_with("Find payments code.\n\nProject context:\n")); + assert!(input.contains("This is a billing API.")); +} + +#[test] +fn dense_embedding_input_skips_empty_project_context() { + let input = ranking::build_dense_embedding_input("Find payments code.", Some(" ")); + + assert_eq!(input, "Find payments code."); +} + +#[test] +fn normalize_queries_includes_original_and_dedupes() { + let queries = vec!["alpha".to_string(), "beta".to_string(), "alpha".to_string()]; + let normalized = ranking::normalize_queries(queries, "alpha", true, 4); + + assert_eq!(normalized, vec!["alpha".to_string(), "beta".to_string()]); +} + +#[test] +fn normalize_queries_respects_max_queries() { + let queries = + vec!["one".to_string(), "two".to_string(), "three".to_string(), "four".to_string()]; + let normalized = ranking::normalize_queries(queries, "zero", true, 3); + + assert_eq!(normalized.len(), 3); +} + +#[test] +fn dynamic_trigger_checks_candidates_and_score() { + let cfg = SearchDynamic { min_candidates: 10, min_top_score: 0.2 }; + + assert!(ranking::should_expand_dynamic(5, 0.9, &cfg)); + assert!(ranking::should_expand_dynamic(20, 0.1, &cfg)); + assert!(!ranking::should_expand_dynamic(20, 0.9, &cfg)); +} + +#[test] +fn rank_normalize_maps_rank_to_unit_interval() { + assert!((ranking::rank_normalize(1, 1) - 1.0).abs() < 1e-6); + assert!((ranking::rank_normalize(1, 5) - 1.0).abs() < 1e-6); + assert!((ranking::rank_normalize(3, 5) - 0.5).abs() < 1e-6); + assert!((ranking::rank_normalize(5, 5) - 0.0).abs() < 1e-6); + assert!((ranking::rank_normalize(0, 5) - 0.0).abs() < 1e-6); +} + +#[test] +fn build_trace_audit_includes_token_id_when_present() { + let audit = search::build_trace_audit("agent-a", Some("tok-123")); + + assert_eq!(audit.get("actor_id"), Some(&Value::from("agent-a"))); + assert_eq!(audit.get("token_id"), Some(&Value::from("tok-123"))); +} + +#[test] +fn build_trace_audit_omits_token_id_when_empty() { + let audit = search::build_trace_audit("agent-a", Some(" ")); + + assert_eq!(audit.get("actor_id"), Some(&Value::from("agent-a"))); + assert!(audit.get("token_id").is_none()); +} diff --git a/packages/elf-service/src/search/tests_relation_context.rs b/packages/elf-service/src/search/tests_relation_context.rs new file mode 100644 index 00000000..1579c2cb --- /dev/null +++ b/packages/elf-service/src/search/tests_relation_context.rs @@ -0,0 +1,42 @@ +use crate::{ + ElfService, + search::{OffsetDateTime, Uuid}, +}; + +#[test] +fn relation_context_rows_without_evidence_are_suppressed() { + let now = OffsetDateTime::from_unix_timestamp(100).expect("valid timestamp"); + let note_id = Uuid::from_u128(1); + let contexts = + ElfService::group_relation_context_rows(vec![crate::search::SearchRelationContextRow { + note_id, + fact_id: Uuid::from_u128(2), + scope: "project_shared".to_string(), + subject_canonical: Some("Alice".to_string()), + subject_kind: Some("person".to_string()), + predicate: "prefers".to_string(), + object_entity_id: None, + object_canonical: None, + object_kind: None, + object_value: Some("source-bound recall".to_string()), + valid_from: now, + valid_to: None, + is_current: true, + evidence_note_ids: Vec::new(), + }]); + + assert!(!contexts.contains_key(¬e_id)); +} + +#[test] +fn relation_context_sql_enforces_shared_grant_keys() { + assert!( + crate::search::RELATION_CONTEXT_SQL + .contains("concat(gf.scope, ':', gf.agent_id) = ANY($10::text[])") + ); + assert!( + crate::search::RELATION_CONTEXT_SQL.contains( + "concat(evidence_note.scope, ':', evidence_note.agent_id) = ANY($10::text[])" + ) + ); +} diff --git a/packages/elf-service/src/search/tests_retrieval_merge.rs b/packages/elf-service/src/search/tests_retrieval_merge.rs new file mode 100644 index 00000000..f129ea53 --- /dev/null +++ b/packages/elf-service/src/search/tests_retrieval_merge.rs @@ -0,0 +1,186 @@ +use crate::search::{ + ChunkCandidate, RetrievalSourceCandidates, RetrievalSourceKind, Uuid, ranking, +}; + +fn test_chunk_candidate(note_id: Uuid, retrieval_rank: u32) -> ChunkCandidate { + ChunkCandidate { + chunk_id: Uuid::new_v4(), + note_id, + chunk_index: 0, + retrieval_rank, + retrieval_score: None, + scope: None, + updated_at: None, + embedding_version: Some("v1".to_string()), + } +} + +fn default_retrieval_sources_policy() -> ranking::ResolvedRetrievalSourcesPolicy { + ranking::ResolvedRetrievalSourcesPolicy { + fusion_weight: 1.0, + structured_field_weight: 1.0, + recursive_weight: 0.0, + fusion_priority: 1, + structured_field_priority: 0, + recursive_priority: 0, + } +} + +#[test] +fn merge_retrieval_candidates_keeps_structured_hits_under_full_fusion_capacity() { + let mut fusion = Vec::new(); + + for rank in 1..=10 { + fusion.push(test_chunk_candidate(Uuid::new_v4(), rank)); + } + + let structured = vec![test_chunk_candidate(Uuid::new_v4(), 1)]; + let structured_chunk_id = structured[0].chunk_id; + let merged = ranking::merge_retrieval_candidates( + vec![ + RetrievalSourceCandidates { source: RetrievalSourceKind::Fusion, candidates: fusion }, + RetrievalSourceCandidates { + source: RetrievalSourceKind::StructuredField, + candidates: structured, + }, + ], + &default_retrieval_sources_policy(), + 10, + ); + let merged_chunk_ids: Vec = merged.iter().map(|candidate| candidate.chunk_id).collect(); + + assert!( + merged_chunk_ids.contains(&structured_chunk_id), + "Structured candidate was dropped by retrieval fusion." + ); +} + +#[test] +fn merge_retrieval_candidates_prefers_dual_source_signal_on_tie() { + let shared_note_id = Uuid::new_v4(); + let shared_chunk_id = Uuid::new_v4(); + let fusion_only_note_id = Uuid::new_v4(); + let fusion_only_chunk_id = Uuid::new_v4(); + let fusion = vec![ + ChunkCandidate { + chunk_id: shared_chunk_id, + note_id: shared_note_id, + chunk_index: 0, + retrieval_rank: 9, + retrieval_score: None, + scope: None, + updated_at: None, + embedding_version: Some("v1".to_string()), + }, + ChunkCandidate { + chunk_id: fusion_only_chunk_id, + note_id: fusion_only_note_id, + chunk_index: 0, + retrieval_rank: 1, + retrieval_score: None, + scope: None, + updated_at: None, + embedding_version: Some("v1".to_string()), + }, + ]; + let structured = vec![ChunkCandidate { + chunk_id: shared_chunk_id, + note_id: shared_note_id, + chunk_index: 0, + retrieval_rank: 1, + retrieval_score: None, + scope: None, + updated_at: None, + embedding_version: Some("v1".to_string()), + }]; + let merged = ranking::merge_retrieval_candidates( + vec![ + RetrievalSourceCandidates { source: RetrievalSourceKind::Fusion, candidates: fusion }, + RetrievalSourceCandidates { + source: RetrievalSourceKind::StructuredField, + candidates: structured, + }, + ], + &default_retrieval_sources_policy(), + 1, + ); + let first = merged.first().expect("Expected merged candidate."); + + assert_eq!(first.chunk_id, shared_chunk_id); +} + +#[test] +fn merge_retrieval_candidates_uses_configured_source_priority_on_tie() { + let fusion_chunk_id = Uuid::from_u128(1); + let recursive_chunk_id = Uuid::from_u128(2); + let mut fusion_candidate = test_chunk_candidate(Uuid::new_v4(), 1); + let mut recursive_candidate = test_chunk_candidate(Uuid::new_v4(), 1); + + fusion_candidate.chunk_id = fusion_chunk_id; + recursive_candidate.chunk_id = recursive_chunk_id; + + let policy = ranking::ResolvedRetrievalSourcesPolicy { + fusion_weight: 1.0, + structured_field_weight: 0.0, + recursive_weight: 1.0, + fusion_priority: 10, + structured_field_priority: 20, + recursive_priority: 0, + }; + let merged = ranking::merge_retrieval_candidates( + vec![ + RetrievalSourceCandidates { + source: RetrievalSourceKind::Fusion, + candidates: vec![fusion_candidate], + }, + RetrievalSourceCandidates { + source: RetrievalSourceKind::Recursive, + candidates: vec![recursive_candidate], + }, + ], + &policy, + 2, + ); + + assert_eq!(merged[0].chunk_id, recursive_chunk_id); + assert_eq!(merged[1].chunk_id, fusion_chunk_id); +} + +#[test] +fn retrieval_weight_for_rank_uses_first_matching_segment_or_last() { + let segments = vec![ + ranking::BlendSegment { max_retrieval_rank: 3, retrieval_weight: 0.7 }, + ranking::BlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.2 }, + ]; + + assert!((ranking::retrieval_weight_for_rank(1, &segments) - 0.7).abs() < 1e-6); + assert!((ranking::retrieval_weight_for_rank(3, &segments) - 0.7).abs() < 1e-6); + assert!((ranking::retrieval_weight_for_rank(4, &segments) - 0.2).abs() < 1e-6); + assert!((ranking::retrieval_weight_for_rank(999, &segments) - 0.2).abs() < 1e-6); +} + +#[test] +fn blend_math_is_linear_and_additive() { + let segments = vec![ + ranking::BlendSegment { max_retrieval_rank: 2, retrieval_weight: 0.7 }, + ranking::BlendSegment { max_retrieval_rank: 10, retrieval_weight: 0.2 }, + ]; + let retrieval_rank = 3; + let rerank_rank = 2; + let retrieval_norm = ranking::rank_normalize(retrieval_rank, 10); + let rerank_norm = ranking::rank_normalize(rerank_rank, 4); + let blend_retrieval_weight = ranking::retrieval_weight_for_rank(retrieval_rank, &segments); + + assert!((blend_retrieval_weight - 0.2).abs() < 1e-6); + assert!((retrieval_norm - (7.0 / 9.0)).abs() < 1e-6); + assert!((rerank_norm - (2.0 / 3.0)).abs() < 1e-6); + + let retrieval_term = blend_retrieval_weight * retrieval_norm; + let rerank_term = (1.0 - blend_retrieval_weight) * rerank_norm; + let tie_breaker_score = 0.1; + let scope_context_boost = 0.0; + let final_score = retrieval_term + rerank_term + tie_breaker_score + scope_context_boost; + let expected = (0.2 * (7.0 / 9.0)) + (0.8 * (2.0 / 3.0)) + 0.1; + + assert!((final_score - expected).abs() < 1e-6, "Unexpected final_score: {final_score}"); +} diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1.rs b/packages/elf-service/tests/acceptance/docs_extension_v1.rs index 7a6f321c..f770b6a6 100644 --- a/packages/elf-service/tests/acceptance/docs_extension_v1.rs +++ b/packages/elf-service/tests/acceptance/docs_extension_v1.rs @@ -1,5 +1,9 @@ +mod excerpts; +mod indexing; +mod l0_search; mod lifecycle; mod search_filters; +mod validation_rejections; use std::{ collections::HashSet, @@ -29,9 +33,8 @@ use uuid::Uuid; use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank, chunking::ChunkingConfig}; use elf_config::EmbeddingProviderConfig; use elf_service::{ - AddNoteInput, AddNoteRequest, DocsExcerptsGetRequest, DocsGetRequest, DocsPutRequest, - DocsPutResponse, DocsSearchL0Request, ElfService, Error, PayloadLevel, Providers, - SearchRequest, TextQuoteSelector, docs::DocRetrievalTrajectory, + DocsExcerptsGetRequest, DocsGetRequest, DocsPutRequest, DocsPutResponse, DocsSearchL0Request, + ElfService, Providers, TextQuoteSelector, docs::DocRetrievalTrajectory, }; use elf_storage::{db::Db, qdrant::QdrantStore}; use elf_testkit::TestDatabase; @@ -317,276 +320,6 @@ async fn cleanup_docs_filter_fixture( test_db.cleanup().await.expect("Failed to cleanup test database."); } -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_put_rejects_non_english_source_ref() { - let Some(test_db) = acceptance::test_db().await else { - eprintln!("Skipping docs_extension_v1; set ELF_PG_DSN to run this test."); - - return; - }; - let Some(qdrant_url) = acceptance::test_qdrant_url() else { - eprintln!( - "Skipping docs_extension_v1; set ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run this test." - ); - - return; - }; - let collection = test_db.collection_name("elf_acceptance"); - let docs_collection = test_db.collection_name("elf_acceptance_docs"); - let cfg = acceptance::test_config( - test_db.dsn().to_string(), - qdrant_url, - 4_096, - collection, - docs_collection, - ); - let providers = Providers::new( - Arc::new(StubEmbedding { vector_dim: 4_096 }), - Arc::new(StubRerank), - Arc::new(SpyExtractor { - calls: Arc::new(Default::default()), - payload: serde_json::json!({ "notes": [] }), - }), - ); - let service = - acceptance::build_service(cfg, providers).await.expect("Failed to build service."); - let result = service - .docs_put(DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "owner".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("Docs rejection sample".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - "notes": "你好" - }), - content: TEST_CONTENT.to_string(), - }) - .await; - - match result { - Err(Error::NonEnglishInput { field }) => { - assert_eq!(field, "$.source_ref[\"notes\"]"); - }, - other => panic!("Expected NonEnglishInput, got {other:?}"), - } - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_put_rejects_missing_and_invalid_source_ref() { - let Some(test_db) = acceptance::test_db().await else { - eprintln!("Skipping docs_extension_v1; set ELF_PG_DSN to run this test."); - - return; - }; - let Some(qdrant_url) = acceptance::test_qdrant_url() else { - eprintln!( - "Skipping docs_extension_v1; set ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run this test." - ); - - return; - }; - let collection = test_db.collection_name("elf_acceptance"); - let docs_collection = test_db.collection_name("elf_acceptance_docs"); - let cfg = acceptance::test_config( - test_db.dsn().to_string(), - qdrant_url, - 4_096, - collection, - docs_collection, - ); - let providers = Providers::new( - Arc::new(StubEmbedding { vector_dim: 4_096 }), - Arc::new(StubRerank), - Arc::new(SpyExtractor { - calls: Arc::new(Default::default()), - payload: serde_json::json!({ "notes": [] }), - }), - ); - let service = - acceptance::build_service(cfg, providers).await.expect("Failed to build service."); - let result = service - .docs_put(DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "owner".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("Docs rejection sample".to_string()), - write_policy: None, - source_ref: serde_json::json!("legacy-shape"), - content: TEST_CONTENT.to_string(), - }) - .await; - - match result { - Err(Error::InvalidRequest { message }) => { - assert!(message.contains("source_ref must be a JSON object")); - }, - other => panic!("Expected InvalidRequest for non-object source_ref, got {other:?}"), - } - - let result = service - .docs_put(DocsPutRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "owner".to_string(), - scope: "project_shared".to_string(), - doc_type: None, - title: Some("Docs rejection sample".to_string()), - write_policy: None, - source_ref: serde_json::json!({ - "schema": "source_ref/v1", - "doc_type": "knowledge", - "ts": "2026-02-25T12:00:00Z", - }), - content: TEST_CONTENT.to_string(), - }) - .await; - - match result { - Err(Error::InvalidRequest { message }) => { - assert!(message.contains("doc_source_ref/v1")); - }, - other => panic!("Expected InvalidRequest for wrong source_ref schema, got {other:?}"), - } - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_search_l0_requires_qdrant_payload_indexes_for_filters() { - let Some(ctx) = setup_docs_context().await else { return }; - let DocsContext { test_db, service } = ctx; - let doc = put_test_doc(&service).await; - let (handle, shutdown) = spawn_doc_worker(&service).await; - - assert!( - wait_for_doc_outbox_done(&service.db.pool, doc.doc_id, std::time::Duration::from_secs(15)) - .await, - "Expected doc outbox to reach DONE." - ); - - verify_docs_qdrant_filter_indexes(&service).await; - - let _ = shutdown.send(()); - - handle.abort(); - - let _ = handle.await; - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_search_l0_projects_source_ref_payload_fields() { - let Some(ctx) = setup_docs_context().await else { return }; - let DocsContext { test_db, service } = ctx; - let source_ts = "2025-01-01T10:00:00Z"; - let cases = [ - ( - "chat", - "Docs chat source ref sample", - serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "chat", - "ts": source_ts, - "thread_id": "thread-42", - "role": "assistant" - }), - ("thread_id", "thread-42"), - ["domain", "repo"], - ), - ( - "search", - "Docs search source ref sample", - serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "search", - "ts": source_ts, - "query": "What is payload indexing?", - "url": "https://docs.example.com/search", - "domain": "docs.example.com", - "provider": "web" - }), - ("domain", "docs.example.com"), - ["thread_id", "repo"], - ), - ( - "dev", - "Docs dev source ref sample", - serde_json::json!({ - "schema": "doc_source_ref/v1", - "doc_type": "dev", - "ts": source_ts, - "repo": "elf-org/docs", - "commit_sha": "9f0a3f4c4eb58bfcf4a5f4f9d0c7be0e13c2f8d19" - }), - ("repo", "elf-org/docs"), - ["thread_id", "domain"], - ), - ]; - let mut docs = Vec::new(); - - for (doc_type, title, source_ref, expected_present, expected_absent) in cases { - let doc = put_test_doc_with( - &service, - "owner", - "project_shared", - Some(doc_type), - title, - source_ref, - TEST_CONTENT, - ) - .await; - - docs.push((doc.doc_id, expected_present, expected_absent)); - } - - let (handle, shutdown) = spawn_doc_worker(&service).await; - - for (doc_id, expected_present, expected_absent) in &docs { - assert!( - wait_for_doc_outbox_done(&service.db.pool, *doc_id, std::time::Duration::from_secs(15)) - .await, - "Expected doc outbox to reach DONE." - ); - - let point = fetch_first_doc_chunk_point(&service, *doc_id) - .await - .expect("Expected doc chunk point in Qdrant."); - - assert_eq!(point.payload.get("doc_ts").and_then(payload_string), Some(source_ts)); - assert_eq!( - point.payload.get(expected_present.0).and_then(payload_string), - Some(expected_present.1) - ); - - for key in expected_absent { - assert!(!point.payload.contains_key(*key)); - } - } - - _ = shutdown.send(()); - - handle.abort(); - - let _ = handle.await; - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - async fn setup_docs_context() -> Option { let Some(test_db) = acceptance::test_db().await else { eprintln!("Skipping docs_extension_v1; set ELF_PG_DSN to run this test."); @@ -685,298 +418,6 @@ async fn put_test_doc(service: &ElfService) -> DocsPutResponse { .await } -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_search_l0_returns_pointer_and_explain_trajectory() { - let Some(ctx) = setup_docs_context().await else { return }; - let DocsContext { test_db, service } = ctx; - let doc = put_test_doc(&service).await; - let (handle, shutdown) = spawn_doc_worker(&service).await; - - assert!( - wait_for_doc_outbox_done(&service.db.pool, doc.doc_id, std::time::Duration::from_secs(15)) - .await, - "Expected doc outbox to reach DONE." - ); - - let results = service - .docs_search_l0(DocsSearchL0Request { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - caller_agent_id: "reader".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - read_profile: "private_plus_project".to_string(), - query: "peregrine".to_string(), - top_k: Some(5), - candidate_k: Some(20), - explain: Some(true), - }) - .await - .expect("Failed to search docs."); - - assert_eq!( - results.trajectory.as_ref().map(|trajectory| trajectory.schema.as_str()), - Some("doc_retrieval_trajectory/v1") - ); - assert!(results.trajectory.is_some()); - assert!(!results.items.is_empty()); - assert!(results.items[0].pointer.schema == "source_ref/v1"); - assert!(!results.items[0].pointer.reference.doc_id.is_nil()); - assert!(!results.items[0].pointer.reference.chunk_id.is_nil()); - assert_eq!( - results.items[0].pointer.reference.source_record_id, - results.items[0].pointer.reference.doc_id - ); - assert_eq!( - results.items[0].pointer.reference.source_span_id, - results.items[0].pointer.locator.span_id - ); - assert_eq!(results.items[0].pointer.resolver, "elf_doc_ext/v1"); - assert!(!results.items[0].pointer.locator.span_id.is_nil()); - assert!(!results.trace_id.is_nil()); - - let _ = shutdown.send(()); - - handle.abort(); - - let _ = handle.await; - - drop(service); - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_search_l0_note_pointer_roundtrip_hydrates_doc() { - let Some(ctx) = setup_docs_context().await else { return }; - let DocsContext { test_db, service } = ctx; - let doc = put_test_doc(&service).await; - let (handle, shutdown) = spawn_doc_worker(&service).await; - - assert!( - wait_for_doc_outbox_done(&service.db.pool, doc.doc_id, std::time::Duration::from_secs(15)) - .await, - "Expected doc outbox to reach DONE." - ); - - let (source_ref, source_ref_doc_id, source_ref_chunk_id) = - fetch_docs_pointer_source_ref(&service).await; - let note_id = add_note_with_pointer_source_ref(&service, source_ref.clone()).await; - - assert!( - wait_for_note_outbox_done(&service.db.pool, note_id, std::time::Duration::from_secs(15)) - .await, - "Expected note outbox to reach DONE." - ); - - let search_results = service - .search_raw_quick(SearchRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "agent".to_string(), - token_id: None, - read_profile: "private_only".to_string(), - payload_level: PayloadLevel::L2, - query: "peregrine".to_string(), - top_k: Some(5), - candidate_k: Some(20), - filter: None, - record_hits: Some(false), - ranking: None, - }) - .await - .expect("Failed to search note with doc pointer source_ref."); - let has_pointer_source_ref = - search_results.items.into_iter().any(|item| item.source_ref == source_ref); - - assert!( - has_pointer_source_ref, - "Expected search result to include note with pointer source_ref." - ); - - let excerpt = service - .docs_excerpts_get(DocsExcerptsGetRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "reader".to_string(), - read_profile: "private_plus_project".to_string(), - doc_id: source_ref_doc_id, - level: "L1".to_string(), - chunk_id: Some(source_ref_chunk_id), - quote: None, - position: None, - explain: None, - }) - .await - .expect("Failed to hydrate excerpt from pointer source_ref."); - - assert!(excerpt.verification.verified); - - let _ = shutdown.send(()); - - handle.abort(); - - let _ = handle.await; - - drop(service); - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - -async fn fetch_docs_pointer_source_ref(service: &ElfService) -> (serde_json::Value, Uuid, Uuid) { - let search = service - .docs_search_l0(DocsSearchL0Request { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - caller_agent_id: "reader".to_string(), - scope: None, - status: None, - doc_type: None, - sparse_mode: None, - domain: None, - repo: None, - agent_id: None, - thread_id: None, - updated_after: None, - updated_before: None, - ts_gte: None, - ts_lte: None, - read_profile: "private_plus_project".to_string(), - query: "peregrine".to_string(), - top_k: Some(5), - candidate_k: Some(20), - explain: None, - }) - .await - .expect("Failed to search docs for source_ref pointer."); - - assert!(!search.items.is_empty(), "Expected docs_search_l0 to return source_ref pointer."); - - let pointer = search.items[0].pointer.clone(); - let source_ref = - serde_json::to_value(&pointer).expect("Failed to serialize docs_search_l0 pointer."); - - (source_ref, pointer.reference.doc_id, pointer.reference.chunk_id) -} - -async fn add_note_with_pointer_source_ref( - service: &ElfService, - source_ref: serde_json::Value, -) -> Uuid { - let note = service - .add_note(AddNoteRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "agent".to_string(), - scope: "agent_private".to_string(), - notes: vec![AddNoteInput { - r#type: "fact".to_string(), - key: Some("doc_pointer_note".to_string()), - text: "Peregrine note for source_ref hydration check.".to_string(), - structured: None, - importance: 0.5, - confidence: 0.9, - ttl_days: None, - source_ref, - write_policy: None, - }], - }) - .await - .expect("Failed to add note from docs pointer."); - - note.results[0].note_id.expect("Expected note_id in add_note result.") -} - -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] -async fn docs_excerpts_get_supports_l0_and_returns_locator_and_optional_trajectory() { - let Some(ctx) = setup_docs_context().await else { return }; - let DocsContext { test_db, service } = ctx; - let doc = put_test_doc(&service).await; - let (handle, shutdown) = spawn_doc_worker(&service).await; - - assert!( - wait_for_doc_outbox_done(&service.db.pool, doc.doc_id, std::time::Duration::from_secs(15)) - .await, - "Expected doc outbox to reach DONE." - ); - - let excerpt = service - .docs_excerpts_get(DocsExcerptsGetRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "reader".to_string(), - read_profile: "private_plus_project".to_string(), - doc_id: doc.doc_id, - level: "L0".to_string(), - chunk_id: None, - quote: Some(TextQuoteSelector { - exact: "Keyword: peregrine.".to_string(), - prefix: Some("evidence. ".to_string()), - suffix: Some("\nSecond".to_string()), - }), - position: None, - explain: Some(true), - }) - .await - .expect("Failed to hydrate excerpt."); - - assert_eq!(excerpt.locator.selector_kind, "quote"); - assert!(excerpt.locator.match_end_offset > excerpt.locator.match_start_offset); - assert!(!excerpt.locator.span_id.is_nil()); - assert!(excerpt.excerpt.len() <= 256); - assert!(excerpt.trajectory.is_some()); - assert_eq!( - excerpt.trajectory.as_ref().map(|trajectory| trajectory.schema.as_str()), - Some("doc_retrieval_trajectory/v1") - ); - assert!(!excerpt.trace_id.is_nil()); - - let no_explain = service - .docs_excerpts_get(DocsExcerptsGetRequest { - tenant_id: "t".to_string(), - project_id: "p".to_string(), - agent_id: "reader".to_string(), - read_profile: "private_plus_project".to_string(), - doc_id: doc.doc_id, - level: "L0".to_string(), - chunk_id: None, - quote: Some(TextQuoteSelector { - exact: "Keyword: peregrine.".to_string(), - prefix: Some("evidence. ".to_string()), - suffix: Some("\nSecond".to_string()), - }), - position: None, - explain: Some(false), - }) - .await - .expect("Failed to hydrate excerpt."); - - assert!(no_explain.trajectory.is_none()); - - let _ = shutdown.send(()); - - handle.abort(); - - let _ = handle.await; - - drop(service); - - test_db.cleanup().await.expect("Failed to cleanup test database."); -} - async fn put_test_doc_with( service: &ElfService, agent_id: &str, diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1/excerpts.rs b/packages/elf-service/tests/acceptance/docs_extension_v1/excerpts.rs new file mode 100644 index 00000000..1c4b4a5c --- /dev/null +++ b/packages/elf-service/tests/acceptance/docs_extension_v1/excerpts.rs @@ -0,0 +1,84 @@ +use crate::acceptance::docs_extension_v1::{self, DocsContext}; +use elf_service::{DocsExcerptsGetRequest, TextQuoteSelector}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_excerpts_get_supports_l0_and_returns_locator_and_optional_trajectory() { + let Some(ctx) = docs_extension_v1::setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let doc = docs_extension_v1::put_test_doc(&service).await; + let (handle, shutdown) = docs_extension_v1::spawn_doc_worker(&service).await; + + assert!( + docs_extension_v1::wait_for_doc_outbox_done( + &service.db.pool, + doc.doc_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected doc outbox to reach DONE." + ); + + let excerpt = service + .docs_excerpts_get(DocsExcerptsGetRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "reader".to_string(), + read_profile: "private_plus_project".to_string(), + doc_id: doc.doc_id, + level: "L0".to_string(), + chunk_id: None, + quote: Some(TextQuoteSelector { + exact: "Keyword: peregrine.".to_string(), + prefix: Some("evidence. ".to_string()), + suffix: Some("\nSecond".to_string()), + }), + position: None, + explain: Some(true), + }) + .await + .expect("Failed to hydrate excerpt."); + + assert_eq!(excerpt.locator.selector_kind, "quote"); + assert!(excerpt.locator.match_end_offset > excerpt.locator.match_start_offset); + assert!(!excerpt.locator.span_id.is_nil()); + assert!(excerpt.excerpt.len() <= 256); + assert!(excerpt.trajectory.is_some()); + assert_eq!( + excerpt.trajectory.as_ref().map(|trajectory| trajectory.schema.as_str()), + Some("doc_retrieval_trajectory/v1") + ); + assert!(!excerpt.trace_id.is_nil()); + + let no_explain = service + .docs_excerpts_get(DocsExcerptsGetRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "reader".to_string(), + read_profile: "private_plus_project".to_string(), + doc_id: doc.doc_id, + level: "L0".to_string(), + chunk_id: None, + quote: Some(TextQuoteSelector { + exact: "Keyword: peregrine.".to_string(), + prefix: Some("evidence. ".to_string()), + suffix: Some("\nSecond".to_string()), + }), + position: None, + explain: Some(false), + }) + .await + .expect("Failed to hydrate excerpt."); + + assert!(no_explain.trajectory.is_none()); + + let _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + drop(service); + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1/indexing.rs b/packages/elf-service/tests/acceptance/docs_extension_v1/indexing.rs new file mode 100644 index 00000000..c70d9200 --- /dev/null +++ b/packages/elf-service/tests/acceptance/docs_extension_v1/indexing.rs @@ -0,0 +1,133 @@ +use crate::acceptance::docs_extension_v1::{self, DocsContext, TEST_CONTENT, payload_string}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_search_l0_requires_qdrant_payload_indexes_for_filters() { + let Some(ctx) = docs_extension_v1::setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let doc = docs_extension_v1::put_test_doc(&service).await; + let (handle, shutdown) = docs_extension_v1::spawn_doc_worker(&service).await; + + assert!( + docs_extension_v1::wait_for_doc_outbox_done( + &service.db.pool, + doc.doc_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected doc outbox to reach DONE." + ); + + docs_extension_v1::verify_docs_qdrant_filter_indexes(&service).await; + + let _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_search_l0_projects_source_ref_payload_fields() { + let Some(ctx) = docs_extension_v1::setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let source_ts = "2025-01-01T10:00:00Z"; + let cases = [ + ( + "chat", + "Docs chat source ref sample", + serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "chat", + "ts": source_ts, + "thread_id": "thread-42", + "role": "assistant" + }), + ("thread_id", "thread-42"), + ["domain", "repo"], + ), + ( + "search", + "Docs search source ref sample", + serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "search", + "ts": source_ts, + "query": "What is payload indexing?", + "url": "https://docs.example.com/search", + "domain": "docs.example.com", + "provider": "web" + }), + ("domain", "docs.example.com"), + ["thread_id", "repo"], + ), + ( + "dev", + "Docs dev source ref sample", + serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "dev", + "ts": source_ts, + "repo": "elf-org/docs", + "commit_sha": "9f0a3f4c4eb58bfcf4a5f4f9d0c7be0e13c2f8d19" + }), + ("repo", "elf-org/docs"), + ["thread_id", "domain"], + ), + ]; + let mut docs = Vec::new(); + + for (doc_type, title, source_ref, expected_present, expected_absent) in cases { + let doc = docs_extension_v1::put_test_doc_with( + &service, + "owner", + "project_shared", + Some(doc_type), + title, + source_ref, + TEST_CONTENT, + ) + .await; + + docs.push((doc.doc_id, expected_present, expected_absent)); + } + + let (handle, shutdown) = docs_extension_v1::spawn_doc_worker(&service).await; + + for (doc_id, expected_present, expected_absent) in &docs { + assert!( + docs_extension_v1::wait_for_doc_outbox_done( + &service.db.pool, + *doc_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected doc outbox to reach DONE." + ); + + let point = docs_extension_v1::fetch_first_doc_chunk_point(&service, *doc_id) + .await + .expect("Expected doc chunk point in Qdrant."); + + assert_eq!(point.payload.get("doc_ts").and_then(payload_string), Some(source_ts)); + assert_eq!( + point.payload.get(expected_present.0).and_then(payload_string), + Some(expected_present.1) + ); + + for key in expected_absent { + assert!(!point.payload.contains_key(*key)); + } + } + + _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1/l0_search.rs b/packages/elf-service/tests/acceptance/docs_extension_v1/l0_search.rs new file mode 100644 index 00000000..70d243c4 --- /dev/null +++ b/packages/elf-service/tests/acceptance/docs_extension_v1/l0_search.rs @@ -0,0 +1,231 @@ +use serde_json::Value; +use uuid::Uuid; + +use crate::acceptance::docs_extension_v1::{self, DocsContext}; +use elf_service::{ + AddNoteInput, AddNoteRequest, DocsExcerptsGetRequest, DocsSearchL0Request, ElfService, + PayloadLevel, SearchRequest, +}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_search_l0_returns_pointer_and_explain_trajectory() { + let Some(ctx) = docs_extension_v1::setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let doc = docs_extension_v1::put_test_doc(&service).await; + let (handle, shutdown) = docs_extension_v1::spawn_doc_worker(&service).await; + + assert!( + docs_extension_v1::wait_for_doc_outbox_done( + &service.db.pool, + doc.doc_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected doc outbox to reach DONE." + ); + + let results = service + .docs_search_l0(DocsSearchL0Request { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + caller_agent_id: "reader".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + read_profile: "private_plus_project".to_string(), + query: "peregrine".to_string(), + top_k: Some(5), + candidate_k: Some(20), + explain: Some(true), + }) + .await + .expect("Failed to search docs."); + + assert_eq!( + results.trajectory.as_ref().map(|trajectory| trajectory.schema.as_str()), + Some("doc_retrieval_trajectory/v1") + ); + assert!(results.trajectory.is_some()); + assert!(!results.items.is_empty()); + assert!(results.items[0].pointer.schema == "source_ref/v1"); + assert!(!results.items[0].pointer.reference.doc_id.is_nil()); + assert!(!results.items[0].pointer.reference.chunk_id.is_nil()); + assert_eq!( + results.items[0].pointer.reference.source_record_id, + results.items[0].pointer.reference.doc_id + ); + assert_eq!( + results.items[0].pointer.reference.source_span_id, + results.items[0].pointer.locator.span_id + ); + assert_eq!(results.items[0].pointer.resolver, "elf_doc_ext/v1"); + assert!(!results.items[0].pointer.locator.span_id.is_nil()); + assert!(!results.trace_id.is_nil()); + + let _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + drop(service); + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_search_l0_note_pointer_roundtrip_hydrates_doc() { + let Some(ctx) = docs_extension_v1::setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let doc = docs_extension_v1::put_test_doc(&service).await; + let (handle, shutdown) = docs_extension_v1::spawn_doc_worker(&service).await; + + assert!( + docs_extension_v1::wait_for_doc_outbox_done( + &service.db.pool, + doc.doc_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected doc outbox to reach DONE." + ); + + let (source_ref, source_ref_doc_id, source_ref_chunk_id) = + fetch_docs_pointer_source_ref(&service).await; + let note_id = add_note_with_pointer_source_ref(&service, source_ref.clone()).await; + + assert!( + docs_extension_v1::wait_for_note_outbox_done( + &service.db.pool, + note_id, + std::time::Duration::from_secs(15) + ) + .await, + "Expected note outbox to reach DONE." + ); + + let search_results = service + .search_raw_quick(SearchRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "agent".to_string(), + token_id: None, + read_profile: "private_only".to_string(), + payload_level: PayloadLevel::L2, + query: "peregrine".to_string(), + top_k: Some(5), + candidate_k: Some(20), + filter: None, + record_hits: Some(false), + ranking: None, + }) + .await + .expect("Failed to search note with doc pointer source_ref."); + let has_pointer_source_ref = + search_results.items.into_iter().any(|item| item.source_ref == source_ref); + + assert!( + has_pointer_source_ref, + "Expected search result to include note with pointer source_ref." + ); + + let excerpt = service + .docs_excerpts_get(DocsExcerptsGetRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "reader".to_string(), + read_profile: "private_plus_project".to_string(), + doc_id: source_ref_doc_id, + level: "L1".to_string(), + chunk_id: Some(source_ref_chunk_id), + quote: None, + position: None, + explain: None, + }) + .await + .expect("Failed to hydrate excerpt from pointer source_ref."); + + assert!(excerpt.verification.verified); + + let _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + drop(service); + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +async fn fetch_docs_pointer_source_ref(service: &ElfService) -> (Value, Uuid, Uuid) { + let search = service + .docs_search_l0(DocsSearchL0Request { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + caller_agent_id: "reader".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + read_profile: "private_plus_project".to_string(), + query: "peregrine".to_string(), + top_k: Some(5), + candidate_k: Some(20), + explain: None, + }) + .await + .expect("Failed to search docs for source_ref pointer."); + + assert!(!search.items.is_empty(), "Expected docs_search_l0 to return source_ref pointer."); + + let pointer = search.items[0].pointer.clone(); + let source_ref = + serde_json::to_value(&pointer).expect("Failed to serialize docs_search_l0 pointer."); + + (source_ref, pointer.reference.doc_id, pointer.reference.chunk_id) +} + +async fn add_note_with_pointer_source_ref(service: &ElfService, source_ref: Value) -> Uuid { + let note = service + .add_note(AddNoteRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "agent".to_string(), + scope: "agent_private".to_string(), + notes: vec![AddNoteInput { + r#type: "fact".to_string(), + key: Some("doc_pointer_note".to_string()), + text: "Peregrine note for source_ref hydration check.".to_string(), + structured: None, + importance: 0.5, + confidence: 0.9, + ttl_days: None, + source_ref, + write_policy: None, + }], + }) + .await + .expect("Failed to add note from docs pointer."); + + note.results[0].note_id.expect("Expected note_id in add_note result.") +} diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1/validation_rejections.rs b/packages/elf-service/tests/acceptance/docs_extension_v1/validation_rejections.rs new file mode 100644 index 00000000..cefdbe55 --- /dev/null +++ b/packages/elf-service/tests/acceptance/docs_extension_v1/validation_rejections.rs @@ -0,0 +1,152 @@ +use std::sync::Arc; + +use crate::acceptance::{ + self, SpyExtractor, StubEmbedding, StubRerank, docs_extension_v1::TEST_CONTENT, +}; +use elf_service::{DocsPutRequest, Error, Providers}; + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_put_rejects_non_english_source_ref() { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping docs_extension_v1; set ELF_PG_DSN to run this test."); + + return; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!( + "Skipping docs_extension_v1; set ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run this test." + ); + + return; + }; + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let providers = Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(StubRerank), + Arc::new(SpyExtractor { + calls: Arc::new(Default::default()), + payload: serde_json::json!({ "notes": [] }), + }), + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + let result = service + .docs_put(DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("Docs rejection sample".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + "notes": "你好" + }), + content: TEST_CONTENT.to_string(), + }) + .await; + + match result { + Err(Error::NonEnglishInput { field }) => { + assert_eq!(field, "$.source_ref[\"notes\"]"); + }, + other => panic!("Expected NonEnglishInput, got {other:?}"), + } + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_put_rejects_missing_and_invalid_source_ref() { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping docs_extension_v1; set ELF_PG_DSN to run this test."); + + return; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!( + "Skipping docs_extension_v1; set ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run this test." + ); + + return; + }; + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let providers = Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(StubRerank), + Arc::new(SpyExtractor { + calls: Arc::new(Default::default()), + payload: serde_json::json!({ "notes": [] }), + }), + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + let result = service + .docs_put(DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("Docs rejection sample".to_string()), + write_policy: None, + source_ref: serde_json::json!("legacy-shape"), + content: TEST_CONTENT.to_string(), + }) + .await; + + match result { + Err(Error::InvalidRequest { message }) => { + assert!(message.contains("source_ref must be a JSON object")); + }, + other => panic!("Expected InvalidRequest for non-object source_ref, got {other:?}"), + } + + let result = service + .docs_put(DocsPutRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + scope: "project_shared".to_string(), + doc_type: None, + title: Some("Docs rejection sample".to_string()), + write_policy: None, + source_ref: serde_json::json!({ + "schema": "source_ref/v1", + "doc_type": "knowledge", + "ts": "2026-02-25T12:00:00Z", + }), + content: TEST_CONTENT.to_string(), + }) + .await; + + match result { + Err(Error::InvalidRequest { message }) => { + assert!(message.contains("doc_source_ref/v1")); + }, + other => panic!("Expected InvalidRequest for wrong source_ref schema, got {other:?}"), + } + + test_db.cleanup().await.expect("Failed to cleanup test database."); +}