From e59dac987f057ec0f817ddbe3de935b511732224 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 13:37:33 +0200 Subject: [PATCH 01/72] feat(graph): add Provenance and optional provenance to TripleInsertPayload --- crates/aingle_graph/src/dag/action.rs | 71 +++++++++++++++++++++++ crates/aingle_graph/src/dag/export.rs | 1 + crates/aingle_graph/src/dag/mod.rs | 2 +- crates/aingle_graph/src/dag/signing.rs | 1 + crates/aingle_graph/src/dag/store.rs | 5 ++ crates/aingle_graph/src/dag/sync.rs | 1 + crates/aingle_graph/src/dag/timetravel.rs | 4 ++ crates/aingle_graph/src/lib.rs | 1 + 8 files changed, 85 insertions(+), 1 deletion(-) diff --git a/crates/aingle_graph/src/dag/action.rs b/crates/aingle_graph/src/dag/action.rs index af9c2378..941ea1b9 100644 --- a/crates/aingle_graph/src/dag/action.rs +++ b/crates/aingle_graph/src/dag/action.rs @@ -111,12 +111,31 @@ pub enum DagPayload { }, } +/// Where an ingested fact or chunk came from: a file and the line span within it, +/// plus the content hash of the whole file at ingest time. Carried in the signed +/// DAG payload so provenance is cryptographically bound to the fact. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Provenance { + /// Path of the source file, relative to the ingest root. + pub source_path: String, + /// 1-based first line of the span this fact was extracted from. + pub line_start: u32, + /// 1-based last line of the span (inclusive). + pub line_end: u32, + /// Hex blake3 of the full file content at ingest time. + pub content_hash: String, +} + /// Wire format for a triple insert within a DAG action. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct TripleInsertPayload { pub subject: String, pub predicate: String, pub object: serde_json::Value, + /// Optional source provenance. Omitted from the wire form (and thus from the + /// content hash) when absent, so pre-provenance action hashes are unchanged. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub provenance: Option, } /// Kinds of memory operations tracked in the DAG. @@ -234,6 +253,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }], }, signature: None, @@ -313,6 +333,7 @@ mod tests { subject: "a".into(), predicate: "b".into(), object: serde_json::json!("c"), + provenance: None, }], }, DagPayload::TripleDelete { @@ -401,4 +422,54 @@ mod tests { ); assert!(action.signature.is_none()); } + + #[test] + fn provenance_none_is_omitted_and_hash_is_stable() { + use crate::dag::{DagPayload, TripleInsertPayload}; + use chrono::TimeZone; + + // A payload with no provenance must serialize WITHOUT a "provenance" key, + // so DAG action hashes computed before this field existed stay identical. + let p = TripleInsertPayload { + subject: "alice".into(), + predicate: "knows".into(), + object: serde_json::json!("bob"), + provenance: None, + }; + let json = serde_json::to_string(&p).unwrap(); + assert!(!json.contains("provenance"), "None provenance must be skipped: {json}"); + + // Old wire format (no provenance key) still deserializes. + let old = r#"{"subject":"a","predicate":"b","object":"c"}"#; + let parsed: TripleInsertPayload = serde_json::from_str(old).unwrap(); + assert!(parsed.provenance.is_none()); + + // A populated provenance round-trips. + let prov = Provenance { + source_path: "docs/x.md".into(), + line_start: 3, + line_end: 5, + content_hash: "deadbeef".into(), + }; + let p2 = TripleInsertPayload { + subject: "s".into(), + predicate: "p".into(), + object: serde_json::json!("o"), + provenance: Some(prov.clone()), + }; + let round: TripleInsertPayload = serde_json::from_str(&serde_json::to_string(&p2).unwrap()).unwrap(); + assert_eq!(round.provenance, Some(prov)); + + // Sanity: an action carrying a None-provenance TripleInsert hashes the same + // as the equivalent payload built inline (documents hash-stability intent). + let action = DagAction { + parents: vec![], + author: crate::NodeId::named("node:a"), + seq: 0, + timestamp: chrono::Utc.timestamp_opt(0, 0).unwrap(), + payload: DagPayload::TripleInsert { triples: vec![p] }, + signature: None, + }; + let _ = action.compute_hash(); // must not panic + } } diff --git a/crates/aingle_graph/src/dag/export.rs b/crates/aingle_graph/src/dag/export.rs index a6c8bbac..f43a9849 100644 --- a/crates/aingle_graph/src/dag/export.rs +++ b/crates/aingle_graph/src/dag/export.rs @@ -213,6 +213,7 @@ mod tests { subject: format!("s{}", seq), predicate: "p".into(), object: serde_json::json!("o"), + provenance: None, }], }, signature: None, diff --git a/crates/aingle_graph/src/dag/mod.rs b/crates/aingle_graph/src/dag/mod.rs index cf9ea511..24d92445 100644 --- a/crates/aingle_graph/src/dag/mod.rs +++ b/crates/aingle_graph/src/dag/mod.rs @@ -25,7 +25,7 @@ pub mod sync; pub mod timetravel; pub mod tips; -pub use action::{DagAction, DagActionHash, DagPayload, MemoryOpKind, TripleInsertPayload}; +pub use action::{DagAction, DagActionHash, DagPayload, MemoryOpKind, Provenance, TripleInsertPayload}; pub use backend::{DagBackend, MemoryDagBackend}; #[cfg(feature = "sled-backend")] pub use backend::SledDagBackend; diff --git a/crates/aingle_graph/src/dag/signing.rs b/crates/aingle_graph/src/dag/signing.rs index 5f9b6427..2c687c83 100644 --- a/crates/aingle_graph/src/dag/signing.rs +++ b/crates/aingle_graph/src/dag/signing.rs @@ -203,6 +203,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }], }, signature: None, diff --git a/crates/aingle_graph/src/dag/store.rs b/crates/aingle_graph/src/dag/store.rs index d2479264..b32ea183 100644 --- a/crates/aingle_graph/src/dag/store.rs +++ b/crates/aingle_graph/src/dag/store.rs @@ -1123,6 +1123,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }], }, signature: None, @@ -1185,6 +1186,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }); let history = store.history(&tid, 10).unwrap(); @@ -1261,6 +1263,7 @@ mod tests { subject: subject.into(), predicate: predicate.into(), object: object_json.clone(), + provenance: None, }); // Compute via TripleId::from_triple (the canonical graph path) @@ -1322,6 +1325,7 @@ mod tests { subject: format!("s{}", seq), predicate: "p".into(), object: serde_json::json!(seq), + provenance: None, }], }, signature: None, @@ -1613,6 +1617,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }); let history = store.history(&tid, 10).unwrap(); assert_eq!(history.len(), 3); diff --git a/crates/aingle_graph/src/dag/sync.rs b/crates/aingle_graph/src/dag/sync.rs index ddfbe588..d9d79093 100644 --- a/crates/aingle_graph/src/dag/sync.rs +++ b/crates/aingle_graph/src/dag/sync.rs @@ -64,6 +64,7 @@ mod tests { subject: subject.into(), predicate: "knows".into(), object: serde_json::json!("x"), + provenance: None, }], }, signature: None, diff --git a/crates/aingle_graph/src/dag/timetravel.rs b/crates/aingle_graph/src/dag/timetravel.rs index 46df01d5..144ef9c0 100644 --- a/crates/aingle_graph/src/dag/timetravel.rs +++ b/crates/aingle_graph/src/dag/timetravel.rs @@ -105,6 +105,7 @@ mod tests { subject: subject.into(), predicate: "knows".into(), object: serde_json::json!(object), + provenance: None, }], }, signature: None, @@ -120,6 +121,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }], }; replay_payload(&db, &payload).unwrap(); @@ -155,6 +157,7 @@ mod tests { subject: "alice".into(), predicate: "knows".into(), object: serde_json::json!("bob"), + provenance: None, }], }, DagPayload::TripleInsert { @@ -162,6 +165,7 @@ mod tests { subject: "bob".into(), predicate: "knows".into(), object: serde_json::json!("charlie"), + provenance: None, }], }, ], diff --git a/crates/aingle_graph/src/lib.rs b/crates/aingle_graph/src/lib.rs index 098fca5d..7164d7fe 100644 --- a/crates/aingle_graph/src/lib.rs +++ b/crates/aingle_graph/src/lib.rs @@ -421,6 +421,7 @@ impl GraphDB { subject: triple.subject.to_string(), predicate: triple.predicate.to_string(), object: value_to_json(&triple.object), + provenance: None, }], }, signature: None, From 46901f46cd1f377ad43571759ac1bced8e8de488 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 13:45:32 +0200 Subject: [PATCH 02/72] feat(ingest): aingle_ingest crate -- structural markdown extraction + chunking --- Cargo.toml | 1 + crates/aingle_ingest/Cargo.toml | 15 ++++ crates/aingle_ingest/src/chunk.rs | 90 +++++++++++++++++++++ crates/aingle_ingest/src/lib.rs | 117 +++++++++++++++++++++++++++ crates/aingle_ingest/src/markdown.rs | 110 +++++++++++++++++++++++++ 5 files changed, 333 insertions(+) create mode 100644 crates/aingle_ingest/Cargo.toml create mode 100644 crates/aingle_ingest/src/chunk.rs create mode 100644 crates/aingle_ingest/src/lib.rs create mode 100644 crates/aingle_ingest/src/markdown.rs diff --git a/Cargo.toml b/Cargo.toml index 672ec8e7..3c691e85 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/aingle_graph", # Native Semantic GraphDB "crates/aingle_zk", # Zero-Knowledge Proofs (Privacy) "crates/ineru", # Ineru Memory System + "crates/aingle_ingest", # Structural ingestion (markdown/code → triples + chunks) "crates/aingle_ai", # AI Integration Layer "crates/aingle_logic", # Proof-of-Logic Validation Engine "crates/kaneru", # Kaneru — Unified Multi-Agent Execution System diff --git a/crates/aingle_ingest/Cargo.toml b/crates/aingle_ingest/Cargo.toml new file mode 100644 index 00000000..2a4a6b16 --- /dev/null +++ b/crates/aingle_ingest/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "aingle_ingest" +version = "0.6.3" +description = "Structural extraction of triples and text chunks from markdown/code for AIngle" +license = "Apache-2.0 OR LicenseRef-Commercial" +edition = "2021" +rust-version = "1.83" + +[dependencies] +aingle_graph = { version = "0.6", path = "../aingle_graph", features = ["dag"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +regex = "1.12" +blake3 = "1.8" +once_cell = "1.4" diff --git a/crates/aingle_ingest/src/chunk.rs b/crates/aingle_ingest/src/chunk.rs new file mode 100644 index 00000000..e0015190 --- /dev/null +++ b/crates/aingle_ingest/src/chunk.rs @@ -0,0 +1,90 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Splitting source text into line-ranged chunks for semantic recall. + +use crate::{Chunk, Provenance}; + +fn prov(path: &str, hash: &str, start: u32, end: u32) -> Provenance { + Provenance { + source_path: path.to_string(), + line_start: start, + line_end: end, + content_hash: hash.to_string(), + } +} + +/// Fixed-window chunking: every `window` lines becomes one chunk. Used for +/// non-markdown files. `window` must be >= 1. +pub fn chunk_fixed(path: &str, content: &str, hash: &str, window: usize) -> Vec { + let window = window.max(1); + let lines: Vec<&str> = content.lines().collect(); + if lines.is_empty() { + return Vec::new(); + } + let mut out = Vec::new(); + let mut i = 0; + while i < lines.len() { + let end = (i + window).min(lines.len()); + let text = lines[i..end].join("\n"); + out.push(Chunk { + text, + provenance: prov(path, hash, (i + 1) as u32, end as u32), + }); + i = end; + } + out +} + +/// Markdown chunking: split on ATX heading lines (`# ...`). Each heading starts a +/// new chunk that runs until the next heading (or EOF). Content before the first +/// heading (e.g. frontmatter + intro) is its own leading chunk. Oversized sections +/// (> 80 lines) are further split with `chunk_fixed`. +pub fn chunk_markdown(path: &str, content: &str, hash: &str) -> Vec { + let lines: Vec<&str> = content.lines().collect(); + if lines.is_empty() { + return Vec::new(); + } + // Boundaries: indices (0-based) where a heading starts a new section. + let mut starts: Vec = Vec::new(); + for (idx, line) in lines.iter().enumerate() { + if is_heading(line) { + starts.push(idx); + } + } + // Ensure the first section starts at line 0 even if there is leading content. + if starts.first() != Some(&0) { + starts.insert(0, 0); + } + starts.dedup(); + + let mut out = Vec::new(); + for (n, &start) in starts.iter().enumerate() { + let end = if n + 1 < starts.len() { starts[n + 1] } else { lines.len() }; + if start >= end { + continue; + } + let section = &lines[start..end]; + if section.len() > 80 { + // Re-window large sections, offsetting line numbers by `start`. + let joined = section.join("\n"); + for mut c in chunk_fixed(path, &joined, hash, 50) { + c.provenance.line_start += start as u32; + c.provenance.line_end += start as u32; + out.push(c); + } + } else { + out.push(Chunk { + text: section.join("\n"), + provenance: prov(path, hash, (start + 1) as u32, end as u32), + }); + } + } + out +} + +fn is_heading(line: &str) -> bool { + let t = line.trim_start(); + let hashes = t.chars().take_while(|c| *c == '#').count(); + (1..=6).contains(&hashes) && t.chars().nth(hashes) == Some(' ') +} diff --git a/crates/aingle_ingest/src/lib.rs b/crates/aingle_ingest/src/lib.rs new file mode 100644 index 00000000..6ec9c7c5 --- /dev/null +++ b/crates/aingle_ingest/src/lib.rs @@ -0,0 +1,117 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Pure, deterministic structural extraction: `(path, content)` → triples + chunks. + +mod chunk; +mod markdown; + +pub use aingle_graph::dag::Provenance; + +/// Object side of an extracted triple. Mapped to the graph value type by the caller. +#[derive(Debug, Clone, PartialEq)] +pub enum ObjectValue { + /// A reference to another node/entity (e.g. a wikilink target). + Node(String), + /// A literal text value (e.g. a frontmatter scalar). + Text(String), +} + +/// A triple plus where it came from. +#[derive(Debug, Clone, PartialEq)] +pub struct ProvenancedTriple { + pub subject: String, + pub predicate: String, + pub object: ObjectValue, + pub provenance: Provenance, +} + +/// A span of source text to embed for semantic recall. +#[derive(Debug, Clone, PartialEq)] +pub struct Chunk { + pub text: String, + pub provenance: Provenance, +} + +/// The full result of extracting one file. +#[derive(Debug, Clone, PartialEq)] +pub struct Extraction { + pub triples: Vec, + pub chunks: Vec, +} + +/// Extract structural triples and text chunks from a file's content. +/// +/// `path` is used verbatim as the note subject and recorded in provenance. +/// Markdown files (`.md`/`.markdown`) get structural triples + heading-aware +/// chunks; all other files get fixed-window chunks only. +pub fn extract(path: &str, content: &str) -> Extraction { + let content_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + let is_md = path.to_lowercase().ends_with(".md") || path.to_lowercase().ends_with(".markdown"); + + let mut triples = Vec::new(); + let chunks; + if is_md { + triples = markdown::extract_triples(path, content, &content_hash); + chunks = chunk::chunk_markdown(path, content, &content_hash); + } else { + chunks = chunk::chunk_fixed(path, content, &content_hash, 50); + } + Extraction { triples, chunks } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn prov(p: &Provenance) -> (u32, u32) { + (p.line_start, p.line_end) + } + + #[test] + fn extracts_wikilink_heading_tag_and_frontmatter() { + let md = "---\ntype: adr\ntags: [storage, decision]\n---\n\ + # Storage Decision\n\n\ + We chose [[sled]] because of the lock. See #durability.\n"; + let ex = extract("docs/adr/007.md", md); + + // frontmatter scalar -> (note, type, adr) + assert!(ex.triples.iter().any(|t| t.subject == "docs/adr/007.md" + && t.predicate == "type" + && t.object == ObjectValue::Text("adr".into()))); + // frontmatter tags -> two tagged triples + assert!(ex.triples.iter().any(|t| t.predicate == "tagged" + && t.object == ObjectValue::Text("storage".into()))); + assert!(ex.triples.iter().any(|t| t.predicate == "tagged" + && t.object == ObjectValue::Text("decision".into()))); + // heading -> has_section + assert!(ex.triples.iter().any(|t| t.predicate == "has_section" + && t.object == ObjectValue::Text("Storage Decision".into()))); + // wikilink -> links_to sled + let link = ex.triples.iter().find(|t| t.predicate == "links_to").unwrap(); + assert_eq!(link.object, ObjectValue::Node("sled".into())); + // inline tag -> tagged durability + assert!(ex.triples.iter().any(|t| t.predicate == "tagged" + && t.object == ObjectValue::Text("durability".into()))); + + // provenance line numbers are 1-based and point at the real lines. + assert_eq!(prov(&link.provenance).0, 7); // the "We chose [[sled]]" line + assert_eq!(link.provenance.source_path, "docs/adr/007.md"); + + // at least one chunk, all carrying the same content hash. + assert!(!ex.chunks.is_empty()); + assert!(ex.chunks.iter().all(|c| !c.provenance.content_hash.is_empty())); + } + + #[test] + fn non_markdown_gets_chunks_only() { + let code = (1..=120).map(|i| format!("line {i}")).collect::>().join("\n"); + let ex = extract("src/main.rs", &code); + assert!(ex.triples.is_empty()); + // 120 lines / 50-line window => 3 chunks. + assert_eq!(ex.chunks.len(), 3); + assert_eq!(ex.chunks[0].provenance.line_start, 1); + assert_eq!(ex.chunks[0].provenance.line_end, 50); + assert_eq!(ex.chunks[2].provenance.line_end, 120); + } +} diff --git a/crates/aingle_ingest/src/markdown.rs b/crates/aingle_ingest/src/markdown.rs new file mode 100644 index 00000000..58a32504 --- /dev/null +++ b/crates/aingle_ingest/src/markdown.rs @@ -0,0 +1,110 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Deterministic structural triple extraction from markdown. + +use crate::{ObjectValue, Provenance, ProvenancedTriple}; +use once_cell::sync::Lazy; +use regex::Regex; + +static WIKILINK: Lazy = Lazy::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]").unwrap()); +static HEADING: Lazy = Lazy::new(|| Regex::new(r"^\s*#{1,6}\s+(.+?)\s*$").unwrap()); +// Inline tag: `#word` where `#` is at start or preceded by whitespace and is +// immediately followed by a letter (so `# Heading` and `##x` are not tags). +static INLINE_TAG: Lazy = + Lazy::new(|| Regex::new(r"(?:^|\s)#([A-Za-z][A-Za-z0-9_/-]*)").unwrap()); + +fn prov(path: &str, hash: &str, line: u32) -> Provenance { + Provenance { + source_path: path.to_string(), + line_start: line, + line_end: line, + content_hash: hash.to_string(), + } +} + +/// Extract structural triples. `path` is the note subject. +pub fn extract_triples(path: &str, content: &str, hash: &str) -> Vec { + let mut out = Vec::new(); + let lines: Vec<&str> = content.lines().collect(); + + // --- Frontmatter (flat scalars + `tags`). Only when the file starts with `---`. + let mut body_start = 0usize; + if lines.first().map(|l| l.trim_end()) == Some("---") { + if let Some(close_rel) = lines[1..].iter().position(|l| l.trim_end() == "---") { + let close = close_rel + 1; // index of closing --- + for (i, raw) in lines[1..close].iter().enumerate() { + let line_no = (i + 2) as u32; // 1-based, after opening --- + if let Some((key, val)) = raw.split_once(':') { + let key = key.trim(); + let val = val.trim(); + if key.is_empty() { + continue; + } + if key == "tags" { + for tag in parse_tag_list(val) { + out.push(ProvenancedTriple { + subject: path.into(), + predicate: "tagged".into(), + object: ObjectValue::Text(tag), + provenance: prov(path, hash, line_no), + }); + } + } else if !val.is_empty() { + out.push(ProvenancedTriple { + subject: path.into(), + predicate: key.into(), + object: ObjectValue::Text(val.into()), + provenance: prov(path, hash, line_no), + }); + } + } + } + body_start = close + 1; + } + } + + // --- Body: headings, wikilinks, inline tags (with real line numbers). + for (i, line) in lines.iter().enumerate().skip(body_start) { + let line_no = (i + 1) as u32; + + if let Some(c) = HEADING.captures(line) { + out.push(ProvenancedTriple { + subject: path.into(), + predicate: "has_section".into(), + object: ObjectValue::Text(c[1].trim().to_string()), + provenance: prov(path, hash, line_no), + }); + continue; // a heading line carries no wikilinks/tags worth extracting + } + + for c in WIKILINK.captures_iter(line) { + out.push(ProvenancedTriple { + subject: path.into(), + predicate: "links_to".into(), + object: ObjectValue::Node(c[1].trim().to_string()), + provenance: prov(path, hash, line_no), + }); + } + for c in INLINE_TAG.captures_iter(line) { + out.push(ProvenancedTriple { + subject: path.into(), + predicate: "tagged".into(), + object: ObjectValue::Text(c[1].to_string()), + provenance: prov(path, hash, line_no), + }); + } + } + + out +} + +/// Parse a frontmatter tag value: either `[a, b]` or a bare `a, b` or single `a`. +fn parse_tag_list(val: &str) -> Vec { + let inner = val.trim().trim_start_matches('[').trim_end_matches(']'); + inner + .split(',') + .map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string()) + .filter(|s| !s.is_empty()) + .collect() +} From 9e0456775dc2e2d466e1266fae717a82c1f1c059 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 13:52:55 +0200 Subject: [PATCH 03/72] refactor(ingest): unify heading detection and scan headings for links/tags --- crates/aingle_ingest/src/chunk.rs | 12 +++++++----- crates/aingle_ingest/src/markdown.rs | 7 +++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/crates/aingle_ingest/src/chunk.rs b/crates/aingle_ingest/src/chunk.rs index e0015190..51998af3 100644 --- a/crates/aingle_ingest/src/chunk.rs +++ b/crates/aingle_ingest/src/chunk.rs @@ -61,12 +61,10 @@ pub fn chunk_markdown(path: &str, content: &str, hash: &str) -> Vec { let mut out = Vec::new(); for (n, &start) in starts.iter().enumerate() { let end = if n + 1 < starts.len() { starts[n + 1] } else { lines.len() }; - if start >= end { - continue; - } let section = &lines[start..end]; if section.len() > 80 { - // Re-window large sections, offsetting line numbers by `start`. + // chunk_fixed returns 1-based lines within the section; adding the + // 0-based section offset `start` yields correct absolute 1-based lines. let joined = section.join("\n"); for mut c in chunk_fixed(path, &joined, hash, 50) { c.provenance.line_start += start as u32; @@ -83,8 +81,12 @@ pub fn chunk_markdown(path: &str, content: &str, hash: &str) -> Vec { out } +/// True for an ATX markdown heading line: optional leading whitespace, 1–6 `#` +/// characters, then at least one whitespace character. Mirrors the `HEADING` +/// regex used by triple extraction so chunk boundaries and `has_section` +/// triples agree on what a heading is. fn is_heading(line: &str) -> bool { let t = line.trim_start(); let hashes = t.chars().take_while(|c| *c == '#').count(); - (1..=6).contains(&hashes) && t.chars().nth(hashes) == Some(' ') + (1..=6).contains(&hashes) && t.chars().nth(hashes).is_some_and(|c| c.is_whitespace()) } diff --git a/crates/aingle_ingest/src/markdown.rs b/crates/aingle_ingest/src/markdown.rs index 58a32504..7aa36773 100644 --- a/crates/aingle_ingest/src/markdown.rs +++ b/crates/aingle_ingest/src/markdown.rs @@ -75,7 +75,8 @@ pub fn extract_triples(path: &str, content: &str, hash: &str) -> Vec Vec Vec { let inner = val.trim().trim_start_matches('[').trim_end_matches(']'); inner From 329b19e17830841017d66820b556ee7b5fa1e185 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 14:01:41 +0200 Subject: [PATCH 04/72] refactor(cortex): insert_triple_inner with optional provenance, shared by create_triple --- crates/aingle_cortex/src/graphql/resolvers.rs | 1 + crates/aingle_cortex/src/rest/triples.rs | 1 + crates/aingle_cortex/src/service/triples.rs | 85 ++++++++++++++----- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/crates/aingle_cortex/src/graphql/resolvers.rs b/crates/aingle_cortex/src/graphql/resolvers.rs index b7f1d13c..e249cbc4 100644 --- a/crates/aingle_cortex/src/graphql/resolvers.rs +++ b/crates/aingle_cortex/src/graphql/resolvers.rs @@ -191,6 +191,7 @@ impl MutationRoot { subject: input.subject.clone(), predicate: input.predicate.clone(), object: serde_json::json!({}), + provenance: None, }], }, signature: None, diff --git a/crates/aingle_cortex/src/rest/triples.rs b/crates/aingle_cortex/src/rest/triples.rs index bb2f0509..1be2abb0 100644 --- a/crates/aingle_cortex/src/rest/triples.rs +++ b/crates/aingle_cortex/src/rest/triples.rs @@ -200,6 +200,7 @@ pub async fn create_triple( subject: req.subject.clone(), predicate: req.predicate.clone(), object: serde_json::to_value(&req.object).unwrap_or_default(), + provenance: None, }], }, signature: None, diff --git a/crates/aingle_cortex/src/service/triples.rs b/crates/aingle_cortex/src/service/triples.rs index 28ae7923..c2ee63a1 100644 --- a/crates/aingle_cortex/src/service/triples.rs +++ b/crates/aingle_cortex/src/service/triples.rs @@ -28,29 +28,35 @@ pub async fn create_triple( req: CreateTripleRequest, namespace: Option, ) -> Result { - // Validate input if req.subject.is_empty() { return Err(Error::InvalidInput("Subject cannot be empty".to_string())); } if req.predicate.is_empty() { return Err(Error::InvalidInput("Predicate cannot be empty".to_string())); } + insert_triple_inner(state, req.object, &req.subject, &req.predicate, None, namespace).await +} - let object: Value = req.object.clone().into(); - - // Create the triple - let triple = Triple::new( - NodeId::named(&req.subject), - Predicate::named(&req.predicate), - object, - ); +/// Shared single-triple write used by `create_triple` and the ingestion path. +/// `object_dto` is serialized into the DAG payload exactly as the REST path does, +/// so triple IDs / DAG replay stay byte-compatible. `provenance`, when present, +/// is attached to the signed `TripleInsert` payload. +pub async fn insert_triple_inner( + state: &AppState, + object_dto: crate::rest::ValueDto, + subject: &str, + predicate: &str, + #[cfg(feature = "dag")] provenance: Option, + #[cfg(not(feature = "dag"))] _provenance: Option<()>, + namespace: Option, +) -> Result { + let object: Value = object_dto.clone().into(); + let triple = Triple::new(NodeId::named(subject), Predicate::named(predicate), object); - // Add triple to graph (and record DAG action if enabled) let triple_id = { let graph = state.graph.read().await; let id = graph.insert(triple.clone())?; - // Record in DAG if enabled #[cfg(feature = "dag")] if let Some(dag_store) = graph.dag_store() { let dag_author = state @@ -69,9 +75,10 @@ pub async fn create_triple( timestamp: chrono::Utc::now(), payload: aingle_graph::dag::DagPayload::TripleInsert { triples: vec![aingle_graph::dag::TripleInsertPayload { - subject: req.subject.clone(), - predicate: req.predicate.clone(), - object: serde_json::to_value(&req.object).unwrap_or_default(), + subject: subject.to_string(), + predicate: predicate.to_string(), + object: serde_json::to_value(&object_dto).unwrap_or_default(), + provenance, }], }, signature: None, @@ -91,7 +98,6 @@ pub async fn create_triple( id }; - // Record audit entry { let mut audit = state.audit_log.write().await; audit.record(AuditEntry { @@ -100,17 +106,16 @@ pub async fn create_triple( namespace, action: "create".to_string(), resource: format!("/api/v1/triples/{}", triple_id.to_hex()), - details: Some(format!("subject={}", req.subject)), + details: Some(format!("subject={}", subject)), request_id: None, }); } - // Broadcast event state.broadcaster.broadcast(Event::TripleAdded { hash: triple_id.to_hex(), - subject: req.subject, - predicate: req.predicate, - object: serde_json::to_value(&req.object).unwrap_or_default(), + subject: subject.to_string(), + predicate: predicate.to_string(), + object: serde_json::to_value(&object_dto).unwrap_or_default(), }); Ok(triple.into()) @@ -485,6 +490,46 @@ mod tests { assert!(matches!(err, Error::NotFound(_))); } + #[cfg(feature = "dag")] + #[tokio::test] + async fn inner_write_records_provenance_in_dag() { + use aingle_graph::dag::{DagPayload, Provenance}; + + let state = AppState::with_db_path(":memory:", None).unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + + let prov = Provenance { + source_path: "docs/x.md".into(), + line_start: 4, + line_end: 4, + content_hash: "abc123".into(), + }; + insert_triple_inner( + &state, + crate::rest::ValueDto::Node { node: "sled".into() }, + "docs/x.md", + "links_to", + Some(prov.clone()), + None, + ) + .await + .unwrap(); + + // The DAG action affecting subject "docs/x.md" must carry the provenance. + let graph = state.graph.read().await; + let actions = graph.dag_history_by_subject("docs/x.md", 10).unwrap(); + let found = actions.iter().any(|a| match &a.payload { + DagPayload::TripleInsert { triples } => { + triples.iter().any(|t| t.provenance.as_ref() == Some(&prov)) + } + _ => false, + }); + assert!(found, "provenance must be present in the TripleInsert DAG payload"); + } + #[tokio::test] async fn list_triples_returns_inserted() { let state = AppState::with_db_path(":memory:", None).unwrap(); From fc35d5d10751bf4fc946039d6e4c06262ed4ea8f Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 14:16:18 +0200 Subject: [PATCH 05/72] =?UTF-8?q?feat(cortex):=20service::ingest=20?= =?UTF-8?q?=E2=80=94=20incremental=20vault=20ingestion=20with=20signed=20p?= =?UTF-8?q?rovenance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/aingle_cortex/Cargo.toml | 2 + crates/aingle_cortex/src/service/ingest.rs | 327 +++++++++++++++++++++ crates/aingle_cortex/src/service/mod.rs | 1 + 3 files changed, 330 insertions(+) create mode 100644 crates/aingle_cortex/src/service/ingest.rs diff --git a/crates/aingle_cortex/Cargo.toml b/crates/aingle_cortex/Cargo.toml index 7ceecf18..c4a37443 100644 --- a/crates/aingle_cortex/Cargo.toml +++ b/crates/aingle_cortex/Cargo.toml @@ -37,6 +37,8 @@ aingle_graph = { version = "0.6", path = "../aingle_graph", features = ["sled-ba aingle_logic = { version = "0.6", path = "../aingle_logic" } aingle_zk = { version = "0.6", path = "../aingle_zk" } ineru = { version = "0.6", path = "../ineru" } +aingle_ingest = { version = "0.6", path = "../aingle_ingest" } +ignore = "0.4" # Web framework axum = { version = "0.8", features = ["ws", "macros"] } diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs new file mode 100644 index 00000000..2f2ba553 --- /dev/null +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -0,0 +1,327 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Incremental vault ingestion: walk a directory, extract triples and chunks, +//! write them into the graph (with signed DAG provenance) and Ineru memory, +//! and maintain a per-file source-hash registry triple for idempotent re-runs. + +use crate::error::{Error, Result}; +use crate::rest::ValueDto; +use crate::service::triples::{delete_triple, insert_triple_inner}; +use crate::state::AppState; +use aingle_graph::{NodeId, Predicate, TriplePattern}; +use aingle_ingest::{extract, ObjectValue}; +use ineru::{Embedding, MemoryEntry, MemoryMetadata}; + +// Bring the graph error type into scope for duplicate-matching in ingest logic. +use aingle_graph::Error as GraphError; + +/// The predicate used to anchor the per-file content-hash registry triple. +pub const PRED_SOURCE_HASH: &str = "aingle:source_hash"; + +/// Ineru `entry_type` used for ingested text chunks. Grounding filters on this. +pub const CHUNK_ENTRY_TYPE: &str = "doc_chunk"; + +/// One ingested source file and its content hash at ingest time. +#[derive(Debug, Clone, serde::Serialize)] +pub struct SourceRecord { + pub path: String, + pub content_hash: String, +} + +/// Summary statistics returned by `ingest_path`. +#[derive(Debug, Default, Clone, serde::Serialize)] +pub struct IngestReport { + /// Total number of files encountered during the walk. + pub files_seen: usize, + /// Files that were newly ingested (hash changed or first time). + pub files_ingested: usize, + /// Files skipped because their content hash matched the registry. + pub files_skipped: usize, + /// Total triples written (structural + registry). + pub triples_written: usize, + /// Total text chunks written to Ineru memory. + pub chunks_written: usize, + /// The files ingested in this run, with their content hashes. + pub sources: Vec, +} + +/// Walk `root_path`, extract structural triples and text chunks from each file, +/// write them to the graph (with DAG provenance) and Ineru memory, and maintain +/// a per-file source-hash registry triple for incremental skip on unchanged files. +/// +/// `namespace` is forwarded to the audit log (use `None` for internal/background calls). +pub async fn ingest_path( + state: &AppState, + root_path: &str, + namespace: Option, +) -> Result { + let mut report = IngestReport::default(); + + // Build a walk that respects .gitignore / .ignore files + let walker = ignore::WalkBuilder::new(root_path) + .hidden(false) + .git_ignore(true) + .build(); + + let mut files: Vec<(String, String)> = Vec::new(); // (rel_path, content) + + for entry in walker { + let entry = entry.map_err(|e| Error::Internal(format!("walk error: {e}")))?; + let path = entry.path(); + + // Skip directories + if !path.is_file() { + continue; + } + + // Filter to supported extensions: .md, .markdown, .txt, .rs, .py, .ts, .js + let ext = path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_lowercase(); + if !matches!( + ext.as_str(), + "md" | "markdown" | "txt" | "rs" | "py" | "ts" | "js" | "toml" | "json" + ) { + continue; + } + + report.files_seen += 1; + + let content = std::fs::read_to_string(path) + .map_err(|e| Error::Internal(format!("read {}: {e}", path.display())))?; + + // Compute relative path from root_path for use as the note subject + let rel_path = path + .strip_prefix(root_path) + .unwrap_or(path) + .to_string_lossy() + .replace('\\', "/"); + + files.push((rel_path, content)); + } + + for (rel_path, content) in files { + let content_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + + // Check registry: does a triple (rel_path, aingle:source_hash, ) already exist? + let existing_hash = { + let graph = state.graph.read().await; + let pattern = TriplePattern::any() + .with_subject(NodeId::named(&rel_path)) + .with_predicate(Predicate::named(PRED_SOURCE_HASH)); + graph + .find(pattern) + .map_err(|e| Error::Internal(format!("graph find error: {e}")))? + .into_iter() + .next() + .and_then(|t| t.object_string().map(|s| s.to_string())) + }; + + if let Some(ref existing) = existing_hash { + if existing == &content_hash { + // File unchanged — skip + report.files_skipped += 1; + continue; + } + } + + report.files_ingested += 1; + + // Remove old registry triple if the hash changed + if existing_hash.is_some() { + // Delete by finding the triple's hex id + let old_triple_id = { + let graph = state.graph.read().await; + let pattern = TriplePattern::any() + .with_subject(NodeId::named(&rel_path)) + .with_predicate(Predicate::named(PRED_SOURCE_HASH)); + graph + .find(pattern) + .ok() + .and_then(|v| v.into_iter().next()) + .map(|t| t.id().to_hex()) + }; + if let Some(hex_id) = old_triple_id { + // Best-effort: ignore NotFound + let _ = delete_triple(state, &hex_id, namespace.clone()).await; + } + } + + // Extract triples and chunks from the file + let extraction = extract(&rel_path, &content); + + // Write structural triples + for pt in &extraction.triples { + let object_dto = match &pt.object { + ObjectValue::Node(n) => ValueDto::Node { node: n.clone() }, + ObjectValue::Text(t) => ValueDto::String(t.clone()), + }; + + #[cfg(feature = "dag")] + let prov = Some(pt.provenance.clone()); + #[cfg(not(feature = "dag"))] + let _prov = (); + + let result = insert_triple_inner( + state, + object_dto, + &pt.subject, + &pt.predicate, + #[cfg(feature = "dag")] + prov, + #[cfg(not(feature = "dag"))] + None, + namespace.clone(), + ) + .await; + + match result { + Ok(_) => { + report.triples_written += 1; + } + Err(Error::GraphError(GraphError::Duplicate(_))) => { + // Triple already exists — counts as already-written (idempotent) + report.triples_written += 1; + } + Err(e) => { + return Err(Error::Internal(format!("triple insert error: {e}"))); + } + } + } + + // Write text chunks to Ineru memory + for chunk in &extraction.chunks { + let embedding = Embedding::from_text_simple(&chunk.text); + let mut entry = MemoryEntry::new( + CHUNK_ENTRY_TYPE, + serde_json::json!({ + "text": chunk.text, + "source_path": chunk.provenance.source_path, + "line_start": chunk.provenance.line_start, + "line_end": chunk.provenance.line_end, + "content_hash": chunk.provenance.content_hash, + }), + ); + entry.metadata = MemoryMetadata::with_source(&chunk.provenance.source_path); + entry.metadata.importance = 0.6; + entry.embedding = Some(embedding); + + let mut mem = state.memory.write().await; + mem.remember(entry) + .map_err(|e| Error::Internal(format!("memory write error: {e}")))?; + report.chunks_written += 1; + } + + // Write/update the source-hash registry triple + #[cfg(feature = "dag")] + let registry_prov = Some(aingle_graph::dag::Provenance { + source_path: rel_path.clone(), + line_start: 0, + line_end: 0, + content_hash: content_hash.clone(), + }); + #[cfg(not(feature = "dag"))] + let _registry_prov = (); + + insert_triple_inner( + state, + ValueDto::String(content_hash.clone()), + &rel_path, + PRED_SOURCE_HASH, + #[cfg(feature = "dag")] + registry_prov, + #[cfg(not(feature = "dag"))] + None, + namespace.clone(), + ) + .await + .map_err(|e| Error::Internal(format!("registry triple insert error: {e}")))?; + + report.triples_written += 1; + + report.sources.push(SourceRecord { + path: rel_path.clone(), + content_hash: content_hash.clone(), + }); + } + + Ok(report) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn write(dir: &std::path::Path, name: &str, body: &str) { + std::fs::write(dir.join(name), body).unwrap(); + } + + async fn enabled_state() -> AppState { + let state = AppState::with_db_path(":memory:", None).unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + state + } + + #[tokio::test] + async fn ingest_writes_triples_and_chunks() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "note.md", "# Title\n\nWe use [[sled]] for storage. #durability\n"); + let state = enabled_state().await; + + let report = ingest_path(&state, dir.path().to_str().unwrap(), None).await.unwrap(); + + assert_eq!(report.files_seen, 1); + assert_eq!(report.files_ingested, 1); + assert!(report.triples_written >= 3); // heading + links_to + tagged + registry + assert!(report.chunks_written >= 1); + + let mem = state.memory.read().await; + let hits = mem.recall_text("sled storage").unwrap(); + assert!(!hits.is_empty()); + } + + #[tokio::test] + async fn reingesting_unchanged_is_idempotent() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "note.md", "# Title\n\nStable [[content]].\n"); + let state = enabled_state().await; + let root = dir.path().to_str().unwrap(); + + ingest_path(&state, root, None).await.unwrap(); + let actions_after_first = { + let g = state.graph.read().await; + g.dag_store().unwrap().action_count() + }; + + let report2 = ingest_path(&state, root, None).await.unwrap(); + let actions_after_second = { + let g = state.graph.read().await; + g.dag_store().unwrap().action_count() + }; + + assert_eq!(report2.files_skipped, 1); + assert_eq!(report2.files_ingested, 0); + assert_eq!(actions_after_first, actions_after_second, + "re-ingesting unchanged files must write zero new DAG actions"); + } + + #[tokio::test] + async fn changed_file_reingests() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "note.md", "# A\n\nFirst [[x]].\n"); + let state = enabled_state().await; + let root = dir.path().to_str().unwrap(); + + ingest_path(&state, root, None).await.unwrap(); + write(dir.path(), "note.md", "# A\n\nSecond [[y]] changed.\n"); + let report = ingest_path(&state, root, None).await.unwrap(); + assert_eq!(report.files_ingested, 1); + assert_eq!(report.files_skipped, 0); + } +} diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index bcc9f1ca..bc346971 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -5,6 +5,7 @@ #[cfg(feature = "dag")] pub mod dag; +pub mod ingest; pub mod proof; pub mod query; pub mod reputation; From 6161f30d765e4a0b3cc11bf588b1b101fbc49cdf Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 14:33:57 +0200 Subject: [PATCH 06/72] =?UTF-8?q?feat(cortex):=20service::ground=20?= =?UTF-8?q?=E2=80=94=20grounded=20retrieval=20with=20provenance=20+=20grou?= =?UTF-8?q?ndedness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/aingle_cortex/src/service/ground.rs | 175 +++++++++++++++++++++ crates/aingle_cortex/src/service/mod.rs | 1 + 2 files changed, 176 insertions(+) create mode 100644 crates/aingle_cortex/src/service/ground.rs diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs new file mode 100644 index 00000000..97471fb3 --- /dev/null +++ b/crates/aingle_cortex/src/service/ground.rs @@ -0,0 +1,175 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Grounded retrieval: turn a question into cited, provenance-backed context with +//! an explicit groundedness signal, so an LLM answers only from verifiable sources. + +use crate::error::Result; +use crate::state::AppState; +use serde::Serialize; + +/// Similarity at/above which retrieval is considered well-grounded. +const GROUND_HIGH: f32 = 0.55; +/// Similarity below which retrieval is considered ungrounded. +const GROUND_LOW: f32 = 0.30; + +/// A cited chunk of source context. +#[derive(Debug, Clone, Serialize)] +pub struct ContextChunk { + pub text: String, + pub source: String, + pub lines: String, + pub relevance: f32, + pub provenance_sig: Option, + pub ingested_at: Option, +} + +/// The grounded answer context returned to the model. +#[derive(Debug, Clone, Serialize)] +pub struct GroundedContext { + pub groundedness: String, // "grounded" | "weak" | "ungrounded" + pub answer_context: Vec, + pub gaps: Vec, + /// Instruction echoed to the model to keep it on the cited path. + pub instruction: String, +} + +use ineru::MemoryQuery; + +/// Retrieve grounded context for `question`. Pulls the top-`k` semantically +/// similar chunks from Ineru, attaches each chunk's signed provenance from the +/// DAG (latest signed action affecting its source path), and computes a +/// groundedness signal from the best similarity. +pub async fn ground(state: &AppState, question: &str, k: usize) -> Result { + let k = k.max(1); + + let results = { + let mem = state.memory.read().await; + mem.recall(&MemoryQuery::text(question).with_limit(k)) + .map_err(|e| crate::error::Error::Internal(e.to_string()))? + }; + + let mut answer_context = Vec::new(); + let mut best: f32 = 0.0; + for r in &results { + // Only consider chunk memories produced by ingestion. + if r.entry.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { + continue; + } + let d = &r.entry.data; + let source = d.get("source_path").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let ls = d.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0); + let le = d.get("line_end").and_then(|v| v.as_u64()).unwrap_or(0); + let text = d.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string(); + + let (sig, ingested_at) = signed_provenance(state, &source).await; + + best = best.max(r.relevance); + answer_context.push(ContextChunk { + text, + source, + lines: format!("{ls}-{le}"), + relevance: r.relevance, + provenance_sig: sig, + ingested_at, + }); + } + + let groundedness = if best >= GROUND_HIGH { + "grounded" + } else if best >= GROUND_LOW && !answer_context.is_empty() { + "weak" + } else { + "ungrounded" + }; + + let mut gaps = Vec::new(); + if answer_context.is_empty() { + gaps.push(format!("No ingested source matches: {question:?}.")); + } else if groundedness == "weak" { + gaps.push("Retrieved context is only weakly related to the question.".to_string()); + } + + Ok(GroundedContext { + groundedness: groundedness.to_string(), + answer_context, + gaps, + instruction: "Answer ONLY from answer_context and cite each claim as \ + source:lines. If groundedness is not \"grounded\", say so explicitly \ + and do not invent facts." + .to_string(), + }) +} + +/// Look up the latest signed DAG action affecting `source_path` and return its +/// action hash (as provenance identifier) and timestamp, if any. +/// +/// Adaptation note: `DagActionDto` has no `signature` field. Instead it has +/// `hash: String` (action hash) and `signed: bool`. We return the action hash +/// as the provenance identifier when the action is signed, or None otherwise. +/// The timestamp field is `timestamp: String` which matches the plan exactly. +async fn signed_provenance(state: &AppState, source_path: &str) -> (Option, Option) { + #[cfg(feature = "dag")] + { + if source_path.is_empty() { + return (None, None); + } + if let Ok(actions) = crate::service::dag::history_by_subject(state, source_path, 1).await { + if let Some(a) = actions.first() { + // DagActionDto has `hash: String` and `signed: bool` rather than a + // `signature` field, so we use the action hash as the provenance token + // when the action is signed. + let sig = if a.signed { Some(a.hash.clone()) } else { None }; + return (sig, Some(a.timestamp.clone())); + } + } + (None, None) + } + #[cfg(not(feature = "dag"))] + { + let _ = (state, source_path); + (None, None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn enabled_state() -> AppState { + let state = AppState::with_db_path(":memory:", None).unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + state + } + + #[tokio::test] + async fn empty_memory_is_ungrounded() { + let state = enabled_state().await; + let g = ground(&state, "anything at all", 5).await.unwrap(); + assert_eq!(g.groundedness, "ungrounded"); + assert!(g.answer_context.is_empty()); + assert!(!g.gaps.is_empty()); + } + + #[tokio::test] + async fn grounds_after_ingest_with_source() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("adr.md"), + "# Storage\n\nWe chose sled because of its exclusive lock semantics.\n", + ) + .unwrap(); + let state = enabled_state().await; + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + + let g = ground(&state, "exclusive lock semantics sled", 5).await.unwrap(); + assert!(!g.answer_context.is_empty(), "should retrieve the ingested chunk"); + assert_eq!(g.answer_context[0].source, "adr.md"); + assert_ne!(g.groundedness, "ungrounded"); + } +} diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index bc346971..e68afa2b 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -5,6 +5,7 @@ #[cfg(feature = "dag")] pub mod dag; +pub mod ground; pub mod ingest; pub mod proof; pub mod query; From 79603ca03777add118ce034a4be2dc2390e9c5e3 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 14:41:08 +0200 Subject: [PATCH 07/72] refactor(cortex): rename provenance_sig to provenance_anchor (it holds the signed action hash) --- crates/aingle_cortex/src/service/ground.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index 97471fb3..c3ee5152 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -20,7 +20,9 @@ pub struct ContextChunk { pub source: String, pub lines: String, pub relevance: f32, - pub provenance_sig: Option, + /// Hex hash of the signed DAG action that recorded this source — verifiable + /// via the DAG history/action API. `None` when the source has no signed action. + pub provenance_anchor: Option, pub ingested_at: Option, } @@ -70,7 +72,7 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result Date: Tue, 23 Jun 2026 14:51:27 +0200 Subject: [PATCH 08/72] feat(cortex): MCP tools aingle_ingest / aingle_ground / aingle_sources --- crates/aingle_cortex/src/mcp/server.rs | 91 ++++++++++++++++++++++ crates/aingle_cortex/src/service/ingest.rs | 18 +++++ 2 files changed, 109 insertions(+) diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index f72abdea..4a1e91a3 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -90,6 +90,56 @@ impl AingleMcp { "pong".to_string() } + /// Ingest a markdown vault / code repo into the graph + memory with provenance. + #[tool( + description = "Ingest a markdown vault or code repo: auto-extracts triples \ + (frontmatter, wikilinks, headings, tags), indexes text chunks for \ + semantic recall, and records signed provenance. Incremental: unchanged \ + files are skipped." + )] + async fn aingle_ingest( + &self, + params: Parameters, + ) -> Result { + let Parameters(p) = params; + let resp = crate::service::ingest::ingest_path(&self.state, &p.path, None) + .await + .map_err(super::convert::to_mcp_error)?; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + + /// Grounded retrieval: cited, provenance-backed context for a question. + #[tool( + description = "Answer-grounding for a question. Returns cited source chunks \ + (path:lines) with a signed-provenance anchor and a groundedness signal. \ + Answer ONLY from the returned context; if groundedness is not 'grounded', \ + say so and do not invent.", + annotations(read_only_hint = true) + )] + async fn aingle_ground( + &self, + params: Parameters, + ) -> Result { + let Parameters(p) = params; + let resp = crate::service::ground::ground(&self.state, &p.question, p.k) + .await + .map_err(super::convert::to_mcp_error)?; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + + /// List ingested sources and their signed content hashes. + #[tool( + description = "List ingested source files with their content hashes (the \ + signed provenance registry).", + annotations(read_only_hint = true) + )] + async fn aingle_sources(&self) -> Result { + let resp = crate::service::ingest::list_sources(&self.state) + .await + .map_err(super::convert::to_mcp_error)?; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + /// Query the semantic graph by triple pattern (any field omitted = wildcard). #[tool( description = "Query the semantic graph by triple pattern. Omit a field to wildcard it.", @@ -536,6 +586,27 @@ impl AingleMcp { } } +/// Parameters for the `aingle_ingest` tool. +#[derive(serde::Deserialize, schemars::JsonSchema)] +pub struct IngestParams { + /// Absolute or relative path to the vault/repo root to ingest. + pub path: String, +} + +/// Parameters for the `aingle_ground` tool. +#[derive(serde::Deserialize, schemars::JsonSchema)] +pub struct GroundParams { + /// The question to ground against ingested sources. + pub question: String, + /// Max chunks to retrieve. + #[serde(default = "default_ground_k")] + pub k: usize, +} + +fn default_ground_k() -> usize { + 6 +} + #[tool_handler(router = self.tool_router)] impl ServerHandler for AingleMcp { fn get_info(&self) -> ServerInfo { @@ -549,3 +620,23 @@ impl ServerHandler for AingleMcp { info } } + +#[cfg(test)] +mod ingest_tools_tests { + use super::*; + + #[test] + fn router_exposes_ingest_ground_sources() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + let mcp = AingleMcp::new(state); + let names: Vec = mcp + .tool_router + .list_all() + .into_iter() + .map(|t| t.name.to_string()) + .collect(); + for expected in ["aingle_ingest", "aingle_ground", "aingle_sources"] { + assert!(names.contains(&expected.to_string()), "missing tool {expected}"); + } + } +} diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs index 2f2ba553..0639cad1 100644 --- a/crates/aingle_cortex/src/service/ingest.rs +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -251,6 +251,24 @@ pub async fn ingest_path( Ok(report) } +/// List all source files recorded in the signed registry (path + content hash). +pub async fn list_sources(state: &AppState) -> Result> { + let graph = state.graph.read().await; + let pattern = TriplePattern::any().with_predicate(Predicate::named(PRED_SOURCE_HASH)); + let triples = graph + .find(pattern) + .map_err(|e| Error::Internal(format!("graph find error: {e}")))?; + Ok(triples + .iter() + .filter_map(|t| { + t.object_string().map(|h| SourceRecord { + path: t.subject.to_string(), + content_hash: h.to_string(), + }) + }) + .collect()) +} + #[cfg(test)] mod tests { use super::*; From 2dede73fa98d8ca7f4b7843655022e2ce2e4f9da Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 15:11:03 +0200 Subject: [PATCH 09/72] fix(cortex): strip IRI brackets from list_sources paths for clean round-trip --- crates/aingle_cortex/src/service/ingest.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs index 0639cad1..fb9e00a8 100644 --- a/crates/aingle_cortex/src/service/ingest.rs +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -261,8 +261,17 @@ pub async fn list_sources(state: &AppState) -> Result> { Ok(triples .iter() .filter_map(|t| { + // `NodeId::to_string` renders the IRI form ``; strip the angle + // brackets so the path matches the clean form used by `ingest_path`'s + // report and the chunk provenance (round-trippable into other tools). + let path = t + .subject + .to_string() + .trim_start_matches('<') + .trim_end_matches('>') + .to_string(); t.object_string().map(|h| SourceRecord { - path: t.subject.to_string(), + path, content_hash: h.to_string(), }) }) From 1e0c11b93e9cb91a23ad342982422dbdc47eb003 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 16:53:40 +0200 Subject: [PATCH 10/72] build: update Cargo.lock for aingle_ingest crate --- Cargo.lock | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d422051e..afc57188 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,7 @@ name = "aingle_cortex" version = "0.6.3" dependencies = [ "aingle_graph", + "aingle_ingest", "aingle_logic", "aingle_raft", "aingle_wal", @@ -149,6 +150,7 @@ dependencies = [ "async-graphql", "async-graphql-axum", "axum", + "base64", "blake3", "chrono", "dashmap 6.1.0", @@ -157,6 +159,7 @@ dependencies = [ "futures", "hex", "if-addrs 0.13.4", + "ignore", "ineru", "jsonwebtoken", "log", @@ -169,6 +172,7 @@ dependencies = [ "regex", "reqwest", "rmcp", + "rsa", "rustls", "rustls-pemfile", "schemars", @@ -215,6 +219,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "aingle_ingest" +version = "0.6.3" +dependencies = [ + "aingle_graph", + "blake3", + "once_cell", + "regex", + "serde", + "serde_json", +] + [[package]] name = "aingle_logic" version = "0.6.3" From 7602997379127d31c2a1482d12edaf79a3a0271a Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 19:26:58 +0200 Subject: [PATCH 11/72] chore: update .gitignore for local data and config --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index bb7ee1d0..c5fdb7ba 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,8 @@ Thumbs.db CLAUDE.md .claude_settings .claudeignore +.mcp.json +docs/superpowers/ # GitHub Copilot .copilot/ @@ -113,6 +115,8 @@ llm-instructions.md *.profraw *.profdata aingle_iot.db +data/ +*.sled # Logs *.log From 595794d26f796a26ad0bdd0cb6557961ace39d66 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 19:42:00 +0200 Subject: [PATCH 12/72] style(ai): sort imports in aingle_ai to satisfy rustfmt check --- crates/aingle_ai/src/config.rs | 2 +- crates/aingle_ai/src/emergent/mod.rs | 4 ++-- crates/aingle_ai/src/emergent/predictive_validator.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/aingle_ai/src/config.rs b/crates/aingle_ai/src/config.rs index 9b55a283..907845b3 100644 --- a/crates/aingle_ai/src/config.rs +++ b/crates/aingle_ai/src/config.rs @@ -3,9 +3,9 @@ //! Global AI configuration +use crate::ineru::IneruConfig; use crate::kaneru::KaneruConfig; use crate::nested_learning::NestedConfig; -use crate::ineru::IneruConfig; use serde::{Deserialize, Serialize}; /// Global AI configuration for AIngle nodes diff --git a/crates/aingle_ai/src/emergent/mod.rs b/crates/aingle_ai/src/emergent/mod.rs index d591a5dc..982d8c20 100644 --- a/crates/aingle_ai/src/emergent/mod.rs +++ b/crates/aingle_ai/src/emergent/mod.rs @@ -17,8 +17,8 @@ mod predictive_validator; pub use adaptive_consensus::AdaptiveConsensus; pub use predictive_validator::PredictiveValidator; -use crate::nested_learning::NestedLearning; use crate::ineru::IneruMemory; +use crate::nested_learning::NestedLearning; use crate::types::{AiTransaction, ConsensusLevel, ValidationPrediction}; /// Unified AI layer combining all capabilities @@ -39,8 +39,8 @@ pub struct AiLayer { impl AiLayer { /// Create a new AI layer with default configuration pub fn new() -> Self { - use crate::nested_learning::NestedConfig; use crate::ineru::IneruConfig; + use crate::nested_learning::NestedConfig; Self { ineru: IneruMemory::new(IneruConfig::default()), diff --git a/crates/aingle_ai/src/emergent/predictive_validator.rs b/crates/aingle_ai/src/emergent/predictive_validator.rs index d96d7f23..287666ed 100644 --- a/crates/aingle_ai/src/emergent/predictive_validator.rs +++ b/crates/aingle_ai/src/emergent/predictive_validator.rs @@ -5,8 +5,8 @@ //! //! Predict validation outcome before full validation. -use crate::nested_learning::NestedLearning; use crate::ineru::IneruMemory; +use crate::nested_learning::NestedLearning; use crate::types::{AiTransaction, ValidationPrediction}; /// Predict validation outcome before full validation From b1e1006b15539d8c60f889591f9b0cd6f05bb53b Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 19:48:26 +0200 Subject: [PATCH 13/72] style: apply rustfmt across workspace --- .../src/emergent/predictive_validator.rs | 2 +- crates/aingle_ai/src/lib.rs | 4 +- crates/aingle_cortex/src/auth/jwt.rs | 5 +- crates/aingle_cortex/src/client.rs | 92 ++++++++++----- crates/aingle_cortex/src/graphql/mod.rs | 10 +- crates/aingle_cortex/src/graphql/resolvers.rs | 12 +- crates/aingle_cortex/src/mcp/server.rs | 5 +- crates/aingle_cortex/src/middleware/mod.rs | 2 +- .../aingle_cortex/src/middleware/namespace.rs | 12 +- .../src/middleware/rate_limit.rs | 18 +-- crates/aingle_cortex/src/p2p/config.rs | 10 +- crates/aingle_cortex/src/p2p/dag_sync.rs | 15 +-- crates/aingle_cortex/src/p2p/discovery.rs | 14 +-- crates/aingle_cortex/src/p2p/gossip.rs | 6 +- crates/aingle_cortex/src/p2p/identity.rs | 2 +- crates/aingle_cortex/src/p2p/manager.rs | 89 ++++++++------- crates/aingle_cortex/src/p2p/message.rs | 65 ++++++----- crates/aingle_cortex/src/p2p/peer_store.rs | 20 ++-- crates/aingle_cortex/src/p2p/rate_limiter.rs | 7 +- crates/aingle_cortex/src/p2p/sync_manager.rs | 14 +-- crates/aingle_cortex/src/p2p/transport.rs | 35 +++--- crates/aingle_cortex/src/proofs/backend.rs | 11 +- crates/aingle_cortex/src/proofs/store.rs | 5 +- .../aingle_cortex/src/proofs/verification.rs | 29 +++-- crates/aingle_cortex/src/rest/audit.rs | 6 +- crates/aingle_cortex/src/rest/cluster.rs | 61 ++++++---- .../aingle_cortex/src/rest/cluster_utils.rs | 2 +- crates/aingle_cortex/src/rest/memory.rs | 67 ++++++----- .../aingle_cortex/src/rest/observability.rs | 4 +- crates/aingle_cortex/src/rest/p2p.rs | 6 +- crates/aingle_cortex/src/rest/raft_rpc.rs | 39 +++---- crates/aingle_cortex/src/service/ground.rs | 26 ++++- crates/aingle_cortex/src/service/ingest.rs | 16 ++- crates/aingle_cortex/src/service/triples.rs | 19 ++- crates/aingle_cortex/src/state.rs | 15 ++- .../tests/data_integrity_test.rs | 108 ++++++++++++------ crates/aingle_graph/src/dag/action.rs | 31 +++-- crates/aingle_graph/src/dag/backend.rs | 4 +- crates/aingle_graph/src/dag/export.rs | 21 +++- crates/aingle_graph/src/dag/mod.rs | 6 +- crates/aingle_graph/src/dag/signing.rs | 4 +- crates/aingle_graph/src/dag/timetravel.rs | 10 +- crates/aingle_graph/src/lib.rs | 15 +-- crates/aingle_ingest/src/chunk.rs | 6 +- crates/aingle_ingest/src/lib.rs | 36 ++++-- crates/aingle_logic/src/engine.rs | 32 ++++-- crates/aingle_minimal/src/discovery.rs | 8 +- crates/aingle_minimal/src/lib.rs | 4 +- crates/aingle_minimal/src/memory.rs | 4 +- crates/aingle_minimal/src/quic.rs | 10 +- crates/aingle_minimal/src/rest.rs | 106 +++++++++-------- crates/aingle_minimal/src/rocks_storage.rs | 11 +- crates/aingle_minimal/src/wallet.rs | 7 +- .../tests/smart_node_integration_tests.rs | 4 +- crates/aingle_raft/src/consistency.rs | 30 ++++- crates/aingle_raft/src/lib.rs | 10 +- crates/aingle_raft/src/log_store.rs | 85 ++++++-------- crates/aingle_raft/src/network.rs | 35 +++--- crates/aingle_raft/src/snapshot_builder.rs | 10 +- crates/aingle_raft/src/state_machine.rs | 106 ++++++++++------- crates/aingle_raft/src/types.rs | 6 +- crates/aingle_wal/src/entry.rs | 31 ++--- crates/aingle_wal/src/reader.rs | 8 +- crates/aingle_wal/src/segment.rs | 13 +-- crates/aingle_wal/src/writer.rs | 38 ++++-- crates/ineru/benches/memory_bench.rs | 2 +- crates/ineru/src/hnsw.rs | 86 ++++++++++---- crates/ineru/src/lib.rs | 7 +- crates/ineru/src/ltm.rs | 8 +- crates/kaneru/src/coordination.rs | 5 +- crates/kaneru/src/kaneru_agent.rs | 4 +- crates/kaneru/src/memory.rs | 2 +- crates/kaneru/src/persistence.rs | 3 +- crates/kaneru/tests/integration_test.rs | 3 +- crates/kaneru/tests/integration_tests.rs | 2 +- 75 files changed, 940 insertions(+), 726 deletions(-) diff --git a/crates/aingle_ai/src/emergent/predictive_validator.rs b/crates/aingle_ai/src/emergent/predictive_validator.rs index 287666ed..cd886250 100644 --- a/crates/aingle_ai/src/emergent/predictive_validator.rs +++ b/crates/aingle_ai/src/emergent/predictive_validator.rs @@ -160,8 +160,8 @@ pub struct PredictionAccuracy { #[cfg(test)] mod tests { use super::*; - use crate::nested_learning::NestedConfig; use crate::ineru::IneruConfig; + use crate::nested_learning::NestedConfig; fn make_test_tx(id: u8) -> AiTransaction { AiTransaction { diff --git a/crates/aingle_ai/src/lib.rs b/crates/aingle_ai/src/lib.rs index 1be71d33..3861df3c 100644 --- a/crates/aingle_ai/src/lib.rs +++ b/crates/aingle_ai/src/lib.rs @@ -58,9 +58,9 @@ #![warn(clippy::all)] pub mod emergent; +pub mod ineru; pub mod kaneru; pub mod nested_learning; -pub mod ineru; mod config; mod error; @@ -75,7 +75,7 @@ pub mod prelude { pub use crate::config::AiConfig; pub use crate::emergent::{AdaptiveConsensus, PredictiveValidator}; pub use crate::error::{AiError, AiResult}; + pub use crate::ineru::{IneruConfig, IneruMemory, LongTermMemory, ShortTermMemory}; pub use crate::kaneru::{KaneruAgent, KaneruConfig}; pub use crate::nested_learning::{NestedConfig, NestedLearning}; - pub use crate::ineru::{LongTermMemory, ShortTermMemory, IneruConfig, IneruMemory}; } diff --git a/crates/aingle_cortex/src/auth/jwt.rs b/crates/aingle_cortex/src/auth/jwt.rs index e15597a8..34c59a76 100644 --- a/crates/aingle_cortex/src/auth/jwt.rs +++ b/crates/aingle_cortex/src/auth/jwt.rs @@ -379,7 +379,10 @@ mod tests { #[test] fn test_token_roundtrip() { - std::env::set_var("AINGLE_JWT_SECRET", "test-secret-only-do-not-use-in-production-64bytes-pad"); + std::env::set_var( + "AINGLE_JWT_SECRET", + "test-secret-only-do-not-use-in-production-64bytes-pad", + ); let claims = Claims::new_access("user123", vec!["user".to_string()]); let token = encode( diff --git a/crates/aingle_cortex/src/client.rs b/crates/aingle_cortex/src/client.rs index eebfdb77..6959f82e 100644 --- a/crates/aingle_cortex/src/client.rs +++ b/crates/aingle_cortex/src/client.rs @@ -8,9 +8,8 @@ //! the knowledge layer. use crate::wasm_types::{ - GraphQueryInput, GraphQueryOutput, GraphStoreInput, GraphStoreOutput, - MemoryRecallInput, MemoryRecallOutput, MemoryRememberInput, MemoryRememberOutput, - Triple, ObjectValue, + GraphQueryInput, GraphQueryOutput, GraphStoreInput, GraphStoreOutput, MemoryRecallInput, + MemoryRecallOutput, MemoryRememberInput, MemoryRememberOutput, ObjectValue, Triple, }; use serde::{Deserialize, Serialize}; @@ -197,7 +196,10 @@ impl CortexInternalClient { /// Query the semantic graph. pub async fn graph_query(&self, input: GraphQueryInput) -> Result { let (subject, predicate) = if let Some(ref pattern) = input.pattern { - (pattern.subject.clone().or(input.subject), pattern.predicate.clone().or(input.predicate)) + ( + pattern.subject.clone().or(input.subject), + pattern.predicate.clone().or(input.predicate), + ) } else { (input.subject, input.predicate) }; @@ -205,23 +207,28 @@ impl CortexInternalClient { let body = PatternQueryRequest { subject, predicate, - object: input.pattern.as_ref() + object: input + .pattern + .as_ref() .and_then(|p| p.object.as_ref()) .map(Self::object_to_json), limit: input.limit, }; - let req = self.apply_auth( - self.http.post(self.url("/api/v1/query")).json(&body), - ); + let req = self.apply_auth(self.http.post(self.url("/api/v1/query")).json(&body)); - let resp = req.send().await.map_err(|e| format!("Cortex query failed: {}", e))?; + let resp = req + .send() + .await + .map_err(|e| format!("Cortex query failed: {}", e))?; if !resp.status().is_success() { return Err(format!("Cortex query returned {}", resp.status())); } - let result: PatternQueryResponse = resp.json().await + let result: PatternQueryResponse = resp + .json() + .await .map_err(|e| format!("Failed to parse Cortex response: {}", e))?; Ok(GraphQueryOutput { @@ -238,17 +245,20 @@ impl CortexInternalClient { object: Self::object_to_json(&input.object), }; - let req = self.apply_auth( - self.http.post(self.url("/api/v1/triples")).json(&body), - ); + let req = self.apply_auth(self.http.post(self.url("/api/v1/triples")).json(&body)); - let resp = req.send().await.map_err(|e| format!("Cortex store failed: {}", e))?; + let resp = req + .send() + .await + .map_err(|e| format!("Cortex store failed: {}", e))?; if !resp.status().is_success() { return Err(format!("Cortex store returned {}", resp.status())); } - let result: CreateTripleResponse = resp.json().await + let result: CreateTripleResponse = resp + .json() + .await .map_err(|e| format!("Failed to parse Cortex response: {}", e))?; Ok(GraphStoreOutput { @@ -257,7 +267,10 @@ impl CortexInternalClient { } /// Recall memories from the Titans system. - pub async fn memory_recall(&self, input: MemoryRecallInput) -> Result { + pub async fn memory_recall( + &self, + input: MemoryRecallInput, + ) -> Result { let body = MemoryRecallRequest { query: input.query, entry_type: input.entry_type, @@ -265,34 +278,46 @@ impl CortexInternalClient { }; let req = self.apply_auth( - self.http.post(self.url("/api/v1/memory/recall")).json(&body), + self.http + .post(self.url("/api/v1/memory/recall")) + .json(&body), ); - let resp = req.send().await.map_err(|e| format!("Titans recall failed: {}", e))?; + let resp = req + .send() + .await + .map_err(|e| format!("Titans recall failed: {}", e))?; if !resp.status().is_success() { return Err(format!("Titans recall returned {}", resp.status())); } - let result: MemoryRecallResponse = resp.json().await + let result: MemoryRecallResponse = resp + .json() + .await .map_err(|e| format!("Failed to parse Titans response: {}", e))?; Ok(MemoryRecallOutput { - results: result.results.iter().map(|r| { - crate::wasm_types::MemoryResult { + results: result + .results + .iter() + .map(|r| crate::wasm_types::MemoryResult { id: r.id.clone(), data: r.data.clone(), entry_type: r.entry_type.clone(), tags: r.tags.clone(), importance: r.importance, created_at: r.created_at.clone(), - } - }).collect(), + }) + .collect(), }) } /// Store a new memory in the Titans system. - pub async fn memory_remember(&self, input: MemoryRememberInput) -> Result { + pub async fn memory_remember( + &self, + input: MemoryRememberInput, + ) -> Result { let body = MemoryRememberRequest { data: input.data, entry_type: input.entry_type, @@ -301,16 +326,23 @@ impl CortexInternalClient { }; let req = self.apply_auth( - self.http.post(self.url("/api/v1/memory/remember")).json(&body), + self.http + .post(self.url("/api/v1/memory/remember")) + .json(&body), ); - let resp = req.send().await.map_err(|e| format!("Titans remember failed: {}", e))?; + let resp = req + .send() + .await + .map_err(|e| format!("Titans remember failed: {}", e))?; if !resp.status().is_success() { return Err(format!("Titans remember returned {}", resp.status())); } - let result: MemoryRememberResponse = resp.json().await + let result: MemoryRememberResponse = resp + .json() + .await .map_err(|e| format!("Failed to parse Titans response: {}", e))?; Ok(MemoryRememberOutput { id: result.id }) @@ -318,7 +350,11 @@ impl CortexInternalClient { /// Check if Cortex is healthy and reachable. pub async fn health_check(&self) -> bool { - match self.apply_auth(self.http.get(self.url("/api/v1/health"))).send().await { + match self + .apply_auth(self.http.get(self.url("/api/v1/health"))) + .send() + .await + { Ok(resp) => resp.status().is_success(), Err(_) => false, } diff --git a/crates/aingle_cortex/src/graphql/mod.rs b/crates/aingle_cortex/src/graphql/mod.rs index 633350a9..2387c9d8 100644 --- a/crates/aingle_cortex/src/graphql/mod.rs +++ b/crates/aingle_cortex/src/graphql/mod.rs @@ -51,8 +51,10 @@ async fn graphql_handler( /// GraphiQL IDE async fn graphql_playground() -> impl IntoResponse { - Html(async_graphql::http::GraphiQLSource::build() - .endpoint("/graphql") - .subscription_endpoint("/graphql/ws") - .finish()) + Html( + async_graphql::http::GraphiQLSource::build() + .endpoint("/graphql") + .subscription_endpoint("/graphql/ws") + .finish(), + ) } diff --git a/crates/aingle_cortex/src/graphql/resolvers.rs b/crates/aingle_cortex/src/graphql/resolvers.rs index e249cbc4..c86355fe 100644 --- a/crates/aingle_cortex/src/graphql/resolvers.rs +++ b/crates/aingle_cortex/src/graphql/resolvers.rs @@ -201,9 +201,9 @@ impl MutationRoot { key.sign(&mut action); } - dag_store.put(&action).map_err(|e| { - Error::new(format!("DAG action failed: {e}")) - })?; + dag_store + .put(&action) + .map_err(|e| Error::new(format!("DAG action failed: {e}")))?; } } @@ -259,9 +259,9 @@ impl MutationRoot { key.sign(&mut action); } - dag_store.put(&action).map_err(|e| { - Error::new(format!("DAG action failed: {e}")) - })?; + dag_store + .put(&action) + .map_err(|e| Error::new(format!("DAG action failed: {e}")))?; } } diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index 4a1e91a3..033b59bd 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -636,7 +636,10 @@ mod ingest_tools_tests { .map(|t| t.name.to_string()) .collect(); for expected in ["aingle_ingest", "aingle_ground", "aingle_sources"] { - assert!(names.contains(&expected.to_string()), "missing tool {expected}"); + assert!( + names.contains(&expected.to_string()), + "missing tool {expected}" + ); } } } diff --git a/crates/aingle_cortex/src/middleware/mod.rs b/crates/aingle_cortex/src/middleware/mod.rs index 3aaa1c09..0f9472bb 100644 --- a/crates/aingle_cortex/src/middleware/mod.rs +++ b/crates/aingle_cortex/src/middleware/mod.rs @@ -22,5 +22,5 @@ pub mod namespace; pub mod rate_limit; -pub use namespace::{namespace_extractor, is_in_namespace, scope_subject, RequestNamespace}; +pub use namespace::{is_in_namespace, namespace_extractor, scope_subject, RequestNamespace}; pub use rate_limit::{RateLimitError, RateLimiter, RateLimiterLayer}; diff --git a/crates/aingle_cortex/src/middleware/namespace.rs b/crates/aingle_cortex/src/middleware/namespace.rs index 31ba729a..f46f2b1c 100644 --- a/crates/aingle_cortex/src/middleware/namespace.rs +++ b/crates/aingle_cortex/src/middleware/namespace.rs @@ -6,12 +6,7 @@ //! Extracts the `namespace` from JWT claims and injects it into Axum request //! extensions so downstream handlers can scope queries/mutations by namespace. -use axum::{ - body::Body, - http::Request, - middleware::Next, - response::Response, -}; +use axum::{body::Body, http::Request, middleware::Next, response::Response}; /// Namespace extracted from JWT claims, available via request extensions. #[derive(Debug, Clone)] @@ -22,10 +17,7 @@ pub struct RequestNamespace(pub Option); /// If auth is not enabled or no namespace is present in the token, sets `None`. /// Downstream handlers can read `RequestNamespace` from extensions and enforce /// namespace boundaries accordingly. -pub async fn namespace_extractor( - mut req: Request, - next: Next, -) -> Response { +pub async fn namespace_extractor(mut req: Request, next: Next) -> Response { // Try to extract namespace from the Authorization header let namespace = extract_namespace_from_token(&req); req.extensions_mut().insert(RequestNamespace(namespace)); diff --git a/crates/aingle_cortex/src/middleware/rate_limit.rs b/crates/aingle_cortex/src/middleware/rate_limit.rs index dcf490b6..937ccf3c 100644 --- a/crates/aingle_cortex/src/middleware/rate_limit.rs +++ b/crates/aingle_cortex/src/middleware/rate_limit.rs @@ -66,10 +66,9 @@ impl IntoResponse for RateLimitError { .into_response(); // Add Retry-After header (infallible: From for HeaderValue) - response.headers_mut().insert( - "Retry-After", - HeaderValue::from(*secs), - ); + response + .headers_mut() + .insert("Retry-After", HeaderValue::from(*secs)); // Add rate limit headers response @@ -282,11 +281,9 @@ where // 1. If behind a proxy, try X-Forwarded-For / X-Real-IP headers. // 2. Fall back to ConnectInfo (direct connection IP). let ip = if limiter.secure_ip { - extract_proxy_ip(&req) - .or_else(|| extract_connect_ip(&req)) + extract_proxy_ip(&req).or_else(|| extract_connect_ip(&req)) } else { - extract_connect_ip(&req) - .or_else(|| extract_proxy_ip(&req)) + extract_connect_ip(&req).or_else(|| extract_proxy_ip(&req)) }; let ip = match ip { @@ -309,10 +306,7 @@ where "X-RateLimit-Limit", HeaderValue::from(limiter.requests_per_minute), ); - headers.insert( - "X-RateLimit-Remaining", - HeaderValue::from(remaining), - ); + headers.insert("X-RateLimit-Remaining", HeaderValue::from(remaining)); Ok(response) } diff --git a/crates/aingle_cortex/src/p2p/config.rs b/crates/aingle_cortex/src/p2p/config.rs index 68ab7fb3..c44da9f4 100644 --- a/crates/aingle_cortex/src/p2p/config.rs +++ b/crates/aingle_cortex/src/p2p/config.rs @@ -58,17 +58,17 @@ impl P2pConfig { /// Validate configuration values. pub fn validate(&self) -> Result<(), String> { if self.port < 1024 { - return Err(format!( - "p2p port must be >= 1024, got {}", - self.port - )); + return Err(format!("p2p port must be >= 1024, got {}", self.port)); } if let Some(ref seed) = self.seed { if seed.is_empty() { return Err("p2p seed must not be empty".to_string()); } - if !seed.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') { + if !seed + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') + { return Err("p2p seed must be alphanumeric (plus _ and -)".to_string()); } } diff --git a/crates/aingle_cortex/src/p2p/dag_sync.rs b/crates/aingle_cortex/src/p2p/dag_sync.rs index 48f7b4fc..f5f6b0fa 100644 --- a/crates/aingle_cortex/src/p2p/dag_sync.rs +++ b/crates/aingle_cortex/src/p2p/dag_sync.rs @@ -31,10 +31,7 @@ pub fn collect_local_tips(graph: &GraphDB) -> (Vec, u64) { /// Given remote tips, compute which actions we have that the remote is missing, /// and return them as serialized bytes ready for sending. #[cfg(feature = "dag")] -pub fn compute_missing_from_tips( - graph: &GraphDB, - remote_tips: &[String], -) -> Vec> { +pub fn compute_missing_from_tips(graph: &GraphDB, remote_tips: &[String]) -> Vec> { let Some(dag_store) = graph.dag_store() else { return Vec::new(); }; @@ -63,10 +60,7 @@ pub fn compute_missing_from_tips( /// Fetch serialized DAG actions by their hex hashes for sending to a peer. #[cfg(feature = "dag")] -pub fn fetch_actions_by_hash( - graph: &GraphDB, - hashes: &[String], -) -> Vec> { +pub fn fetch_actions_by_hash(graph: &GraphDB, hashes: &[String]) -> Vec> { let Some(dag_store) = graph.dag_store() else { return Vec::new(); }; @@ -88,10 +82,7 @@ pub fn fetch_actions_by_hash( /// Ingest received DAG actions into the local store. #[cfg(feature = "dag")] -pub fn ingest_actions( - graph: &GraphDB, - action_bytes_list: &[Vec], -) -> (usize, usize) { +pub fn ingest_actions(graph: &GraphDB, action_bytes_list: &[Vec]) -> (usize, usize) { let Some(dag_store) = graph.dag_store() else { return (0, action_bytes_list.len()); }; diff --git a/crates/aingle_cortex/src/p2p/discovery.rs b/crates/aingle_cortex/src/p2p/discovery.rs index 7778d07b..2bbe70b2 100644 --- a/crates/aingle_cortex/src/p2p/discovery.rs +++ b/crates/aingle_cortex/src/p2p/discovery.rs @@ -42,8 +42,7 @@ mod inner { impl P2pDiscovery { pub fn new(node_id: String, seed_hash: String, port: u16) -> Result { - let daemon = ServiceDaemon::new() - .map_err(|e| format!("mDNS daemon: {}", e))?; + let daemon = ServiceDaemon::new().map_err(|e| format!("mDNS daemon: {}", e))?; Ok(Self { daemon, node_id, @@ -71,10 +70,7 @@ mod inner { return Err("no network interfaces".to_string()); } - let instance_name = format!( - "cortex-{}", - &self.node_id[..8.min(self.node_id.len())] - ); + let instance_name = format!("cortex-{}", &self.node_id[..8.min(self.node_id.len())]); let mut props = HashMap::new(); props.insert("node_id".to_string(), self.node_id.clone()); @@ -177,10 +173,8 @@ mod inner { self.running .store(false, std::sync::atomic::Ordering::SeqCst); if self.registered { - let instance_name = format!( - "cortex-{}", - &self.node_id[..8.min(self.node_id.len())] - ); + let instance_name = + format!("cortex-{}", &self.node_id[..8.min(self.node_id.len())]); let _ = self .daemon .unregister(&format!("{}.{}", instance_name, SERVICE_TYPE)); diff --git a/crates/aingle_cortex/src/p2p/gossip.rs b/crates/aingle_cortex/src/p2p/gossip.rs index 067afb80..b23b7d88 100644 --- a/crates/aingle_cortex/src/p2p/gossip.rs +++ b/crates/aingle_cortex/src/p2p/gossip.rs @@ -437,11 +437,7 @@ impl TripleGossipManager { } /// Find IDs that exist in `our_ids` but are missing from `peer_filter`. - pub fn find_missing( - &self, - peer_filter: &BloomFilter, - our_ids: &[[u8; 32]], - ) -> Vec<[u8; 32]> { + pub fn find_missing(&self, peer_filter: &BloomFilter, our_ids: &[[u8; 32]]) -> Vec<[u8; 32]> { our_ids .iter() .filter(|id| !peer_filter.may_contain(id)) diff --git a/crates/aingle_cortex/src/p2p/identity.rs b/crates/aingle_cortex/src/p2p/identity.rs index cf964682..c3b24237 100644 --- a/crates/aingle_cortex/src/p2p/identity.rs +++ b/crates/aingle_cortex/src/p2p/identity.rs @@ -41,8 +41,8 @@ impl NodeIdentity { // Write with restrictive permissions (Unix 0o600). #[cfg(unix)] { - use std::os::unix::fs::OpenOptionsExt; use std::io::Write; + use std::os::unix::fs::OpenOptionsExt; let mut f = std::fs::OpenOptions::new() .create(true) .write(true) diff --git a/crates/aingle_cortex/src/p2p/manager.rs b/crates/aingle_cortex/src/p2p/manager.rs index 44c00d8a..f6912bea 100644 --- a/crates/aingle_cortex/src/p2p/manager.rs +++ b/crates/aingle_cortex/src/p2p/manager.rs @@ -52,9 +52,7 @@ impl ManualPeerTracker { fn record_failure(&mut self) { self.retries += 1; self.last_attempt = Instant::now(); - self.current_backoff = Duration::from_secs( - (self.current_backoff.as_secs() * 2).min(300), - ); + self.current_backoff = Duration::from_secs((self.current_backoff.as_secs() * 2).min(300)); if self.retries >= self.max_retries { self.abandoned = true; } @@ -85,7 +83,8 @@ impl PingTracker { } fn record_ping(&mut self, addr: SocketAddr, timestamp_ms: u64) { - self.outstanding.insert(addr, (timestamp_ms, Instant::now())); + self.outstanding + .insert(addr, (timestamp_ms, Instant::now())); } fn record_pong(&mut self, addr: &SocketAddr, _timestamp_ms: u64) { @@ -183,9 +182,10 @@ impl P2pManager { } // A3: Load persistent peer store and merge with manual peers. - let peer_store = Arc::new(RwLock::new( - PeerStore::load(&config.data_dir, config.max_peers * 2), - )); + let peer_store = Arc::new(RwLock::new(PeerStore::load( + &config.data_dir, + config.max_peers * 2, + ))); // 6. Connect to manual peers + persisted peers. let triple_count = { @@ -227,7 +227,11 @@ impl P2pManager { sync.write().await.get_peer_state(&stored.addr); } Err(e) => { - tracing::debug!("P2P persisted peer {} unreachable: {}", stored.addr, e); + tracing::debug!( + "P2P persisted peer {} unreachable: {}", + stored.addr, + e + ); } } } @@ -243,8 +247,7 @@ impl P2pManager { ))); // 7. Discovery. - let mut disc = - P2pDiscovery::new(node_id.clone(), seed_hash.clone(), config.port)?; + let mut disc = P2pDiscovery::new(node_id.clone(), seed_hash.clone(), config.port)?; if config.mdns { disc.register()?; disc.start_browsing()?; @@ -309,19 +312,31 @@ impl P2pManager { }; match hello { - P2pMessage::Hello { seed_hash: peer_seed, node_id: peer_nid, .. } => { + P2pMessage::Hello { + seed_hash: peer_seed, + node_id: peer_nid, + .. + } => { let accepted = peer_seed == accept_seed_hash; let ack = P2pMessage::HelloAck { node_id: accept_node_id.clone(), accepted, - reason: if accepted { None } else { Some("seed_mismatch".into()) }, + reason: if accepted { + None + } else { + Some("seed_mismatch".into()) + }, }; if P2pTransport::send_on_conn(&connection, &ack).await.is_err() { continue; } if accepted { - tracing::info!("P2P accepted connection from {} ({})", remote, &peer_nid[..8.min(peer_nid.len())]); + tracing::info!( + "P2P accepted connection from {} ({})", + remote, + &peer_nid[..8.min(peer_nid.len())] + ); // Store connection (brief write lock). transport.write().await.store_connection(remote, connection); // Register in sync manager for gossip. @@ -336,7 +351,10 @@ impl P2pManager { }); let _ = ps.save(); } else { - tracing::warn!("P2P rejected connection from {}: seed mismatch", remote); + tracing::warn!( + "P2P rejected connection from {}: seed mismatch", + remote + ); connection.close(1u32.into(), b"seed_mismatch"); } } @@ -441,13 +459,9 @@ impl P2pManager { #[cfg(feature = "dag")] { let g = graph2.read().await; - let (tips, action_count) = - crate::p2p::dag_sync::collect_local_tips(&g); + let (tips, action_count) = crate::p2p::dag_sync::collect_local_tips(&g); if !tips.is_empty() { - let dag_msg = P2pMessage::DagTipSync { - tips, - action_count, - }; + let dag_msg = P2pMessage::DagTipSync { tips, action_count }; let _ = t.send(&peer_addr, &dag_msg).await; } } @@ -539,13 +553,9 @@ impl P2pManager { ); } } - P2pMessage::BloomSync { - filter_bytes, - .. - } => { + P2pMessage::BloomSync { filter_bytes, .. } => { let peer_filter = BloomFilter::from_bytes(&filter_bytes); - let local_ids: Vec<[u8; 32]> = - sync.read().await.local_ids().to_vec(); + let local_ids: Vec<[u8; 32]> = sync.read().await.local_ids().to_vec(); let missing = gossip.read().await.find_missing(&peer_filter, &local_ids); @@ -601,10 +611,7 @@ impl P2pManager { .filter_map(|tw| tw.to_triple()) .collect(); let g = graph.read().await; - let result = sync - .write() - .await - .store_received_triples(converted, &g); + let result = sync.write().await.store_received_triples(converted, &g); sync.write() .await .record_sync_result(addr, true, result.inserted); @@ -635,7 +642,10 @@ impl P2pManager { } } // A1: Handle incoming deletion announcement. - P2pMessage::AnnounceDelete { triple_id, tombstone_ts } => { + P2pMessage::AnnounceDelete { + triple_id, + tombstone_ts, + } => { if let Some(tid) = TripleId::from_hex(&triple_id) { let mut s = sync.write().await; if !s.has_tombstone(&tid.0) { @@ -682,10 +692,7 @@ impl P2pManager { // A4: Forward pong to health task via channel. P2pMessage::Pong { timestamp_ms, .. } => { let _ = health_tx - .send(HealthEvent::PongReceived { - addr, - timestamp_ms, - }) + .send(HealthEvent::PongReceived { addr, timestamp_ms }) .await; } // DAG sync message handlers @@ -710,8 +717,7 @@ impl P2pManager { #[cfg(feature = "dag")] P2pMessage::RequestDagActions { hashes } => { let g = graph.read().await; - let actions = - crate::p2p::dag_sync::fetch_actions_by_hash(&g, &hashes); + let actions = crate::p2p::dag_sync::fetch_actions_by_hash(&g, &hashes); if !actions.is_empty() { let send_msg = P2pMessage::SendDagActions { actions }; let t = transport.read().await; @@ -772,10 +778,7 @@ impl P2pManager { .connect(peer.addr, triple_count) .await; if let Ok(()) = result { - tracing::info!( - "P2P discovered and connected to {}", - peer.node_id - ); + tracing::info!("P2P discovered and connected to {}", peer.node_id); sync.write().await.get_peer_state(&peer.addr); // A3: Record mDNS peer let mut ps = peer_store.write().await; @@ -1155,7 +1158,9 @@ mod tests { fn ping_tracker_timed_out_detection() { let mut tracker = PingTracker::new(Duration::from_millis(10)); let addr: SocketAddr = "127.0.0.1:9000".parse().unwrap(); - tracker.outstanding.insert(addr, (1000, Instant::now() - Duration::from_millis(50))); + tracker + .outstanding + .insert(addr, (1000, Instant::now() - Duration::from_millis(50))); let timed_out = tracker.timed_out_peers(); assert_eq!(timed_out.len(), 1); assert_eq!(timed_out[0], addr); diff --git a/crates/aingle_cortex/src/p2p/message.rs b/crates/aingle_cortex/src/p2p/message.rs index 043a20a6..9e3181d9 100644 --- a/crates/aingle_cortex/src/p2p/message.rs +++ b/crates/aingle_cortex/src/p2p/message.rs @@ -38,21 +38,13 @@ pub enum P2pMessage { triple_count: u64, }, /// Request triples by their hex IDs. - RequestTriples { - ids: Vec, - }, + RequestTriples { ids: Vec }, /// Batch of triples. - SendTriples { - triples: Vec, - }, + SendTriples { triples: Vec }, /// Lightweight announcement of a new triple. - Announce { - triple_id: String, - }, + Announce { triple_id: String }, /// Keep-alive ping. - Ping { - timestamp_ms: u64, - }, + Ping { timestamp_ms: u64 }, /// Keep-alive pong. Pong { timestamp_ms: u64, @@ -64,9 +56,7 @@ pub enum P2pMessage { tombstone_ts: u64, }, /// Batch tombstone synchronization. - TombstoneSync { - tombstones: Vec, - }, + TombstoneSync { tombstones: Vec }, // ── DAG sync messages (feature: dag) ──────────────────────── /// Exchange of DAG tip hashes for sync. #[cfg(feature = "dag")] @@ -76,14 +66,10 @@ pub enum P2pMessage { }, /// Request specific DAG actions by hash. #[cfg(feature = "dag")] - RequestDagActions { - hashes: Vec, - }, + RequestDagActions { hashes: Vec }, /// Batch of serialized DAG actions. #[cfg(feature = "dag")] - SendDagActions { - actions: Vec>, - }, + SendDagActions { actions: Vec> }, // ── Raft / Cluster messages (feature: cluster) ────────────── /// Raft AppendEntries RPC (serialized openraft request). #[cfg(feature = "cluster")] @@ -278,10 +264,7 @@ fn json_to_value(j: &serde_json::Value) -> Value { } } "lang" => { - let lang = map - .get("lang") - .and_then(|v| v.as_str()) - .unwrap_or_default(); + let lang = map.get("lang").and_then(|v| v.as_str()).unwrap_or_default(); Value::LangString { value: val.to_string(), lang: lang.to_string(), @@ -315,7 +298,13 @@ mod tests { }; let bytes = msg.to_bytes(); let parsed = P2pMessage::from_bytes(&bytes).unwrap(); - assert!(matches!(parsed, P2pMessage::Hello { triple_count: 42, .. })); + assert!(matches!( + parsed, + P2pMessage::Hello { + triple_count: 42, + .. + } + )); } #[test] @@ -328,7 +317,10 @@ mod tests { let bytes = msg.to_bytes(); let parsed = P2pMessage::from_bytes(&bytes).unwrap(); match parsed { - P2pMessage::BloomSync { filter_bytes, triple_count } => { + P2pMessage::BloomSync { + filter_bytes, + triple_count, + } => { assert_eq!(filter_bytes.len(), 128); assert_eq!(triple_count, 100); } @@ -359,9 +351,7 @@ mod tests { author: None, source: None, }; - let msg = P2pMessage::SendTriples { - triples: vec![tw], - }; + let msg = P2pMessage::SendTriples { triples: vec![tw] }; let bytes = msg.to_bytes(); let parsed = P2pMessage::from_bytes(&bytes).unwrap(); match parsed { @@ -433,7 +423,10 @@ mod tests { let bytes = msg.to_bytes(); let parsed = P2pMessage::from_bytes(&bytes).unwrap(); match parsed { - P2pMessage::AnnounceDelete { triple_id, tombstone_ts } => { + P2pMessage::AnnounceDelete { + triple_id, + tombstone_ts, + } => { assert_eq!(triple_id, "deadbeef"); assert_eq!(tombstone_ts, 1700000000000); } @@ -444,8 +437,14 @@ mod tests { #[test] fn tombstone_sync_roundtrip() { let tombstones = vec![ - TombstoneWire { triple_id: "aa".into(), deleted_at_ms: 100 }, - TombstoneWire { triple_id: "bb".into(), deleted_at_ms: 200 }, + TombstoneWire { + triple_id: "aa".into(), + deleted_at_ms: 100, + }, + TombstoneWire { + triple_id: "bb".into(), + deleted_at_ms: 200, + }, ]; let msg = P2pMessage::TombstoneSync { tombstones }; let bytes = msg.to_bytes(); diff --git a/crates/aingle_cortex/src/p2p/peer_store.rs b/crates/aingle_cortex/src/p2p/peer_store.rs index 0d6ef963..311b8ae3 100644 --- a/crates/aingle_cortex/src/p2p/peer_store.rs +++ b/crates/aingle_cortex/src/p2p/peer_store.rs @@ -46,19 +46,22 @@ impl PeerStore { } else { Vec::new() }; - Self { path, peers, max_peers } + Self { + path, + peers, + max_peers, + } } /// Write the current peer list to disk. pub fn save(&self) -> Result<(), String> { if let Some(parent) = self.path.parent() { - std::fs::create_dir_all(parent) - .map_err(|e| format!("create peer store dir: {}", e))?; + std::fs::create_dir_all(parent).map_err(|e| format!("create peer store dir: {}", e))?; } let json = serde_json::to_string_pretty(&self.peers) .map_err(|e| format!("serialize peers: {}", e))?; - let mut file = std::fs::File::create(&self.path) - .map_err(|e| format!("create peer store: {}", e))?; + let mut file = + std::fs::File::create(&self.path).map_err(|e| format!("create peer store: {}", e))?; std::io::Write::write_all(&mut file, json.as_bytes()) .map_err(|e| format!("write peer store: {}", e))?; file.sync_all() @@ -75,7 +78,10 @@ impl PeerStore { // Enforce capacity if self.peers.len() >= self.max_peers { // Remove oldest (by last_connected_ms) - if let Some(oldest_idx) = self.peers.iter().enumerate() + if let Some(oldest_idx) = self + .peers + .iter() + .enumerate() .min_by_key(|(_, p)| p.last_connected_ms) .map(|(i, _)| i) { @@ -191,7 +197,7 @@ mod tests { // Add a peer with an old timestamp store.add(stored_peer(9001, 1)); store.cleanup_stale(1000); // 1 second max age - // peer with ts=0 is kept (never-connected sentinel), old one removed + // peer with ts=0 is kept (never-connected sentinel), old one removed assert_eq!(store.all().len(), 1); assert_eq!(store.all()[0].addr, addr(9000)); } diff --git a/crates/aingle_cortex/src/p2p/rate_limiter.rs b/crates/aingle_cortex/src/p2p/rate_limiter.rs index 3aa5d48f..4851b023 100644 --- a/crates/aingle_cortex/src/p2p/rate_limiter.rs +++ b/crates/aingle_cortex/src/p2p/rate_limiter.rs @@ -41,9 +41,10 @@ impl IngressRateLimiter { /// /// Returns the number of allowed triples (0..=count). pub fn check(&mut self, addr: &SocketAddr, count: usize) -> usize { - let bucket = self.per_peer.entry(*addr).or_insert_with(|| { - TokenBucket::with_params(self.per_peer_max, self.per_peer_rate) - }); + let bucket = self + .per_peer + .entry(*addr) + .or_insert_with(|| TokenBucket::with_params(self.per_peer_max, self.per_peer_rate)); let mut allowed = 0; for _ in 0..count { diff --git a/crates/aingle_cortex/src/p2p/sync_manager.rs b/crates/aingle_cortex/src/p2p/sync_manager.rs index a55ba231..8855db61 100644 --- a/crates/aingle_cortex/src/p2p/sync_manager.rs +++ b/crates/aingle_cortex/src/p2p/sync_manager.rs @@ -146,11 +146,7 @@ impl TripleSyncManager { } /// Insert triples received from a peer into the graph. Duplicates are counted, not errors. - pub fn store_received_triples( - &mut self, - triples: Vec, - graph: &GraphDB, - ) -> StoreResult { + pub fn store_received_triples(&mut self, triples: Vec, graph: &GraphDB) -> StoreResult { let mut result = StoreResult::default(); for triple in triples { let id = TripleId::from_triple(&triple); @@ -161,7 +157,10 @@ impl TripleSyncManager { } Err(e) => { let msg = format!("{}", e); - if msg.contains("duplicate") || msg.contains("exists") || msg.contains("already") { + if msg.contains("duplicate") + || msg.contains("exists") + || msg.contains("already") + { result.duplicates += 1; } else { result.errors += 1; @@ -215,7 +214,8 @@ impl TripleSyncManager { .unwrap_or_default() .as_millis() as u64; let ttl_ms = self.tombstone_ttl.as_millis() as u64; - self.tombstones.retain(|_, ts| now_ms.saturating_sub(*ts) < ttl_ms); + self.tombstones + .retain(|_, ts| now_ms.saturating_sub(*ts) < ttl_ms); } /// Return all active tombstones as (hash, timestamp_ms) pairs. diff --git a/crates/aingle_cortex/src/p2p/transport.rs b/crates/aingle_cortex/src/p2p/transport.rs index 68b42c09..c0bf865b 100644 --- a/crates/aingle_cortex/src/p2p/transport.rs +++ b/crates/aingle_cortex/src/p2p/transport.rs @@ -103,7 +103,9 @@ impl P2pTransport { // Receive HelloAck. let ack = Self::recv_from_connection(&connection).await?; match ack { - P2pMessage::HelloAck { accepted, reason, .. } => { + P2pMessage::HelloAck { + accepted, reason, .. + } => { if !accepted { connection.close(1u32.into(), b"rejected"); return Err(format!( @@ -142,7 +144,9 @@ impl P2pTransport { let hello = Self::recv_from_connection(&connection).await?; match &hello { - P2pMessage::Hello { seed_hash, node_id, .. } => { + P2pMessage::Hello { + seed_hash, node_id, .. + } => { let accepted = seed_hash == &self.seed_hash; let reason = if accepted { None @@ -158,7 +162,11 @@ impl P2pTransport { Self::send_on_connection(&connection, &ack).await?; if accepted { - tracing::info!("P2P accepted connection from {} ({})", remote, &node_id[..8.min(node_id.len())]); + tracing::info!( + "P2P accepted connection from {} ({})", + remote, + &node_id[..8.min(node_id.len())] + ); self.connections.insert(remote, connection); Ok(Some((remote, hello))) } else { @@ -302,8 +310,7 @@ impl P2pTransport { .map_err(|e| format!("cert gen: {}", e))?; let cert_der = CertificateDer::from(cert.cert.der().to_vec()); - let key_der = - PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der())); + let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der())); let mut server_crypto = rustls::ServerConfig::builder() .with_no_client_auth() @@ -429,22 +436,14 @@ mod tests { #[test] fn transport_new_has_no_connections() { - let t = P2pTransport::new( - P2pTransportConfig::default(), - "abc".into(), - "hash".into(), - ); + let t = P2pTransport::new(P2pTransportConfig::default(), "abc".into(), "hash".into()); assert_eq!(t.connection_count(), 0); assert!(t.connected_peers().is_empty()); } #[test] fn is_connected_false_initially() { - let t = P2pTransport::new( - P2pTransportConfig::default(), - "abc".into(), - "hash".into(), - ); + let t = P2pTransport::new(P2pTransportConfig::default(), "abc".into(), "hash".into()); let addr: SocketAddr = "127.0.0.1:19091".parse().unwrap(); assert!(!t.is_connected(&addr)); } @@ -483,11 +482,7 @@ mod tests { #[tokio::test] async fn disconnect_nonexistent_is_noop() { - let mut t = P2pTransport::new( - P2pTransportConfig::default(), - "abc".into(), - "hash".into(), - ); + let mut t = P2pTransport::new(P2pTransportConfig::default(), "abc".into(), "hash".into()); let addr: SocketAddr = "127.0.0.1:19091".parse().unwrap(); t.disconnect(&addr); // should not panic } diff --git a/crates/aingle_cortex/src/proofs/backend.rs b/crates/aingle_cortex/src/proofs/backend.rs index 07e7ffd5..9866dc02 100644 --- a/crates/aingle_cortex/src/proofs/backend.rs +++ b/crates/aingle_cortex/src/proofs/backend.rs @@ -80,10 +80,7 @@ impl ProofBackend for MemoryProofBackend { .data .read() .map_err(|_| "MemoryProofBackend lock poisoned".to_string())?; - Ok(data - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect()) + Ok(data.iter().map(|(k, v)| (k.clone(), v.clone())).collect()) } } @@ -103,8 +100,7 @@ pub struct SledProofBackend { impl SledProofBackend { /// Open or create a proofs tree inside the Sled database at `path`. pub fn open(path: &str) -> Result { - let db = - sled::open(path).map_err(|e| format!("sled open error (proofs): {e}"))?; + let db = sled::open(path).map_err(|e| format!("sled open error (proofs): {e}"))?; let tree = db .open_tree("proofs") .map_err(|e| format!("sled open_tree(proofs) error: {e}"))?; @@ -139,8 +135,7 @@ impl ProofBackend for SledProofBackend { fn list_all(&self) -> Result)>, String> { let mut results = Vec::new(); for item in self.tree.iter() { - let (k, v) = - item.map_err(|e| format!("sled proofs scan error: {e}"))?; + let (k, v) = item.map_err(|e| format!("sled proofs scan error: {e}"))?; let key = String::from_utf8(k.to_vec()) .map_err(|e| format!("sled proofs key decode error: {e}"))?; results.push((key, v.to_vec())); diff --git a/crates/aingle_cortex/src/proofs/store.rs b/crates/aingle_cortex/src/proofs/store.rs index 03ee91a1..2bb6eb34 100644 --- a/crates/aingle_cortex/src/proofs/store.rs +++ b/crates/aingle_cortex/src/proofs/store.rs @@ -460,10 +460,7 @@ impl ProofStore { /// Get count of proofs pub async fn count(&self) -> usize { - self.backend - .list_all() - .map(|all| all.len()) - .unwrap_or(0) + self.backend.list_all().map(|all| all.len()).unwrap_or(0) } /// Flush proof backend to durable storage. diff --git a/crates/aingle_cortex/src/proofs/verification.rs b/crates/aingle_cortex/src/proofs/verification.rs index 24842c97..44b43665 100644 --- a/crates/aingle_cortex/src/proofs/verification.rs +++ b/crates/aingle_cortex/src/proofs/verification.rs @@ -114,10 +114,11 @@ fn reconstruct_zk_proof(proof: &StoredProof) -> Result, -) -> Json { +pub async fn get_audit_stats(State(state): State) -> Json { let log = state.audit_log.read().await; Json(log.stats()) } diff --git a/crates/aingle_cortex/src/rest/cluster.rs b/crates/aingle_cortex/src/rest/cluster.rs index 5408972d..0b8fadc2 100644 --- a/crates/aingle_cortex/src/rest/cluster.rs +++ b/crates/aingle_cortex/src/rest/cluster.rs @@ -86,16 +86,16 @@ pub struct WalVerifyResponse { } /// GET /api/v1/cluster/status -pub async fn cluster_status( - State(state): State, -) -> Result> { +pub async fn cluster_status(State(state): State) -> Result> { let wal_last_seq = { #[cfg(feature = "cluster")] { state.wal.as_ref().map(|w| w.last_seq()).unwrap_or(0) } #[cfg(not(feature = "cluster"))] - { 0u64 } + { + 0u64 + } }; // Extract live Raft metrics when available @@ -113,9 +113,7 @@ pub async fn cluster_status( .map(|lid| lid.index) .unwrap_or(0); - let commit_index = metrics - .last_log_index - .unwrap_or(0); + let commit_index = metrics.last_log_index.unwrap_or(0); // Build member list from membership config let membership = metrics.membership_config.membership(); @@ -137,7 +135,10 @@ pub async fn cluster_status( // Resolve leader address from membership config (#13) let leader_addr = leader_id.and_then(|lid| { - membership.nodes().find(|(nid, _)| **nid == lid).map(|(_, node)| node.rest_addr.clone()) + membership + .nodes() + .find(|(nid, _)| **nid == lid) + .map(|(_, node)| node.rest_addr.clone()) }); return Ok(Json(ClusterStatus { @@ -187,10 +188,16 @@ pub async fn cluster_join( if metrics.current_leader != state.cluster_node_id { let membership = metrics.membership_config.membership(); let leader_addr = metrics.current_leader.and_then(|lid| { - membership.nodes().find(|(nid, _)| **nid == lid).map(|(_, node)| node.rest_addr.clone()) + membership + .nodes() + .find(|(nid, _)| **nid == lid) + .map(|(_, node)| node.rest_addr.clone()) }); if let Some(ref addr) = leader_addr { - return Err(Error::Redirect(format!("http://{}/api/v1/cluster/join", addr))); + return Err(Error::Redirect(format!( + "http://{}/api/v1/cluster/join", + addr + ))); } return Ok(( StatusCode::CONFLICT, @@ -219,7 +226,10 @@ pub async fn cluster_join( voter_ids.insert(req.node_id); // Resolve leader_addr for response let leader_addr = metrics.current_leader.and_then(|lid| { - membership.nodes().find(|(nid, _)| **nid == lid).map(|(_, node)| node.rest_addr.clone()) + membership + .nodes() + .find(|(nid, _)| **nid == lid) + .map(|(_, node)| node.rest_addr.clone()) }); match raft.change_membership(voter_ids.clone(), false).await { Ok(_) => { @@ -294,18 +304,23 @@ pub async fn cluster_leave( if metrics.current_leader != state.cluster_node_id { let membership = metrics.membership_config.membership(); let leader_addr = metrics.current_leader.and_then(|lid| { - membership.nodes().find(|(nid, _)| **nid == lid).map(|(_, node)| node.rest_addr.clone()) + membership + .nodes() + .find(|(nid, _)| **nid == lid) + .map(|(_, node)| node.rest_addr.clone()) }); if let Some(ref addr) = leader_addr { - return Err(Error::Redirect(format!("http://{}/api/v1/cluster/leave", addr))); + return Err(Error::Redirect(format!( + "http://{}/api/v1/cluster/leave", + addr + ))); } return Err(Error::Internal("Not leader; leader unknown".to_string())); } if let Some(node_id) = state.cluster_node_id { let membership = metrics.membership_config.membership(); - let mut voter_ids: std::collections::BTreeSet = - membership.voter_ids().collect(); + let mut voter_ids: std::collections::BTreeSet = membership.voter_ids().collect(); voter_ids.remove(&node_id); if !voter_ids.is_empty() { if let Err(e) = raft.change_membership(voter_ids, false).await { @@ -320,9 +335,7 @@ pub async fn cluster_leave( } /// GET /api/v1/cluster/members -pub async fn cluster_members( - State(state): State, -) -> Result>> { +pub async fn cluster_members(State(state): State) -> Result>> { #[cfg(feature = "cluster")] if let Some(ref raft) = state.raft { let metrics = raft.metrics().borrow_watched().clone(); @@ -351,12 +364,12 @@ pub async fn cluster_members( } /// GET /api/v1/cluster/wal/stats -pub async fn wal_stats( - State(state): State, -) -> Result> { +pub async fn wal_stats(State(state): State) -> Result> { #[cfg(feature = "cluster")] if let Some(ref wal) = state.wal { - let stats = wal.stats().map_err(|e| Error::Internal(format!("WAL stats error: {e}")))?; + let stats = wal + .stats() + .map_err(|e| Error::Internal(format!("WAL stats error: {e}")))?; return Ok(Json(WalStatsResponse { segment_count: stats.segment_count, total_size_bytes: stats.total_size_bytes, @@ -374,9 +387,7 @@ pub async fn wal_stats( } /// POST /api/v1/cluster/wal/verify -pub async fn wal_verify( - State(state): State, -) -> Result> { +pub async fn wal_verify(State(state): State) -> Result> { #[cfg(feature = "cluster")] if let Some(ref wal) = state.wal { let wal_dir = wal.dir(); diff --git a/crates/aingle_cortex/src/rest/cluster_utils.rs b/crates/aingle_cortex/src/rest/cluster_utils.rs index 42aa9239..ffcc0397 100644 --- a/crates/aingle_cortex/src/rest/cluster_utils.rs +++ b/crates/aingle_cortex/src/rest/cluster_utils.rs @@ -3,9 +3,9 @@ //! Shared helpers for cluster-mode REST handlers. -use axum::http::HeaderMap; use crate::error::Error; use crate::state::AppState; +use axum::http::HeaderMap; /// Convert a Raft `client_write` error into an appropriate HTTP error. /// diff --git a/crates/aingle_cortex/src/rest/memory.rs b/crates/aingle_cortex/src/rest/memory.rs index a59e92cb..6f864c28 100644 --- a/crates/aingle_cortex/src/rest/memory.rs +++ b/crates/aingle_cortex/src/rest/memory.rs @@ -23,8 +23,8 @@ use axum::{ http::StatusCode, Json, }; -use serde::{Deserialize, Serialize}; use ineru::{MemoryEntry, MemoryId, MemoryQuery}; +use serde::{Deserialize, Serialize}; use crate::error::{Error, Result}; use crate::state::AppState; @@ -151,16 +151,15 @@ pub async fn remember( .clone() .unwrap_or_else(|| "raft".to_string()); - return Ok(( - StatusCode::CREATED, - Json(RememberResponse { id }), - )); + return Ok((StatusCode::CREATED, Json(RememberResponse { id }))); } // Guard: if Raft is initialized, all writes MUST go through Raft (#2). #[cfg(feature = "cluster")] if state.raft.is_some() { - return Err(Error::Internal("Raft initialized but write not routed through Raft".into())); + return Err(Error::Internal( + "Raft initialized but write not routed through Raft".into(), + )); } // Non-cluster mode: direct write @@ -192,14 +191,13 @@ pub async fn remember( entry_type: req.entry_type.clone(), data: wal_data.clone(), importance: req.importance, - }).map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; + }) + .map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; } Ok(( StatusCode::CREATED, - Json(RememberResponse { - id: id.to_hex(), - }), + Json(RememberResponse { id: id.to_hex() }), )) } @@ -237,9 +235,7 @@ pub async fn recall( } /// Force consolidation of important STM entries into LTM. -pub async fn consolidate( - State(state): State, -) -> Result> { +pub async fn consolidate(State(state): State) -> Result> { // Cluster mode: route through Raft so all nodes consolidate deterministically #[cfg(feature = "cluster")] if let Some(ref raft) = state.raft { @@ -278,7 +274,9 @@ pub async fn consolidate( // Guard: if Raft is initialized, all writes MUST go through Raft (#2). #[cfg(feature = "cluster")] if state.raft.is_some() { - return Err(Error::Internal("Raft initialized but write not routed through Raft".into())); + return Err(Error::Internal( + "Raft initialized but write not routed through Raft".into(), + )); } // Non-cluster mode: direct consolidation @@ -292,7 +290,8 @@ pub async fn consolidate( if let Some(ref wal) = state.wal { wal.append(aingle_wal::WalEntryKind::MemoryConsolidate { consolidated_count: count, - }).map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; + }) + .map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; } Ok(Json(ConsolidateResponse { @@ -315,10 +314,7 @@ pub async fn stats(State(state): State) -> Result } /// Forget (delete) a specific memory entry. -pub async fn forget( - State(state): State, - Path(id): Path, -) -> Result { +pub async fn forget(State(state): State, Path(id): Path) -> Result { // Cluster mode: route through Raft #[cfg(feature = "cluster")] if let Some(ref raft) = state.raft { @@ -347,7 +343,9 @@ pub async fn forget( // Guard: if Raft is initialized, all writes MUST go through Raft (#2). #[cfg(feature = "cluster")] if state.raft.is_some() { - return Err(Error::Internal("Raft initialized but write not routed through Raft".into())); + return Err(Error::Internal( + "Raft initialized but write not routed through Raft".into(), + )); } // Non-cluster mode: direct delete @@ -364,7 +362,8 @@ pub async fn forget( if let Some(ref wal) = state.wal { wal.append(aingle_wal::WalEntryKind::MemoryForget { memory_id: id.clone(), - }).map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; + }) + .map_err(|e| Error::Internal(format!("WAL append failed: {e}")))?; } Ok(StatusCode::NO_CONTENT) @@ -379,9 +378,9 @@ pub async fn checkpoint( // For now, create a proof-of-state in the proof store let memory = state.memory.read().await; let s = memory.stats(); - let label = req.label.unwrap_or_else(|| { - format!("checkpoint-{}", chrono::Utc::now().timestamp()) - }); + let label = req + .label + .unwrap_or_else(|| format!("checkpoint-{}", chrono::Utc::now().timestamp())); let checkpoint_data = serde_json::json!({ "label": label, @@ -429,16 +428,12 @@ pub async fn list_checkpoints( .into_iter() .filter(|p| p.metadata.tags.contains(&"checkpoint".to_string())) .map(|p| { - let data: serde_json::Value = - serde_json::from_slice(&p.data).unwrap_or_default(); + let data: serde_json::Value = serde_json::from_slice(&p.data).unwrap_or_default(); CheckpointListDto { id: p.id.clone(), label: data.get("label").and_then(|v| v.as_str()).map(String::from), created_at: p.created_at.to_rfc3339(), - stm_count: data - .get("stm_count") - .and_then(|v| v.as_u64()) - .unwrap_or(0) as usize, + stm_count: data.get("stm_count").and_then(|v| v.as_u64()).unwrap_or(0) as usize, ltm_entity_count: data .get("ltm_entity_count") .and_then(|v| v.as_u64()) @@ -507,7 +502,9 @@ pub async fn vector_search( Json(req): Json, ) -> Result>> { let memory = state.memory.read().await; - let results = memory.ltm.vector_search_memories(&req.embedding, req.k, req.min_similarity); + let results = memory + .ltm + .vector_search_memories(&req.embedding, req.k, req.min_similarity); let mut dtos: Vec = results .into_iter() @@ -543,7 +540,9 @@ pub async fn vector_index_stats( State(state): State, ) -> Result> { let memory = state.memory.read().await; - let stats = memory.ltm.hnsw_index() + let stats = memory + .ltm + .hnsw_index() .map(|idx| idx.stats()) .unwrap_or(ineru::hnsw::HnswStats { point_count: 0, @@ -562,9 +561,7 @@ pub async fn vector_index_stats( } /// Force rebuild of the HNSW vector index. -pub async fn rebuild_vector_index( - State(state): State, -) -> Result { +pub async fn rebuild_vector_index(State(state): State) -> Result { let mut memory = state.memory.write().await; if let Some(hnsw) = memory.ltm.hnsw_index_mut() { hnsw.rebuild(); diff --git a/crates/aingle_cortex/src/rest/observability.rs b/crates/aingle_cortex/src/rest/observability.rs index e9493afb..6bd5b7d4 100644 --- a/crates/aingle_cortex/src/rest/observability.rs +++ b/crates/aingle_cortex/src/rest/observability.rs @@ -212,9 +212,7 @@ pub async fn query_events( // Query all event:type triples to find event subjects let type_pred = Predicate::named(format!("{}:event:type", ns)); - let type_triples = graph - .get_predicate(&type_pred) - .unwrap_or_default(); + let type_triples = graph.get_predicate(&type_pred).unwrap_or_default(); let mut events: Vec = Vec::new(); diff --git a/crates/aingle_cortex/src/rest/p2p.rs b/crates/aingle_cortex/src/rest/p2p.rs index 28352e12..34ff9508 100644 --- a/crates/aingle_cortex/src/rest/p2p.rs +++ b/crates/aingle_cortex/src/rest/p2p.rs @@ -43,7 +43,8 @@ async fn p2p_status(State(state): State) -> impl IntoResponse { Err(e) => ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": format!("serialize p2p status: {e}")})), - ).into_response(), + ) + .into_response(), } } @@ -64,7 +65,8 @@ async fn list_peers(State(state): State) -> impl IntoResponse { Err(e) => ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": format!("serialize peers: {e}")})), - ).into_response(), + ) + .into_response(), } } diff --git a/crates/aingle_cortex/src/rest/raft_rpc.rs b/crates/aingle_cortex/src/rest/raft_rpc.rs index 65d1c76b..59b0ca07 100644 --- a/crates/aingle_cortex/src/rest/raft_rpc.rs +++ b/crates/aingle_cortex/src/rest/raft_rpc.rs @@ -45,13 +45,10 @@ pub async fn raft_append_entries( let req: openraft::raft::AppendEntriesRequest = serde_json::from_slice(&body) .map_err(|e| Error::Internal(format!("Deserialize AppendEntries: {e}")))?; - let resp = tokio::time::timeout( - std::time::Duration::from_secs(10), - raft.append_entries(req), - ) - .await - .map_err(|_| Error::Timeout("AppendEntries RPC timed out (10s)".into()))? - .map_err(|e| Error::Internal(format!("AppendEntries failed: {e}")))?; + let resp = tokio::time::timeout(std::time::Duration::from_secs(10), raft.append_entries(req)) + .await + .map_err(|_| Error::Timeout("AppendEntries RPC timed out (10s)".into()))? + .map_err(|e| Error::Internal(format!("AppendEntries failed: {e}")))?; let payload = serde_json::to_vec(&resp) .map_err(|e| Error::Internal(format!("Serialize response: {e}")))?; @@ -78,13 +75,10 @@ pub async fn raft_vote( let req: openraft::raft::VoteRequest = serde_json::from_slice(&body) .map_err(|e| Error::Internal(format!("Deserialize Vote: {e}")))?; - let resp = tokio::time::timeout( - std::time::Duration::from_secs(10), - raft.vote(req), - ) - .await - .map_err(|_| Error::Timeout("Vote RPC timed out (10s)".into()))? - .map_err(|e| Error::Internal(format!("Vote failed: {e}")))?; + let resp = tokio::time::timeout(std::time::Duration::from_secs(10), raft.vote(req)) + .await + .map_err(|_| Error::Timeout("Vote RPC timed out (10s)".into()))? + .map_err(|e| Error::Internal(format!("Vote failed: {e}")))?; let payload = serde_json::to_vec(&resp) .map_err(|e| Error::Internal(format!("Serialize response: {e}")))?; @@ -153,9 +147,8 @@ struct SnapshotBuffer { /// In-flight chunked snapshot buffers, keyed by snapshot_id. /// Buffers older than `BUFFER_TTL` are evicted to prevent memory leaks /// from abandoned transfers. -static SNAPSHOT_BUFFERS: std::sync::LazyLock< - dashmap::DashMap, -> = std::sync::LazyLock::new(dashmap::DashMap::new); +static SNAPSHOT_BUFFERS: std::sync::LazyLock> = + std::sync::LazyLock::new(dashmap::DashMap::new); /// Maximum time a partial snapshot buffer can live before eviction. const BUFFER_TTL: std::time::Duration = std::time::Duration::from_secs(300); // 5 min @@ -226,7 +219,9 @@ pub async fn raft_snapshot_chunk( // Remove buffer and validate completeness let full_buf = SNAPSHOT_BUFFERS .remove(&snapshot_id) - .ok_or_else(|| Error::Internal("Snapshot buffer missing on final chunk".into()))? + .ok_or_else(|| { + Error::Internal("Snapshot buffer missing on final chunk".into()) + })? .1; if (full_buf.data.len() as u64) != full_buf.expected_size { @@ -256,10 +251,7 @@ pub async fn raft_snapshot_chunk( } /// Shared logic: install a full snapshot from its raw bytes. -async fn install_full_snapshot_from_bytes( - state: &AppState, - data: &[u8], -) -> Result, Error> { +async fn install_full_snapshot_from_bytes(state: &AppState, data: &[u8]) -> Result, Error> { let raft = state .raft .as_ref() @@ -292,8 +284,7 @@ async fn install_full_snapshot_from_bytes( .map_err(|_| Error::Timeout("InstallSnapshot timed out (60s)".into()))? .map_err(|e| Error::Internal(format!("InstallSnapshot failed: {e}")))?; - serde_json::to_vec(&resp) - .map_err(|e| Error::Internal(format!("Serialize response: {e}"))) + serde_json::to_vec(&resp).map_err(|e| Error::Internal(format!("Serialize response: {e}"))) } /// Create the internal Raft RPC sub-router. diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index c3ee5152..d28cc084 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -59,10 +59,18 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result Result (Option, Option) { +async fn signed_provenance( + state: &AppState, + source_path: &str, +) -> (Option, Option) { #[cfg(feature = "dag")] { if source_path.is_empty() { @@ -169,8 +180,13 @@ mod tests { .await .unwrap(); - let g = ground(&state, "exclusive lock semantics sled", 5).await.unwrap(); - assert!(!g.answer_context.is_empty(), "should retrieve the ingested chunk"); + let g = ground(&state, "exclusive lock semantics sled", 5) + .await + .unwrap(); + assert!( + !g.answer_context.is_empty(), + "should retrieve the ingested chunk" + ); assert_eq!(g.answer_context[0].source, "adr.md"); assert_ne!(g.groundedness, "ungrounded"); } diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs index fb9e00a8..f7a515ff 100644 --- a/crates/aingle_cortex/src/service/ingest.rs +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -298,10 +298,16 @@ mod tests { #[tokio::test] async fn ingest_writes_triples_and_chunks() { let dir = tempfile::tempdir().unwrap(); - write(dir.path(), "note.md", "# Title\n\nWe use [[sled]] for storage. #durability\n"); + write( + dir.path(), + "note.md", + "# Title\n\nWe use [[sled]] for storage. #durability\n", + ); let state = enabled_state().await; - let report = ingest_path(&state, dir.path().to_str().unwrap(), None).await.unwrap(); + let report = ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); assert_eq!(report.files_seen, 1); assert_eq!(report.files_ingested, 1); @@ -334,8 +340,10 @@ mod tests { assert_eq!(report2.files_skipped, 1); assert_eq!(report2.files_ingested, 0); - assert_eq!(actions_after_first, actions_after_second, - "re-ingesting unchanged files must write zero new DAG actions"); + assert_eq!( + actions_after_first, actions_after_second, + "re-ingesting unchanged files must write zero new DAG actions" + ); } #[tokio::test] diff --git a/crates/aingle_cortex/src/service/triples.rs b/crates/aingle_cortex/src/service/triples.rs index c2ee63a1..6220b409 100644 --- a/crates/aingle_cortex/src/service/triples.rs +++ b/crates/aingle_cortex/src/service/triples.rs @@ -34,7 +34,15 @@ pub async fn create_triple( if req.predicate.is_empty() { return Err(Error::InvalidInput("Predicate cannot be empty".to_string())); } - insert_triple_inner(state, req.object, &req.subject, &req.predicate, None, namespace).await + insert_triple_inner( + state, + req.object, + &req.subject, + &req.predicate, + None, + namespace, + ) + .await } /// Shared single-triple write used by `create_triple` and the ingestion path. @@ -509,7 +517,9 @@ mod tests { }; insert_triple_inner( &state, - crate::rest::ValueDto::Node { node: "sled".into() }, + crate::rest::ValueDto::Node { + node: "sled".into(), + }, "docs/x.md", "links_to", Some(prov.clone()), @@ -527,7 +537,10 @@ mod tests { } _ => false, }); - assert!(found, "provenance must be present in the TripleInsert DAG payload"); + assert!( + found, + "provenance must be present in the TripleInsert DAG payload" + ); } #[tokio::test] diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index ea2c3db5..081f29a1 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -5,9 +5,9 @@ use aingle_graph::GraphDB; use aingle_logic::RuleEngine; +use ineru::IneruMemory; use std::path::Path; use std::sync::Arc; -use ineru::IneruMemory; use tokio::sync::RwLock; #[cfg(feature = "auth")] @@ -48,7 +48,12 @@ pub struct AppState { pub wal: Option>, /// Raft consensus instance for cluster coordination. #[cfg(feature = "cluster")] - pub raft: Option>>, + pub raft: Option< + openraft::Raft< + aingle_raft::CortexTypeConfig, + std::sync::Arc, + >, + >, /// This node's ID in the Raft cluster. #[cfg(feature = "cluster")] pub cluster_node_id: Option, @@ -268,7 +273,10 @@ impl AppState { Arc::new(ps) } Err(e) => { - log::warn!("Failed to open Sled ProofStore: {}. Falling back to in-memory.", e); + log::warn!( + "Failed to open Sled ProofStore: {}. Falling back to in-memory.", + e + ); Arc::new(ProofStore::new()) } } @@ -314,7 +322,6 @@ impl AppState { }) } - /// Flushes the graph database and saves the Ineru memory snapshot to disk. /// /// This should be called before shutdown or binary updates to ensure diff --git a/crates/aingle_cortex/tests/data_integrity_test.rs b/crates/aingle_cortex/tests/data_integrity_test.rs index 0e261328..70ddb0a7 100644 --- a/crates/aingle_cortex/tests/data_integrity_test.rs +++ b/crates/aingle_cortex/tests/data_integrity_test.rs @@ -9,8 +9,8 @@ //! - State flush/restore round-trip //! - Batch insert atomicity +use aingle_cortex::proofs::{ProofMetadata, ProofStore, ProofType, SubmitProofRequest}; use aingle_cortex::state::AppState; -use aingle_cortex::proofs::{ProofStore, ProofType, SubmitProofRequest, ProofMetadata}; // ============================================================================ // 1. ProofStore persistence round-trip (Sled backend) @@ -62,7 +62,9 @@ async fn test_proof_store_sled_roundtrip_data_integrity() { assert_eq!(store.count().await, 20, "count mismatch after reopen"); for (i, id) in proof_ids.iter().enumerate() { - let proof = store.get(id).await + let proof = store + .get(id) + .await .unwrap_or_else(|| panic!("proof {} (id={}) missing after reopen", i, id)); // Verify data field contains correct index @@ -99,11 +101,17 @@ async fn test_proof_store_sled_roundtrip_data_integrity() { // Deleted ones should be gone for id in &proof_ids[0..10] { - assert!(store.get(id).await.is_none(), "deleted proof {} should not exist", id); + assert!( + store.get(id).await.is_none(), + "deleted proof {} should not exist", + id + ); } // Remaining ones should be intact for (i, id) in proof_ids[10..20].iter().enumerate() { - let proof = store.get(id).await + let proof = store + .get(id) + .await .unwrap_or_else(|| panic!("remaining proof {} missing", i + 10)); let data: serde_json::Value = serde_json::from_slice(&proof.data).unwrap(); assert_eq!(data["index"].as_u64().unwrap() as usize, i + 10); @@ -117,7 +125,7 @@ async fn test_proof_store_sled_roundtrip_data_integrity() { #[tokio::test] async fn test_graph_dag_triple_materialization_consistency() { - use aingle_graph::{GraphDB, NodeId, Predicate, Value, Triple, TriplePattern}; + use aingle_graph::{GraphDB, NodeId, Predicate, Triple, TriplePattern, Value}; let mut graph = GraphDB::memory().unwrap(); graph.enable_dag(); @@ -144,17 +152,28 @@ async fn test_graph_dag_triple_materialization_consistency() { // Verify each triple can be retrieved by ID for (i, tid) in triple_ids.iter().enumerate() { - let triple = graph.get(tid).unwrap() + let triple = graph + .get(tid) + .unwrap() .unwrap_or_else(|| panic!("triple {} not found by ID", i)); - assert_eq!(triple.object, Value::Integer(i as i64 * 100), - "value mismatch for triple {}", i); + assert_eq!( + triple.object, + Value::Integer(i as i64 * 100), + "value mismatch for triple {}", + i + ); } // Verify pattern queries return correct results for i in 0..50 { let pattern = TriplePattern::subject(NodeId::named(&format!("entity:{}", i))); let results = graph.find(pattern).unwrap(); - assert_eq!(results.len(), 1, "entity:{} should have exactly 1 triple", i); + assert_eq!( + results.len(), + 1, + "entity:{} should have exactly 1 triple", + i + ); assert_eq!(results[0].object, Value::Integer(i * 100)); } @@ -182,17 +201,19 @@ async fn test_graph_dag_triple_materialization_consistency() { #[tokio::test] async fn test_batch_insert_index_consistency() { - use aingle_graph::{GraphDB, NodeId, Predicate, Value, Triple, TriplePattern}; + use aingle_graph::{GraphDB, NodeId, Predicate, Triple, TriplePattern, Value}; let graph = GraphDB::memory().unwrap(); // Batch insert 100 triples let triples: Vec = (0..100) - .map(|i| Triple::new( - NodeId::named(&format!("batch:{}", i)), - Predicate::named("batch_value"), - Value::Integer(i), - )) + .map(|i| { + Triple::new( + NodeId::named(&format!("batch:{}", i)), + Predicate::named("batch_value"), + Value::Integer(i), + ) + }) .collect(); let ids = graph.insert_batch(triples).unwrap(); @@ -208,18 +229,20 @@ async fn test_batch_insert_index_consistency() { } // Verify predicate index works - let by_pred = graph.find( - TriplePattern::predicate(Predicate::named("batch_value")) - ).unwrap(); + let by_pred = graph + .find(TriplePattern::predicate(Predicate::named("batch_value"))) + .unwrap(); assert_eq!(by_pred.len(), 100, "predicate index should find all 100"); // Re-batch the same triples — should skip duplicates, no count change let triples2: Vec = (0..100) - .map(|i| Triple::new( - NodeId::named(&format!("batch:{}", i)), - Predicate::named("batch_value"), - Value::Integer(i), - )) + .map(|i| { + Triple::new( + NodeId::named(&format!("batch:{}", i)), + Predicate::named("batch_value"), + Value::Integer(i), + ) + }) .collect(); let ids2 = graph.insert_batch(triples2).unwrap(); assert_eq!(ids2.len(), 100); @@ -232,7 +255,7 @@ async fn test_batch_insert_index_consistency() { #[tokio::test] async fn test_app_state_flush_restore_roundtrip() { - use aingle_graph::{NodeId, Predicate, Value, Triple, TriplePattern}; + use aingle_graph::{NodeId, Predicate, Triple, TriplePattern, Value}; let dir = tempfile::TempDir::new().unwrap(); let db_path = dir.path().join("graph.sled"); @@ -291,7 +314,8 @@ async fn test_app_state_flush_restore_roundtrip() { assert_eq!( results[0].object, Value::String(format!("data-{}", i)), - "data mismatch for node:{}", i + "data mismatch for node:{}", + i ); } } @@ -300,7 +324,10 @@ async fn test_app_state_flush_restore_roundtrip() { let proof_count = state.proof_store.count().await; assert_eq!(proof_count, 5, "proofs should survive restart"); for (i, id) in proof_ids.iter().enumerate() { - let proof = state.proof_store.get(id).await + let proof = state + .proof_store + .get(id) + .await .unwrap_or_else(|| panic!("proof {} missing after restart", i)); let data: serde_json::Value = serde_json::from_slice(&proof.data).unwrap(); assert_eq!(data["flush_test"].as_u64().unwrap(), i as u64); @@ -314,9 +341,7 @@ async fn test_app_state_flush_restore_roundtrip() { #[tokio::test] async fn test_raft_snapshot_with_proofs_roundtrip() { - use aingle_raft::state_machine::{ - ClusterSnapshot, TripleSnapshot, ProofSnapshot, - }; + use aingle_raft::state_machine::{ClusterSnapshot, ProofSnapshot, TripleSnapshot}; let snapshot = ClusterSnapshot { triples: vec![ @@ -379,7 +404,10 @@ async fn test_raft_snapshot_with_proofs_roundtrip() { assert_eq!(restored.proofs[0].proof_type, "schnorr"); assert_eq!(restored.proofs[0].data, vec![1, 2, 3, 4]); assert!(restored.proofs[0].verified); - assert_eq!(restored.proofs[0].verified_at.as_deref(), Some("2026-03-16T00:01:00Z")); + assert_eq!( + restored.proofs[0].verified_at.as_deref(), + Some("2026-03-16T00:01:00Z") + ); assert_eq!(restored.proofs[1].id, "proof-002"); assert!(!restored.proofs[1].verified); assert!(restored.proofs[1].verified_at.is_none()); @@ -398,9 +426,7 @@ async fn test_raft_snapshot_with_proofs_roundtrip() { #[tokio::test] async fn test_snapshot_checksum_changes_with_proofs() { - use aingle_raft::state_machine::{ - ClusterSnapshot, TripleSnapshot, ProofSnapshot, - }; + use aingle_raft::state_machine::{ClusterSnapshot, ProofSnapshot, TripleSnapshot}; // Snapshot without proofs let snap_no_proofs = ClusterSnapshot { @@ -447,8 +473,10 @@ async fn test_snapshot_checksum_changes_with_proofs() { let r2 = ClusterSnapshot::from_bytes(&bytes2).unwrap(); // Checksums should differ - assert_ne!(r1.checksum, r2.checksum, - "checksum should change when proofs are added"); + assert_ne!( + r1.checksum, r2.checksum, + "checksum should change when proofs are added" + ); } // ============================================================================ @@ -457,7 +485,7 @@ async fn test_snapshot_checksum_changes_with_proofs() { #[tokio::test] async fn test_graph_sled_persistence_full_cycle() { - use aingle_graph::{GraphDB, NodeId, Predicate, Value, Triple, TriplePattern}; + use aingle_graph::{GraphDB, NodeId, Predicate, Triple, TriplePattern, Value}; let dir = tempfile::TempDir::new().unwrap(); let path = dir.path().join("test.sled"); @@ -504,7 +532,7 @@ async fn test_graph_sled_persistence_full_cycle() { #[tokio::test] async fn test_audit_log_fsync_integrity() { - use aingle_cortex::rest::audit::{AuditLog, AuditEntry}; + use aingle_cortex::rest::audit::{AuditEntry, AuditLog}; let dir = tempfile::TempDir::new().unwrap(); let path = dir.path().join("audit_test.jsonl"); @@ -517,7 +545,11 @@ async fn test_audit_log_fsync_integrity() { timestamp: format!("2026-03-16T00:{:02}:00Z", i), user_id: format!("user-{}", i % 5), namespace: Some("test".to_string()), - action: if i % 3 == 0 { "create".into() } else { "read".into() }, + action: if i % 3 == 0 { + "create".into() + } else { + "read".into() + }, resource: format!("/api/v1/triples/{}", i), details: Some(format!("detail-{}", i)), request_id: Some(format!("req-{}", i)), diff --git a/crates/aingle_graph/src/dag/action.rs b/crates/aingle_graph/src/dag/action.rs index 941ea1b9..5d647edb 100644 --- a/crates/aingle_graph/src/dag/action.rs +++ b/crates/aingle_graph/src/dag/action.rs @@ -142,10 +142,7 @@ pub struct TripleInsertPayload { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum MemoryOpKind { /// A memory entry was stored. - Store { - entry_type: String, - importance: f32, - }, + Store { entry_type: String, importance: f32 }, /// A memory entry was forgotten. Forget { memory_id: String }, /// Consolidation was triggered. @@ -197,8 +194,8 @@ impl DagAction { // Author — serde_json::to_vec cannot fail for NodeId (no maps with // non-string keys, no NaN/Inf floats), so expect() is safe here. - let author_bytes = serde_json::to_vec(&self.author) - .expect("NodeId serialization must not fail"); + let author_bytes = + serde_json::to_vec(&self.author).expect("NodeId serialization must not fail"); hasher.update(&(author_bytes.len() as u64).to_le_bytes()); hasher.update(&author_bytes); @@ -211,8 +208,8 @@ impl DagAction { // Payload — same reasoning: DagPayload contains only strings, // integers, booleans, and JSON values — all safely serializable. - let payload_bytes = serde_json::to_vec(&self.payload) - .expect("DagPayload serialization must not fail"); + let payload_bytes = + serde_json::to_vec(&self.payload).expect("DagPayload serialization must not fail"); hasher.update(&(payload_bytes.len() as u64).to_le_bytes()); hasher.update(&payload_bytes); @@ -376,9 +373,8 @@ mod tests { "another_future": 123 }"#; - let action: DagAction = serde_json::from_str(json).expect( - "must deserialize actions with unknown fields (forward compat)" - ); + let action: DagAction = serde_json::from_str(json) + .expect("must deserialize actions with unknown fields (forward compat)"); assert_eq!(action.seq, 42); assert!(matches!(action.payload, DagPayload::Noop)); } @@ -417,9 +413,8 @@ mod tests { "payload": "Noop" }"#; - let action: DagAction = serde_json::from_str(json).expect( - "must deserialize actions without signature field (backward compat)" - ); + let action: DagAction = serde_json::from_str(json) + .expect("must deserialize actions without signature field (backward compat)"); assert!(action.signature.is_none()); } @@ -437,7 +432,10 @@ mod tests { provenance: None, }; let json = serde_json::to_string(&p).unwrap(); - assert!(!json.contains("provenance"), "None provenance must be skipped: {json}"); + assert!( + !json.contains("provenance"), + "None provenance must be skipped: {json}" + ); // Old wire format (no provenance key) still deserializes. let old = r#"{"subject":"a","predicate":"b","object":"c"}"#; @@ -457,7 +455,8 @@ mod tests { object: serde_json::json!("o"), provenance: Some(prov.clone()), }; - let round: TripleInsertPayload = serde_json::from_str(&serde_json::to_string(&p2).unwrap()).unwrap(); + let round: TripleInsertPayload = + serde_json::from_str(&serde_json::to_string(&p2).unwrap()).unwrap(); assert_eq!(round.provenance, Some(prov)); // Sanity: an action carrying a None-provenance TripleInsert hashes the same diff --git a/crates/aingle_graph/src/dag/backend.rs b/crates/aingle_graph/src/dag/backend.rs index aa2e7fde..dd12d432 100644 --- a/crates/aingle_graph/src/dag/backend.rs +++ b/crates/aingle_graph/src/dag/backend.rs @@ -158,8 +158,8 @@ impl DagBackend for SledDagBackend { fn scan_prefix(&self, prefix: &[u8]) -> crate::Result, Vec)>> { let mut results = Vec::new(); for item in self.tree.scan_prefix(prefix) { - let (k, v) = item - .map_err(|e| crate::Error::Storage(format!("sled dag scan error: {}", e)))?; + let (k, v) = + item.map_err(|e| crate::Error::Storage(format!("sled dag scan error: {}", e)))?; results.push((k.to_vec(), v.to_vec())); } Ok(results) diff --git a/crates/aingle_graph/src/dag/export.rs b/crates/aingle_graph/src/dag/export.rs index f43a9849..c8c0190e 100644 --- a/crates/aingle_graph/src/dag/export.rs +++ b/crates/aingle_graph/src/dag/export.rs @@ -62,8 +62,7 @@ fn short_id(id: &str) -> &str { impl DagGraph { /// Build a graph from a list of actions and their tip status. pub fn from_actions(actions: &[DagAction], tips: &[DagActionHash]) -> Self { - let tip_set: std::collections::HashSet<[u8; 32]> = - tips.iter().map(|h| h.0).collect(); + let tip_set: std::collections::HashSet<[u8; 32]> = tips.iter().map(|h| h.0).collect(); let mut nodes = Vec::with_capacity(actions.len()); let mut edges = Vec::new(); @@ -85,7 +84,9 @@ impl DagGraph { DagPayload::Genesis { .. } => "Genesis".into(), DagPayload::Compact { .. } => "Compact".into(), DagPayload::Noop => "Noop".into(), - DagPayload::Custom { ref payload_type, .. } => { + DagPayload::Custom { + ref payload_type, .. + } => { format!("Custom({})", payload_type) } }; @@ -115,7 +116,9 @@ impl DagGraph { /// Export as Graphviz DOT format. pub fn to_dot(&self) -> String { - let mut out = String::from("digraph DAG {\n rankdir=BT;\n node [shape=box, style=filled, fontsize=10];\n\n"); + let mut out = String::from( + "digraph DAG {\n rankdir=BT;\n node [shape=box, style=filled, fontsize=10];\n\n", + ); for node in &self.nodes { let color = if node.is_tip { @@ -173,7 +176,10 @@ impl DagGraph { // Style tips for node in &self.nodes { if node.is_tip { - out.push_str(&format!(" style {} fill:#4CAF50,color:white\n", short_id(&node.id))); + out.push_str(&format!( + " style {} fill:#4CAF50,color:white\n", + short_id(&node.id) + )); } } @@ -306,7 +312,10 @@ mod tests { assert_eq!(ExportFormat::from_str("dot"), Some(ExportFormat::Dot)); assert_eq!(ExportFormat::from_str("DOT"), Some(ExportFormat::Dot)); assert_eq!(ExportFormat::from_str("graphviz"), Some(ExportFormat::Dot)); - assert_eq!(ExportFormat::from_str("mermaid"), Some(ExportFormat::Mermaid)); + assert_eq!( + ExportFormat::from_str("mermaid"), + Some(ExportFormat::Mermaid) + ); assert_eq!(ExportFormat::from_str("json"), Some(ExportFormat::Json)); assert_eq!(ExportFormat::from_str("xml"), None); } diff --git a/crates/aingle_graph/src/dag/mod.rs b/crates/aingle_graph/src/dag/mod.rs index 24d92445..aa94cb8d 100644 --- a/crates/aingle_graph/src/dag/mod.rs +++ b/crates/aingle_graph/src/dag/mod.rs @@ -25,10 +25,12 @@ pub mod sync; pub mod timetravel; pub mod tips; -pub use action::{DagAction, DagActionHash, DagPayload, MemoryOpKind, Provenance, TripleInsertPayload}; -pub use backend::{DagBackend, MemoryDagBackend}; +pub use action::{ + DagAction, DagActionHash, DagPayload, MemoryOpKind, Provenance, TripleInsertPayload, +}; #[cfg(feature = "sled-backend")] pub use backend::SledDagBackend; +pub use backend::{DagBackend, MemoryDagBackend}; pub use export::{DagGraph, ExportFormat}; pub use pruning::{PruneResult, RetentionPolicy}; #[cfg(feature = "dag-sign")] diff --git a/crates/aingle_graph/src/dag/signing.rs b/crates/aingle_graph/src/dag/signing.rs index 2c687c83..7efcfd2a 100644 --- a/crates/aingle_graph/src/dag/signing.rs +++ b/crates/aingle_graph/src/dag/signing.rs @@ -339,8 +339,8 @@ mod tests { #[test] fn test_verifying_key_from_bytes_invalid() { let bad_bytes = [0u8; 32]; // not a valid Ed25519 point - // This may or may not fail depending on the point — use all-zero which is identity - // For safety, just test that the API doesn't panic + // This may or may not fail depending on the point — use all-zero which is identity + // For safety, just test that the API doesn't panic let _ = DagVerifyingKey::from_bytes(&bad_bytes); } } diff --git a/crates/aingle_graph/src/dag/timetravel.rs b/crates/aingle_graph/src/dag/timetravel.rs index 144ef9c0..95abfb82 100644 --- a/crates/aingle_graph/src/dag/timetravel.rs +++ b/crates/aingle_graph/src/dag/timetravel.rs @@ -262,10 +262,12 @@ mod tests { // At a timestamp before any actions: None let result = db.dag_at_timestamp(&(before - chrono::Duration::seconds(10))); - assert!(result.is_err() || { - // Should fail or return empty - true - }); + assert!( + result.is_err() || { + // Should fail or return empty + true + } + ); // At current time: should get state with both triples let (snap, info) = db.dag_at_timestamp(&Utc::now()).unwrap(); diff --git a/crates/aingle_graph/src/lib.rs b/crates/aingle_graph/src/lib.rs index 7164d7fe..ebfa4bfe 100644 --- a/crates/aingle_graph/src/lib.rs +++ b/crates/aingle_graph/src/lib.rs @@ -484,11 +484,7 @@ impl GraphDB { /// Get mutation history for a specific triple. #[cfg(feature = "dag")] - pub fn dag_history( - &self, - triple_id: &[u8; 32], - limit: usize, - ) -> Result> { + pub fn dag_history(&self, triple_id: &[u8; 32], limit: usize) -> Result> { self.dag_store .as_ref() .ok_or_else(|| Error::Config("DAG not enabled".into()))? @@ -585,11 +581,7 @@ impl GraphDB { /// Sign a DAG action using an Ed25519 signing key. #[cfg(feature = "dag-sign")] - pub fn dag_sign( - &self, - action: &mut dag::DagAction, - key: &dag::DagSigningKey, - ) { + pub fn dag_sign(&self, action: &mut dag::DagAction, key: &dag::DagSigningKey) { key.sign(action); } @@ -600,8 +592,7 @@ impl GraphDB { action: &dag::DagAction, public_key: &[u8; 32], ) -> Result { - dag::signing::verify_action(action, public_key) - .map_err(|e| Error::Config(e.to_string())) + dag::signing::verify_action(action, public_key).map_err(|e| Error::Config(e.to_string())) } /// Export the full DAG as a portable graph structure. diff --git a/crates/aingle_ingest/src/chunk.rs b/crates/aingle_ingest/src/chunk.rs index 51998af3..6fef5506 100644 --- a/crates/aingle_ingest/src/chunk.rs +++ b/crates/aingle_ingest/src/chunk.rs @@ -60,7 +60,11 @@ pub fn chunk_markdown(path: &str, content: &str, hash: &str) -> Vec { let mut out = Vec::new(); for (n, &start) in starts.iter().enumerate() { - let end = if n + 1 < starts.len() { starts[n + 1] } else { lines.len() }; + let end = if n + 1 < starts.len() { + starts[n + 1] + } else { + lines.len() + }; let section = &lines[start..end]; if section.len() > 80 { // chunk_fixed returns 1-based lines within the section; adding the diff --git a/crates/aingle_ingest/src/lib.rs b/crates/aingle_ingest/src/lib.rs index 6ec9c7c5..f0a27c71 100644 --- a/crates/aingle_ingest/src/lib.rs +++ b/crates/aingle_ingest/src/lib.rs @@ -80,19 +80,31 @@ mod tests { && t.predicate == "type" && t.object == ObjectValue::Text("adr".into()))); // frontmatter tags -> two tagged triples - assert!(ex.triples.iter().any(|t| t.predicate == "tagged" - && t.object == ObjectValue::Text("storage".into()))); - assert!(ex.triples.iter().any(|t| t.predicate == "tagged" - && t.object == ObjectValue::Text("decision".into()))); + assert!(ex + .triples + .iter() + .any(|t| t.predicate == "tagged" && t.object == ObjectValue::Text("storage".into()))); + assert!(ex + .triples + .iter() + .any(|t| t.predicate == "tagged" && t.object == ObjectValue::Text("decision".into()))); // heading -> has_section assert!(ex.triples.iter().any(|t| t.predicate == "has_section" && t.object == ObjectValue::Text("Storage Decision".into()))); // wikilink -> links_to sled - let link = ex.triples.iter().find(|t| t.predicate == "links_to").unwrap(); + let link = ex + .triples + .iter() + .find(|t| t.predicate == "links_to") + .unwrap(); assert_eq!(link.object, ObjectValue::Node("sled".into())); // inline tag -> tagged durability - assert!(ex.triples.iter().any(|t| t.predicate == "tagged" - && t.object == ObjectValue::Text("durability".into()))); + assert!( + ex.triples + .iter() + .any(|t| t.predicate == "tagged" + && t.object == ObjectValue::Text("durability".into())) + ); // provenance line numbers are 1-based and point at the real lines. assert_eq!(prov(&link.provenance).0, 7); // the "We chose [[sled]]" line @@ -100,12 +112,18 @@ mod tests { // at least one chunk, all carrying the same content hash. assert!(!ex.chunks.is_empty()); - assert!(ex.chunks.iter().all(|c| !c.provenance.content_hash.is_empty())); + assert!(ex + .chunks + .iter() + .all(|c| !c.provenance.content_hash.is_empty())); } #[test] fn non_markdown_gets_chunks_only() { - let code = (1..=120).map(|i| format!("line {i}")).collect::>().join("\n"); + let code = (1..=120) + .map(|i| format!("line {i}")) + .collect::>() + .join("\n"); let ex = extract("src/main.rs", &code); assert!(ex.triples.is_empty()); // 120 lines / 50-line window => 3 chunks. diff --git a/crates/aingle_logic/src/engine.rs b/crates/aingle_logic/src/engine.rs index 13460d16..5f305f0c 100644 --- a/crates/aingle_logic/src/engine.rs +++ b/crates/aingle_logic/src/engine.rs @@ -105,28 +105,33 @@ impl RuleEngine { /// /// The stats provide metrics on validations, inferences, rejections, etc. pub fn stats(&self) -> EngineStats { - self.stats.read() + self.stats + .read() .unwrap_or_else(|poisoned| poisoned.into_inner()) .clone() } /// Resets all collected `EngineStats` to their default (zero) values. pub fn clear_stats(&self) { - let mut guard = self.stats.write() + let mut guard = self + .stats + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()); *guard = EngineStats::default(); } /// Retrieves a clone of all triples that have been inferred by the engine. pub fn inferred_triples(&self) -> Vec { - self.inferred.read() + self.inferred + .read() .unwrap_or_else(|poisoned| poisoned.into_inner()) .clone() } /// Clears the internal cache of inferred triples. pub fn clear_inferred(&self) { - self.inferred.write() + self.inferred + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()) .clear(); } @@ -145,7 +150,9 @@ impl RuleEngine { /// A `ValidationResult` indicating whether the triple is valid, and detailing any /// matches, rejections, warnings, or chained rules. pub fn validate(&self, triple: &Triple) -> ValidationResult { - let mut stats = self.stats.write() + let mut stats = self + .stats + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()); stats.validations += 1; @@ -172,7 +179,9 @@ impl RuleEngine { } Action::Infer(pattern) => { if let Some(inferred) = pattern.instantiate(&bindings) { - let mut inf = self.inferred.write() + let mut inf = self + .inferred + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()); inf.push(inferred); stats.inferences += 1; @@ -204,7 +213,9 @@ impl RuleEngine { /// A `Result` containing a `ForwardChainResult` which includes the number of iterations /// and all new facts inferred, or an `Error` if the process exceeds `max_depth`. pub fn forward_chain(&self, graph: &GraphDB) -> Result { - let mut stats = self.stats.write() + let mut stats = self + .stats + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()); let mut result = ForwardChainResult::new(); let mut iteration = 0; @@ -263,7 +274,8 @@ impl RuleEngine { // Add new facts to result (would be added to graph in real use) for fact in new_facts { - self.inferred.write() + self.inferred + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()) .push(fact); } @@ -292,7 +304,9 @@ impl RuleEngine { graph: &GraphDB, goal: &TriplePattern, ) -> Result { - let mut stats = self.stats.write() + let mut stats = self + .stats + .write() .unwrap_or_else(|poisoned| poisoned.into_inner()); stats.backward_queries += 1; diff --git a/crates/aingle_minimal/src/discovery.rs b/crates/aingle_minimal/src/discovery.rs index 5a40b187..2cfad8ac 100644 --- a/crates/aingle_minimal/src/discovery.rs +++ b/crates/aingle_minimal/src/discovery.rs @@ -10,9 +10,9 @@ //! - **mDNS**: Service type `_aingle._udp.local.` (feature: mdns) //! - **CoAP Multicast**: `/.well-known/core` to 224.0.1.187:5683 (feature: coap) -use crate::error::Result; #[cfg(feature = "mdns")] use crate::error::Error; +use crate::error::Result; use std::collections::HashMap; use std::net::{IpAddr, SocketAddr}; use std::time::{Duration, Instant}; @@ -202,7 +202,11 @@ impl Discovery { return; } - let addresses: Vec = info.get_addresses().iter().map(|a| a.to_ip_addr()).collect(); + let addresses: Vec = info + .get_addresses() + .iter() + .map(|a| a.to_ip_addr()) + .collect(); let mut props = HashMap::new(); for prop in info.get_properties().iter() { diff --git a/crates/aingle_minimal/src/lib.rs b/crates/aingle_minimal/src/lib.rs index 72f332ec..4c4ebeb6 100644 --- a/crates/aingle_minimal/src/lib.rs +++ b/crates/aingle_minimal/src/lib.rs @@ -266,6 +266,8 @@ pub use ota::{OtaManager, UpdateChannel, UpdateInfo, UpdateState}; pub use power::{BatteryInfo, PowerManager, PowerProfile}; #[cfg(feature = "quic")] pub use quic::{QuicConfig, QuicServer}; +#[cfg(feature = "rest")] +pub use rest::{RestConfig, RestServer}; pub use sensors::{CalibrationParams, Sensor, SensorManager, SensorReading, SensorType}; #[cfg(feature = "smart_agents")] pub use smart::{IoTPolicyBuilder, SensorAdapter, SmartNode, SmartNodeConfig, SmartNodeStats}; @@ -281,8 +283,6 @@ pub use webrtc::{ ConnectionState, PeerConnection, SignalingClient, SignalingConfig, SignalingMessage, SignalingServer, WebRtcConfig, WebRtcServer, WebRtcStats, }; -#[cfg(feature = "rest")] -pub use rest::{RestConfig, RestServer}; /// Version information for the crate. /// diff --git a/crates/aingle_minimal/src/memory.rs b/crates/aingle_minimal/src/memory.rs index 8577cbc0..20feb847 100644 --- a/crates/aingle_minimal/src/memory.rs +++ b/crates/aingle_minimal/src/memory.rs @@ -8,9 +8,9 @@ #[cfg(feature = "ai_memory")] pub use ineru::{ - ConsolidationConfig, Embedding, Entity, EntityId, KnowledgeGraph, Link, LinkType, + ConsolidationConfig, Embedding, Entity, EntityId, IneruMemory, KnowledgeGraph, Link, LinkType, LongTermMemory, LtmConfig, MemoryConfig, MemoryEntry, MemoryId, MemoryMetadata, MemoryQuery, - MemoryResult, MemoryStats, Relation, SemanticTag, ShortTermMemory, StmConfig, IneruMemory, + MemoryResult, MemoryStats, Relation, SemanticTag, ShortTermMemory, StmConfig, }; #[cfg(feature = "ai_memory")] diff --git a/crates/aingle_minimal/src/quic.rs b/crates/aingle_minimal/src/quic.rs index 61ff2a4f..87c7c55e 100644 --- a/crates/aingle_minimal/src/quic.rs +++ b/crates/aingle_minimal/src/quic.rs @@ -215,7 +215,11 @@ impl QuicServer { // Reject oversized messages (max 1MB) const MAX_MESSAGE_SIZE: usize = 1024 * 1024; if len > MAX_MESSAGE_SIZE { - log::warn!("Rejecting oversized QUIC message: {} bytes from {}", len, addr); + log::warn!( + "Rejecting oversized QUIC message: {} bytes from {}", + len, + addr + ); continue; } @@ -327,7 +331,9 @@ impl QuicServer { // In a real deployment, load trusted peer certificates here. // For self-signed mesh networks, each node pins peer certs at discovery time. // Using dangerous() only as fallback for initial handshake — log a warning. - log::warn!("QUIC client using permissive certificate validation — pin peer certs in production"); + log::warn!( + "QUIC client using permissive certificate validation — pin peer certs in production" + ); let crypto = rustls::ClientConfig::builder() .dangerous() .with_custom_certificate_verifier(Arc::new(LoggingCertVerifier)) diff --git a/crates/aingle_minimal/src/rest.rs b/crates/aingle_minimal/src/rest.rs index 19af4334..452a720e 100644 --- a/crates/aingle_minimal/src/rest.rs +++ b/crates/aingle_minimal/src/rest.rs @@ -187,8 +187,12 @@ impl RestServer { /// The server will run in a background thread until `stop()` is called. pub fn start(config: RestConfig, node: &mut MinimalNode) -> Result { let bind_addr = config.bind_address(); - let server = Server::http(&bind_addr) - .map_err(|e| Error::Network(NetworkError::Other(format!("Failed to start REST server: {}", e))))?; + let server = Server::http(&bind_addr).map_err(|e| { + Error::Network(NetworkError::Other(format!( + "Failed to start REST server: {}", + e + ))) + })?; log::info!("REST API server starting on http://{}", bind_addr); @@ -206,7 +210,14 @@ impl RestServer { // In production, this would use channels or shared state let handle = thread::spawn(move || { - Self::server_loop(server, running_clone, enable_cors, node_id, version, start_time); + Self::server_loop( + server, + running_clone, + enable_cors, + node_id, + version, + start_time, + ); }); Ok(Self { @@ -219,13 +230,14 @@ impl RestServer { /// Start the REST server with shared node access. /// /// This version allows the node to be accessed from the REST handlers. - pub fn start_with_node( - config: RestConfig, - node: Arc>, - ) -> Result { + pub fn start_with_node(config: RestConfig, node: Arc>) -> Result { let bind_addr = config.bind_address(); - let server = Server::http(&bind_addr) - .map_err(|e| Error::Network(NetworkError::Other(format!("Failed to start REST server: {}", e))))?; + let server = Server::http(&bind_addr).map_err(|e| { + Error::Network(NetworkError::Other(format!( + "Failed to start REST server: {}", + e + ))) + })?; log::info!("REST API server starting on http://{}", bind_addr); @@ -257,12 +269,8 @@ impl RestServer { // Use a timeout so we can check the running flag periodically match server.recv_timeout(std::time::Duration::from_millis(100)) { Ok(Some(request)) => { - let response = Self::handle_static_request( - &request, - &node_id, - &version, - start_time, - ); + let response = + Self::handle_static_request(&request, &node_id, &version, start_time); Self::send_response(request, response, enable_cors); } Ok(None) => continue, // Timeout, check running flag @@ -299,10 +307,7 @@ impl RestServer { } /// Handle a request with full node access - fn handle_request( - request: &mut Request, - node: &Arc>, - ) -> (u16, String) { + fn handle_request(request: &mut Request, node: &Arc>) -> (u16, String) { let method = request.method().clone(); let url = request.url().to_string(); @@ -310,14 +315,10 @@ impl RestServer { match (method, url.as_str()) { // GET /api/v1/info - (Method::Get, "/api/v1/info") => { - Self::handle_info(node) - } + (Method::Get, "/api/v1/info") => Self::handle_info(node), // POST /api/v1/entries - (Method::Post, "/api/v1/entries") => { - Self::handle_create_entry(request, node) - } + (Method::Post, "/api/v1/entries") => Self::handle_create_entry(request, node), // GET /api/v1/entries/:hash (Method::Get, path) if path.starts_with("/api/v1/entries/") => { @@ -326,19 +327,13 @@ impl RestServer { } // GET /api/v1/peers - (Method::Get, "/api/v1/peers") => { - Self::handle_peers(node) - } + (Method::Get, "/api/v1/peers") => Self::handle_peers(node), // GET /api/v1/stats - (Method::Get, "/api/v1/stats") => { - Self::handle_stats(node) - } + (Method::Get, "/api/v1/stats") => Self::handle_stats(node), // OPTIONS (CORS preflight) - (Method::Options, _) => { - (204, String::new()) - } + (Method::Options, _) => (204, String::new()), // Health check (Method::Get, "/health") | (Method::Get, "/") => { @@ -406,10 +401,7 @@ impl RestServer { } /// Handle POST /api/v1/entries - fn handle_create_entry( - request: &mut Request, - node: &Arc>, - ) -> (u16, String) { + fn handle_create_entry(request: &mut Request, node: &Arc>) -> (u16, String) { // Read body let mut body = String::new(); let reader = request.as_reader(); @@ -444,7 +436,10 @@ impl RestServer { timestamp: crate::types::Timestamp::now().as_millis(), }; let api_response = ApiResponse::success(response); - (201, serde_json::to_string(&api_response).unwrap_or_default()) + ( + 201, + serde_json::to_string(&api_response).unwrap_or_default(), + ) } Err(e) => { let response = ApiResponse::<()>::error(format!("Failed to create entry: {}", e)); @@ -454,10 +449,7 @@ impl RestServer { } /// Handle GET /api/v1/entries/:hash - fn handle_get_entry( - hash_str: &str, - node: &Arc>, - ) -> (u16, String) { + fn handle_get_entry(hash_str: &str, node: &Arc>) -> (u16, String) { // Parse hash let hash = match Hash::from_hex(hash_str) { Ok(h) => h, @@ -482,7 +474,7 @@ impl RestServer { let content: serde_json::Value = serde_json::from_slice(&entry.content) .unwrap_or_else(|_| { serde_json::Value::String( - String::from_utf8_lossy(&entry.content).to_string() + String::from_utf8_lossy(&entry.content).to_string(), ) }); @@ -493,7 +485,10 @@ impl RestServer { size: entry.size(), }; let api_response = ApiResponse::success(response); - (200, serde_json::to_string(&api_response).unwrap_or_default()) + ( + 200, + serde_json::to_string(&api_response).unwrap_or_default(), + ) } Ok(None) => { let response = ApiResponse::<()>::error("Entry not found"); @@ -564,7 +559,10 @@ impl RestServer { }; let api_response = ApiResponse::success(response); - (200, serde_json::to_string(&api_response).unwrap_or_default()) + ( + 200, + serde_json::to_string(&api_response).unwrap_or_default(), + ) } /// Handle static requests (without node access) @@ -613,25 +611,25 @@ impl RestServer { fn send_response(request: Request, response: (u16, String), enable_cors: bool) { let (status, body) = response; - let mut headers = vec![ - Header::from_bytes(&b"Content-Type"[..], &b"application/json"[..]).unwrap(), - ]; + let mut headers = + vec![Header::from_bytes(&b"Content-Type"[..], &b"application/json"[..]).unwrap()]; if enable_cors { - headers.push( - Header::from_bytes(&b"Access-Control-Allow-Origin"[..], &b"*"[..]).unwrap() - ); + headers + .push(Header::from_bytes(&b"Access-Control-Allow-Origin"[..], &b"*"[..]).unwrap()); headers.push( Header::from_bytes( &b"Access-Control-Allow-Methods"[..], &b"GET, POST, OPTIONS"[..], - ).unwrap() + ) + .unwrap(), ); headers.push( Header::from_bytes( &b"Access-Control-Allow-Headers"[..], &b"Content-Type, Authorization"[..], - ).unwrap() + ) + .unwrap(), ); } diff --git a/crates/aingle_minimal/src/rocks_storage.rs b/crates/aingle_minimal/src/rocks_storage.rs index acfd0f88..18a8aab9 100644 --- a/crates/aingle_minimal/src/rocks_storage.rs +++ b/crates/aingle_minimal/src/rocks_storage.rs @@ -99,9 +99,9 @@ impl RocksStorage { /// Get column family handle fn cf(&self, name: &str) -> Result<&ColumnFamily> { - self.db - .cf_handle(name) - .ok_or_else(|| crate::error::Error::storage(format!("Column family '{}' not found", name))) + self.db.cf_handle(name).ok_or_else(|| { + crate::error::Error::storage(format!("Column family '{}' not found", name)) + }) } /// Serialize key for actions (hash-based) @@ -136,7 +136,10 @@ impl RocksStorage { arr.copy_from_slice(&v[..8]); Some(i64::from_be_bytes(arr)) } else { - log::warn!("Corrupt sequence counter: expected 8 bytes, got {}", v.len()); + log::warn!( + "Corrupt sequence counter: expected 8 bytes, got {}", + v.len() + ); None } }) diff --git a/crates/aingle_minimal/src/wallet.rs b/crates/aingle_minimal/src/wallet.rs index 6303af01..d7c1cb85 100644 --- a/crates/aingle_minimal/src/wallet.rs +++ b/crates/aingle_minimal/src/wallet.rs @@ -667,9 +667,10 @@ impl ApduCommand { /// Serialize to bytes for transmission pub fn serialize(&self) -> std::result::Result, crate::error::Error> { if self.data.len() > 255 { - return Err(crate::error::Error::network( - format!("APDU data too large: {} bytes (max 255)", self.data.len()), - )); + return Err(crate::error::Error::network(format!( + "APDU data too large: {} bytes (max 255)", + self.data.len() + ))); } let mut bytes = Vec::with_capacity(5 + self.data.len()); bytes.push(self.cla); diff --git a/crates/aingle_minimal/tests/smart_node_integration_tests.rs b/crates/aingle_minimal/tests/smart_node_integration_tests.rs index bc152999..e6656850 100644 --- a/crates/aingle_minimal/tests/smart_node_integration_tests.rs +++ b/crates/aingle_minimal/tests/smart_node_integration_tests.rs @@ -12,9 +12,7 @@ use aingle_minimal::*; use kaneru::policy::Condition; -use kaneru::{ - Action, ActionType, AgentConfig, Goal, Observation, ObservationType, Policy, Rule, -}; +use kaneru::{Action, ActionType, AgentConfig, Goal, Observation, ObservationType, Policy, Rule}; /// Helper to create test configuration fn test_smart_config() -> SmartNodeConfig { diff --git a/crates/aingle_raft/src/consistency.rs b/crates/aingle_raft/src/consistency.rs index cee877b3..816e8527 100644 --- a/crates/aingle_raft/src/consistency.rs +++ b/crates/aingle_raft/src/consistency.rs @@ -39,12 +39,30 @@ mod tests { #[test] fn test_from_header() { - assert_eq!(ConsistencyLevel::from_header("local"), ConsistencyLevel::Local); - assert_eq!(ConsistencyLevel::from_header("quorum"), ConsistencyLevel::Quorum); - assert_eq!(ConsistencyLevel::from_header("linearizable"), ConsistencyLevel::Linearizable); - assert_eq!(ConsistencyLevel::from_header("LOCAL"), ConsistencyLevel::Local); - assert_eq!(ConsistencyLevel::from_header("QUORUM"), ConsistencyLevel::Quorum); - assert_eq!(ConsistencyLevel::from_header("unknown"), ConsistencyLevel::Local); + assert_eq!( + ConsistencyLevel::from_header("local"), + ConsistencyLevel::Local + ); + assert_eq!( + ConsistencyLevel::from_header("quorum"), + ConsistencyLevel::Quorum + ); + assert_eq!( + ConsistencyLevel::from_header("linearizable"), + ConsistencyLevel::Linearizable + ); + assert_eq!( + ConsistencyLevel::from_header("LOCAL"), + ConsistencyLevel::Local + ); + assert_eq!( + ConsistencyLevel::from_header("QUORUM"), + ConsistencyLevel::Quorum + ); + assert_eq!( + ConsistencyLevel::from_header("unknown"), + ConsistencyLevel::Local + ); } #[test] diff --git a/crates/aingle_raft/src/lib.rs b/crates/aingle_raft/src/lib.rs index 04c867c8..4ff52aa9 100644 --- a/crates/aingle_raft/src/lib.rs +++ b/crates/aingle_raft/src/lib.rs @@ -6,12 +6,12 @@ //! Uses openraft for leader election and log replication, //! backed by the AIngle WAL for durable log storage. -pub mod types; +pub mod consistency; pub mod log_store; -pub mod state_machine; -pub mod snapshot_builder; pub mod network; -pub mod consistency; +pub mod snapshot_builder; +pub mod state_machine; +pub mod types; -pub use types::{CortexTypeConfig, CortexRequest, CortexResponse, CortexNode, NodeId}; pub use consistency::ConsistencyLevel; +pub use types::{CortexNode, CortexRequest, CortexResponse, CortexTypeConfig, NodeId}; diff --git a/crates/aingle_raft/src/log_store.rs b/crates/aingle_raft/src/log_store.rs index 82bbc8a0..d3dd4d27 100644 --- a/crates/aingle_raft/src/log_store.rs +++ b/crates/aingle_raft/src/log_store.rs @@ -69,7 +69,12 @@ impl CortexLogStore { let mut log = BTreeMap::new(); for wal_entry in &wal_entries { - if let WalEntryKind::RaftEntry { index, term: _, data } = &wal_entry.kind { + if let WalEntryKind::RaftEntry { + index, + term: _, + data, + } = &wal_entry.kind + { match serde_json::from_slice::(data) { Ok(entry) => { log.insert(*index, entry); @@ -248,7 +253,11 @@ impl CortexLogStore { // Write ALL to WAL first for (index, term, ref data, _) in &batch { self.wal - .append(WalEntryKind::RaftEntry { index: *index, term: *term, data: data.clone() }) + .append(WalEntryKind::RaftEntry { + index: *index, + term: *term, + data: data.clone(), + }) .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; } @@ -348,9 +357,12 @@ impl RaftLogStorage for Arc { { // Always invoke the callback, even on error, to prevent openraft hangs. let result = self.append_inner(entries).await; - callback.io_completed(result.as_ref().map(|_| ()).map_err(|e| { - io::Error::new(e.kind(), e.to_string()) - })); + callback.io_completed( + result + .as_ref() + .map(|_| ()) + .map_err(|e| io::Error::new(e.kind(), e.to_string())), + ); result } @@ -381,10 +393,7 @@ impl RaftLogStorage for Arc { async fn purge(&mut self, log_id: LogId) -> Result<(), io::Error> { let mut log = self.log.write().await; - let keys_to_remove: Vec = log - .range(..=log_id.index) - .map(|(k, _)| *k) - .collect(); + let keys_to_remove: Vec = log.range(..=log_id.index).map(|(k, _)| *k).collect(); for k in keys_to_remove { log.remove(&k); } @@ -409,10 +418,7 @@ mod tests { use openraft::vote::RaftLeaderId; fn make_entry(index: u64, term: u64) -> Entry { - Entry::new_blank(openraft::LogId::new( - CommittedLeaderId::new(term, 0), - index, - )) + Entry::new_blank(openraft::LogId::new(CommittedLeaderId::new(term, 0), index)) } #[tokio::test] @@ -434,10 +440,7 @@ mod tests { let entries = vec![make_entry(1, 1), make_entry(2, 1), make_entry(3, 1)]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); let mut reader = store.clone(); let result = reader.try_get_log_entries(1..4).await.unwrap(); @@ -480,10 +483,7 @@ mod tests { make_entry(3, 1), make_entry(4, 1), ]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); // Truncate after index 2 let lid = openraft::LogId::new(CommittedLeaderId::new(1, 0), 2); @@ -509,10 +509,7 @@ mod tests { make_entry(3, 1), make_entry(4, 1), ]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); let lid = openraft::LogId::new(CommittedLeaderId::new(1, 0), 2); store_mut.truncate_after(Some(lid)).await.unwrap(); @@ -523,7 +520,11 @@ mod tests { let store = Arc::new(CortexLogStore::open(dir.path()).unwrap()); let mut reader = store.clone(); let result = reader.try_get_log_entries(1..5).await.unwrap(); - assert_eq!(result.len(), 2, "truncated entries must not survive restart"); + assert_eq!( + result.len(), + 2, + "truncated entries must not survive restart" + ); } } @@ -533,15 +534,8 @@ mod tests { let store = Arc::new(CortexLogStore::open(dir.path()).unwrap()); let mut store_mut = store.clone(); - let entries = vec![ - make_entry(1, 1), - make_entry(2, 1), - make_entry(3, 1), - ]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + let entries = vec![make_entry(1, 1), make_entry(2, 1), make_entry(3, 1)]; + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); let purge_id = openraft::LogId::new(CommittedLeaderId::new(1, 0), 2); store_mut.purge(purge_id).await.unwrap(); @@ -561,15 +555,8 @@ mod tests { let store = Arc::new(CortexLogStore::open(dir.path()).unwrap()); let mut store_mut = store.clone(); - let entries = vec![ - make_entry(1, 1), - make_entry(2, 1), - make_entry(3, 1), - ]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + let entries = vec![make_entry(1, 1), make_entry(2, 1), make_entry(3, 1)]; + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); let purge_id = openraft::LogId::new(CommittedLeaderId::new(1, 0), 2); store_mut.purge(purge_id).await.unwrap(); @@ -601,10 +588,7 @@ mod tests { let mut store_mut = store.clone(); let entries = vec![make_entry(1, 1), make_entry(2, 1)]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); } // Reopen and verify entries are recovered @@ -644,10 +628,7 @@ mod tests { let mut store_mut = store.clone(); let entries = vec![make_entry(1, 1), make_entry(2, 1)]; - store_mut - .append(entries, IOFlushed::noop()) - .await - .unwrap(); + store_mut.append(entries, IOFlushed::noop()).await.unwrap(); let state = store_mut.get_log_state().await.unwrap(); assert!(state.last_purged_log_id.is_none()); diff --git a/crates/aingle_raft/src/network.rs b/crates/aingle_raft/src/network.rs index 4fcb62a4..2ad84f8f 100644 --- a/crates/aingle_raft/src/network.rs +++ b/crates/aingle_raft/src/network.rs @@ -166,18 +166,15 @@ impl RaftNetworkFactory for CortexNetworkFactory { async fn new_client(&mut self, target: NodeId, node: &CortexNode) -> Self::Network { // Use REST address for HTTP-based Raft RPC routing. // Fallback is constructed infallibly (no parse) to avoid panics. - let addr: SocketAddr = node - .rest_addr - .parse() - .unwrap_or_else(|e| { - tracing::warn!( - target_node = target, - addr = %node.rest_addr, - error = %e, - "Invalid REST address for Raft peer, falling back to localhost:19090" - ); - SocketAddr::from(([127, 0, 0, 1], 19090)) - }); + let addr: SocketAddr = node.rest_addr.parse().unwrap_or_else(|e| { + tracing::warn!( + target_node = target, + addr = %node.rest_addr, + error = %e, + "Invalid REST address for Raft peer, falling back to localhost:19090" + ); + SocketAddr::from(([127, 0, 0, 1], 19090)) + }); CortexNetworkConnection { target, @@ -284,18 +281,15 @@ impl RaftNetworkV2 for CortexNetworkConnection { "meta": snapshot.meta, "data": snapshot.snapshot.into_inner(), }); - let payload = serde_json::to_vec(&snap_data).map_err(|e| { - StreamingError::Unreachable(Unreachable::new(&AnyError::error(e))) - })?; + let payload = serde_json::to_vec(&snap_data) + .map_err(|e| StreamingError::Unreachable(Unreachable::new(&AnyError::error(e))))?; // Use chunked transfer for payloads > 1MB to avoid timeouts // and reduce memory pressure on the receiver. const CHUNK_THRESHOLD: usize = 1024 * 1024; // 1MB if payload.len() > CHUNK_THRESHOLD { - return self - .send_chunked_snapshot(&payload, option) - .await; + return self.send_chunked_snapshot(&payload, option).await; } // Small snapshot: send monolithic @@ -327,7 +321,6 @@ impl RaftNetworkV2 for CortexNetworkConnection { ))), } } - } impl CortexNetworkConnection { @@ -390,9 +383,7 @@ impl CortexNetworkConnection { "Snapshot chunk at offset {offset} timed out after {per_chunk_timeout:?}" )))) })? - .map_err(|e| { - StreamingError::Unreachable(Unreachable::new(&AnyError::error(e))) - })?; + .map_err(|e| StreamingError::Unreachable(Unreachable::new(&AnyError::error(e))))?; match response { // Final chunk returns the install response diff --git a/crates/aingle_raft/src/snapshot_builder.rs b/crates/aingle_raft/src/snapshot_builder.rs index 4851fb2c..d3dff2e8 100644 --- a/crates/aingle_raft/src/snapshot_builder.rs +++ b/crates/aingle_raft/src/snapshot_builder.rs @@ -97,10 +97,7 @@ impl RaftSnapshotBuilder for CortexSnapshotBuilder { .to_bytes() .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; - let snapshot_id = format!( - "snap-{}-{}", - last_applied_term, last_applied_index - ); + let snapshot_id = format!("snap-{}-{}", last_applied_term, last_applied_index); let meta = SnapshotMeta { last_log_id: self.last_applied.clone(), @@ -152,10 +149,7 @@ mod tests { let mut builder = CortexSnapshotBuilder { graph: Arc::new(RwLock::new(graph)), memory: Arc::new(RwLock::new(memory)), - last_applied: Some(openraft::LogId::new( - CommittedLeaderId::new(1, 0), - 5, - )), + last_applied: Some(openraft::LogId::new(CommittedLeaderId::new(1, 0), 5)), last_membership: openraft::StoredMembership::default(), proof_provider: None, }; diff --git a/crates/aingle_raft/src/state_machine.rs b/crates/aingle_raft/src/state_machine.rs index 06551e98..64ec27e4 100644 --- a/crates/aingle_raft/src/state_machine.rs +++ b/crates/aingle_raft/src/state_machine.rs @@ -219,9 +219,7 @@ impl CortexStateMachine { id: None, } } - WalEntryKind::DagAction { action_bytes } => { - self.apply_dag_action(action_bytes).await - } + WalEntryKind::DagAction { action_bytes } => self.apply_dag_action(action_bytes).await, _ => CortexResponse { success: true, detail: None, @@ -248,9 +246,7 @@ impl CortexStateMachine { }; // Reject unsigned actions (Genesis exempt — system-generated at init) - if action.signature.is_none() - && !matches!(action.payload, DagPayload::Genesis { .. }) - { + if action.signature.is_none() && !matches!(action.payload, DagPayload::Genesis { .. }) { tracing::warn!( seq = action.seq, author = %action.author, @@ -285,9 +281,8 @@ impl CortexStateMachine { DagPayload::TripleInsert { triples } => { let graph = self.graph.read().await; for t in triples { - let value = json_to_value( - &serde_json::to_value(&t.object).unwrap_or_default(), - ); + let value = + json_to_value(&serde_json::to_value(&t.object).unwrap_or_default()); let triple = aingle_graph::Triple::new( aingle_graph::NodeId::named(&t.subject), aingle_graph::Predicate::named(&t.predicate), @@ -320,10 +315,15 @@ impl CortexStateMachine { ); } aingle_graph::dag::MemoryOpKind::Forget { memory_id } => { - tracing::debug!(memory_id, "DagAction MemoryOp::Forget recorded (audit only)"); + tracing::debug!( + memory_id, + "DagAction MemoryOp::Forget recorded (audit only)" + ); } aingle_graph::dag::MemoryOpKind::Consolidate => { - tracing::debug!("DagAction MemoryOp::Consolidate recorded (audit only)"); + tracing::debug!( + "DagAction MemoryOp::Consolidate recorded (audit only)" + ); } } } @@ -360,24 +360,42 @@ impl CortexStateMachine { // Audit-only: no graph mutation needed } DagPayload::Batch { .. } => { - tracing::warn!("Nested Batch inside Batch — skipping to avoid recursion"); + tracing::warn!( + "Nested Batch inside Batch — skipping to avoid recursion" + ); } } } } - DagPayload::Genesis { triple_count, description } => { + DagPayload::Genesis { + triple_count, + description, + } => { + tracing::info!(triple_count, description, "Applied DagAction::Genesis"); + } + DagPayload::Compact { + pruned_count, + retained_count, + ref policy, + } => { tracing::info!( - triple_count, - description, - "Applied DagAction::Genesis" + pruned_count, + retained_count, + policy, + "Applied DagAction::Compact" ); } - DagPayload::Compact { pruned_count, retained_count, ref policy } => { - tracing::info!(pruned_count, retained_count, policy, "Applied DagAction::Compact"); - } DagPayload::Noop => {} - DagPayload::Custom { ref payload_type, ref payload_summary, .. } => { - tracing::info!(payload_type, payload_summary, "Applied DagAction::Custom (audit only)"); + DagPayload::Custom { + ref payload_type, + ref payload_summary, + .. + } => { + tracing::info!( + payload_type, + payload_summary, + "Applied DagAction::Custom (audit only)" + ); } } @@ -426,9 +444,7 @@ impl CortexStateMachine { impl RaftStateMachine for Arc { type SnapshotBuilder = CortexSnapshotBuilder; - async fn applied_state( - &mut self, - ) -> Result<(Option, StoredMembershipOf), io::Error> { + async fn applied_state(&mut self) -> Result<(Option, StoredMembershipOf), io::Error> { let la = self.last_applied.read().await; let membership = self.last_membership.read().await; Ok((la.clone(), membership.clone())) @@ -436,9 +452,7 @@ impl RaftStateMachine for Arc { async fn apply(&mut self, mut entries: Strm) -> Result<(), io::Error> where - Strm: futures_util::Stream, io::Error>> - + Unpin - + Send, + Strm: futures_util::Stream, io::Error>> + Unpin + Send, { while let Some(item) = entries.next().await { let (entry, responder) = item?; @@ -456,9 +470,7 @@ impl RaftStateMachine for Arc { detail: None, id: None, }, - openraft::EntryPayload::Normal(ref req) => { - self.apply_mutation(&req.kind).await - } + openraft::EntryPayload::Normal(ref req) => self.apply_mutation(&req.kind).await, openraft::EntryPayload::Membership(_) => CortexResponse { success: true, detail: None, @@ -509,8 +521,8 @@ impl RaftStateMachine for Arc { // Build both new graph and new memory into temporaries FIRST, // then swap atomically only if both succeed (#7). - let new_graph = GraphDB::memory() - .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?; + let new_graph = + GraphDB::memory().map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?; for ts in &cluster_snap.triples { let value = json_to_value(&ts.object); let triple = aingle_graph::Triple::new( @@ -525,9 +537,12 @@ impl RaftStateMachine for Arc { let new_memory = if !cluster_snap.ineru_ltm.is_empty() { Some( - IneruMemory::import_snapshot(&cluster_snap.ineru_ltm) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, - format!("Failed to restore Ineru from snapshot: {e}")))? + IneruMemory::import_snapshot(&cluster_snap.ineru_ltm).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to restore Ineru from snapshot: {e}"), + ) + })?, ) } else { None @@ -775,10 +790,7 @@ mod tests { fn make_graph_and_memory() -> (Arc>, Arc>) { let graph = GraphDB::memory().unwrap(); let memory = IneruMemory::agent_mode(); - ( - Arc::new(RwLock::new(graph)), - Arc::new(RwLock::new(memory)), - ) + (Arc::new(RwLock::new(graph)), Arc::new(RwLock::new(memory))) } #[tokio::test] @@ -954,7 +966,11 @@ mod tests { // Verify: old data cleared, only snapshot data present let g = graph.read().await; - assert_eq!(g.count(), 1, "old data should be cleared, only snapshot data remains"); + assert_eq!( + g.count(), + 1, + "old data should be cleared, only snapshot data remains" + ); let triples = g.find(aingle_graph::TriplePattern::any()).unwrap(); let subject_str = triples[0].subject.to_string(); assert!( @@ -1024,7 +1040,10 @@ mod tests { // Verify checksum was written into serialized data let raw: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); let checksum = raw["checksum"].as_str().unwrap(); - assert!(!checksum.is_empty(), "checksum should be set after to_bytes"); + assert!( + !checksum.is_empty(), + "checksum should be set after to_bytes" + ); // Valid roundtrip succeeds let restored = ClusterSnapshot::from_bytes(&bytes).unwrap(); @@ -1097,6 +1116,9 @@ mod tests { }; let bytes = serde_json::to_vec(&snap).unwrap(); let result = ClusterSnapshot::from_bytes(&bytes); - assert!(result.is_ok(), "empty checksum should be accepted for backward compat"); + assert!( + result.is_ok(), + "empty checksum should be accepted for backward compat" + ); } } diff --git a/crates/aingle_raft/src/types.rs b/crates/aingle_raft/src/types.rs index c15fd068..d8a62b9e 100644 --- a/crates/aingle_raft/src/types.rs +++ b/crates/aingle_raft/src/types.rs @@ -51,7 +51,11 @@ pub struct CortexNode { impl fmt::Display for CortexNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "CortexNode(rest={}, p2p={})", self.rest_addr, self.p2p_addr) + write!( + f, + "CortexNode(rest={}, p2p={})", + self.rest_addr, self.p2p_addr + ) } } diff --git a/crates/aingle_wal/src/entry.rs b/crates/aingle_wal/src/entry.rs index bb35235a..5dccf4b6 100644 --- a/crates/aingle_wal/src/entry.rs +++ b/crates/aingle_wal/src/entry.rs @@ -23,7 +23,12 @@ pub struct WalEntry { impl WalEntry { /// Compute the hash for this entry's payload (kind + seq + timestamp + prev_hash). - pub fn compute_hash(seq: u64, timestamp: &DateTime, kind: &WalEntryKind, prev_hash: &[u8; 32]) -> [u8; 32] { + pub fn compute_hash( + seq: u64, + timestamp: &DateTime, + kind: &WalEntryKind, + prev_hash: &[u8; 32], + ) -> [u8; 32] { let mut hasher = blake3::Hasher::new(); hasher.update(&seq.to_le_bytes()); hasher.update(timestamp.to_rfc3339().as_bytes()); @@ -47,9 +52,7 @@ pub enum WalEntryKind { triple_id: [u8; 32], }, /// Triple deleted from GraphDB. - TripleDelete { - triple_id: [u8; 32], - }, + TripleDelete { triple_id: [u8; 32] }, /// Memory entry stored in Ineru STM. MemoryStore { memory_id: String, @@ -58,13 +61,9 @@ pub enum WalEntryKind { importance: f32, }, /// Memory entry forgotten. - MemoryForget { - memory_id: String, - }, + MemoryForget { memory_id: String }, /// STM → LTM consolidation occurred. - MemoryConsolidate { - consolidated_count: usize, - }, + MemoryConsolidate { consolidated_count: usize }, /// Proof submitted. ProofSubmit { proof_id: String, @@ -90,9 +89,7 @@ pub enum WalEntryKind { weight: f32, }, /// LTM entity deleted (for Ineru replication). - LtmEntityDelete { - entity_id: String, - }, + LtmEntityDelete { entity_id: String }, /// Serialized openraft Raft log entry. RaftEntry { index: u64, @@ -128,7 +125,9 @@ mod tests { #[test] fn test_compute_hash_deterministic() { let ts = Utc::now(); - let kind = WalEntryKind::TripleDelete { triple_id: [1u8; 32] }; + let kind = WalEntryKind::TripleDelete { + triple_id: [1u8; 32], + }; let prev = [0u8; 32]; let h1 = WalEntry::compute_hash(1, &ts, &kind, &prev); @@ -139,7 +138,9 @@ mod tests { #[test] fn test_compute_hash_differs_on_seq() { let ts = Utc::now(); - let kind = WalEntryKind::TripleDelete { triple_id: [1u8; 32] }; + let kind = WalEntryKind::TripleDelete { + triple_id: [1u8; 32], + }; let prev = [0u8; 32]; let h1 = WalEntry::compute_hash(1, &ts, &kind, &prev); diff --git a/crates/aingle_wal/src/reader.rs b/crates/aingle_wal/src/reader.rs index 472092ba..9f2398c2 100644 --- a/crates/aingle_wal/src/reader.rs +++ b/crates/aingle_wal/src/reader.rs @@ -76,12 +76,8 @@ impl WalReader { let entry = &entries[i]; // Verify this entry's hash - let expected_hash = WalEntry::compute_hash( - entry.seq, - &entry.timestamp, - &entry.kind, - &entry.prev_hash, - ); + let expected_hash = + WalEntry::compute_hash(entry.seq, &entry.timestamp, &entry.kind, &entry.prev_hash); if entry.hash != expected_hash { return Ok(VerifyResult { valid: false, diff --git a/crates/aingle_wal/src/segment.rs b/crates/aingle_wal/src/segment.rs index b5bcd1e8..26f4aab5 100644 --- a/crates/aingle_wal/src/segment.rs +++ b/crates/aingle_wal/src/segment.rs @@ -55,12 +55,11 @@ impl WalSegment { .strip_prefix("wal-") .and_then(|s| s.strip_suffix(".seg")) .and_then(|s| s.parse::().ok()) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "invalid segment filename"))?; + .ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidInput, "invalid segment filename") + })?; - let file = OpenOptions::new() - .create(true) - .append(true) - .open(path)?; + let file = OpenOptions::new().create(true).append(true).open(path)?; let size_bytes = file.metadata()?.len(); @@ -84,8 +83,8 @@ impl WalSegment { /// Append a WAL entry to the segment. pub fn append(&mut self, entry: &WalEntry) -> io::Result<()> { - let payload = serde_json::to_vec(entry) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + let payload = + serde_json::to_vec(entry).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; let len = payload.len() as u32; self.file.write_all(&len.to_be_bytes())?; self.file.write_all(&payload)?; diff --git a/crates/aingle_wal/src/writer.rs b/crates/aingle_wal/src/writer.rs index 6440a4dc..13b4cae1 100644 --- a/crates/aingle_wal/src/writer.rs +++ b/crates/aingle_wal/src/writer.rs @@ -73,8 +73,12 @@ impl WalWriter { let timestamp = Utc::now(); let prev_hash = { - let guard = self.last_hash.lock() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("WAL last_hash lock poisoned: {e}")))?; + let guard = self.last_hash.lock().map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("WAL last_hash lock poisoned: {e}"), + ) + })?; *guard }; @@ -89,8 +93,12 @@ impl WalWriter { }; { - let mut seg = self.current_segment.lock() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("WAL segment lock poisoned: {e}")))?; + let mut seg = self.current_segment.lock().map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("WAL segment lock poisoned: {e}"), + ) + })?; seg.append(&entry)?; seg.sync()?; @@ -104,8 +112,12 @@ impl WalWriter { // Update last_hash { - let mut guard = self.last_hash.lock() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("WAL last_hash lock poisoned: {e}")))?; + let mut guard = self.last_hash.lock().map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("WAL last_hash lock poisoned: {e}"), + ) + })?; *guard = entry.hash; } @@ -114,15 +126,23 @@ impl WalWriter { /// Flush the current segment to disk. pub fn sync(&self) -> io::Result<()> { - let mut seg = self.current_segment.lock() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("WAL segment lock poisoned: {e}")))?; + let mut seg = self.current_segment.lock().map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("WAL segment lock poisoned: {e}"), + ) + })?; seg.sync() } /// The next sequence number that will be assigned. pub fn last_seq(&self) -> u64 { let next = self.next_seq.load(Ordering::SeqCst); - if next == 0 { 0 } else { next - 1 } + if next == 0 { + 0 + } else { + next - 1 + } } /// Get the WAL directory path. diff --git a/crates/ineru/benches/memory_bench.rs b/crates/ineru/benches/memory_bench.rs index 78a282cf..25157415 100644 --- a/crates/ineru/benches/memory_bench.rs +++ b/crates/ineru/benches/memory_bench.rs @@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; use ineru::{ - ConsolidationConfig, LtmConfig, MemoryConfig, MemoryEntry, MemoryQuery, StmConfig, IneruMemory, + ConsolidationConfig, IneruMemory, LtmConfig, MemoryConfig, MemoryEntry, MemoryQuery, StmConfig, }; /// Benchmark STM store operations diff --git a/crates/ineru/src/hnsw.rs b/crates/ineru/src/hnsw.rs index e88bd553..4efa30f6 100644 --- a/crates/ineru/src/hnsw.rs +++ b/crates/ineru/src/hnsw.rs @@ -9,8 +9,8 @@ use crate::error::{Error, Result}; use crate::types::MemoryId; use serde::{Deserialize, Serialize}; -use std::collections::{BinaryHeap, HashMap, HashSet}; use std::cmp::Ordering; +use std::collections::{BinaryHeap, HashMap, HashSet}; // ============================================================================ // Config @@ -82,7 +82,10 @@ impl Eq for Candidate {} impl Ord for Candidate { fn cmp(&self, other: &Self) -> Ordering { // Reverse for min-heap behavior (BinaryHeap is max-heap) - other.distance.partial_cmp(&self.distance).unwrap_or(Ordering::Equal) + other + .distance + .partial_cmp(&self.distance) + .unwrap_or(Ordering::Equal) } } @@ -109,7 +112,9 @@ impl Eq for MaxCandidate {} impl Ord for MaxCandidate { fn cmp(&self, other: &Self) -> Ordering { - self.distance.partial_cmp(&other.distance).unwrap_or(Ordering::Equal) + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) } } @@ -259,7 +264,8 @@ impl HnswIndex { /// Rebuild the index, removing deleted points. pub fn rebuild(&mut self) { - let active_points: Vec<(MemoryId, Vec)> = self.points + let active_points: Vec<(MemoryId, Vec)> = self + .points .iter() .filter(|p| !p.deleted) .map(|p| (p.id.clone(), p.embedding.clone())) @@ -282,7 +288,8 @@ impl HnswIndex { /// Serialize the index to bytes (M1 fix — preserves full topology). pub fn serialize(&self) -> Result> { - let points: Vec = self.points + let points: Vec = self + .points .iter() .map(|p| HnswPointSnapshot { id: p.id.clone(), @@ -302,8 +309,7 @@ impl HnswIndex { points, }; - serde_json::to_vec(&snapshot) - .map_err(|e| Error::internal(format!("HNSW serialize: {e}"))) + serde_json::to_vec(&snapshot).map_err(|e| Error::internal(format!("HNSW serialize: {e}"))) } /// Deserialize an index from bytes (M1 fix — backward-compatible). @@ -320,7 +326,9 @@ impl HnswIndex { index.max_layer = snapshot.max_layer; index.entry_point = snapshot.entry_point; - let dimensions = snapshot.points.first() + let dimensions = snapshot + .points + .first() .map(|p| p.embedding.len()) .unwrap_or(0); index.dimensions = dimensions; @@ -474,15 +482,27 @@ impl HnswIndex { current } - fn search_layer(&self, start: usize, query: &[f32], ef: usize, _layer: usize) -> Vec { + fn search_layer( + &self, + start: usize, + query: &[f32], + ef: usize, + _layer: usize, + ) -> Vec { let mut visited = HashSet::new(); let start_dist = Self::cosine_distance(&self.points[start].embedding, query); let mut candidates = BinaryHeap::new(); // min-heap let mut result = BinaryHeap::::new(); // max-heap - candidates.push(Candidate { index: start, distance: start_dist }); - result.push(MaxCandidate { index: start, distance: start_dist }); + candidates.push(Candidate { + index: start, + distance: start_dist, + }); + result.push(MaxCandidate { + index: start, + distance: start_dist, + }); visited.insert(start); while let Some(current) = candidates.pop() { @@ -521,8 +541,14 @@ impl HnswIndex { }; if should_add { - candidates.push(Candidate { index: neighbor_idx, distance: dist }); - result.push(MaxCandidate { index: neighbor_idx, distance: dist }); + candidates.push(Candidate { + index: neighbor_idx, + distance: dist, + }); + result.push(MaxCandidate { + index: neighbor_idx, + distance: dist, + }); if result.len() > ef { result.pop(); // Remove worst @@ -534,9 +560,16 @@ impl HnswIndex { // Convert max-heap to sorted vec (best first) let mut results: Vec = result .into_iter() - .map(|mc| Candidate { index: mc.index, distance: mc.distance }) + .map(|mc| Candidate { + index: mc.index, + distance: mc.distance, + }) .collect(); - results.sort_by(|a, b| a.distance.partial_cmp(&b.distance).unwrap_or(Ordering::Equal)); + results.sort_by(|a, b| { + a.distance + .partial_cmp(&b.distance) + .unwrap_or(Ordering::Equal) + }); results } @@ -571,17 +604,20 @@ impl HnswIndex { // Add reverse links for &neighbor_idx in &selected { if layer < self.points[neighbor_idx].neighbors.len() { - let already_linked = self.points[neighbor_idx].neighbors[layer].contains(&new_idx); + let already_linked = + self.points[neighbor_idx].neighbors[layer].contains(&new_idx); if !already_linked { self.points[neighbor_idx].neighbors[layer].push(new_idx); // Prune if too many neighbors if self.points[neighbor_idx].neighbors[layer].len() > max_neighbors { // Compute distances for sorting, then sort & truncate let emb = self.points[neighbor_idx].embedding.clone(); - let mut scored: Vec<(usize, f32)> = self.points[neighbor_idx] - .neighbors[layer] + let mut scored: Vec<(usize, f32)> = self.points[neighbor_idx].neighbors + [layer] .iter() - .map(|&n| (n, Self::cosine_distance(&self.points[n].embedding, &emb))) + .map(|&n| { + (n, Self::cosine_distance(&self.points[n].embedding, &emb)) + }) .collect(); scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal)); scored.truncate(max_neighbors); @@ -851,7 +887,11 @@ mod tests { #[test] fn test_entry_point_updates_on_higher_level() { // Use a config with small m to increase chance of higher layers - let config = HnswConfig { m: 2, ef_construction: 10, ef_search: 10 }; + let config = HnswConfig { + m: 2, + ef_construction: 10, + ef_search: 10, + }; let mut index = HnswIndex::new(config); // Insert many points; at least one should get a level > 0 @@ -882,7 +922,11 @@ mod tests { #[test] fn test_deletion_prunes_neighbor_lists() { - let config = HnswConfig { m: 4, ef_construction: 20, ef_search: 10 }; + let config = HnswConfig { + m: 4, + ef_construction: 20, + ef_search: 10, + }; let mut index = HnswIndex::new(config); // Insert several closely-related points so they appear in each other's diff --git a/crates/ineru/src/lib.rs b/crates/ineru/src/lib.rs index 927c822c..31e17d4c 100644 --- a/crates/ineru/src/lib.rs +++ b/crates/ineru/src/lib.rs @@ -364,7 +364,8 @@ impl IneruMemory { config: self.config.clone(), }; - serde_json::to_vec(&snapshot).map_err(|e| Error::internal(format!("snapshot export: {}", e))) + serde_json::to_vec(&snapshot) + .map_err(|e| Error::internal(format!("snapshot export: {}", e))) } /// Imports a memory state from a JSON byte slice. @@ -401,8 +402,8 @@ impl IneruMemory { /// Loads a memory state from a file. pub fn load_from_file(path: &std::path::Path) -> Result { - let data = std::fs::read(path) - .map_err(|e| Error::internal(format!("snapshot read: {}", e)))?; + let data = + std::fs::read(path).map_err(|e| Error::internal(format!("snapshot read: {}", e)))?; Self::import_snapshot(&data) } } diff --git a/crates/ineru/src/ltm.rs b/crates/ineru/src/ltm.rs index 158f75e7..167c0227 100644 --- a/crates/ineru/src/ltm.rs +++ b/crates/ineru/src/ltm.rs @@ -371,14 +371,14 @@ impl LongTermMemory { results .into_iter() .filter(|(_, sim)| *sim >= min_similarity) - .filter_map(|(id, sim)| { - self.memories.get(&id).map(|entry| (entry, sim)) - }) + .filter_map(|(id, sim)| self.memories.get(&id).map(|entry| (entry, sim))) .collect() } else { // Fallback to brute-force over memory entries with embeddings let query_emb = Embedding::new(query.to_vec()); - let mut scored: Vec<_> = self.memories.values() + let mut scored: Vec<_> = self + .memories + .values() .filter_map(|entry| { entry.embedding.as_ref().map(|emb| { let sim = query_emb.cosine_similarity(emb); diff --git a/crates/kaneru/src/coordination.rs b/crates/kaneru/src/coordination.rs index 1056131e..2ec8f594 100644 --- a/crates/kaneru/src/coordination.rs +++ b/crates/kaneru/src/coordination.rs @@ -419,7 +419,10 @@ impl AgentCoordinator { } /// Unregisters an agent from the coordinator. - pub fn unregister_agent(&mut self, agent_id: &AgentId) -> Result { + pub fn unregister_agent( + &mut self, + agent_id: &AgentId, + ) -> Result { self.agents .remove(agent_id) .map(|handle| handle.agent) diff --git a/crates/kaneru/src/kaneru_agent.rs b/crates/kaneru/src/kaneru_agent.rs index 6911c256..c948d3fb 100644 --- a/crates/kaneru/src/kaneru_agent.rs +++ b/crates/kaneru/src/kaneru_agent.rs @@ -330,7 +330,9 @@ impl KaneruAgent { let prev_obs = match self.observation_history.back() { Some(obs) => obs, None => { - log::warn!("learn() called with empty observation history — skipping predictive update"); + log::warn!( + "learn() called with empty observation history — skipping predictive update" + ); // Still update goal progress below self.current_state = Some(new_state); self.observation_history.push_back(outcome.new_observation); diff --git a/crates/kaneru/src/memory.rs b/crates/kaneru/src/memory.rs index a3433bf6..81f3ad82 100644 --- a/crates/kaneru/src/memory.rs +++ b/crates/kaneru/src/memory.rs @@ -11,7 +11,7 @@ use crate::agent::{Agent, AgentId, AgentState, SimpleAgent}; use crate::config::AgentConfig; use crate::error::Result; use crate::observation::Observation; -use ineru::{MemoryConfig, MemoryEntry, MemoryQuery, IneruMemory}; +use ineru::{IneruMemory, MemoryConfig, MemoryEntry, MemoryQuery}; /// An agent wrapper that adds memory capabilities using `IneruMemory`. /// diff --git a/crates/kaneru/src/persistence.rs b/crates/kaneru/src/persistence.rs index c20b51c8..73c54fdf 100644 --- a/crates/kaneru/src/persistence.rs +++ b/crates/kaneru/src/persistence.rs @@ -223,7 +223,8 @@ impl AgentPersistence for KaneruAgent { let mut bytes = Vec::new(); file.read_to_end(&mut bytes)?; - let state: crate::kaneru_agent::SerializedState = deserialize_with_options(&bytes, options)?; + let state: crate::kaneru_agent::SerializedState = + deserialize_with_options(&bytes, options)?; let mut agent = KaneruAgent::new(state.config.clone()); agent.load_state(state); diff --git a/crates/kaneru/tests/integration_test.rs b/crates/kaneru/tests/integration_test.rs index 2b60ec2d..3641882a 100644 --- a/crates/kaneru/tests/integration_test.rs +++ b/crates/kaneru/tests/integration_test.rs @@ -243,7 +243,8 @@ fn test_persistence_formats() { compress: false, }; let json_bytes = agent.to_bytes_with_options(&json_options).unwrap(); - let loaded_from_json = KaneruAgent::from_bytes_with_options(&json_bytes, &json_options).unwrap(); + let loaded_from_json = + KaneruAgent::from_bytes_with_options(&json_bytes, &json_options).unwrap(); assert_eq!( loaded_from_json.get_statistics().total_steps, agent.get_statistics().total_steps diff --git a/crates/kaneru/tests/integration_tests.rs b/crates/kaneru/tests/integration_tests.rs index f8228570..91545f64 100644 --- a/crates/kaneru/tests/integration_tests.rs +++ b/crates/kaneru/tests/integration_tests.rs @@ -7,6 +7,7 @@ #![cfg(feature = "memory")] +use ineru::MemoryConfig; use kaneru::{ action::{Action, ActionType}, agent::Agent, @@ -16,7 +17,6 @@ use kaneru::{ observation::Observation, policy::{Condition, Rule}, }; -use ineru::MemoryConfig; /// Test: Create a memory agent and store observations #[test] From c50fd73a6eccd6a7c8381b2b9e588b19069c682f Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 20:41:37 +0200 Subject: [PATCH 14/72] ci: trigger AIngle CI on dev branch (was develop) --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4dd35485..7d8ceb5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: AIngle CI on: push: - branches: [ main, develop ] + branches: [ main, dev ] pull_request: - branches: [ main, develop ] + branches: [ main, dev ] env: CARGO_TERM_COLOR: always From 9fe5e7a518ded5fd6ae3068e5c0b1505e0040d29 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 21:02:34 +0200 Subject: [PATCH 15/72] fix(cortex): purge stale triples and chunks on changed-file re-ingest ingest_path only deleted the old source-hash registry triple when a file changed, leaving prior structural triples and Ineru chunks in place, so ground() could surface stale content after an edit. Add purge_source to delete every triple with subject==rel_path and forget every chunk whose metadata.source==rel_path before re-writing the file's fresh data. --- crates/aingle_cortex/src/service/ingest.rs | 118 +++++++++++++++++---- 1 file changed, 100 insertions(+), 18 deletions(-) diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs index f7a515ff..75a088b8 100644 --- a/crates/aingle_cortex/src/service/ingest.rs +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -11,7 +11,7 @@ use crate::service::triples::{delete_triple, insert_triple_inner}; use crate::state::AppState; use aingle_graph::{NodeId, Predicate, TriplePattern}; use aingle_ingest::{extract, ObjectValue}; -use ineru::{Embedding, MemoryEntry, MemoryMetadata}; +use ineru::{Embedding, MemoryEntry, MemoryId, MemoryMetadata}; // Bring the graph error type into scope for duplicate-matching in ingest logic. use aingle_graph::Error as GraphError; @@ -130,24 +130,12 @@ pub async fn ingest_path( report.files_ingested += 1; - // Remove old registry triple if the hash changed + // The file changed (or it's a re-ingest with a different hash): purge all + // prior facts and chunks for this source before writing the fresh ones, so + // stale structural triples and Ineru chunks don't linger and leak into + // grounded retrieval. if existing_hash.is_some() { - // Delete by finding the triple's hex id - let old_triple_id = { - let graph = state.graph.read().await; - let pattern = TriplePattern::any() - .with_subject(NodeId::named(&rel_path)) - .with_predicate(Predicate::named(PRED_SOURCE_HASH)); - graph - .find(pattern) - .ok() - .and_then(|v| v.into_iter().next()) - .map(|t| t.id().to_hex()) - }; - if let Some(hex_id) = old_triple_id { - // Best-effort: ignore NotFound - let _ = delete_triple(state, &hex_id, namespace.clone()).await; - } + purge_source(state, &rel_path, namespace.clone()).await?; } // Extract triples and chunks from the file @@ -251,6 +239,49 @@ pub async fn ingest_path( Ok(report) } +/// Remove every fact and chunk previously ingested from `rel_path`, so a changed +/// file's stale data can't survive a re-ingest. +/// +/// Deletes all graph triples whose subject is `rel_path` (its structural facts +/// plus the source-hash registry triple) and forgets every Ineru chunk whose +/// `metadata.source` is `rel_path`. Inbound links from *other* files (where +/// `rel_path` is the object, not the subject) are left untouched. +async fn purge_source(state: &AppState, rel_path: &str, namespace: Option) -> Result<()> { + // Graph: delete every triple authored by this source (subject == rel_path). + let stale_ids: Vec = { + let graph = state.graph.read().await; + let pattern = TriplePattern::any().with_subject(NodeId::named(rel_path)); + graph + .find(pattern) + .map_err(|e| Error::Internal(format!("graph find error: {e}")))? + .into_iter() + .map(|t| t.id().to_hex()) + .collect() + }; + for hex_id in stale_ids { + // Best-effort: a concurrently-removed triple is fine to skip. + let _ = delete_triple(state, &hex_id, namespace.clone()).await; + } + + // Ineru: forget every chunk that came from this source. + { + let mut mem = state.memory.write().await; + let ids: Vec = mem + .stm + .all_entries() + .into_iter() + .chain(mem.ltm.all_entries()) + .filter(|e| e.entry_type == CHUNK_ENTRY_TYPE && e.metadata.source == rel_path) + .map(|e| e.id) + .collect(); + for id in ids { + let _ = mem.forget(&id); + } + } + + Ok(()) +} + /// List all source files recorded in the signed registry (path + content hash). pub async fn list_sources(state: &AppState) -> Result> { let graph = state.graph.read().await; @@ -359,4 +390,55 @@ mod tests { assert_eq!(report.files_ingested, 1); assert_eq!(report.files_skipped, 0); } + + #[tokio::test] + async fn changed_file_purges_stale_chunks() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "note.md", "# A\n\nWe use sled for storage.\n"); + let state = enabled_state().await; + let root = dir.path().to_str().unwrap(); + ingest_path(&state, root, None).await.unwrap(); + + // Change the file so the old sentence no longer exists in the source. + write(dir.path(), "note.md", "# A\n\nWe use rocksdb now.\n"); + ingest_path(&state, root, None).await.unwrap(); + + // Querying the OLD sentence verbatim must not surface the stale chunk: + // re-ingesting a changed file must forget the previous chunks for it. + let g = crate::service::ground::ground(&state, "We use sled for storage.", 5) + .await + .unwrap(); + assert!( + !g.answer_context.iter().any(|c| c.text.contains("sled")), + "stale 'sled' chunk should be purged on re-ingest, got: {:?}", + g.answer_context + ); + } + + #[tokio::test] + async fn changed_file_purges_stale_triples() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "note.md", "# A\n\nSee [[sled]].\n"); + let state = enabled_state().await; + let root = dir.path().to_str().unwrap(); + ingest_path(&state, root, None).await.unwrap(); + + // Repoint the wikilink: the old links_to:sled triple must not linger. + write(dir.path(), "note.md", "# A\n\nSee [[rocksdb]].\n"); + ingest_path(&state, root, None).await.unwrap(); + + let graph = state.graph.read().await; + let links = graph + .find( + TriplePattern::any() + .with_subject(NodeId::named("note.md")) + .with_predicate(Predicate::named("links_to")), + ) + .unwrap(); + assert_eq!( + links.len(), + 1, + "stale links_to should be purged, leaving only the new link, got: {links:?}" + ); + } } From f6ebe871c9cdd1e26cd0bfb7b146b908064171df Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Tue, 23 Jun 2026 21:06:40 +0200 Subject: [PATCH 16/72] fix(cortex): require >=2 corroborating chunks for grounded retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The placeholder hashing embedder can score an unrelated query's top chunk above GROUND_HIGH, yielding a false 'grounded'. Require at least two strong (>= GROUND_HIGH) chunks before declaring 'grounded'; a lone strong chunk is downgraded to 'weak' with an explicit gap. Structural corroboration guard, not a threshold tweak — revisit once a real embedder replaces the placeholder. --- crates/aingle_cortex/src/service/ground.rs | 87 +++++++++++++++++++++- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index d28cc084..4e70da41 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -8,10 +8,15 @@ use crate::error::Result; use crate::state::AppState; use serde::Serialize; -/// Similarity at/above which retrieval is considered well-grounded. +/// Similarity at/above which a chunk counts as a strong, corroborating match. const GROUND_HIGH: f32 = 0.55; /// Similarity below which retrieval is considered ungrounded. const GROUND_LOW: f32 = 0.30; +/// Number of strong chunks required to call retrieval "grounded". A lone strong +/// chunk is treated as "weak": with the current placeholder embedder a single +/// high score can be spurious, so we require independent corroboration rather +/// than blind-tuning `GROUND_HIGH`. Revisit once a real embedder lands. +const MIN_CORROBORATING_CHUNKS: usize = 2; /// A cited chunk of source context. #[derive(Debug, Clone, Serialize)] @@ -85,7 +90,13 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result= GROUND_HIGH { + // Require at least MIN_CORROBORATING_CHUNKS strong matches for "grounded"; + // a single strong chunk is only "weak" (independent corroboration guard). + let strong = answer_context + .iter() + .filter(|c| c.relevance >= GROUND_HIGH) + .count(); + let groundedness = if best >= GROUND_HIGH && strong >= MIN_CORROBORATING_CHUNKS { "grounded" } else if best >= GROUND_LOW && !answer_context.is_empty() { "weak" @@ -97,7 +108,13 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result= GROUND_HIGH && strong < MIN_CORROBORATING_CHUNKS { + gaps.push( + "Only one source corroborates this; a second is needed to be grounded.".to_string(), + ); + } else { + gaps.push("Retrieved context is only weakly related to the question.".to_string()); + } } Ok(GroundedContext { @@ -167,6 +184,70 @@ mod tests { assert!(!g.gaps.is_empty()); } + #[tokio::test] + async fn single_corroborating_chunk_is_weak_not_grounded() { + // One source, one chunk: even a strong similarity match must not be called + // "grounded" — with the placeholder embedder a lone high score can be + // spurious, so a single corroborating chunk is downgraded to "weak". + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("note.md"), + "# Note\n\nWe chose sled for its exclusive lock semantics.\n", + ) + .unwrap(); + let state = enabled_state().await; + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + + // Query the chunk almost verbatim so the lone chunk scores well above HIGH. + let g = ground(&state, "We chose sled for its exclusive lock semantics.", 5) + .await + .unwrap(); + assert!( + !g.answer_context.is_empty(), + "should retrieve the one chunk" + ); + assert_eq!( + g.groundedness, "weak", + "a single corroborating chunk must be weak, not grounded; ctx: {:?}", + g.answer_context + ); + } + + #[tokio::test] + async fn two_corroborating_sources_are_grounded() { + // The same fact stated in two separate files yields two strong chunks for a + // matching query — that independent corroboration is what makes it grounded. + let dir = tempfile::tempdir().unwrap(); + let fact = "# Doc\n\nThe quorum read requires a valid leader lease.\n"; + std::fs::write(dir.path().join("a.md"), fact).unwrap(); + std::fs::write(dir.path().join("b.md"), fact).unwrap(); + let state = enabled_state().await; + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + + let g = ground(&state, "The quorum read requires a valid leader lease.", 5) + .await + .unwrap(); + let strong = g + .answer_context + .iter() + .filter(|c| c.relevance >= 0.55) + .count(); + assert!( + strong >= 2, + "two sources should both score strongly; ctx: {:?}", + g.answer_context + ); + assert_eq!( + g.groundedness, "grounded", + "two corroborating strong chunks must be grounded; ctx: {:?}", + g.answer_context + ); + } + #[tokio::test] async fn grounds_after_ingest_with_source() { let dir = tempfile::tempdir().unwrap(); From 9d4ae8301d32e6467d1d9f947427eb6b76f4c3aa Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:19:29 +0200 Subject: [PATCH 17/72] feat(ineru): add Embedder trait and HashEmbedder fallback --- crates/ineru/src/consolidation.rs | 11 ++--- crates/ineru/src/embedder.rs | 78 +++++++++++++++++++++++++++++++ crates/ineru/src/hnsw.rs | 2 +- crates/ineru/src/lib.rs | 16 +++---- 4 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 crates/ineru/src/embedder.rs diff --git a/crates/ineru/src/consolidation.rs b/crates/ineru/src/consolidation.rs index 4e057ae3..cec64b06 100644 --- a/crates/ineru/src/consolidation.rs +++ b/crates/ineru/src/consolidation.rs @@ -199,7 +199,7 @@ pub struct ConsolidationStats { } /// Defines the strategy used to select memories for consolidation. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Default, Clone, Copy)] pub enum ConsolidationStrategy { /// Consolidates memories that are accessed most frequently. FrequencyBased, @@ -208,15 +208,10 @@ pub enum ConsolidationStrategy { /// Consolidates memories that are semantically novel compared to existing LTM content. NoveltyBased, /// A default strategy that combines importance, frequency, recency, and novelty. + #[default] Combined, } -impl Default for ConsolidationStrategy { - fn default() -> Self { - Self::Combined - } -} - /// An advanced consolidator that can apply different strategies for selecting memories. /// /// This provides more flexible control over the consolidation process than the basic `Consolidator`. @@ -259,7 +254,7 @@ impl AdvancedConsolidator { .cloned() .collect(); - candidates.sort_by(|a, b| b.metadata.access_count.cmp(&a.metadata.access_count)); + candidates.sort_by_key(|b| std::cmp::Reverse(b.metadata.access_count)); let mut count = 0; for entry in candidates.into_iter().take(self.base.config.batch_size) { diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs new file mode 100644 index 00000000..c44d5f9a --- /dev/null +++ b/crates/ineru/src/embedder.rs @@ -0,0 +1,78 @@ +//! Text-to-embedding strategies. +//! +//! [`Embedder`] is the unit callers own and inject. Implementations may hold a +//! loaded model (stateful) and may block, so embedding is *not* baked into data +//! structures like `MemoryQuery`. + +use crate::types::Embedding; + +/// Produces semantic embeddings for text. +/// +/// `embed_passage` is for documents/chunks that get stored and searched against; +/// `embed_query` is for search queries. They are distinct because some models +/// (e.g. the E5 family) are trained with asymmetric prefixes, so the right one +/// must be applied at each call site. +pub trait Embedder: Send + Sync { + /// Embed a document/chunk to be stored and searched against. + fn embed_passage(&self, text: &str) -> Embedding; + /// Embed a search query. + fn embed_query(&self, text: &str) -> Embedding; + /// Dimensionality of the vectors this embedder produces. + fn dimensions(&self) -> usize; +} + +/// 64-dimensional fallback embedder built on the lexical hash scheme +/// (`Embedding::from_text_simple`). Always available; captures lexical overlap, +/// not meaning. The hash scheme is symmetric, so passage and query embeddings +/// are identical and no prefixes are applied. +#[derive(Debug, Default, Clone, Copy)] +pub struct HashEmbedder; + +impl HashEmbedder { + /// Creates a new `HashEmbedder`. + pub fn new() -> Self { + Self + } +} + +impl Embedder for HashEmbedder { + fn embed_passage(&self, text: &str) -> Embedding { + Embedding::from_text_simple(text) + } + + fn embed_query(&self, text: &str) -> Embedding { + Embedding::from_text_simple(text) + } + + fn dimensions(&self) -> usize { + 64 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_embedder_has_64_dimensions() { + let e = HashEmbedder::new(); + assert_eq!(e.dimensions(), 64); + } + + #[test] + fn hash_embedder_produces_64_dim_vectors() { + let e = HashEmbedder::new(); + let p = e.embed_passage("the quick brown fox"); + let q = e.embed_query("the quick brown fox"); + assert_eq!(p.0.len(), 64); + assert_eq!(q.0.len(), 64); + } + + #[test] + fn hash_embedder_is_deterministic() { + let e = HashEmbedder::new(); + let a = e.embed_passage("hello world"); + let b = e.embed_passage("hello world"); + assert_eq!(a.0, b.0); + } +} diff --git a/crates/ineru/src/hnsw.rs b/crates/ineru/src/hnsw.rs index 4efa30f6..55f7bc84 100644 --- a/crates/ineru/src/hnsw.rs +++ b/crates/ineru/src/hnsw.rs @@ -676,7 +676,7 @@ struct HnswSnapshotLegacy { fn rand_f64() -> f64 { use std::cell::Cell; thread_local! { - static SEED: Cell = Cell::new(0x12345678_9abcdef0); + static SEED: Cell = const { Cell::new(0x12345678_9abcdef0) }; } SEED.with(|s| { let mut x = s.get(); diff --git a/crates/ineru/src/lib.rs b/crates/ineru/src/lib.rs index 31e17d4c..2c9c700a 100644 --- a/crates/ineru/src/lib.rs +++ b/crates/ineru/src/lib.rs @@ -65,6 +65,7 @@ pub mod config; pub mod consolidation; +mod embedder; pub mod error; pub mod hnsw; pub mod ltm; @@ -73,6 +74,7 @@ pub mod types; pub use config::{ConsolidationConfig, LtmConfig, MemoryConfig, StmConfig}; pub use consolidation::Consolidator; +pub use embedder::{Embedder, HashEmbedder}; pub use error::{Error, Result}; pub use ltm::{KnowledgeGraph, LongTermMemory}; pub use stm::ShortTermMemory; @@ -102,7 +104,7 @@ impl IneruMemory { /// # Arguments /// /// * `config` - The `MemoryConfig` that defines the behavior and capacity - /// of the STM, LTM, and consolidation process. + /// of the STM, LTM, and consolidation process. pub fn new(config: MemoryConfig) -> Self { Self { stm: ShortTermMemory::new(config.stm.clone()), @@ -148,7 +150,7 @@ impl IneruMemory { /// /// * `entry` - The `MemoryEntry` to store. /// * `importance` - A float score determining the entry's importance. Higher values - /// make it more likely to be consolidated into LTM. + /// make it more likely to be consolidated into LTM. /// /// # Returns /// @@ -531,9 +533,8 @@ mod tests { memory.remember_important(entry, 0.9).unwrap(); } - let consolidated = memory.consolidate().unwrap(); - // Consolidation may or may not move entries depending on thresholds - assert!(consolidated >= 0); + // Consolidation may or may not move entries depending on thresholds; just ensure it runs. + let _consolidated = memory.consolidate().unwrap(); } #[test] @@ -600,9 +601,8 @@ mod tests { memory.remember(entry).unwrap(); } - let pruned = memory.prune_stm().unwrap(); - // Should have pruned some entries - assert!(pruned >= 0); + // Should have pruned some entries (result is usize, always valid). + let _pruned = memory.prune_stm().unwrap(); } #[test] From 18fbd31535142b00320e00bafcc86e6528f087fc Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:22:44 +0200 Subject: [PATCH 18/72] test(ineru): assert HashEmbedder passage/query symmetry --- crates/ineru/src/embedder.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index c44d5f9a..11f0e19c 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -75,4 +75,12 @@ mod tests { let b = e.embed_passage("hello world"); assert_eq!(a.0, b.0); } + + #[test] + fn hash_embedder_passage_and_query_are_identical() { + let e = HashEmbedder::new(); + let p = e.embed_passage("test input"); + let q = e.embed_query("test input"); + assert_eq!(p.0, q.0); + } } From be4e55a5ff4dc7f3b8a0564a5753f75318cc1f4b Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:23:48 +0200 Subject: [PATCH 19/72] feat(ineru): add MemoryQuery::with_embedding injection hook --- crates/ineru/src/types.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crates/ineru/src/types.rs b/crates/ineru/src/types.rs index db160588..869249a5 100644 --- a/crates/ineru/src/types.rs +++ b/crates/ineru/src/types.rs @@ -341,6 +341,16 @@ impl MemoryQuery { self.min_importance = Some(importance); self } + + /// Attaches (or replaces) the embedding vector used for similarity search. + /// + /// Callers that own an [`crate::Embedder`] use this to inject a vector computed + /// by a real model, overriding the default lexical-hash embedding that + /// [`MemoryQuery::text`] currently attaches. + pub fn with_embedding(mut self, embedding: Embedding) -> Self { + self.embedding = Some(embedding); + self + } } /// A single result returned from a memory query. @@ -580,4 +590,15 @@ mod tests { assert_eq!(entity.name, "temp_001"); assert!(entity.properties.contains_key("location")); } + + #[test] + fn with_embedding_overrides_query_vector() { + let injected = Embedding::new(vec![0.25; 384]); + let q = MemoryQuery::text("perro").with_embedding(injected.clone()); + let emb = q.embedding.expect("embedding present"); + assert_eq!(emb.0.len(), 384); + assert_eq!(emb.0, injected.0); + // text is still retained + assert_eq!(q.text.as_deref(), Some("perro")); + } } From be9956b8deee0d747a9724afd39b85b4c79f2411 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:28:18 +0200 Subject: [PATCH 20/72] feat(ineru): add NeuralEmbedder (e5-small via fastembed, offline) --- .gitignore | 6 +- crates/ineru/src/embedder.rs | 118 +++++++++++++++++++++++++++++++++++ crates/ineru/src/lib.rs | 2 + 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c5fdb7ba..1e7ec6fc 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ CLAUDE.md .claudeignore .mcp.json docs/superpowers/ +.superpowers/ # GitHub Copilot .copilot/ @@ -131,4 +132,7 @@ data/ .env.local .secrets contexto/ -contexto/* \ No newline at end of file +contexto/* + +# Local neural-embedder test models (never commit — ~120MB, fetched on demand) +crates/ineru/test-models/ \ No newline at end of file diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index 11f0e19c..41a35b54 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -49,6 +49,88 @@ impl Embedder for HashEmbedder { } } +#[cfg(feature = "neural-embeddings")] +use std::path::Path; +#[cfg(feature = "neural-embeddings")] +use std::sync::Mutex; + +#[cfg(feature = "neural-embeddings")] +use fastembed::{ + InitOptionsUserDefined, Pooling, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel, +}; + +/// Real neural embedder: multilingual-e5-small (384-dim) via fastembed/ONNX, +/// loaded entirely from a local directory (no network). E5 is trained with +/// asymmetric prefixes, so `embed_query` prepends `"query: "` and +/// `embed_passage` prepends `"passage: "`. +/// +/// fastembed's `embed` takes `&mut self`, so the model is held behind a `Mutex` +/// to satisfy the `&self` trait methods while staying `Send + Sync`. +#[cfg(feature = "neural-embeddings")] +pub struct NeuralEmbedder { + model: Mutex, +} + +#[cfg(feature = "neural-embeddings")] +impl NeuralEmbedder { + /// Output dimensionality of multilingual-e5-small. + const DIM: usize = 384; + + /// Loads the model from a directory containing `onnx/model.onnx`, + /// `tokenizer.json`, `config.json`, `special_tokens_map.json`, and + /// `tokenizer_config.json`. Returns an error (never panics) if any file is + /// missing or the model fails to initialize, so callers can fall back. + pub fn from_path(dir: &Path) -> crate::Result { + let read = |name: &str| -> crate::Result> { + std::fs::read(dir.join(name)) + .map_err(|e| crate::Error::Storage(format!("reading {name}: {e}"))) + }; + + let onnx = read("onnx/model.onnx")?; + let tokenizer_files = TokenizerFiles { + tokenizer_file: read("tokenizer.json")?, + config_file: read("config.json")?, + special_tokens_map_file: read("special_tokens_map.json")?, + tokenizer_config_file: read("tokenizer_config.json")?, + }; + + // E5 REQUIRES mean pooling; the fastembed default is Cls. + let model = UserDefinedEmbeddingModel::new(onnx, tokenizer_files) + .with_pooling(Pooling::Mean); + let options = InitOptionsUserDefined::new().with_max_length(512); + + let embedding = TextEmbedding::try_new_from_user_defined(model, options) + .map_err(|e| crate::Error::Internal(format!("init e5: {e}")))?; + + Ok(Self { + model: Mutex::new(embedding), + }) + } + + fn embed_one(&self, prefixed: String) -> Embedding { + let mut guard = self.model.lock().expect("embedder mutex poisoned"); + let out = guard + .embed(vec![prefixed], None) + .expect("e5 embed failed"); + Embedding::new(out.into_iter().next().unwrap_or_default()) + } +} + +#[cfg(feature = "neural-embeddings")] +impl Embedder for NeuralEmbedder { + fn embed_passage(&self, text: &str) -> Embedding { + self.embed_one(format!("passage: {text}")) + } + + fn embed_query(&self, text: &str) -> Embedding { + self.embed_one(format!("query: {text}")) + } + + fn dimensions(&self) -> usize { + Self::DIM + } +} + #[cfg(test)] mod tests { use super::*; @@ -84,3 +166,39 @@ mod tests { assert_eq!(p.0, q.0); } } + +#[cfg(all(test, feature = "neural-embeddings"))] +mod neural_tests { + use super::*; + use std::path::PathBuf; + + /// Returns the model dir, or `None` (test skips) if it isn't present. + fn model_dir() -> Option { + let dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { + "crates/ineru/test-models/multilingual-e5-small".to_string() + }); + let p = PathBuf::from(dir); + if p.join("onnx/model.onnx").exists() { + Some(p) + } else { + eprintln!("skipping: model files not found at {}", p.display()); + None + } + } + + #[test] + fn neural_embedder_reports_384_dimensions() { + let Some(dir) = model_dir() else { return }; + let e = NeuralEmbedder::from_path(&dir).expect("load model"); + assert_eq!(e.dimensions(), 384); + } + + #[test] + fn neural_embedder_produces_384_dim_vectors() { + let Some(dir) = model_dir() else { return }; + let e = NeuralEmbedder::from_path(&dir).expect("load model"); + let v = e.embed_passage("el perro corre en el parque"); + assert_eq!(v.0.len(), 384); + assert!(v.0.iter().any(|x| *x != 0.0)); + } +} diff --git a/crates/ineru/src/lib.rs b/crates/ineru/src/lib.rs index 2c9c700a..5010de92 100644 --- a/crates/ineru/src/lib.rs +++ b/crates/ineru/src/lib.rs @@ -75,6 +75,8 @@ pub mod types; pub use config::{ConsolidationConfig, LtmConfig, MemoryConfig, StmConfig}; pub use consolidation::Consolidator; pub use embedder::{Embedder, HashEmbedder}; +#[cfg(feature = "neural-embeddings")] +pub use embedder::NeuralEmbedder; pub use error::{Error, Result}; pub use ltm::{KnowledgeGraph, LongTermMemory}; pub use stm::ShortTermMemory; From 61a8e3bcd2b5855760f204a0fa6aa38bab4d8b2c Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:37:22 +0200 Subject: [PATCH 21/72] test(ineru): resolve neural test model dir via CARGO_MANIFEST_DIR --- crates/ineru/src/embedder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index 41a35b54..53dab65e 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -175,7 +175,7 @@ mod neural_tests { /// Returns the model dir, or `None` (test skips) if it isn't present. fn model_dir() -> Option { let dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { - "crates/ineru/test-models/multilingual-e5-small".to_string() + concat!(env!("CARGO_MANIFEST_DIR"), "/test-models/multilingual-e5-small").to_string() }); let p = PathBuf::from(dir); if p.join("onnx/model.onnx").exists() { From fd846e1e82053b2e33bd1fc01c6d6187a9cb46bc Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:56:11 +0200 Subject: [PATCH 22/72] build(ineru): declare neural-embeddings feature and fastembed dep --- crates/ineru/Cargo.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/ineru/Cargo.toml b/crates/ineru/Cargo.toml index 272c3d75..f1e8fbe6 100644 --- a/crates/ineru/Cargo.toml +++ b/crates/ineru/Cargo.toml @@ -21,6 +21,9 @@ persistent = [] wasm = [] # Compression for memory entries compression = [] +# Real neural embeddings via fastembed (ONNX). ort loaded dynamically at +# runtime from a controlled path (no build-time binary download, no network). +neural-embeddings = ["dep:fastembed"] [dependencies] # Serialization @@ -40,6 +43,11 @@ log = "0.4" # Optional: SQLite for persistent LTM (matching workspace version) rusqlite = { version = "0.32", default-features = false, features = ["bundled"], optional = true } +# Optional: real neural embeddings (multilingual-e5-small via ONNX). +# default-features off → no hf-hub network deps; ort-load-dynamic → onnxruntime +# is loaded from a runtime path we ship, not downloaded/linked at build time. +fastembed = { version = "5", default-features = false, features = ["ort-load-dynamic"], optional = true } + [dev-dependencies] criterion = "0.5" From b1f3a3bafac4052729b665438270f0de4eb45b08 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 20:57:02 +0200 Subject: [PATCH 23/72] build: update Cargo.lock for ineru neural-embeddings feature --- Cargo.lock | 289 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 278 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afc57188..ace3d3d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,7 +70,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", + "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -150,7 +152,7 @@ dependencies = [ "async-graphql", "async-graphql-axum", "axum", - "base64", + "base64 0.22.1", "blake3", "chrono", "dashmap 6.1.0", @@ -686,7 +688,7 @@ dependencies = [ "async-graphql-value", "async-trait", "asynk-strim", - "base64", + "base64 0.22.1", "blocking", "bytes", "chrono", @@ -930,7 +932,7 @@ checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "axum-macros", - "base64", + "base64 0.22.1", "bytes", "form_urlencoded", "futures-util", @@ -1010,6 +1012,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.1" @@ -1513,6 +1521,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cbc" version = "0.1.2" @@ -1674,7 +1691,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", - "libloading", + "libloading 0.8.9", ] [[package]] @@ -1777,6 +1794,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "compact_str" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dfdd1c2274d9aa354115b09dc9a901d6c5576818cdf70d14cae2bdb47df00ab" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "compression-codecs" version = "0.4.37" @@ -2341,6 +2373,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "dary_heap" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe" +dependencies = [ + "serde", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -2473,6 +2514,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -2924,6 +2996,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" + [[package]] name = "esp-idf-hal" version = "0.44.1" @@ -3057,6 +3135,21 @@ dependencies = [ "siphasher", ] +[[package]] +name = "fastembed" +version = "5.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "545e4fb17fc48768ff36c2a3854aa5b0b809d0ed595ab5530fa8ac94f31bd0ea" +dependencies = [ + "anyhow", + "ndarray", + "ort", + "safetensors 0.8.0", + "serde", + "serde_json", + "tokenizers", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -3962,7 +4055,7 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-util", @@ -4175,6 +4268,7 @@ dependencies = [ "blake3", "chrono", "criterion", + "fastembed", "log", "rusqlite", "serde", @@ -4409,7 +4503,7 @@ version = "10.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ - "base64", + "base64 0.22.1", "ed25519-dalek", "getrandom 0.2.17", "hmac", @@ -4546,6 +4640,16 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libm" version = "0.2.16" @@ -4691,6 +4795,22 @@ dependencies = [ "zerocopy-derive", ] +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + [[package]] name = "maplit" version = "1.0.2" @@ -4846,6 +4966,28 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "monostate" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "more-asserts" version = "0.3.1" @@ -5188,6 +5330,28 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "onig" +version = "6.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3cbf698f9438986c11a880c90a6d04b9de27575afd28bbf45b154b6c709e2" +dependencies = [ + "bitflags 2.11.0", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e68317604e77e53b85896388e1a803c1d21b74c899ec9e5e1112db90735edd7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "oorandom" version = "11.1.5" @@ -5311,6 +5475,25 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ort" +version = "2.0.0-rc.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7de3af33d24a745ffb8fab904b13478438d1cd52868e6f17735ef6e1f8bf133" +dependencies = [ + "libloading 0.9.0", + "ndarray", + "ort-sys", + "smallvec", + "tracing", +] + +[[package]] +name = "ort-sys" +version = "2.0.0-rc.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90" + [[package]] name = "oxilangtag" version = "0.1.5" @@ -5478,7 +5661,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64", + "base64 0.22.1", "serde_core", ] @@ -6030,6 +6213,17 @@ dependencies = [ "rayon-core", ] +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools 0.14.0", + "rayon", +] + [[package]] name = "rayon-core" version = "1.13.0" @@ -6211,7 +6405,7 @@ version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-core", @@ -6356,7 +6550,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0810a9f717d9828f475fe1f629f4c305c8464b7f496c3a854b58d29e65f4058e" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "chrono", "futures", @@ -6666,6 +6860,19 @@ dependencies = [ "serde_json", ] +[[package]] +name = "safetensors" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79b079b829cb27a1c3c374341345ed2e8b2c0c839034522cee576c140bd7f846" +dependencies = [ + "hashbrown 0.16.1", + "libc", + "serde", + "serde_json", + "tempfile", +] + [[package]] name = "same-file" version = "1.0.6" @@ -7168,6 +7375,18 @@ dependencies = [ "der", ] +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + [[package]] name = "sse-stream" version = "0.2.3" @@ -7276,7 +7495,7 @@ version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e0fd33c04d4617df42c9c84c698511c59f59869629fb7a193067eec41bce347" dependencies = [ - "base64", + "base64 0.22.1", "crc", "lazy_static", "md-5", @@ -7564,6 +7783,39 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokenizers" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" +dependencies = [ + "ahash 0.8.12", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "getrandom 0.3.4", + "itertools 0.14.0", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.2", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.18", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.50.0" @@ -7888,7 +8140,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6a8b8ac3543b2a8eb0b28c7ac3d5f2db6221e057f3b3ae47cf7637b1333a5c3" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "futures", "log", "md-5", @@ -7947,6 +8199,15 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -7959,6 +8220,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "universal-hash" version = "0.5.1" From e5a55779035b6864addd4aa7ee0ca765e91773fc Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 21:00:39 +0200 Subject: [PATCH 24/72] refactor(ineru): fail loudly on empty embed batch; note lock serialization --- crates/ineru/src/embedder.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index 53dab65e..f102a2d1 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -66,6 +66,7 @@ use fastembed::{ /// /// fastembed's `embed` takes `&mut self`, so the model is held behind a `Mutex` /// to satisfy the `&self` trait methods while staying `Send + Sync`. +/// Concurrent callers serialize through this lock for the duration of inference. #[cfg(feature = "neural-embeddings")] pub struct NeuralEmbedder { model: Mutex, @@ -112,7 +113,11 @@ impl NeuralEmbedder { let out = guard .embed(vec![prefixed], None) .expect("e5 embed failed"); - Embedding::new(out.into_iter().next().unwrap_or_default()) + let vector = out + .into_iter() + .next() + .expect("e5 returned empty batch for single-item input"); + Embedding::new(vector) } } From 3279fba963d11598605af2c3cd7bc941a837ebc7 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 21:01:49 +0200 Subject: [PATCH 25/72] test(ineru): assert NeuralEmbedder semantic quality and prefixes --- crates/ineru/src/embedder.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index f102a2d1..41bddc45 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -206,4 +206,35 @@ mod neural_tests { assert_eq!(v.0.len(), 384); assert!(v.0.iter().any(|x| *x != 0.0)); } + + #[test] + fn neural_embedder_captures_semantic_similarity() { + let Some(dir) = model_dir() else { return }; + let e = NeuralEmbedder::from_path(&dir).expect("load model"); + + let perro = e.embed_query("perro"); + let can = e.embed_passage("can"); + let finanzas = e.embed_passage("finanzas"); + + let near = perro.cosine_similarity(&can); + let far = perro.cosine_similarity(&finanzas); + + // "perro" (dog) is semantically closer to "can" (dog, formal/archaic ES) + // than to "finanzas" (finance). A real model captures this; the hash one can't. + assert!( + near > far, + "expected sim(perro,can)={near} > sim(perro,finanzas)={far}" + ); + } + + #[test] + fn neural_embedder_applies_distinct_prefixes() { + let Some(dir) = model_dir() else { return }; + let e = NeuralEmbedder::from_path(&dir).expect("load model"); + + // Same raw text, different prefixes → different vectors. + let as_query = e.embed_query("documento"); + let as_passage = e.embed_passage("documento"); + assert_ne!(as_query.0, as_passage.0); + } } From e7063b42d2fef25c947b73a92aa4a6dc391023eb Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 21:45:10 +0200 Subject: [PATCH 26/72] test(ineru): use sentence-level inputs for E5 semantic similarity assertion --- crates/ineru/src/embedder.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index 41bddc45..36478d99 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -212,18 +212,24 @@ mod neural_tests { let Some(dir) = model_dir() else { return }; let e = NeuralEmbedder::from_path(&dir).expect("load model"); - let perro = e.embed_query("perro"); - let can = e.embed_passage("can"); - let finanzas = e.embed_passage("finanzas"); - - let near = perro.cosine_similarity(&can); - let far = perro.cosine_similarity(&finanzas); - - // "perro" (dog) is semantically closer to "can" (dog, formal/archaic ES) - // than to "finanzas" (finance). A real model captures this; the hash one can't. + // E5 is trained for sentence/passage retrieval, which is exactly how this + // embedder is used (queries = questions, chunks = sentences). Isolated + // single words cluster too tightly to test meaningfully; realistic + // sentence-level inputs produce a clear semantic margin. + let query = e.embed_query("¿Cómo debo cuidar a mi perro?"); + let related = + e.embed_passage("Los perros necesitan paseos diarios, agua fresca y una dieta equilibrada."); + let unrelated = + e.embed_passage("La bolsa de valores cerró hoy con fuertes pérdidas para los inversores."); + + let near = query.cosine_similarity(&related); + let far = query.cosine_similarity(&unrelated); + + // A real model ranks the dog-care passage above the stock-market one for a + // dog-care question. The 64-dim hash embedder cannot. assert!( near > far, - "expected sim(perro,can)={near} > sim(perro,finanzas)={far}" + "expected sim(query,related)={near} > sim(query,unrelated)={far}" ); } From f0f3b22ea2e5a4c2aa31856df38eb9d004aed588 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 21:51:36 +0200 Subject: [PATCH 27/72] docs: correct test-model size note in gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1e7ec6fc..a68b1540 100644 --- a/.gitignore +++ b/.gitignore @@ -134,5 +134,5 @@ data/ contexto/ contexto/* -# Local neural-embedder test models (never commit — ~120MB, fetched on demand) +# Local neural-embedder test models (never commit — ~470MB fp32 e5-small, fetched on demand) crates/ineru/test-models/ \ No newline at end of file From 0f28b9132f1070a62079e03b374f754f8b025a47 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 21:52:05 +0200 Subject: [PATCH 28/72] chore: gitignore local onnxruntime dylib dir --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a68b1540..3d9e8360 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,6 @@ contexto/ contexto/* # Local neural-embedder test models (never commit — ~470MB fp32 e5-small, fetched on demand) -crates/ineru/test-models/ \ No newline at end of file +crates/ineru/test-models/ +# Local ONNX Runtime dylib for running the gated neural tests (set ORT_DYLIB_PATH to it) +.tmp-ort/ \ No newline at end of file From 107ca6783b4dfb4a8bcf796236fa598e43eaccfc Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 22:47:40 +0200 Subject: [PATCH 29/72] feat(ineru): add Embedder::relevance_thresholds (hash default, e5 override) --- crates/ineru/src/embedder.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index 36478d99..b96fb887 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -19,6 +19,13 @@ pub trait Embedder: Send + Sync { fn embed_query(&self, text: &str) -> Embedding; /// Dimensionality of the vectors this embedder produces. fn dimensions(&self) -> usize; + /// `(strong, weak)` cosine-similarity cutoffs for this embedder's score + /// distribution: at/above `strong` a match corroborates; below `weak` it is + /// noise. The default suits the lexical-hash scale; model-based embedders + /// override it. + fn relevance_thresholds(&self) -> (f32, f32) { + (0.55, 0.30) + } } /// 64-dimensional fallback embedder built on the lexical hash scheme @@ -134,6 +141,10 @@ impl Embedder for NeuralEmbedder { fn dimensions(&self) -> usize { Self::DIM } + + fn relevance_thresholds(&self) -> (f32, f32) { + (0.80, 0.77) + } } #[cfg(test)] @@ -170,6 +181,12 @@ mod tests { let q = e.embed_query("test input"); assert_eq!(p.0, q.0); } + + #[test] + fn hash_embedder_relevance_thresholds() { + let e = HashEmbedder::new(); + assert_eq!(e.relevance_thresholds(), (0.55, 0.30)); + } } #[cfg(all(test, feature = "neural-embeddings"))] @@ -243,4 +260,13 @@ mod neural_tests { let as_passage = e.embed_passage("documento"); assert_ne!(as_query.0, as_passage.0); } + + #[test] + fn neural_embedder_relevance_thresholds() { + // Calibrated to multilingual-e5-small's anisotropic cosine scale: + // unrelated sentence pairs ceil ~0.76, related floor ~0.81. + let Some(dir) = model_dir() else { return }; + let e = NeuralEmbedder::from_path(&dir).expect("load model"); + assert_eq!(e.relevance_thresholds(), (0.80, 0.77)); + } } From 1cac4d4d537604662efe51d1c99b836d6586ef18 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 22:53:36 +0200 Subject: [PATCH 30/72] feat(cortex): add neural-embeddings feature and build_embedder selector --- crates/aingle_cortex/Cargo.toml | 3 + crates/aingle_cortex/src/embedder.rs | 92 ++++++++++++++++++++++++++++ crates/aingle_cortex/src/lib.rs | 1 + 3 files changed, 96 insertions(+) create mode 100644 crates/aingle_cortex/src/embedder.rs diff --git a/crates/aingle_cortex/Cargo.toml b/crates/aingle_cortex/Cargo.toml index c4a37443..fe6eacad 100644 --- a/crates/aingle_cortex/Cargo.toml +++ b/crates/aingle_cortex/Cargo.toml @@ -25,6 +25,9 @@ dag = ["cluster", "aingle_graph/dag", "aingle_graph/dag-sign", "aingle_raft/dag" mcp = ["dep:rmcp", "dep:schemars"] mcp-http = ["mcp", "rmcp/transport-streamable-http-server", "rmcp/server-side-http"] mcp-oauth = ["mcp-http", "dep:jsonwebtoken"] +# Real neural embeddings: forwards to ineru's fastembed-backed embedder. +# Off by default — default cortex build stays hash-only (MSRV 1.83 unaffected). +neural-embeddings = ["ineru/neural-embeddings"] full =["rest", "graphql", "sparql", "auth", "dag"] [[bin]] diff --git a/crates/aingle_cortex/src/embedder.rs b/crates/aingle_cortex/src/embedder.rs new file mode 100644 index 00000000..3101bd32 --- /dev/null +++ b/crates/aingle_cortex/src/embedder.rs @@ -0,0 +1,92 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Embedder selection and index-migration helpers for Cortex. +//! +//! Chooses a `NeuralEmbedder` when the `neural-embeddings` feature is on and a +//! model directory is available, else falls back to `HashEmbedder`. Also owns +//! the dimension-sidecar bookkeeping used to detect an embedder change and the +//! registry-clear that forces a re-ingest after one. + +use ineru::{Embedder, HashEmbedder}; +use std::sync::Arc; + +/// Builds the active embedder. Returns a `NeuralEmbedder` only when cortex is +/// compiled with `neural-embeddings` AND `model_dir` is `Some` AND the model +/// loads; otherwise a `HashEmbedder`. Never panics — embedding must not be able +/// to take the server down. +pub fn build_embedder(model_dir: Option<&str>) -> Arc { + #[cfg(feature = "neural-embeddings")] + if let Some(dir) = model_dir { + match ineru::NeuralEmbedder::from_path(std::path::Path::new(dir)) { + Ok(e) => { + log::info!("Using neural embedder (multilingual-e5-small) from {dir}"); + return Arc::new(e); + } + Err(e) => { + log::warn!("Failed to load neural embedder from {dir}: {e}. Using hash embedder."); + } + } + } + #[cfg(not(feature = "neural-embeddings"))] + if model_dir.is_some() { + log::warn!( + "--embed-model was set but cortex was built without the `neural-embeddings` \ + feature; using the hash embedder." + ); + } + Arc::new(HashEmbedder::new()) +} + +/// Reads the persisted embedder dimensionality from `/embedder.dims`. +/// Returns `None` if the sidecar is absent or unparseable. +pub fn read_persisted_dims(dir: &std::path::Path) -> Option { + let raw = std::fs::read_to_string(dir.join("embedder.dims")).ok()?; + raw.trim().parse::().ok() +} + +/// Writes the active embedder dimensionality to `/embedder.dims`. +pub fn write_dims(dir: &std::path::Path, dims: usize) { + if let Err(e) = std::fs::write(dir.join("embedder.dims"), dims.to_string()) { + log::warn!("Failed to write embedder.dims sidecar: {e}"); + } +} + +/// Deletes every `aingle:source_hash` registry triple so the next ingest treats +/// all files as new and re-embeds them. Returns the number removed. +pub fn clear_source_registry(graph: &aingle_graph::GraphDB) -> usize { + use aingle_graph::{Predicate, TriplePattern}; + let pattern = + TriplePattern::any().with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)); + let ids: Vec<_> = match graph.find(pattern) { + Ok(ts) => ts.into_iter().map(|t| t.id()).collect(), + Err(e) => { + log::warn!("clear_source_registry: graph find failed: {e}"); + return 0; + } + }; + let mut removed = 0; + for id in &ids { + if matches!(graph.delete(id), Ok(true)) { + removed += 1; + } + } + removed +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_embedder_without_model_is_hash_64d() { + let e = build_embedder(None); + assert_eq!(e.dimensions(), 64); + } + + #[test] + fn build_embedder_missing_dir_falls_back_to_hash() { + let e = build_embedder(Some("/nonexistent/model/dir")); + assert_eq!(e.dimensions(), 64); + } +} diff --git a/crates/aingle_cortex/src/lib.rs b/crates/aingle_cortex/src/lib.rs index 4372feed..58242cdc 100644 --- a/crates/aingle_cortex/src/lib.rs +++ b/crates/aingle_cortex/src/lib.rs @@ -164,6 +164,7 @@ #[cfg(feature = "auth")] pub mod auth; pub mod client; +pub mod embedder; #[cfg(feature = "cluster")] pub mod cluster_init; pub mod error; From 845905f808935689ef7eaacefeb665ed7575b785 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 23:04:44 +0200 Subject: [PATCH 31/72] refactor(cortex): clear_source_registry returns Result; add embedder helper tests --- crates/aingle_cortex/src/embedder.rs | 45 ++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/crates/aingle_cortex/src/embedder.rs b/crates/aingle_cortex/src/embedder.rs index 3101bd32..22b02bab 100644 --- a/crates/aingle_cortex/src/embedder.rs +++ b/crates/aingle_cortex/src/embedder.rs @@ -20,7 +20,7 @@ pub fn build_embedder(model_dir: Option<&str>) -> Arc { if let Some(dir) = model_dir { match ineru::NeuralEmbedder::from_path(std::path::Path::new(dir)) { Ok(e) => { - log::info!("Using neural embedder (multilingual-e5-small) from {dir}"); + log::info!("Using neural embedder from {dir}"); return Arc::new(e); } Err(e) => { @@ -40,7 +40,7 @@ pub fn build_embedder(model_dir: Option<&str>) -> Arc { /// Reads the persisted embedder dimensionality from `/embedder.dims`. /// Returns `None` if the sidecar is absent or unparseable. -pub fn read_persisted_dims(dir: &std::path::Path) -> Option { +pub fn read_dims(dir: &std::path::Path) -> Option { let raw = std::fs::read_to_string(dir.join("embedder.dims")).ok()?; raw.trim().parse::().ok() } @@ -54,24 +54,20 @@ pub fn write_dims(dir: &std::path::Path, dims: usize) { /// Deletes every `aingle:source_hash` registry triple so the next ingest treats /// all files as new and re-embeds them. Returns the number removed. -pub fn clear_source_registry(graph: &aingle_graph::GraphDB) -> usize { +pub fn clear_source_registry(graph: &aingle_graph::GraphDB) -> Result { use aingle_graph::{Predicate, TriplePattern}; - let pattern = - TriplePattern::any().with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)); - let ids: Vec<_> = match graph.find(pattern) { - Ok(ts) => ts.into_iter().map(|t| t.id()).collect(), - Err(e) => { - log::warn!("clear_source_registry: graph find failed: {e}"); - return 0; - } - }; + let pattern = TriplePattern::any() + .with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)); + let ids: Vec<_> = graph.find(pattern)?.into_iter().map(|t| t.id()).collect(); let mut removed = 0; for id in &ids { - if matches!(graph.delete(id), Ok(true)) { - removed += 1; + match graph.delete(id) { + Ok(true) => removed += 1, + Ok(false) => {} // already gone — fine + Err(e) => log::warn!("clear_source_registry: delete failed for {id:?}: {e}"), } } - removed + Ok(removed) } #[cfg(test)] @@ -89,4 +85,23 @@ mod tests { let e = build_embedder(Some("/nonexistent/model/dir")); assert_eq!(e.dimensions(), 64); } + + #[test] + fn dims_sidecar_round_trips() { + let dir = tempfile::tempdir().unwrap(); + write_dims(dir.path(), 384); + assert_eq!(read_dims(dir.path()), Some(384)); + } + + #[test] + fn read_dims_absent_is_none() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(read_dims(dir.path()), None); + } + + #[test] + fn clear_source_registry_on_empty_graph_is_zero() { + let graph = aingle_graph::GraphDB::memory().unwrap(); + assert_eq!(clear_source_registry(&graph).unwrap(), 0); + } } From 59d3efece9f9ed6b56589b210651cd662b139d72 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Fri, 26 Jun 2026 23:09:45 +0200 Subject: [PATCH 32/72] feat(cortex): add embedder field to AppState (hash default) --- crates/aingle_cortex/src/state.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index 081f29a1..ba3ebaaa 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -5,7 +5,7 @@ use aingle_graph::GraphDB; use aingle_logic::RuleEngine; -use ineru::IneruMemory; +use ineru::{Embedder, HashEmbedder, IneruMemory}; use std::path::Path; use std::sync::Arc; use tokio::sync::RwLock; @@ -27,6 +27,8 @@ pub struct AppState { pub logic: Arc>, /// The Ineru dual-memory system (STM + LTM with consolidation). pub memory: Arc>, + /// The active text embedder (hash fallback or neural). Shared, thread-safe. + pub embedder: std::sync::Arc, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. @@ -94,6 +96,7 @@ impl AppState { graph: Arc::new(RwLock::new(graph)), logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), + embedder: std::sync::Arc::new(HashEmbedder::new()), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -138,6 +141,7 @@ impl AppState { graph: Arc::new(RwLock::new(graph)), logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), + embedder: std::sync::Arc::new(HashEmbedder::new()), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -182,6 +186,7 @@ impl AppState { graph: Arc::new(RwLock::new(graph)), logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), + embedder: std::sync::Arc::new(HashEmbedder::new()), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -295,6 +300,7 @@ impl AppState { graph: Arc::new(RwLock::new(graph)), logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), + embedder: std::sync::Arc::new(HashEmbedder::new()), broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), @@ -552,3 +558,14 @@ impl Default for SandboxManager { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn appstate_has_default_hash_embedder() { + let state = AppState::new().unwrap(); + assert_eq!(state.embedder.dimensions(), 64); + } +} From 8f7e3784292b59df30656ca86c50d87562f5fbb9 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 09:02:49 +0200 Subject: [PATCH 33/72] feat(cortex): with_db_path_and_embedder with dimension-change migration --- crates/aingle_cortex/src/state.rs | 106 ++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 7 deletions(-) diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index ba3ebaaa..debcbf52 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -221,6 +221,22 @@ impl AppState { pub fn with_db_path( db_path: &str, audit_log_path: Option, + ) -> crate::error::Result { + Self::with_db_path_and_embedder( + db_path, + audit_log_path, + std::sync::Arc::new(HashEmbedder::new()), + ) + } + + /// Like [`with_db_path`] but with an explicit embedder. If a persisted + /// snapshot was produced by a different-dimension embedder, the snapshot is + /// discarded and the `aingle:source_hash` registry is cleared so the next + /// ingest re-embeds everything with this embedder. + pub fn with_db_path_and_embedder( + db_path: &str, + audit_log_path: Option, + embedder: std::sync::Arc, ) -> crate::error::Result { let graph = if db_path == ":memory:" { GraphDB::memory()? @@ -234,13 +250,24 @@ impl AppState { let logic = RuleEngine::new(); - // Load Ineru snapshot if available next to the graph database + // Embedder-change migration + snapshot load (persistent only). let memory = if db_path != ":memory:" { - let snapshot_path = Path::new(db_path) - .parent() - .unwrap_or(Path::new(".")) - .join("ineru.snapshot"); - if snapshot_path.exists() { + let dbdir = Path::new(db_path).parent().unwrap_or(Path::new(".")); + let snapshot_path = dbdir.join("ineru.snapshot"); + let active_dims = embedder.dimensions(); + let persisted_dims = crate::embedder::read_dims(dbdir); + let dim_mismatch = snapshot_path.exists() + && persisted_dims.map(|d| d != active_dims).unwrap_or(false); + + if dim_mismatch { + let removed = crate::embedder::clear_source_registry(&graph) + .map_err(|e| crate::error::Error::Internal(format!("clear registry: {e}")))?; + log::warn!( + "Embedder changed ({:?}d → {}d): cleared {} source-hash entries; re-ingest required.", + persisted_dims, active_dims, removed + ); + IneruMemory::agent_mode() + } else if snapshot_path.exists() { match IneruMemory::load_from_file(&snapshot_path) { Ok(mem) => { log::info!("Loaded Ineru snapshot from {}", snapshot_path.display()); @@ -300,7 +327,7 @@ impl AppState { graph: Arc::new(RwLock::new(graph)), logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), - embedder: std::sync::Arc::new(HashEmbedder::new()), + embedder, broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), @@ -353,6 +380,7 @@ impl AppState { } else { log::info!("Ineru snapshot saved to {}", snapshot_path.display()); } + crate::embedder::write_dims(dir, self.embedder.dimensions()); } Ok(()) @@ -568,4 +596,68 @@ mod tests { let state = AppState::new().unwrap(); assert_eq!(state.embedder.dimensions(), 64); } + + #[tokio::test] + async fn embedder_change_clears_source_registry_and_snapshot() { + use aingle_graph::{Predicate, TriplePattern}; + let dir = tempfile::tempdir().unwrap(); + let db = dir.path().join("graph.sled"); + let db_str = db.to_str().unwrap(); + + // First boot with the default (hash, 64d): ingest writes a registry triple, + // flush writes snapshot + embedder.dims=64. + { + let state = AppState::with_db_path(db_str, None).unwrap(); + { + let mut g = state.graph.write().await; + g.enable_dag(); + } + std::fs::write(dir.path().join("note.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + state.flush(Some(db.parent().unwrap())).await.unwrap(); + } + + // Registry triple exists on disk now. + { + let state = AppState::with_db_path(db_str, None).unwrap(); + let g = state.graph.read().await; + let n = g + .find(TriplePattern::any().with_predicate(Predicate::named( + crate::service::ingest::PRED_SOURCE_HASH, + ))) + .unwrap() + .len(); + assert!(n >= 1, "registry triple should exist after first ingest"); + } + + // Second boot with a 384d embedder → mismatch → registry cleared, memory empty. + { + let fake_384: std::sync::Arc = std::sync::Arc::new(Fake384); + let state = AppState::with_db_path_and_embedder(db_str, None, fake_384).unwrap(); + let g = state.graph.read().await; + let n = g + .find(TriplePattern::any().with_predicate(Predicate::named( + crate::service::ingest::PRED_SOURCE_HASH, + ))) + .unwrap() + .len(); + assert_eq!(n, 0, "registry must be cleared on embedder dim change"); + } + } + + /// A stand-in 384-dim embedder for migration tests (no model needed). + struct Fake384; + impl Embedder for Fake384 { + fn embed_passage(&self, _t: &str) -> ineru::Embedding { + ineru::Embedding::new(vec![0.0; 384]) + } + fn embed_query(&self, _t: &str) -> ineru::Embedding { + ineru::Embedding::new(vec![0.0; 384]) + } + fn dimensions(&self) -> usize { + 384 + } + } } From c04044cb1b12712eabf50f761ebed8e46b0539ba Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 09:13:39 +0200 Subject: [PATCH 34/72] fix(cortex): treat missing embedder.dims sidecar as 64d to migrate legacy data --- crates/aingle_cortex/src/state.rs | 84 +++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index debcbf52..ef460200 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -255,19 +255,20 @@ impl AppState { let dbdir = Path::new(db_path).parent().unwrap_or(Path::new(".")); let snapshot_path = dbdir.join("ineru.snapshot"); let active_dims = embedder.dimensions(); - let persisted_dims = crate::embedder::read_dims(dbdir); - let dim_mismatch = snapshot_path.exists() - && persisted_dims.map(|d| d != active_dims).unwrap_or(false); + // Pre-sidecar databases were written by the 64d hash embedder. + let persisted_dims = crate::embedder::read_dims(dbdir).unwrap_or(64); + let snapshot_exists = snapshot_path.exists(); + let dim_mismatch = snapshot_exists && persisted_dims != active_dims; if dim_mismatch { let removed = crate::embedder::clear_source_registry(&graph) .map_err(|e| crate::error::Error::Internal(format!("clear registry: {e}")))?; log::warn!( - "Embedder changed ({:?}d → {}d): cleared {} source-hash entries; re-ingest required.", + "Embedder changed ({}d → {}d): cleared {} source-hash entries; re-ingest required.", persisted_dims, active_dims, removed ); IneruMemory::agent_mode() - } else if snapshot_path.exists() { + } else if snapshot_exists { match IneruMemory::load_from_file(&snapshot_path) { Ok(mem) => { log::info!("Loaded Ineru snapshot from {}", snapshot_path.display()); @@ -647,6 +648,79 @@ mod tests { } } + #[tokio::test] + async fn legacy_snapshot_without_sidecar_migrates_on_dim_change() { + use aingle_graph::{Predicate, TriplePattern}; + let dir = tempfile::tempdir().unwrap(); + let db = dir.path().join("graph.sled"); + let db_str = db.to_str().unwrap(); + + // First boot with default hash (64d): ingest + flush (writes snapshot + sidecar). + { + let state = AppState::with_db_path(db_str, None).unwrap(); + { + let mut g = state.graph.write().await; + g.enable_dag(); + } + std::fs::write(dir.path().join("n.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + state.flush(Some(db.parent().unwrap())).await.unwrap(); + } + + // Simulate a legacy DB: delete the sidecar so persisted_dims is absent. + std::fs::remove_file(db.parent().unwrap().join("embedder.dims")).unwrap(); + + // Boot with a 384d embedder: absent sidecar must be treated as 64d → mismatch → cleared. + { + let fake_384: std::sync::Arc = std::sync::Arc::new(Fake384); + let state = AppState::with_db_path_and_embedder(db_str, None, fake_384).unwrap(); + let g = state.graph.read().await; + let n = g + .find(TriplePattern::any().with_predicate(Predicate::named( + crate::service::ingest::PRED_SOURCE_HASH, + ))) + .unwrap() + .len(); + assert_eq!(n, 0, "legacy snapshot without sidecar must migrate when dims differ"); + } + } + + #[tokio::test] + async fn same_dims_preserves_snapshot_and_registry() { + use aingle_graph::{Predicate, TriplePattern}; + let dir = tempfile::tempdir().unwrap(); + let db = dir.path().join("graph.sled"); + let db_str = db.to_str().unwrap(); + + { + let state = AppState::with_db_path(db_str, None).unwrap(); + { + let mut g = state.graph.write().await; + g.enable_dag(); + } + std::fs::write(dir.path().join("n.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + state.flush(Some(db.parent().unwrap())).await.unwrap(); + } + + // Second boot with the same default 64d hash embedder: no migration. + { + let state = AppState::with_db_path(db_str, None).unwrap(); + let g = state.graph.read().await; + let n = g + .find(TriplePattern::any().with_predicate(Predicate::named( + crate::service::ingest::PRED_SOURCE_HASH, + ))) + .unwrap() + .len(); + assert!(n >= 1, "same-dims boot must preserve the registry"); + } + } + /// A stand-in 384-dim embedder for migration tests (no model needed). struct Fake384; impl Embedder for Fake384 { From b367c70533ed2b24d38fcfc71c19251babc5b811 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 09:18:07 +0200 Subject: [PATCH 35/72] feat(cortex): --embed-model / AINGLE_EMBED_MODEL to select the embedder --- crates/aingle_cortex/src/main.rs | 8 ++++++++ crates/aingle_cortex/src/server.rs | 12 +++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/main.rs b/crates/aingle_cortex/src/main.rs index d37b9439..374b60f0 100644 --- a/crates/aingle_cortex/src/main.rs +++ b/crates/aingle_cortex/src/main.rs @@ -42,6 +42,7 @@ async fn main() -> Result<(), Box> { } let mut config = CortexConfig::default(); + config.embed_model = std::env::var("AINGLE_EMBED_MODEL").ok(); // Simple argument parsing let mut i = 1; @@ -68,6 +69,12 @@ async fn main() -> Result<(), Box> { i += 1; } } + "--embed-model" => { + if i + 1 < args.len() { + config.embed_model = Some(args[i + 1].clone()); + i += 1; + } + } "--memory" => { config.db_path = Some(":memory:".to_string()); } @@ -338,6 +345,7 @@ fn print_help() { " --db Path to graph database (default: ~/.aingle/cortex/graph.sled)" ); println!(" --memory Use volatile in-memory storage (no persistence)"); + println!(" --embed-model Directory with a neural embedding model (requires --features neural-embeddings; falls back to hash if absent)"); println!(" --flush-interval Periodic flush interval in seconds (default: 300, 0=off)"); println!(" --mcp Serve MCP over stdio (requires --features mcp)"); println!( diff --git a/crates/aingle_cortex/src/server.rs b/crates/aingle_cortex/src/server.rs index 22df5559..0fcad465 100644 --- a/crates/aingle_cortex/src/server.rs +++ b/crates/aingle_cortex/src/server.rs @@ -57,6 +57,10 @@ pub struct CortexConfig { pub mcp_oauth_resource: Option, /// Optional explicit JWKS URL; if None, derived from the issuer (Keycloak certs path). pub mcp_oauth_jwks_url: Option, + /// Optional directory containing a neural embedding model. Selects the neural + /// embedder when set and cortex is built with `neural-embeddings`; otherwise + /// the hash embedder is used. + pub embed_model: Option, } impl Default for CortexConfig { @@ -80,6 +84,7 @@ impl Default for CortexConfig { mcp_oauth_issuer: None, mcp_oauth_resource: None, mcp_oauth_jwks_url: None, + embed_model: None, } } } @@ -124,7 +129,12 @@ impl CortexServer { /// - `None` — Sled-backed persistent storage at `~/.aingle/cortex/graph.sled`. pub fn new(config: CortexConfig) -> Result { let db_path = resolve_db_path(&config.db_path); - let state = AppState::with_db_path(&db_path, config.audit_log_path.clone())?; + let embedder = crate::embedder::build_embedder(config.embed_model.as_deref()); + let state = AppState::with_db_path_and_embedder( + &db_path, + config.audit_log_path.clone(), + embedder, + )?; info!("Graph database: {}", db_path); Ok(Self { config, state }) } From 9b1a01829618e6adfb4d9bd29da5cbfb3e350152 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 09:49:12 +0200 Subject: [PATCH 36/72] feat(cortex): embed ingested chunks via AppState embedder --- crates/aingle_cortex/src/service/ingest.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/aingle_cortex/src/service/ingest.rs b/crates/aingle_cortex/src/service/ingest.rs index 75a088b8..965a6023 100644 --- a/crates/aingle_cortex/src/service/ingest.rs +++ b/crates/aingle_cortex/src/service/ingest.rs @@ -11,7 +11,7 @@ use crate::service::triples::{delete_triple, insert_triple_inner}; use crate::state::AppState; use aingle_graph::{NodeId, Predicate, TriplePattern}; use aingle_ingest::{extract, ObjectValue}; -use ineru::{Embedding, MemoryEntry, MemoryId, MemoryMetadata}; +use ineru::{MemoryEntry, MemoryId, MemoryMetadata}; // Bring the graph error type into scope for duplicate-matching in ingest logic. use aingle_graph::Error as GraphError; @@ -182,7 +182,7 @@ pub async fn ingest_path( // Write text chunks to Ineru memory for chunk in &extraction.chunks { - let embedding = Embedding::from_text_simple(&chunk.text); + let embedding = state.embedder.embed_passage(&chunk.text); let mut entry = MemoryEntry::new( CHUNK_ENTRY_TYPE, serde_json::json!({ From 748762024bab1a3a886e52dffddfeedb7ad34014 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 09:49:12 +0200 Subject: [PATCH 37/72] feat(cortex): ground via injected query embedding and embedder thresholds --- crates/aingle_cortex/src/service/ground.rs | 31 ++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index 4e70da41..98ef86c7 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -8,14 +8,11 @@ use crate::error::Result; use crate::state::AppState; use serde::Serialize; -/// Similarity at/above which a chunk counts as a strong, corroborating match. -const GROUND_HIGH: f32 = 0.55; -/// Similarity below which retrieval is considered ungrounded. -const GROUND_LOW: f32 = 0.30; -/// Number of strong chunks required to call retrieval "grounded". A lone strong -/// chunk is treated as "weak": with the current placeholder embedder a single -/// high score can be spurious, so we require independent corroboration rather -/// than blind-tuning `GROUND_HIGH`. Revisit once a real embedder lands. +/// Number of strong chunks required to call retrieval "grounded". Requiring two +/// independent corroborating sources is a deliberate anti-hallucination policy: +/// a lone strong chunk is surfaced as "weak", not "grounded". The strong/weak +/// similarity cutoffs themselves come from the active embedder via +/// [`ineru::Embedder::relevance_thresholds`]. const MIN_CORROBORATING_CHUNKS: usize = 2; /// A cited chunk of source context. @@ -49,11 +46,17 @@ use ineru::MemoryQuery; /// groundedness signal from the best similarity. pub async fn ground(state: &AppState, question: &str, k: usize) -> Result { let k = k.max(1); + let (ground_high, ground_low) = state.embedder.relevance_thresholds(); + let query_vec = state.embedder.embed_query(question); let results = { let mem = state.memory.read().await; - mem.recall(&MemoryQuery::text(question).with_limit(k)) - .map_err(|e| crate::error::Error::Internal(e.to_string()))? + mem.recall( + &MemoryQuery::text(question) + .with_limit(k) + .with_embedding(query_vec), + ) + .map_err(|e| crate::error::Error::Internal(e.to_string()))? }; let mut answer_context = Vec::new(); @@ -94,11 +97,11 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result= GROUND_HIGH) + .filter(|c| c.relevance >= ground_high) .count(); - let groundedness = if best >= GROUND_HIGH && strong >= MIN_CORROBORATING_CHUNKS { + let groundedness = if best >= ground_high && strong >= MIN_CORROBORATING_CHUNKS { "grounded" - } else if best >= GROUND_LOW && !answer_context.is_empty() { + } else if best >= ground_low && !answer_context.is_empty() { "weak" } else { "ungrounded" @@ -108,7 +111,7 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result= GROUND_HIGH && strong < MIN_CORROBORATING_CHUNKS { + if best >= ground_high && strong < MIN_CORROBORATING_CHUNKS { gaps.push( "Only one source corroborates this; a second is needed to be grounded.".to_string(), ); From fe264f2f1c766e54de8360cbed5225fee03e9d19 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 10:57:54 +0200 Subject: [PATCH 38/72] feat(cortex): rank grounding by embedding cosine, not composite recall score --- crates/aingle_cortex/src/service/ground.rs | 86 ++++++++++++++++++++-- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index 98ef86c7..0f30b1f7 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -49,23 +49,34 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result query_vec.cosine_similarity(emb), + None => continue, + }; let d = &r.entry.data; let source = d .get("source_path") @@ -82,17 +93,25 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result Date: Sat, 27 Jun 2026 13:34:55 +0200 Subject: [PATCH 39/72] feat(cortex): add SwappableEmbedder for hot-swap with fixed dimensions --- crates/aingle_cortex/src/embedder.rs | 106 ++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/embedder.rs b/crates/aingle_cortex/src/embedder.rs index 22b02bab..0714fb2a 100644 --- a/crates/aingle_cortex/src/embedder.rs +++ b/crates/aingle_cortex/src/embedder.rs @@ -8,7 +8,7 @@ //! the dimension-sidecar bookkeeping used to detect an embedder change and the //! registry-clear that forces a re-ingest after one. -use ineru::{Embedder, HashEmbedder}; +use ineru::{Embedder, Embedding, HashEmbedder}; use std::sync::Arc; /// Builds the active embedder. Returns a `NeuralEmbedder` only when cortex is @@ -70,6 +70,71 @@ pub fn clear_source_registry(graph: &aingle_graph::GraphDB) -> Result>, + dims: usize, +} + +/// Placeholder delegate before the real model is installed. Returns a zero vector +/// of the fixed dims — harmless for queries (cosine 0 → "ungrounded") and never +/// used for stored passages because ingest is gated on readiness. +struct PendingEmbedder { + dims: usize, +} + +impl Embedder for PendingEmbedder { + fn embed_passage(&self, _text: &str) -> Embedding { Embedding::new(vec![0.0; self.dims]) } + fn embed_query(&self, _text: &str) -> Embedding { Embedding::new(vec![0.0; self.dims]) } + fn dimensions(&self) -> usize { self.dims } +} + +impl SwappableEmbedder { + /// Creates a swappable embedder in the pending state with a fixed dimension. + pub fn new_pending(dims: usize) -> Self { + Self { + inner: std::sync::RwLock::new(Arc::new(PendingEmbedder { dims })), + dims, + } + } + + /// Installs the real delegate. The delegate MUST report the same dimension + /// this swappable was created with; a mismatch is logged and ignored so the + /// index dimension can never change underneath stored vectors. + pub fn install(&self, delegate: Arc) { + if delegate.dimensions() != self.dims { + log::warn!( + "SwappableEmbedder.install rejected: delegate dims {} != fixed {}", + delegate.dimensions(), + self.dims + ); + return; + } + *self.inner.write().expect("swappable embedder poisoned") = delegate; + } +} + +impl Embedder for SwappableEmbedder { + fn embed_passage(&self, text: &str) -> Embedding { + let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + inner.embed_passage(text) + } + fn embed_query(&self, text: &str) -> Embedding { + let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + inner.embed_query(text) + } + fn dimensions(&self) -> usize { self.dims } + fn relevance_thresholds(&self) -> (f32, f32) { + let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + inner.relevance_thresholds() + } +} + #[cfg(test)] mod tests { use super::*; @@ -104,4 +169,43 @@ mod tests { let graph = aingle_graph::GraphDB::memory().unwrap(); assert_eq!(clear_source_registry(&graph).unwrap(), 0); } + + #[test] + fn swappable_reports_fixed_dims_before_and_after_install() { + let s = SwappableEmbedder::new_pending(384); + assert_eq!(s.dimensions(), 384); + let q = s.embed_query("hola"); + assert_eq!(q.0.len(), 384); + assert!(q.0.iter().all(|x| *x == 0.0)); + s.install(std::sync::Arc::new(Fake384)); + assert_eq!(s.dimensions(), 384); + let q2 = s.embed_query("hola"); + assert_eq!(q2.0.len(), 384); + assert!(q2.0.iter().any(|x| *x != 0.0)); + } + + #[test] + fn swappable_rejects_mismatched_dims_install() { + let s = SwappableEmbedder::new_pending(384); + s.install(std::sync::Arc::new(ineru::HashEmbedder::new())); // 64d → rejected + let q = s.embed_query("x"); + assert_eq!(q.0.len(), 384); + assert!(q.0.iter().all(|x| *x == 0.0)); + } + + #[test] + fn swappable_delegates_relevance_thresholds_after_install() { + let s = SwappableEmbedder::new_pending(384); + s.install(std::sync::Arc::new(Fake384)); + assert_eq!(s.relevance_thresholds(), (0.80, 0.77)); + } + + /// 384-dim test delegate with non-zero output and the e5 thresholds. + struct Fake384; + impl ineru::Embedder for Fake384 { + fn embed_passage(&self, _t: &str) -> ineru::Embedding { ineru::Embedding::new(vec![0.5; 384]) } + fn embed_query(&self, _t: &str) -> ineru::Embedding { ineru::Embedding::new(vec![0.5; 384]) } + fn dimensions(&self) -> usize { 384 } + fn relevance_thresholds(&self) -> (f32, f32) { (0.80, 0.77) } + } } From df79eb8627f5cfbcc45d2d4d8cd57b5de9c85903 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 16:48:45 +0200 Subject: [PATCH 40/72] fix(cortex): report a gap when retrieval is ungrounded but returned chunks --- crates/aingle_cortex/src/service/ground.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index 0f30b1f7..f9c4264f 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -137,6 +137,13 @@ pub async fn ground(state: &AppState, question: &str, k: usize) -> Result Date: Sat, 27 Jun 2026 17:41:09 +0200 Subject: [PATCH 41/72] feat(cortex): vault_map structural derivation (hubs, orphans, tags, types) --- crates/aingle_cortex/src/service/mod.rs | 1 + crates/aingle_cortex/src/service/vault_map.rs | 245 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 crates/aingle_cortex/src/service/vault_map.rs diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index e68afa2b..7423d727 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -16,3 +16,4 @@ pub mod sparql; pub mod stats; pub mod triples; pub mod validate; +pub mod vault_map; diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs new file mode 100644 index 00000000..b257b37f --- /dev/null +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -0,0 +1,245 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Vault Map: a deterministic, offline map + navigation manual derived from the +//! semantic graph (links/tags/types) and neural embeddings (semantic topics). + +use serde::Serialize; +use std::collections::BTreeMap; + +/// The full vault map returned to the UI and the connected AI. +#[derive(Debug, Clone, Serialize, Default)] +pub struct VaultMap { + pub totals: Totals, + pub entry_points: Vec, + pub topics: Vec, + pub tag_clusters: Vec, + pub orphans: Vec, + pub tags: Vec, + pub types: Vec, + pub graph: GraphView, + pub guidance: String, +} + +#[derive(Debug, Clone, Serialize, Default)] +pub struct Totals { + pub notes: usize, + pub links: usize, + pub clusters: usize, + pub orphans: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct EntryPoint { + pub path: String, + pub title: String, + pub in_links: usize, + pub out_links: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Topic { + pub id: usize, + pub label: String, + pub representative: String, + pub notes: Vec, + pub size: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct TagGroup { + pub tag: String, + pub notes: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct TagCount { + pub tag: String, + pub count: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct TypeCount { + pub ty: String, + pub count: usize, +} + +#[allow(dead_code)] // used in MM-1 assembly +#[derive(Debug, Clone, Serialize, Default)] +pub struct GraphView { + pub nodes: Vec, + pub edges: Vec, +} + +#[allow(dead_code)] // used in MM-1 assembly +#[derive(Debug, Clone, Serialize)] +pub struct GraphNode { + pub id: String, + pub label: String, + pub cluster: i64, + pub degree: usize, +} + +#[allow(dead_code)] // used in MM-1 assembly +#[derive(Debug, Clone, Serialize)] +pub struct GraphEdge { + pub source: String, + pub target: String, +} + +/// Max nodes rendered in the visual graph (top-degree); larger vaults are capped. +#[allow(dead_code)] // used in MM-1 assembly +const GRAPH_NODE_CAP: usize = 600; + +/// Basename without directory or extension, for wikilink resolution + titles. +pub(crate) fn basename(path: &str) -> String { + let file = path.rsplit(['/', '\\']).next().unwrap_or(path); + file.rsplit_once('.').map(|(stem, _)| stem).unwrap_or(file).to_string() +} + +/// Structural inputs derived from the graph (no embeddings). +#[derive(Debug, Default)] +pub(crate) struct Structural { + pub notes: Vec, // note rel_paths, sorted + pub in_deg: BTreeMap, // note -> incoming resolved links + pub out_deg: BTreeMap, // note -> outgoing resolved links + pub edges: Vec<(String, String)>, // resolved (src note, dst note) + pub tag_notes: BTreeMap>, // tag -> notes + pub type_counts: BTreeMap, // type -> count + pub link_count: usize, // total resolved links +} + +pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { + use aingle_graph::{Predicate, TriplePattern}; + + let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); + let find = |pred: &str| -> Vec<(String, String)> { + graph + .find(TriplePattern::any().with_predicate(Predicate::named(pred))) + .unwrap_or_default() + .into_iter() + .filter_map(|t| { + let subj = strip(t.subject.to_string()); + t.object_string().map(|o| (subj, o.to_string())) + }) + .collect() + }; + + // Note set from the source-hash registry. + let mut notes: Vec = find(crate::service::ingest::PRED_SOURCE_HASH) + .into_iter() + .map(|(s, _)| s) + .collect(); + notes.sort(); + notes.dedup(); + + // Basename -> note path index for wikilink resolution. + let mut by_base: BTreeMap = BTreeMap::new(); + for n in ¬es { + by_base.entry(basename(n)).or_insert_with(|| n.clone()); + } + let resolve = |target: &str| -> Option { + // exact path first, else basename match + if notes.iter().any(|n| n == target) { + Some(target.to_string()) + } else { + by_base.get(&basename(target)).cloned() + } + }; + + let mut in_deg: BTreeMap = BTreeMap::new(); + let mut out_deg: BTreeMap = BTreeMap::new(); + let mut edges: Vec<(String, String)> = Vec::new(); + for (src, target) in find("links_to") { + if !notes.iter().any(|n| n == &src) { + continue; + } + if let Some(dst) = resolve(&target) { + if dst == src { + continue; + } + *out_deg.entry(src.clone()).or_default() += 1; + *in_deg.entry(dst.clone()).or_default() += 1; + edges.push((src, dst)); + } + } + let link_count = edges.len(); + + let mut tag_notes: BTreeMap> = BTreeMap::new(); + for (note, tag) in find("tagged") { + if notes.iter().any(|n| n == ¬e) { + tag_notes.entry(tag).or_default().push(note); + } + } + for v in tag_notes.values_mut() { + v.sort(); + v.dedup(); + } + + let mut type_counts: BTreeMap = BTreeMap::new(); + for (_note, ty) in find("type") { + *type_counts.entry(ty).or_default() += 1; + } + + Structural { + notes, + in_deg, + out_deg, + edges, + tag_notes, + type_counts, + link_count, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use aingle_graph::{NodeId, Predicate, Triple, Value}; + + pub(super) async fn graph_with( + triples: &[(&str, &str, &str)], + ) -> crate::state::AppState { + let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); + { + let g = state.graph.write().await; + for (s, p, o) in triples { + g.insert(Triple::new( + NodeId::named(*s), + Predicate::named(*p), + Value::literal(*o), + )) + .unwrap(); + } + } + state + } + + #[tokio::test] + async fn structural_hubs_orphans_tags() { + // a.md and b.md both link to hub.md; orphan.md links to nothing and is + // linked by nothing. Tags group a.md + b.md under "storage". + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("hub.md", "aingle:source_hash", "h3"), + ("orphan.md", "aingle:source_hash", "h4"), + ("a.md", "links_to", "hub"), + ("b.md", "links_to", "hub"), + ("a.md", "tagged", "storage"), + ("b.md", "tagged", "storage"), + ("a.md", "type", "note"), + ]) + .await; + + let s = { + let g = state.graph.read().await; + super::derive_structural(&g) + }; + assert_eq!(s.notes.len(), 4); + assert_eq!(s.in_deg.get("hub.md").copied().unwrap_or(0), 2, "hub has 2 incoming"); + assert_eq!(s.out_deg.get("a.md").copied().unwrap_or(0), 1); + assert_eq!(s.tag_notes.get("storage").map(|v| v.len()), Some(2)); + assert_eq!(s.link_count, 2); + } +} From d2ded284824c700b88b4f532bd4b007e82727e5f Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 17:42:13 +0200 Subject: [PATCH 42/72] feat(cortex): vault_map semantic topic clustering from neural embeddings --- crates/aingle_cortex/src/service/vault_map.rs | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index b257b37f..d01af403 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -192,6 +192,128 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { } } +use ineru::Embedding; + +/// Cosine similarity between two raw vectors (same length). +fn cosine(a: &[f32], b: &[f32]) -> f32 { + Embedding::new(a.to_vec()).cosine_similarity(&Embedding::new(b.to_vec())) +} + +/// Connected-components clustering over a cosine-similarity graph: notes whose +/// cosine >= `threshold` are linked; each connected component is a topic. Labeled +/// by the most central note (highest mean cosine to its component). Deterministic +/// (inputs are a sorted BTreeMap). O(n^2) — the caller caps n. +pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32) -> Vec { + let names: Vec<&String> = vecs.keys().collect(); + let n = names.len(); + // union-find + let mut parent: Vec = (0..n).collect(); + fn find(parent: &mut [usize], mut x: usize) -> usize { + while parent[x] != x { + parent[x] = parent[parent[x]]; + x = parent[x]; + } + x + } + for i in 0..n { + for j in (i + 1)..n { + if cosine(&vecs[names[i]], &vecs[names[j]]) >= threshold { + let (ri, rj) = (find(&mut parent, i), find(&mut parent, j)); + if ri != rj { + parent[ri] = rj; + } + } + } + } + // group by root + let mut groups: BTreeMap> = BTreeMap::new(); + for i in 0..n { + let r = find(&mut parent, i); + groups.entry(r).or_default().push(i); + } + let mut topics: Vec = Vec::new(); + for (id, (_root, members)) in groups.into_iter().enumerate() { + // central note = max mean cosine to the rest of its group + let central = *members + .iter() + .max_by(|&&x, &&y| { + let mx = mean_sim(&vecs[names[x]], &members, &names, vecs); + let my = mean_sim(&vecs[names[y]], &members, &names, vecs); + mx.partial_cmp(&my).unwrap_or(std::cmp::Ordering::Equal) + }) + .unwrap(); + let mut notes: Vec = members.iter().map(|&m| names[m].clone()).collect(); + notes.sort(); + let rep = names[central].clone(); + topics.push(Topic { + id, + label: basename(&rep), + representative: rep, + size: notes.len(), + notes, + }); + } + topics.sort_by(|a, b| b.size.cmp(&a.size).then(a.label.cmp(&b.label))); + topics +} + +fn mean_sim( + v: &[f32], + members: &[usize], + names: &[&String], + vecs: &BTreeMap>, +) -> f32 { + if members.len() <= 1 { + return 1.0; + } + let mut sum = 0.0; + let mut cnt = 0; + for &m in members { + let other = &vecs[names[m]]; + if !std::ptr::eq(other.as_ptr(), v.as_ptr()) { + sum += cosine(v, other); + cnt += 1; + } + } + if cnt == 0 { + 1.0 + } else { + sum / cnt as f32 + } +} + +/// Mean per-note embedding from Ineru `doc_chunk` entries, grouped by source_path. +pub(crate) fn per_note_vectors(mem: &ineru::IneruMemory) -> BTreeMap> { + let mut sums: BTreeMap, usize)> = BTreeMap::new(); + let mut entries = mem.stm.all_entries(); + entries.extend(mem.ltm.all_entries()); + for e in entries { + if e.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { + continue; + } + let Some(path) = e.data.get("source_path").and_then(|v| v.as_str()) else { + continue; + }; + let Some(emb) = e.embedding.as_ref() else { continue }; + let entry = sums.entry(path.to_string()).or_insert_with(|| (vec![0.0; emb.0.len()], 0)); + if entry.0.len() == emb.0.len() { + for (acc, x) in entry.0.iter_mut().zip(&emb.0) { + *acc += *x; + } + entry.1 += 1; + } + } + sums.into_iter() + .filter(|(_, (_, c))| *c > 0) + .map(|(p, (mut v, c))| { + for x in &mut v { + *x /= c as f32; + } + (p, v) + }) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -242,4 +364,23 @@ mod tests { assert_eq!(s.tag_notes.get("storage").map(|v| v.len()), Some(2)); assert_eq!(s.link_count, 2); } + + #[test] + fn semantic_clusters_group_similar_notes() { + // Three notes: a & b have near-identical vectors, c is far. + let mut vecs: BTreeMap> = BTreeMap::new(); + vecs.insert("a.md".into(), vec![1.0, 0.0, 0.0]); + vecs.insert("b.md".into(), vec![0.99, 0.01, 0.0]); + vecs.insert("c.md".into(), vec![0.0, 0.0, 1.0]); + + let topics = super::cluster_semantic(&vecs, 0.9); + // a & b together, c alone → 2 topics + assert_eq!(topics.len(), 2); + let big = topics.iter().max_by_key(|t| t.size).unwrap(); + assert_eq!(big.size, 2); + assert!( + big.notes.contains(&"a.md".to_string()) + && big.notes.contains(&"b.md".to_string()) + ); + } } From b9cb98d44389d0b012ecf17b8edb699c9be692b5 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 17:45:34 +0200 Subject: [PATCH 43/72] feat(cortex): assemble VaultMap with guidance + triple-count cache on AppState --- crates/aingle_cortex/src/service/vault_map.rs | 200 +++++++++++++++++- crates/aingle_cortex/src/state.rs | 7 + 2 files changed, 203 insertions(+), 4 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index d01af403..56e3bec3 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -64,14 +64,12 @@ pub struct TypeCount { pub count: usize, } -#[allow(dead_code)] // used in MM-1 assembly #[derive(Debug, Clone, Serialize, Default)] pub struct GraphView { pub nodes: Vec, pub edges: Vec, } -#[allow(dead_code)] // used in MM-1 assembly #[derive(Debug, Clone, Serialize)] pub struct GraphNode { pub id: String, @@ -80,7 +78,6 @@ pub struct GraphNode { pub degree: usize, } -#[allow(dead_code)] // used in MM-1 assembly #[derive(Debug, Clone, Serialize)] pub struct GraphEdge { pub source: String, @@ -88,7 +85,6 @@ pub struct GraphEdge { } /// Max nodes rendered in the visual graph (top-degree); larger vaults are capped. -#[allow(dead_code)] // used in MM-1 assembly const GRAPH_NODE_CAP: usize = 600; /// Basename without directory or extension, for wikilink resolution + titles. @@ -314,6 +310,177 @@ pub(crate) fn per_note_vectors(mem: &ineru::IneruMemory) -> BTreeMap VaultMap { + let s = { + let g = state.graph.read().await; + derive_structural(&g) + }; + + // Hubs / entry points: top by in-degree, tie-break out-degree. + let mut entry_points: Vec = s + .notes + .iter() + .map(|p| EntryPoint { + path: p.clone(), + title: basename(p), + in_links: s.in_deg.get(p).copied().unwrap_or(0), + out_links: s.out_deg.get(p).copied().unwrap_or(0), + }) + .collect(); + entry_points.sort_by(|a, b| { + b.in_links + .cmp(&a.in_links) + .then(b.out_links.cmp(&a.out_links)) + .then(a.path.cmp(&b.path)) + }); + entry_points.retain(|e| e.in_links > 0 || e.out_links > 0); + entry_points.truncate(20); + + // Orphans. + let orphans: Vec = s + .notes + .iter() + .filter(|p| { + s.in_deg.get(*p).copied().unwrap_or(0) == 0 + && s.out_deg.get(*p).copied().unwrap_or(0) == 0 + }) + .cloned() + .collect(); + + // Semantic topics (capped). + let topics = if s.notes.len() <= SEMANTIC_NOTE_CAP { + let mem = state.memory.read().await; + let vecs = per_note_vectors(&mem); + if vecs.len() >= 2 { + cluster_semantic(&vecs, SEMANTIC_THRESHOLD) + } else { + Vec::new() + } + } else { + log::info!( + "vault_map: {} notes > cap {}, skipping semantic clustering (tag clusters used)", + s.notes.len(), + SEMANTIC_NOTE_CAP + ); + Vec::new() + }; + + // Tag clusters + tag index. + let mut tag_clusters: Vec = s + .tag_notes + .iter() + .map(|(tag, notes)| TagGroup { tag: tag.clone(), notes: notes.clone() }) + .collect(); + tag_clusters.sort_by(|a, b| b.notes.len().cmp(&a.notes.len()).then(a.tag.cmp(&b.tag))); + let mut tags: Vec = s + .tag_notes + .iter() + .map(|(tag, notes)| TagCount { tag: tag.clone(), count: notes.len() }) + .collect(); + tags.sort_by(|a, b| b.count.cmp(&a.count).then(a.tag.cmp(&b.tag))); + + let mut types: Vec = s + .type_counts + .iter() + .map(|(ty, count)| TypeCount { ty: ty.clone(), count: *count }) + .collect(); + types.sort_by(|a, b| b.count.cmp(&a.count).then(a.ty.cmp(&b.ty))); + + // Cluster id per note (for graph coloring). + let mut cluster_of: BTreeMap = BTreeMap::new(); + for t in &topics { + for npath in &t.notes { + cluster_of.insert(npath.clone(), t.id as i64); + } + } + + // GraphView (cap by degree). + let mut ranked: Vec<&String> = s.notes.iter().collect(); + ranked.sort_by(|a, b| { + let da = + s.in_deg.get(*a).copied().unwrap_or(0) + s.out_deg.get(*a).copied().unwrap_or(0); + let db = + s.in_deg.get(*b).copied().unwrap_or(0) + s.out_deg.get(*b).copied().unwrap_or(0); + db.cmp(&da).then(a.cmp(b)) + }); + let kept: std::collections::BTreeSet = + ranked.into_iter().take(GRAPH_NODE_CAP).cloned().collect(); + let nodes: Vec = kept + .iter() + .map(|p| GraphNode { + id: p.clone(), + label: basename(p), + cluster: cluster_of.get(p).copied().unwrap_or(-1), + degree: s.in_deg.get(p).copied().unwrap_or(0) + + s.out_deg.get(p).copied().unwrap_or(0), + }) + .collect(); + let edges: Vec = s + .edges + .iter() + .filter(|(a, b)| kept.contains(a) && kept.contains(b)) + .map(|(a, b)| GraphEdge { source: a.clone(), target: b.clone() }) + .collect(); + + let totals = Totals { + notes: s.notes.len(), + links: s.link_count, + clusters: topics.len(), + orphans: orphans.len(), + }; + + let guidance = if totals.notes == 0 { + "Vault not yet indexed. Once notes are ingested, this map lists entry-point (hub) \ + notes, topic clusters, and orphans so you can navigate accurately." + .to_string() + } else { + format!( + "This vault has {} notes, {} links, {} topics, {} orphans. To answer about a topic, \ + start at its entry_points and the topic's representative note, then follow links. \ + Ground every claim with aingle_ground (it returns signed provenance). Orphan notes \ + are unconnected and may be incomplete.", + totals.notes, totals.links, totals.clusters, totals.orphans + ) + }; + + VaultMap { + totals, + entry_points, + topics, + tag_clusters, + orphans, + tags, + types, + graph: GraphView { nodes, edges }, + guidance, + } +} + +/// Cached vault map, keyed on the graph's triple count (auto-invalidated on ingest). +pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { + let tc = { state.graph.read().await.stats().triple_count }; + { + let cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); + if let Some((cached_tc, map)) = cache.as_ref() { + if *cached_tc == tc { + return map.clone(); + } + } + } + let map = compute_vault_map(state).await; + let mut cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); + *cache = Some((tc, map.clone())); + map +} + #[cfg(test)] mod tests { use super::*; @@ -383,4 +550,29 @@ mod tests { && big.notes.contains(&"b.md".to_string()) ); } + + #[tokio::test] + async fn vault_map_cached_assembles_and_caches() { + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("hub.md", "aingle:source_hash", "h2"), + ("orphan.md", "aingle:source_hash", "h3"), + ("a.md", "links_to", "hub"), + ("a.md", "tagged", "storage"), + ]) + .await; + + let m1 = super::vault_map_cached(&state).await; + assert_eq!(m1.totals.notes, 3); + assert_eq!(m1.totals.links, 1); + assert_eq!(m1.totals.orphans, 1); // orphan.md + assert!(m1.entry_points.iter().any(|e| e.path == "hub.md" && e.in_links == 1)); + assert!(m1.tag_clusters.iter().any(|t| t.tag == "storage")); + assert!(!m1.guidance.is_empty()); + assert!(!m1.graph.nodes.is_empty()); + + // Cached: no graph change → identical totals (and cheap). + let m2 = super::vault_map_cached(&state).await; + assert_eq!(m2.totals.notes, m1.totals.notes); + } } diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index ef460200..9d13dab7 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -29,6 +29,9 @@ pub struct AppState { pub memory: Arc>, /// The active text embedder (hash fallback or neural). Shared, thread-safe. pub embedder: std::sync::Arc, + /// Cached vault map, keyed on graph triple-count (see service::vault_map). + pub vault_map_cache: + std::sync::Arc>>, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. @@ -97,6 +100,7 @@ impl AppState { logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), + vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -142,6 +146,7 @@ impl AppState { logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), + vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -187,6 +192,7 @@ impl AppState { logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), + vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -329,6 +335,7 @@ impl AppState { logic: Arc::new(RwLock::new(logic)), memory: Arc::new(RwLock::new(memory)), embedder, + vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), From d61a2c4569721eb32d4a8a1f1ec4f1fdad5f6328 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 18:01:06 +0200 Subject: [PATCH 44/72] refactor(cortex): zero-alloc cosine, index-based mean_sim, cache + self-link tests - Replace vec-cloning cosine() with a borrow-based implementation (no heap allocation per call) - Replace std::ptr::eq identity trick in mean_sim() with explicit index comparison - Build BTreeSet<&str> once in derive_structural for O(log n) note membership checks - Add comment in vault_map_cached explaining intentional mutex release before .await - Tests: self-link skip assertion, type_counts assertion, cache invalidation test --- crates/aingle_cortex/src/service/vault_map.rs | 75 +++++++++++++------ 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 56e3bec3..d0bfc226 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -129,6 +129,9 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { notes.sort(); notes.dedup(); + // O(log n) membership set — avoids linear scans during link/tag resolution. + let note_set: std::collections::BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + // Basename -> note path index for wikilink resolution. let mut by_base: BTreeMap = BTreeMap::new(); for n in ¬es { @@ -136,7 +139,7 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { } let resolve = |target: &str| -> Option { // exact path first, else basename match - if notes.iter().any(|n| n == target) { + if note_set.contains(target) { Some(target.to_string()) } else { by_base.get(&basename(target)).cloned() @@ -147,7 +150,7 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { let mut out_deg: BTreeMap = BTreeMap::new(); let mut edges: Vec<(String, String)> = Vec::new(); for (src, target) in find("links_to") { - if !notes.iter().any(|n| n == &src) { + if !note_set.contains(src.as_str()) { continue; } if let Some(dst) = resolve(&target) { @@ -163,7 +166,7 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { let mut tag_notes: BTreeMap> = BTreeMap::new(); for (note, tag) in find("tagged") { - if notes.iter().any(|n| n == ¬e) { + if note_set.contains(note.as_str()) { tag_notes.entry(tag).or_default().push(note); } } @@ -188,11 +191,19 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { } } -use ineru::Embedding; - /// Cosine similarity between two raw vectors (same length). fn cosine(a: &[f32], b: &[f32]) -> f32 { - Embedding::new(a.to_vec()).cosine_similarity(&Embedding::new(b.to_vec())) + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum(); + let ma = a.iter().map(|x| x * x).sum::().sqrt(); + let mb = b.iter().map(|x| x * x).sum::().sqrt(); + if ma == 0.0 || mb == 0.0 { + 0.0 + } else { + dot / (ma * mb) + } } /// Connected-components clustering over a cosine-similarity graph: notes whose @@ -233,8 +244,8 @@ pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32 let central = *members .iter() .max_by(|&&x, &&y| { - let mx = mean_sim(&vecs[names[x]], &members, &names, vecs); - let my = mean_sim(&vecs[names[y]], &members, &names, vecs); + let mx = mean_sim(x, &members, &names, vecs); + let my = mean_sim(y, &members, &names, vecs); mx.partial_cmp(&my).unwrap_or(std::cmp::Ordering::Equal) }) .unwrap(); @@ -253,29 +264,21 @@ pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32 topics } -fn mean_sim( - v: &[f32], - members: &[usize], - names: &[&String], - vecs: &BTreeMap>, -) -> f32 { +fn mean_sim(self_idx: usize, members: &[usize], names: &[&String], vecs: &BTreeMap>) -> f32 { if members.len() <= 1 { return 1.0; } + let v = &vecs[names[self_idx]]; let mut sum = 0.0; let mut cnt = 0; for &m in members { - let other = &vecs[names[m]]; - if !std::ptr::eq(other.as_ptr(), v.as_ptr()) { - sum += cosine(v, other); - cnt += 1; + if m == self_idx { + continue; } + sum += cosine(v, &vecs[names[m]]); + cnt += 1; } - if cnt == 0 { - 1.0 - } else { - sum / cnt as f32 - } + if cnt == 0 { 1.0 } else { sum / cnt as f32 } } /// Mean per-note embedding from Ineru `doc_chunk` entries, grouped by source_path. @@ -475,6 +478,8 @@ pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { } } } + // The cache mutex is intentionally released before the async compute to avoid + // holding it across an `.await` point. let map = compute_vault_map(state).await; let mut cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); *cache = Some((tc, map.clone())); @@ -515,6 +520,8 @@ mod tests { ("orphan.md", "aingle:source_hash", "h4"), ("a.md", "links_to", "hub"), ("b.md", "links_to", "hub"), + // self-link: "a" resolves to "a.md" via basename → must be skipped + ("a.md", "links_to", "a"), ("a.md", "tagged", "storage"), ("b.md", "tagged", "storage"), ("a.md", "type", "note"), @@ -530,6 +537,10 @@ mod tests { assert_eq!(s.out_deg.get("a.md").copied().unwrap_or(0), 1); assert_eq!(s.tag_notes.get("storage").map(|v| v.len()), Some(2)); assert_eq!(s.link_count, 2); + // Self-link must not be counted as incoming for a.md. + assert_eq!(s.in_deg.get("a.md").copied().unwrap_or(0), 0, "self-link must not count as incoming"); + // type_counts must reflect the triple ("a.md","type","note"). + assert_eq!(s.type_counts.get("note"), Some(&1)); } #[test] @@ -575,4 +586,22 @@ mod tests { let m2 = super::vault_map_cached(&state).await; assert_eq!(m2.totals.notes, m1.totals.notes); } + + #[tokio::test] + async fn vault_map_cache_invalidates_on_change() { + let state = graph_with(&[("a.md", "aingle:source_hash", "h1")]).await; + let m1 = super::vault_map_cached(&state).await; + assert_eq!(m1.totals.notes, 1); + { + let g = state.graph.write().await; + g.insert(Triple::new( + NodeId::named("b.md"), + Predicate::named("aingle:source_hash"), + Value::literal("h2"), + )) + .unwrap(); + } + let m2 = super::vault_map_cached(&state).await; + assert_eq!(m2.totals.notes, 2, "cache must invalidate when triple_count changes"); + } } From 356254dab77fb417a8d659c7de02c07cf058f533 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 18:05:13 +0200 Subject: [PATCH 45/72] feat(cortex): aingle_vault_map MCP tool (navigation manual for connected AI) --- crates/aingle_cortex/src/mcp/server.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index 033b59bd..cc0e408a 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -140,6 +140,19 @@ impl AingleMcp { Ok(CallToolResult::success(vec![Content::json(resp)?])) } + /// Vault Map & Navigation Manual: entry points, topics, orphans, indices, + /// and guidance for navigating the vault accurately before answering. + #[tool( + description = "Vault map & navigation manual: hub entry-points, semantic topic \ + clusters, orphan notes, tag/type indices, and guidance. Call this FIRST to \ + navigate a vault accurately, then aingle_ground each claim.", + annotations(read_only_hint = true) + )] + async fn aingle_vault_map(&self) -> Result { + let resp = crate::service::vault_map::vault_map_cached(&self.state).await; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + /// Query the semantic graph by triple pattern (any field omitted = wildcard). #[tool( description = "Query the semantic graph by triple pattern. Omit a field to wildcard it.", @@ -635,7 +648,7 @@ mod ingest_tools_tests { .into_iter() .map(|t| t.name.to_string()) .collect(); - for expected in ["aingle_ingest", "aingle_ground", "aingle_sources"] { + for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map"] { assert!( names.contains(&expected.to_string()), "missing tool {expected}" From 744a428de5d11f7625a24e3ec11adba1f724fb7e Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 18:19:54 +0200 Subject: [PATCH 46/72] fix(cortex): calibrate vault_map semantic threshold to 0.88 (note-level cosine) --- crates/aingle_cortex/src/service/vault_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index d0bfc226..be0aa4e6 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -316,7 +316,7 @@ pub(crate) fn per_note_vectors(mem: &ineru::IneruMemory) -> BTreeMap Date: Sat, 27 Jun 2026 18:26:39 +0200 Subject: [PATCH 47/72] refactor(cortex): key vault_map cache on (triple_count, mem_bytes); log graph cap --- crates/aingle_cortex/src/service/vault_map.rs | 21 +++++++++++++++---- crates/aingle_cortex/src/state.rs | 8 ++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index be0aa4e6..7570416b 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -414,6 +414,14 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { s.in_deg.get(*b).copied().unwrap_or(0) + s.out_deg.get(*b).copied().unwrap_or(0); db.cmp(&da).then(a.cmp(b)) }); + if s.notes.len() > GRAPH_NODE_CAP { + log::info!( + "vault_map: {} notes > graph cap {}, rendering the {} most-connected", + s.notes.len(), + GRAPH_NODE_CAP, + GRAPH_NODE_CAP + ); + } let kept: std::collections::BTreeSet = ranked.into_iter().take(GRAPH_NODE_CAP).cloned().collect(); let nodes: Vec = kept @@ -467,13 +475,18 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { } } -/// Cached vault map, keyed on the graph's triple count (auto-invalidated on ingest). +/// Cached vault map, keyed on `(graph triple_count, memory bytes)`. The graph +/// count invalidates on structural change; the memory-bytes signal invalidates +/// when chunk content/embeddings change even if the triple count is unchanged +/// (e.g. a same-structure prose edit) — so semantic topics don't go stale. pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { let tc = { state.graph.read().await.stats().triple_count }; + let mem_bytes = { state.memory.read().await.stats().total_memory_bytes }; + let key = (tc, mem_bytes); { let cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); - if let Some((cached_tc, map)) = cache.as_ref() { - if *cached_tc == tc { + if let Some((cached_key, map)) = cache.as_ref() { + if *cached_key == key { return map.clone(); } } @@ -482,7 +495,7 @@ pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { // holding it across an `.await` point. let map = compute_vault_map(state).await; let mut cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); - *cache = Some((tc, map.clone())); + *cache = Some((key, map.clone())); map } diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index 9d13dab7..d4315d64 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -29,9 +29,11 @@ pub struct AppState { pub memory: Arc>, /// The active text embedder (hash fallback or neural). Shared, thread-safe. pub embedder: std::sync::Arc, - /// Cached vault map, keyed on graph triple-count (see service::vault_map). - pub vault_map_cache: - std::sync::Arc>>, + /// Cached vault map, keyed on (graph triple-count, memory bytes) — see + /// service::vault_map::vault_map_cached. + pub vault_map_cache: std::sync::Arc< + std::sync::Mutex>, + >, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. From 6dccc2ac10221e0fcad86a49619ea9c266a91b96 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 19:18:08 +0200 Subject: [PATCH 48/72] feat(cortex): exclude _maps/ artifacts from the Vault Map (no self-pollution) --- crates/aingle_cortex/src/service/vault_map.rs | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 7570416b..a4676ba4 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -93,6 +93,11 @@ pub(crate) fn basename(path: &str) -> String { file.rsplit_once('.').map(|(stem, _)| stem).unwrap_or(file).to_string() } +/// True for paths under the generated maps folder (excluded from the vault map). +pub(crate) fn is_maps_path(path: &str) -> bool { + path.starts_with("_maps/") || path.starts_with("_maps\\") +} + /// Structural inputs derived from the graph (no embeddings). #[derive(Debug, Default)] pub(crate) struct Structural { @@ -128,6 +133,7 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { .collect(); notes.sort(); notes.dedup(); + notes.retain(|n| !is_maps_path(n)); // O(log n) membership set — avoids linear scans during link/tag resolution. let note_set: std::collections::BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); @@ -361,7 +367,11 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { // Semantic topics (capped). let topics = if s.notes.len() <= SEMANTIC_NOTE_CAP { let mem = state.memory.read().await; - let vecs = per_note_vectors(&mem); + let all_vecs = per_note_vectors(&mem); + let vecs: std::collections::BTreeMap> = all_vecs + .into_iter() + .filter(|(p, _)| s.notes.iter().any(|n| n == p)) + .collect(); if vecs.len() >= 2 { cluster_semantic(&vecs, SEMANTIC_THRESHOLD) } else { @@ -600,6 +610,25 @@ mod tests { assert_eq!(m2.totals.notes, m1.totals.notes); } + #[tokio::test] + async fn excludes_maps_folder_notes() { + let state = graph_with(&[ + ("real.md", "aingle:source_hash", "h1"), + ("hub.md", "aingle:source_hash", "h2"), + ("_maps/vault-map.md", "aingle:source_hash", "h3"), + ("_maps/vault-map.md", "links_to", "hub"), + ("real.md", "links_to", "hub"), + ]) + .await; + + let map = super::vault_map_cached(&state).await; + assert_eq!(map.totals.notes, 2, "_maps/ notes excluded from the count"); + assert!(!map.graph.nodes.iter().any(|n| n.id.starts_with("_maps/"))); + assert!(!map.entry_points.iter().any(|e| e.path.starts_with("_maps/"))); + let hub = map.entry_points.iter().find(|e| e.path == "hub.md").expect("hub"); + assert_eq!(hub.in_links, 1, "the _maps link to hub must be excluded"); + } + #[tokio::test] async fn vault_map_cache_invalidates_on_change() { let state = graph_with(&[("a.md", "aingle:source_hash", "h1")]).await; From 363568e62cbadfa0880e7fed2463120c8fe9ebb5 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 19:38:41 +0200 Subject: [PATCH 49/72] feat(cortex): detect me.md identity + skill-tagged notes in the Vault Map --- crates/aingle_cortex/src/service/vault_map.rs | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index a4676ba4..2c417671 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -19,6 +19,10 @@ pub struct VaultMap { pub types: Vec, pub graph: GraphView, pub guidance: String, + /// Path to the user's identity note (`me.md`) if present — read this first. + pub identity: Option, + /// Note paths tagged as reusable skills/processes (the "skill map"). + pub skills: Vec, } #[derive(Debug, Clone, Serialize, Default)] @@ -87,6 +91,9 @@ pub struct GraphEdge { /// Max nodes rendered in the visual graph (top-degree); larger vaults are capped. const GRAPH_NODE_CAP: usize = 600; +/// Tags (case-insensitive) that mark a note as a reusable skill/process. +const SKILL_TAGS: [&str; 6] = ["skill", "process", "sop", "workflow", "how-to", "howto"]; + /// Basename without directory or extension, for wikilink resolution + titles. pub(crate) fn basename(path: &str) -> String { let file = path.rsplit(['/', '\\']).next().unwrap_or(path); @@ -458,18 +465,43 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { orphans: orphans.len(), }; + // Identity: the root `me.md` (exact rel_path), read first by the AI. + let identity = s + .notes + .iter() + .find(|n| n.as_str() == "me.md" || n.as_str() == "me.markdown") + .cloned(); + + // Skills: notes tagged with any SKILL_TAGS value (case-insensitive). + let mut skills: Vec = Vec::new(); + for (tag, notes) in &s.tag_notes { + if SKILL_TAGS.contains(&tag.to_lowercase().as_str()) { + skills.extend(notes.iter().cloned()); + } + } + skills.sort(); + skills.dedup(); + let guidance = if totals.notes == 0 { "Vault not yet indexed. Once notes are ingested, this map lists entry-point (hub) \ notes, topic clusters, and orphans so you can navigate accurately." .to_string() } else { - format!( + let mut g = String::new(); + if identity.is_some() { + g.push_str("Read me.md first for the user's identity and preferences. "); + } + g.push_str(&format!( "This vault has {} notes, {} links, {} topics, {} orphans. To answer about a topic, \ start at its entry_points and the topic's representative note, then follow links. \ Ground every claim with aingle_ground (it returns signed provenance). Orphan notes \ are unconnected and may be incomplete.", totals.notes, totals.links, totals.clusters, totals.orphans - ) + )); + if !skills.is_empty() { + g.push_str(" Follow the skill notes (skill-map) for the user's documented processes."); + } + g }; VaultMap { @@ -482,6 +514,8 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { types, graph: GraphView { nodes, edges }, guidance, + identity, + skills, } } @@ -629,6 +663,27 @@ mod tests { assert_eq!(hub.in_links, 1, "the _maps link to hub must be excluded"); } + #[tokio::test] + async fn detects_identity_and_skills() { + let state = graph_with(&[ + ("me.md", "aingle:source_hash", "h0"), + ("note.md", "aingle:source_hash", "h1"), + ("deploy.md", "aingle:source_hash", "h2"), + ("writing.md", "aingle:source_hash", "h3"), + ("deploy.md", "tagged", "sop"), + ("writing.md", "tagged", "process"), + ("note.md", "tagged", "misc"), + ]) + .await; + + let map = super::vault_map_cached(&state).await; + assert_eq!(map.identity.as_deref(), Some("me.md")); + assert!(map.skills.contains(&"deploy.md".to_string())); + assert!(map.skills.contains(&"writing.md".to_string())); + assert!(!map.skills.contains(&"note.md".to_string()), "non-skill tag excluded"); + assert!(map.guidance.contains("me.md"), "guidance points at identity"); + } + #[tokio::test] async fn vault_map_cache_invalidates_on_change() { let state = graph_with(&[("a.md", "aingle:source_hash", "h1")]).await; From 8afc3cb6e1cbe778db50f39d6944e333aac1df33 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 20:06:13 +0200 Subject: [PATCH 50/72] feat(cortex): backlinks service (verified backlinks, outgoing, unlinked mentions) --- crates/aingle_cortex/src/service/backlinks.rs | 233 ++++++++++++++++++ crates/aingle_cortex/src/service/mod.rs | 1 + 2 files changed, 234 insertions(+) create mode 100644 crates/aingle_cortex/src/service/backlinks.rs diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs new file mode 100644 index 00000000..488ba5ec --- /dev/null +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -0,0 +1,233 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Backlinks, outgoing links, and unlinked mentions for a note — the verified +//! link graph around a single note. Deterministic; reuses links_to triples, +//! Ineru chunk text (context + unlinked scan), and DAG provenance. + +use serde::Serialize; +use std::collections::BTreeMap; + +/// Verified link context for one note. +#[derive(Debug, Clone, Serialize, Default)] +pub struct Backlinks { + pub backlinks: Vec, + pub outgoing: Vec, + pub unlinked: Vec, +} + +/// A note that links to the target, with the link's context + provenance. +#[derive(Debug, Clone, Serialize)] +pub struct BacklinkRef { + pub path: String, + pub context: Option, + pub provenance_anchor: Option, +} + +/// Basename without directory or extension (wikilink resolution + titles). +fn basename(path: &str) -> String { + let file = path.rsplit(['/', '\\']).next().unwrap_or(path); + file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file).to_string() +} + +/// True if `text` mentions `word` as a whole (case-insensitive) token — avoids +/// "cat" matching "category". Tokens split on non-alphanumeric. +fn mentions_word(text: &str, word: &str) -> bool { + let w = word.to_lowercase(); + text.to_lowercase() + .split(|c: char| !c.is_alphanumeric()) + .any(|tok| tok == w) +} + +/// Retrieve a signed provenance anchor hash for a note path, if available. +async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Option { + #[cfg(feature = "dag")] + { + match crate::service::dag::history_by_subject(state, src, 1).await { + Ok(a) => a.first().filter(|x| x.signed).map(|x| x.hash.clone()), + Err(_) => None, + } + } + #[cfg(not(feature = "dag"))] + { + let _ = (state, src); + None + } +} + +/// Compute backlinks, outgoing links, and unlinked mentions for `note`. +pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks { + use aingle_graph::{Predicate, TriplePattern}; + + let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); + + // Note set + basename index. + let (notes, links): (Vec, Vec<(String, String)>) = { + let g = state.graph.read().await; + let collect = |pred: &str| -> Vec<(String, String)> { + g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) + .unwrap_or_default() + .into_iter() + .filter_map(|t| { + t.object_string() + .map(|o| (strip(t.subject.to_string()), o.to_string())) + }) + .collect() + }; + let mut notes: Vec = collect(crate::service::ingest::PRED_SOURCE_HASH) + .into_iter() + .map(|(s, _)| s) + .collect(); + notes.sort(); + notes.dedup(); + let links = collect("links_to"); + (notes, links) + }; + + let note_set: std::collections::BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + let mut by_base: BTreeMap = BTreeMap::new(); + for n in ¬es { + by_base.entry(basename(n)).or_insert_with(|| n.clone()); + } + let resolve = |target: &str| -> Option { + if note_set.contains(target) { + Some(target.to_string()) + } else { + by_base.get(&basename(target)).cloned() + } + }; + let active_base = basename(note); + + // Per-note chunk text (for context + unlinked scan). + let mut text_of: BTreeMap = BTreeMap::new(); + { + let mem = state.memory.read().await; + let mut entries = mem.stm.all_entries(); + entries.extend(mem.ltm.all_entries()); + for e in entries { + if e.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { + continue; + } + if let (Some(p), Some(t)) = ( + e.data.get("source_path").and_then(|v| v.as_str()), + e.data.get("text").and_then(|v| v.as_str()), + ) { + let buf = text_of.entry(p.to_string()).or_default(); + buf.push('\n'); + buf.push_str(t); + } + } + } + + // Backlinks: sources linking to `note`. + let mut seen = std::collections::BTreeSet::new(); + let mut backlinks: Vec = Vec::new(); + let mut backlink_paths = std::collections::BTreeSet::new(); + for (src, target) in &links { + if src == note || !note_set.contains(src.as_str()) { + continue; + } + if resolve(target).as_deref() == Some(note) && seen.insert(src.clone()) { + let context = text_of.get(src).and_then(|txt| { + txt.lines() + .find(|l| { + l.contains("[[") + && l.to_lowercase().contains(&active_base.to_lowercase()) + }) + .map(|l| { + let t = l.trim(); + if t.len() > 200 { + format!("{}…", &t[..200]) + } else { + t.to_string() + } + }) + }); + let anchor = provenance_anchor_for(state, src).await; + backlink_paths.insert(src.clone()); + backlinks.push(BacklinkRef { + path: src.clone(), + context, + provenance_anchor: anchor, + }); + } + } + backlinks.sort_by(|a, b| a.path.cmp(&b.path)); + + // Outgoing: notes `note` links to. + let mut outgoing: Vec = links + .iter() + .filter(|(src, _)| src == note) + .filter_map(|(_, target)| resolve(target)) + .filter(|p| p != note) + .collect(); + outgoing.sort(); + outgoing.dedup(); + + // Unlinked mentions: notes whose text names `active_base` but don't link it. + let mut unlinked: Vec = text_of + .iter() + .filter(|(p, _)| { + p.as_str() != note + && !backlink_paths.contains(p.as_str()) + && note_set.contains(p.as_str()) + }) + .filter(|(_, txt)| mentions_word(txt, &active_base)) + .map(|(p, _)| p.clone()) + .collect(); + unlinked.sort(); + unlinked.dedup(); + + Backlinks { + backlinks, + outgoing, + unlinked, + } +} + +#[cfg(test)] +mod tests { + use crate::state::AppState; + use aingle_graph::{NodeId, Predicate, Triple, Value}; + + async fn graph_with(triples: &[(&str, &str, &str)]) -> AppState { + let state = AppState::with_db_path(":memory:", None).unwrap(); + { + let g = state.graph.write().await; + for (s, p, o) in triples { + g.insert(Triple::new(NodeId::named(*s), Predicate::named(*p), Value::literal(*o))) + .unwrap(); + } + } + state + } + + #[tokio::test] + async fn backlinks_outgoing_unlinked() { + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("c.md", "aingle:source_hash", "h3"), + ("target.md", "aingle:source_hash", "h4"), + ("a.md", "links_to", "target"), // a → target (backlink) + ("target.md", "links_to", "b"), // target → b (outgoing) + ]) + .await; + // c.md mentions "target" in text but does not link it (unlinked). + { + let mut mem = state.memory.write().await; + let mut e = ineru::MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "See target for details.", "source_path": "c.md" }), + ); + e.embedding = Some(ineru::Embedding::new(vec![0.0; 8])); + mem.remember(e).unwrap(); + } + + let r = super::backlinks(&state, "target.md").await; + assert!(r.backlinks.iter().any(|b| b.path == "a.md"), "a links to target"); + assert!(r.outgoing.contains(&"b.md".to_string()), "target links to b"); + assert!(r.unlinked.contains(&"c.md".to_string()), "c mentions target unlinked"); + assert!(!r.unlinked.contains(&"a.md".to_string()), "a is a backlink, not unlinked"); + } +} diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index 7423d727..dc1aca50 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -3,6 +3,7 @@ //! Business-logic layer shared by REST handlers and the MCP server. +pub mod backlinks; #[cfg(feature = "dag")] pub mod dag; pub mod ground; From 7252df0c9ec3d58e4275c4c438fe4e31f53aacff Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 20:11:05 +0200 Subject: [PATCH 51/72] fix(cortex): char-safe context truncation in backlinks (no UTF-8 panic) --- crates/aingle_cortex/src/service/backlinks.rs | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index 488ba5ec..628ae04f 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -136,8 +136,9 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks }) .map(|l| { let t = l.trim(); - if t.len() > 200 { - format!("{}…", &t[..200]) + if t.chars().count() > 200 { + let cut: String = t.chars().take(200).collect(); + format!("{cut}…") } else { t.to_string() } @@ -230,4 +231,30 @@ mod tests { assert!(r.unlinked.contains(&"c.md".to_string()), "c mentions target unlinked"); assert!(!r.unlinked.contains(&"a.md".to_string()), "a is a backlink, not unlinked"); } + + #[tokio::test] + async fn context_truncation_is_char_safe() { + let state = graph_with(&[ + ("t.md", "aingle:source_hash", "h1"), + ("src.md", "aingle:source_hash", "h2"), + ("src.md", "links_to", "t"), + ]) + .await; + { + let mut mem = state.memory.write().await; + // A line with accented chars whose byte length far exceeds 200 around the cut. + let long = format!("[[t]] {}", "áéíóú ".repeat(80)); + let mut e = ineru::MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": long, "source_path": "src.md" }), + ); + e.embedding = Some(ineru::Embedding::new(vec![0.0; 8])); + mem.remember(e).unwrap(); + } + // Must not panic; context should be present and ≤ 201 chars (200 + ellipsis). + let r = super::backlinks(&state, "t.md").await; + let b = r.backlinks.iter().find(|b| b.path == "src.md").expect("backlink"); + let ctx = b.context.as_ref().expect("context"); + assert!(ctx.chars().count() <= 201); + } } From cd56951b35e9c76bd6a19449e5d6220b64242228 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 20:16:15 +0200 Subject: [PATCH 52/72] fix(cortex): boundary-aware unlinked mentions (hyphenated names); dedup cleanup --- crates/aingle_cortex/src/service/backlinks.rs | 69 ++++++++++++++++--- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index 628ae04f..2bfcc616 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -30,13 +30,31 @@ fn basename(path: &str) -> String { file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file).to_string() } -/// True if `text` mentions `word` as a whole (case-insensitive) token — avoids -/// "cat" matching "category". Tokens split on non-alphanumeric. +/// True if `text` contains `word` (case-insensitive) as a whole token — bounded +/// by non-alphanumeric chars or string ends. Handles multi-token names like +/// "meeting-notes" while NOT matching "note" inside "notebook". fn mentions_word(text: &str, word: &str) -> bool { - let w = word.to_lowercase(); - text.to_lowercase() - .split(|c: char| !c.is_alphanumeric()) - .any(|tok| tok == w) + let w = word.trim().to_lowercase(); + if w.is_empty() { + return false; + } + let hay = text.to_lowercase(); + let hb = hay.as_bytes(); + let mut from = 0; + while let Some(rel) = hay[from..].find(w.as_str()) { + let start = from + rel; + let end = start + w.len(); + let before_ok = start == 0 || !(hb[start - 1] as char).is_alphanumeric(); + let after_ok = end >= hb.len() || !(hb[end] as char).is_alphanumeric(); + if before_ok && after_ok { + return true; + } + from = start + 1; + if from >= hb.len() { + break; + } + } + false } /// Retrieve a signed provenance anchor hash for a note path, if available. @@ -97,6 +115,7 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks } }; let active_base = basename(note); + let active_base_lc = active_base.to_lowercase(); // Per-note chunk text (for context + unlinked scan). let mut text_of: BTreeMap = BTreeMap::new(); @@ -120,19 +139,18 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks } // Backlinks: sources linking to `note`. - let mut seen = std::collections::BTreeSet::new(); - let mut backlinks: Vec = Vec::new(); let mut backlink_paths = std::collections::BTreeSet::new(); + let mut backlinks: Vec = Vec::new(); for (src, target) in &links { if src == note || !note_set.contains(src.as_str()) { continue; } - if resolve(target).as_deref() == Some(note) && seen.insert(src.clone()) { + if resolve(target).as_deref() == Some(note) && backlink_paths.insert(src.clone()) { let context = text_of.get(src).and_then(|txt| { txt.lines() .find(|l| { l.contains("[[") - && l.to_lowercase().contains(&active_base.to_lowercase()) + && l.to_lowercase().contains(&active_base_lc) }) .map(|l| { let t = l.trim(); @@ -145,7 +163,6 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks }) }); let anchor = provenance_anchor_for(state, src).await; - backlink_paths.insert(src.clone()); backlinks.push(BacklinkRef { path: src.clone(), context, @@ -232,6 +249,36 @@ mod tests { assert!(!r.unlinked.contains(&"a.md".to_string()), "a is a backlink, not unlinked"); } + #[tokio::test] + async fn unlinked_detects_hyphenated_basename() { + let state = graph_with(&[ + ("meeting-notes.md", "aingle:source_hash", "h1"), + ("c.md", "aingle:source_hash", "h2"), + ]) + .await; + { + let mut mem = state.memory.write().await; + let mut e = ineru::MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "Discussed in meeting-notes yesterday.", "source_path": "c.md" }), + ); + e.embedding = Some(ineru::Embedding::new(vec![0.0; 8])); + mem.remember(e).unwrap(); + } + let r = super::backlinks(&state, "meeting-notes.md").await; + assert!( + r.unlinked.contains(&"c.md".to_string()), + "hyphenated name must be detected: {r:?}" + ); + } + + #[test] + fn mentions_word_is_bounded() { + assert!(super::mentions_word("a meeting-notes b", "meeting-notes")); + assert!(!super::mentions_word("my notebook here", "note")); + assert!(super::mentions_word("see Target.", "target")); + } + #[tokio::test] async fn context_truncation_is_char_safe() { let state = graph_with(&[ From 1e39a4b6733620bc361e422c8ec675639f2a3a84 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 20:21:25 +0200 Subject: [PATCH 53/72] feat(cortex): aingle_backlinks MCP tool (verified reverse navigation) --- crates/aingle_cortex/src/mcp/server.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index cc0e408a..a9f803b5 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -127,6 +127,22 @@ impl AingleMcp { Ok(CallToolResult::success(vec![Content::json(resp)?])) } + /// Verified backlinks + outgoing links + unlinked mentions for a note. + #[tool( + description = "Verified backlinks, outgoing links, and unlinked mentions for a note. \ + Each backlink includes the source's context line and a signed-provenance anchor \ + when available. Use for accurate reverse navigation.", + annotations(read_only_hint = true) + )] + async fn aingle_backlinks( + &self, + params: Parameters, + ) -> Result { + let Parameters(p) = params; + let resp = crate::service::backlinks::backlinks(&self.state, &p.note).await; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + /// List ingested sources and their signed content hashes. #[tool( description = "List ingested source files with their content hashes (the \ @@ -620,6 +636,13 @@ fn default_ground_k() -> usize { 6 } +/// Parameters for the `aingle_backlinks` tool. +#[derive(serde::Deserialize, schemars::JsonSchema)] +pub struct BacklinksParams { + /// Note path (vault-relative) to get backlinks for, e.g. "ideas/sled.md". + pub note: String, +} + #[tool_handler(router = self.tool_router)] impl ServerHandler for AingleMcp { fn get_info(&self) -> ServerInfo { @@ -648,7 +671,7 @@ mod ingest_tools_tests { .into_iter() .map(|t| t.name.to_string()) .collect(); - for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map"] { + for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map", "aingle_backlinks"] { assert!( names.contains(&expected.to_string()), "missing tool {expected}" From 25e49b6f9428009a5ee87df6c5b56003042f173b Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 20:33:58 +0200 Subject: [PATCH 54/72] fix(cortex): read node-valued links_to objects (real wikilinks) in backlinks + vault_map --- crates/aingle_cortex/src/service/backlinks.rs | 48 ++++++++++++++++++- crates/aingle_cortex/src/service/vault_map.rs | 43 ++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index 2bfcc616..11e91bc6 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -24,6 +24,19 @@ pub struct BacklinkRef { pub provenance_anchor: Option, } +/// Return the object of a triple as a plain `String`, handling both literal +/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored +/// with `<…>` angle-bracket wrappers; this strips them so the result matches +/// the bare names used everywhere else in this module. +fn obj_string(t: &aingle_graph::Triple) -> Option { + if let Some(s) = t.object_string() { + Some(s.to_string()) + } else { + t.object_node() + .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) + } +} + /// Basename without directory or extension (wikilink resolution + titles). fn basename(path: &str) -> String { let file = path.rsplit(['/', '\\']).next().unwrap_or(path); @@ -87,8 +100,7 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks .unwrap_or_default() .into_iter() .filter_map(|t| { - t.object_string() - .map(|o| (strip(t.subject.to_string()), o.to_string())) + obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) }) .collect() }; @@ -279,6 +291,38 @@ mod tests { assert!(super::mentions_word("see Target.", "target")); } + #[tokio::test] + async fn links_to_node_objects_are_captured() { + // Real ingest stores wikilink targets as Value::Node, not Value::literal. + // This test locks the fix: node-valued links_to objects must be read as + // backlinks/outgoing, not silently dropped. + let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); + { + let g = state.graph.write().await; + for (s, p) in [("a.md", "aingle:source_hash"), ("hub.md", "aingle:source_hash")] { + g.insert(Triple::new(NodeId::named(s), Predicate::named(p), Value::literal("h"))) + .unwrap(); + } + // links_to stored as a NODE object — how real ingest produces it. + g.insert(Triple::new( + NodeId::named("a.md"), + Predicate::named("links_to"), + Value::Node(NodeId::named("hub")), + )) + .unwrap(); + } + let r = super::backlinks(&state, "hub.md").await; + assert!( + r.backlinks.iter().any(|b| b.path == "a.md"), + "node-valued links_to must appear as a backlink: {r:?}" + ); + let r2 = super::backlinks(&state, "a.md").await; + assert!( + r2.outgoing.contains(&"hub.md".to_string()), + "node-valued links_to must appear as outgoing: {r2:?}" + ); + } + #[tokio::test] async fn context_truncation_is_char_safe() { let state = graph_with(&[ diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 2c417671..bad2e6c2 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -105,6 +105,19 @@ pub(crate) fn is_maps_path(path: &str) -> bool { path.starts_with("_maps/") || path.starts_with("_maps\\") } +/// Return the object of a triple as a plain `String`, handling both literal +/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored +/// with `<…>` angle-bracket wrappers; this strips them so the result matches +/// the bare names used everywhere else in this module. +fn obj_string(t: &aingle_graph::Triple) -> Option { + if let Some(s) = t.object_string() { + Some(s.to_string()) + } else { + t.object_node() + .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) + } +} + /// Structural inputs derived from the graph (no embeddings). #[derive(Debug, Default)] pub(crate) struct Structural { @@ -128,7 +141,7 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { .into_iter() .filter_map(|t| { let subj = strip(t.subject.to_string()); - t.object_string().map(|o| (subj, o.to_string())) + obj_string(&t).map(|o| (subj, o)) }) .collect() }; @@ -684,6 +697,34 @@ mod tests { assert!(map.guidance.contains("me.md"), "guidance points at identity"); } + #[tokio::test] + async fn links_to_node_objects_are_read() { + // Real ingest stores wikilink targets as Value::Node, not Value::literal. + // All link-counting and hub detection must work for node-valued objects. + let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); + { + let g = state.graph.write().await; + for (s, p) in [("a.md", "aingle:source_hash"), ("hub.md", "aingle:source_hash")] { + g.insert(Triple::new(NodeId::named(s), Predicate::named(p), Value::literal("h"))) + .unwrap(); + } + // links_to as a NODE object — how real ingest produces it. + g.insert(Triple::new( + NodeId::named("a.md"), + Predicate::named("links_to"), + Value::Node(NodeId::named("hub")), + )) + .unwrap(); + } + let map = super::vault_map_cached(&state).await; + assert_eq!(map.totals.links, 1, "node-valued links_to must be counted: {:?}", map.totals); + assert!( + map.entry_points.iter().any(|e| e.path == "hub.md" && e.in_links == 1), + "hub.md must appear as a hub with 1 incoming link: {:?}", + map.entry_points + ); + } + #[tokio::test] async fn vault_map_cache_invalidates_on_change() { let state = graph_with(&[("a.md", "aingle:source_hash", "h1")]).await; From beb9a31c4f825ba29a6fa71f7196a928b9830f93 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 21:27:39 +0200 Subject: [PATCH 55/72] feat(cortex): semantic note-context module + per-note cache (VC-1 Task 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add service::context with note_context() / note_context_cached(): for an active note, retrieve the top-N semantically related notes (by embedding cosine similarity) even when never explicitly linked, each annotated with the best matching passage (char-safe ≤200), a signed DAG provenance anchor (cfg dag), and an already_linked flag. Gate on SEMANTIC_MIN_DIMS=128 so the 64-d hash embedder short-circuits gracefully. Cache keyed on (triple_count, total_memory_bytes) — same invalidation signal as vault_map_cached. Six unit tests: semantic gate, ranking, passage truncation, node-object links_to, _maps/ exclusion, dag provenance path. --- crates/aingle_cortex/src/service/context.rs | 700 ++++++++++++++++++++ crates/aingle_cortex/src/service/mod.rs | 1 + crates/aingle_cortex/src/state.rs | 17 + 3 files changed, 718 insertions(+) create mode 100644 crates/aingle_cortex/src/service/context.rs diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs new file mode 100644 index 00000000..24ea8a59 --- /dev/null +++ b/crates/aingle_cortex/src/service/context.rs @@ -0,0 +1,700 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Semantic note-context: for an active note, surface the notes that are +//! semantically related (by neural embeddings) even when never linked, each +//! with the matching passage and signed provenance. + +use std::collections::{BTreeMap, BTreeSet}; + +/// The semantic context for one note — the semantically related notes, even +/// when never explicitly linked. +#[derive(Debug, Clone, serde::Serialize, Default)] +pub struct NoteContext { + /// `true` when the embedder has enough dimensions to produce meaningful + /// semantic similarity (≥ `SEMANTIC_MIN_DIMS`). `false` means the hash + /// fallback is active and no neighbor search was attempted. + pub semantic_ready: bool, + pub neighbors: Vec, +} + +/// A note that is semantically related to the active note. +#[derive(Debug, Clone, serde::Serialize)] +pub struct Neighbor { + /// Full relative path — the canonical identity used everywhere else. + pub path: String, + /// Best chunk cosine similarity against the active note's query vector. + pub score: f32, + /// The matching chunk text, ≤ 200 chars (char-safe), with `…` appended + /// if truncated. + pub passage: Option, + /// Hex hash of the signed DAG action that recorded this source (🔒 anchor). + /// `None` when the feature is off or no signed action exists. + pub provenance_anchor: Option, + /// `true` if the active note already has an explicit `links_to` edge to + /// this neighbor — so the UI can distinguish "related and linked" from + /// "related but not yet linked". + pub already_linked: bool, +} + +/// Minimum embedder dimensionality required to attempt semantic neighbor search. +/// The 64-d hash embedder does not produce meaningful cosine similarity for +/// cross-note retrieval; this gate keeps the result honest. +const SEMANTIC_MIN_DIMS: usize = 128; + +// --------------------------------------------------------------------------- +// Helpers (mirrored verbatim from backlinks.rs) +// --------------------------------------------------------------------------- + +/// Return the object of a triple as a plain `String`, handling both literal +/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored +/// with `<…>` angle-bracket wrappers; this strips them so the result matches +/// the bare names used everywhere else in this module. +fn obj_string(t: &aingle_graph::Triple) -> Option { + if let Some(s) = t.object_string() { + Some(s.to_string()) + } else { + t.object_node() + .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) + } +} + +/// Retrieve a signed provenance anchor hash for a note path, if available. +async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Option { + #[cfg(feature = "dag")] + { + match crate::service::dag::history_by_subject(state, src, 1).await { + Ok(a) => a.first().filter(|x| x.signed).map(|x| x.hash.clone()), + Err(_) => None, + } + } + #[cfg(not(feature = "dag"))] + { + let _ = (state, src); + None + } +} + +// --------------------------------------------------------------------------- +// Core retrieval +// --------------------------------------------------------------------------- + +/// Compute the semantic neighbors of `note` — up to `limit` related notes, +/// ranked by embedding cosine similarity, each with a matching passage and +/// optional signed provenance anchor. +pub async fn note_context( + state: &crate::state::AppState, + note: &str, + limit: usize, +) -> NoteContext { + use aingle_graph::{Predicate, TriplePattern}; + use ineru::MemoryQuery; + + // 1. Semantic gate: only proceed when the embedder is neural-grade. + if state.embedder.dimensions() < SEMANTIC_MIN_DIMS { + return NoteContext { + semantic_ready: false, + neighbors: vec![], + }; + } + + let (_, low) = state.embedder.relevance_thresholds(); + + // 2. Build the note set (subjects of PRED_SOURCE_HASH) + basename index, + // and collect all links_to triples. + let strip = + |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); + + let (notes, links): (Vec, Vec<(String, String)>) = { + let g = state.graph.read().await; + let collect = |pred: &str| -> Vec<(String, String)> { + g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) + .unwrap_or_default() + .into_iter() + .filter_map(|t| { + obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) + }) + .collect() + }; + let mut ns: Vec = collect(crate::service::ingest::PRED_SOURCE_HASH) + .into_iter() + .map(|(s, _)| s) + .collect(); + ns.sort(); + ns.dedup(); + let links = collect("links_to"); + (ns, links) + }; + + let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + + // basename → first full path (for wikilink resolution). + let mut by_base: BTreeMap = BTreeMap::new(); + for n in ¬es { + by_base + .entry(crate::service::vault_map::basename(n)) + .or_insert_with(|| n.clone()); + } + + let resolve = |target: &str| -> Option { + if note_set.contains(target) { + Some(target.to_string()) + } else { + by_base + .get(&crate::service::vault_map::basename(target)) + .cloned() + } + }; + + // 3. Compute `outgoing_set`: full paths that the active `note` links to. + let outgoing_set: BTreeSet = links + .iter() + .filter(|(src, _)| src == note) + .filter_map(|(_, target)| resolve(target)) + .filter(|p| p != note) + .collect(); + + // 4. Build the active note's query text from its own chunks. + let mut own_text = String::new(); + let all_entries: Vec = { + let mem = state.memory.read().await; + let mut v = mem.stm.all_entries(); + v.extend(mem.ltm.all_entries()); + v + }; + + for e in &all_entries { + if e.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { + continue; + } + if let (Some(p), Some(t)) = ( + e.data.get("source_path").and_then(|v| v.as_str()), + e.data.get("text").and_then(|v| v.as_str()), + ) { + if p == note { + own_text.push('\n'); + own_text.push_str(t); + } + } + } + + let query_text: String = if own_text.trim().is_empty() { + crate::service::vault_map::basename(note) + } else { + own_text.clone() + }; + + let q = state.embedder.embed_query(&query_text); + + // 5. Over-fetch from memory and re-rank by cosine similarity. + let fetch_limit = (limit * 8).max(48); + let results = { + let mem = state.memory.read().await; + mem.recall( + &MemoryQuery::text(&query_text) + .with_embedding(q.clone()) + .with_limit(fetch_limit), + ) + .unwrap_or_default() + }; + + // Per-source best (rel, text). + let mut best_by_src: BTreeMap = BTreeMap::new(); + + for r in &results { + if r.entry.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { + continue; + } + let emb = match &r.entry.embedding { + Some(e) => e, + None => continue, + }; + let rel = q.cosine_similarity(emb); + if rel < low { + continue; + } + let d = &r.entry.data; + let src = match d.get("source_path").and_then(|v| v.as_str()) { + Some(s) => s.to_string(), + None => continue, + }; + if src == note { + continue; + } + if crate::service::vault_map::is_maps_path(&src) { + continue; + } + if !note_set.contains(src.as_str()) { + continue; + } + let text = d + .get("text") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let entry = best_by_src.entry(src).or_insert((rel, text.clone())); + if rel > entry.0 { + *entry = (rel, text); + } + } + + // 6. Build Neighbor list, resolve provenance, sort, truncate. + let mut neighbors: Vec = Vec::with_capacity(best_by_src.len()); + for (src, (rel, chunk_text)) in best_by_src { + let passage = Some({ + let t = chunk_text.trim(); + if t.chars().count() > 200 { + let cut: String = t.chars().take(200).collect(); + format!("{cut}…") + } else { + t.to_string() + } + }); + let already_linked = outgoing_set.contains(&src); + let provenance_anchor = provenance_anchor_for(state, &src).await; + neighbors.push(Neighbor { + path: src, + score: rel, + passage, + provenance_anchor, + already_linked, + }); + } + + // NaN-safe descending sort (mirrors ground.rs). + neighbors.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + neighbors.truncate(limit); + + NoteContext { + semantic_ready: true, + neighbors, + } +} + +// --------------------------------------------------------------------------- +// Cached variant +// --------------------------------------------------------------------------- + +/// Like [`note_context`] but memoised on `(triple_count, total_memory_bytes)`. +/// +/// The cache key does NOT include `limit` — keep it simple and document the +/// assumption: callers should use a stable `limit` for the same note. If +/// `limit` varies per call, the first winning result is served. For Akashi's +/// use-case (fixed sidebar top-N) this is always correct. +pub async fn note_context_cached( + state: &crate::state::AppState, + note: &str, + limit: usize, +) -> NoteContext { + let tc = { state.graph.read().await.stats().triple_count }; + let mem_bytes = { state.memory.read().await.stats().total_memory_bytes }; + let key = (tc, mem_bytes); + + // Check cache — release lock before any await. + { + let cache = state + .note_context_cache + .lock() + .expect("note_context cache poisoned"); + if let Some((cached_key, ctx)) = cache.get(note) { + if *cached_key == key { + return ctx.clone(); + } + } + } + + // Compute without holding the mutex. + let result = note_context(state, note, limit).await; + + // Store result. + { + let mut cache = state + .note_context_cache + .lock() + .expect("note_context cache poisoned"); + cache.insert(note.to_string(), (key, result.clone())); + } + + result +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use aingle_graph::{NodeId, Predicate, Triple, Value}; + use ineru::{Embedder, Embedding, MemoryEntry}; + + use crate::state::AppState; + + // ----------------------------------------------------------------------- + // Stub embedder: 128-dim, deterministic, text-content-aware. + // - text containing "alpha" → [1.0, 0.0, 0.0, …] (unit basis e0) + // - text containing "zzz" → [0.0, 1.0, 0.0, …] (unit basis e1) + // - query for "alpha" → [1.0, 0.0, 0.0, …] (same basis) + // Cosine("alpha","alpha") = 1.0 ≥ low threshold (0.1) → pass + // Cosine("alpha","zzz") = 0.0 < low threshold → filtered out + // ----------------------------------------------------------------------- + struct StubEmbedder; + + impl Embedder for StubEmbedder { + fn embed_passage(&self, text: &str) -> Embedding { + let mut v = vec![0.0_f32; 128]; + if text.contains("alpha") { + v[0] = 1.0; + } else if text.contains("zzz") { + v[1] = 1.0; + } else { + // default: non-zero to avoid zero-vector edge case + v[2] = 1.0; + } + Embedding::new(v) + } + + fn embed_query(&self, text: &str) -> Embedding { + // Reuse passage embedding logic for query — correct for symmetric + // tests; for real asymmetric models the trait would differ. + self.embed_passage(text) + } + + fn dimensions(&self) -> usize { + 128 + } + + fn relevance_thresholds(&self) -> (f32, f32) { + // high=0.5, low=0.1 — alpha/alpha scores 1.0 (pass), alpha/zzz + // scores 0.0 (filtered). + (0.5, 0.1) + } + } + + // ----------------------------------------------------------------------- + // Helpers + // ----------------------------------------------------------------------- + + fn stub_state() -> AppState { + AppState::with_db_path_and_embedder( + ":memory:", + None, + Arc::new(StubEmbedder), + ) + .unwrap() + } + + async fn insert_triples(state: &AppState, triples: &[(&str, &str, &str)]) { + let g = state.graph.write().await; + for (s, p, o) in triples { + g.insert(Triple::new( + NodeId::named(*s), + Predicate::named(*p), + Value::literal(*o), + )) + .unwrap(); + } + } + + async fn insert_chunk(state: &AppState, source_path: &str, text: &str, emb: Vec) { + let mut mem = state.memory.write().await; + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": text, "source_path": source_path }), + ); + e.embedding = Some(Embedding::new(emb)); + mem.remember(e).unwrap(); + } + + // ----------------------------------------------------------------------- + // Tests + // ----------------------------------------------------------------------- + + /// Default state uses 64-d hash embedder → semantic gate fires → short-circuit. + #[tokio::test] + async fn hash_grade_embedder_short_circuits() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + let ctx = super::note_context(&state, "active.md", 5).await; + assert!(!ctx.semantic_ready, "64-d hash embedder must not be semantic_ready"); + assert!(ctx.neighbors.is_empty()); + } + + /// The "alpha" note scores 1.0 (cosine of identical unit vectors) and appears + /// as neighbor #1; the "zzz" note scores 0.0 and is filtered below low threshold. + #[tokio::test] + async fn same_topic_ranks_above_off_topic() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("alpha.md", "aingle:source_hash", "h1"), + ("zzz.md", "aingle:source_hash", "h2"), + ], + ) + .await; + + // Active note's own chunk (alpha text → e0 query vector). + let e0 = vec![1.0_f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; + let e1 = { + let mut v = vec![0.0_f32; 128]; + v[1] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha content for active", e0.clone()).await; + insert_chunk(&state, "alpha.md", "alpha related content", e0.clone()).await; + insert_chunk(&state, "zzz.md", "zzz completely unrelated orthogonal", e1).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + assert!(ctx.semantic_ready); + assert!( + ctx.neighbors.iter().any(|n| n.path == "alpha.md"), + "alpha.md must be a neighbor: {:?}", + ctx.neighbors + ); + assert!( + !ctx.neighbors.iter().any(|n| n.path == "zzz.md"), + "zzz.md must be filtered (cosine 0.0 < low threshold): {:?}", + ctx.neighbors + ); + // alpha.md is first (highest score). + assert_eq!(ctx.neighbors[0].path, "alpha.md"); + } + + /// passage is present and its char count is ≤ 201 (200 + optional ellipsis). + /// An accented long chunk proves no byte-slice panic. + #[tokio::test] + async fn passage_present_and_char_safe() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("related.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + let e0: Vec = { + let mut v = vec![0.0; 128]; + v[0] = 1.0; + v + }; + // Long chunk with accented chars to exercise char-safe truncation. + let long_text = format!("alpha {}", "áéíóú ".repeat(80)); + insert_chunk(&state, "active.md", "alpha active note", e0.clone()).await; + insert_chunk(&state, "related.md", &long_text, e0.clone()).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + assert!(ctx.semantic_ready); + let n = ctx + .neighbors + .iter() + .find(|n| n.path == "related.md") + .expect("related.md must be a neighbor"); + let passage = n.passage.as_ref().expect("passage must be present"); + assert!( + passage.chars().count() <= 201, + "passage must be ≤ 201 chars (200 + ellipsis), got {}", + passage.chars().count() + ); + } + + /// `already_linked` is `true` when the active note has a `links_to` triple + /// whose object is `Value::Node` (the real ingest format — NOT a literal). + #[tokio::test] + async fn already_linked_from_node_object() { + let state = stub_state(); + + { + let g = state.graph.write().await; + for (s, p) in [ + ("active.md", "aingle:source_hash"), + ("alpha.md", "aingle:source_hash"), + ] { + g.insert(Triple::new( + NodeId::named(s), + Predicate::named(p), + Value::literal("h"), + )) + .unwrap(); + } + // links_to stored as a NODE object — how real ingest produces it. + g.insert(Triple::new( + NodeId::named("active.md"), + Predicate::named("links_to"), + Value::Node(NodeId::named("alpha")), + )) + .unwrap(); + } + + let e0: Vec = { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha active note", e0.clone()).await; + insert_chunk(&state, "alpha.md", "alpha related content", e0.clone()).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + assert!(ctx.semantic_ready); + let n = ctx + .neighbors + .iter() + .find(|n| n.path == "alpha.md") + .expect("alpha.md must be a neighbor"); + assert!( + n.already_linked, + "alpha.md must have already_linked=true (node-valued links_to): {:?}", + n + ); + } + + /// Notes under `_maps/` are excluded even when their embeddings match. + #[tokio::test] + async fn maps_excluded() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("_maps/vault-map.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + let e0: Vec = { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha active", e0.clone()).await; + insert_chunk(&state, "_maps/vault-map.md", "alpha maps content", e0.clone()).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + assert!(ctx.semantic_ready); + assert!( + !ctx.neighbors.iter().any(|n| n.path.starts_with("_maps/")), + "maps paths must be excluded: {:?}", + ctx.neighbors + ); + } + + /// Without the `dag` feature the provenance anchor is always `None`. + /// With the `dag` feature, a signed action anchors the source and is surfaced. + #[cfg(not(feature = "dag"))] + #[tokio::test] + async fn provenance_none_without_dag() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("alpha.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + let e0: Vec = { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha active", e0.clone()).await; + insert_chunk(&state, "alpha.md", "alpha related", e0).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + let n = ctx + .neighbors + .iter() + .find(|n| n.path == "alpha.md") + .expect("alpha.md must be neighbor"); + assert!( + n.provenance_anchor.is_none(), + "provenance must be None without dag feature" + ); + } + + /// With the `dag` feature, a signed DAG action for the neighbor yields a + /// non-None provenance_anchor. + #[cfg(feature = "dag")] + #[tokio::test] + async fn provenance_present_when_signed() { + // Build a state with DAG enabled and a signing key so actions are signed. + let state = AppState::with_db_path_and_embedder( + ":memory:", + None, + Arc::new(StubEmbedder), + ) + .unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("alpha.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + let e0: Vec = { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha active", e0.clone()).await; + insert_chunk(&state, "alpha.md", "alpha related", e0).await; + + // Record a DAG action for alpha.md. Because DAG is enabled but there is + // no signing key on this state (key is None), the action will be unsigned + // and provenance_anchor_for returns None for unsigned actions. This proves + // the cfg(feature="dag") path compiles and runs; a signed action requires + // a proper DagSigningKey that is complex to wire in a unit test. + // The critical assertion: no panic, code path exercised. + let ctx = super::note_context(&state, "active.md", 10).await; + // alpha.md is a semantic neighbor regardless of provenance. + assert!( + ctx.neighbors.iter().any(|n| n.path == "alpha.md"), + "alpha.md must be a semantic neighbor with dag feature: {:?}", + ctx.neighbors + ); + // provenance_anchor is None because no signing key is configured (unsigned action). + // This is the correct behavior for an unsigned DAG node. + let n = ctx + .neighbors + .iter() + .find(|n| n.path == "alpha.md") + .unwrap(); + // We assert the Option is coherent (not that it's Some, since no signing key). + let _ = n.provenance_anchor.as_deref(); + } +} diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index dc1aca50..85e7f7d5 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -4,6 +4,7 @@ //! Business-logic layer shared by REST handlers and the MCP server. pub mod backlinks; +pub mod context; #[cfg(feature = "dag")] pub mod dag; pub mod ground; diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index d4315d64..47e16254 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -34,6 +34,19 @@ pub struct AppState { pub vault_map_cache: std::sync::Arc< std::sync::Mutex>, >, + /// Per-note semantic-neighbor cache, keyed by note path, storing + /// `(graph_triple_count, total_memory_bytes) → NoteContext`. Invalidated + /// whenever the graph or memory changes — same staleness signal as + /// vault_map_cache. Cache key does not include `limit`; callers should + /// use a consistent limit for the same note. + pub note_context_cache: std::sync::Arc< + std::sync::Mutex< + std::collections::HashMap< + String, + ((usize, usize), crate::service::context::NoteContext), + >, + >, + >, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. @@ -103,6 +116,7 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -149,6 +163,7 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -195,6 +210,7 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -338,6 +354,7 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder, vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), From 7782458501acf828a9a2dfaeecae323668904ac1 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 21:40:50 +0200 Subject: [PATCH 56/72] test(cortex): assert provenance_anchor is_some for signed DAG action Replace the no-op let _ = ... in provenance_present_when_signed with a real assertion. The test now generates a DagSigningKey, signs a Custom DAG action whose subject matches the neighbor note path, puts it in the store, then asserts n.provenance_anchor.is_some() after note_context. --- crates/aingle_cortex/src/service/context.rs | 42 +++++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index 24ea8a59..afb15e13 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -645,7 +645,6 @@ mod tests { #[cfg(feature = "dag")] #[tokio::test] async fn provenance_present_when_signed() { - // Build a state with DAG enabled and a signing key so actions are signed. let state = AppState::with_db_path_and_embedder( ":memory:", None, @@ -674,27 +673,46 @@ mod tests { insert_chunk(&state, "active.md", "alpha active", e0.clone()).await; insert_chunk(&state, "alpha.md", "alpha related", e0).await; - // Record a DAG action for alpha.md. Because DAG is enabled but there is - // no signing key on this state (key is None), the action will be unsigned - // and provenance_anchor_for returns None for unsigned actions. This proves - // the cfg(feature="dag") path compiles and runs; a signed action requires - // a proper DagSigningKey that is complex to wire in a unit test. - // The critical assertion: no panic, code path exercised. + // Record a signed Custom DAG action whose subject is "alpha.md" so that + // history_by_subject("alpha.md") returns a signed entry and + // provenance_anchor_for returns Some(hash_hex). + { + let graph = state.graph.read().await; + let dag_store = graph.dag_store().expect("DAG must be enabled"); + let parents = dag_store.tips().expect("tips must be readable"); + let mut action = aingle_graph::dag::DagAction { + parents, + author: aingle_graph::NodeId::named("test"), + seq: 0, + timestamp: chrono::Utc::now(), + payload: aingle_graph::dag::DagPayload::Custom { + payload_type: "ingest".to_string(), + payload_summary: "alpha.md ingested".to_string(), + payload: None, + subject: Some("alpha.md".to_string()), + }, + signature: None, + }; + let key = aingle_graph::dag::DagSigningKey::generate(); + key.sign(&mut action); + dag_store.put(&action).expect("put signed action must succeed"); + } + let ctx = super::note_context(&state, "active.md", 10).await; - // alpha.md is a semantic neighbor regardless of provenance. assert!( ctx.neighbors.iter().any(|n| n.path == "alpha.md"), "alpha.md must be a semantic neighbor with dag feature: {:?}", ctx.neighbors ); - // provenance_anchor is None because no signing key is configured (unsigned action). - // This is the correct behavior for an unsigned DAG node. let n = ctx .neighbors .iter() .find(|n| n.path == "alpha.md") .unwrap(); - // We assert the Option is coherent (not that it's Some, since no signing key). - let _ = n.provenance_anchor.as_deref(); + assert!( + n.provenance_anchor.is_some(), + "provenance_anchor must be Some when a signed DAG action is recorded for the source: {:?}", + n + ); } } From b3cf405b8935b3a25bd1865949dbd028f9e03656 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 21:54:45 +0200 Subject: [PATCH 57/72] refactor(cortex): code-quality fixes for service::context Items from code review: 1. Cache tests: add note_context_cached_hit_and_invalidation (locks cache-hit and version-change recompute) and cache_cap_clears_when_exceeded. Optional nit: no_chunks_falls_back_to_basename_and_never_self_matches. 2. Provenance for survivors only: build the neighbor Vec without provenance_anchor, sort+truncate to `limit`, then fill provenance for survivors. Cuts up to ~48 DAG reads to just `limit` reads. 3. Don't clone all memory for query text: read STM+LTM separately into two Vecs (no combined Vec), filter to note while iterating. 4. Avoid eager clone in per-source dedupe: match on btree_map::Entry so text.to_string() is called only on Vacant insert or score-improvement replace, never on occupied+same-score iterations. 5. Bound note_context_cache growth: clear map before insert when len exceeds 256. Documented with inline comment. 6. Lift obj_string into shared triple_util module: was duplicated verbatim in backlinks, context, and vault_map. Create service/triple_util.rs, register in mod.rs, replace all three copies. provenance_anchor_for duplication left as-is (dag-gated, separate spec). All tests pass: context 9/9, backlinks 5/5, vault_map 7/7, total 228/228. No new clippy warnings. --- crates/aingle_cortex/src/service/backlinks.rs | 15 +- crates/aingle_cortex/src/service/context.rs | 209 +++++++++++++++--- crates/aingle_cortex/src/service/mod.rs | 1 + .../aingle_cortex/src/service/triple_util.rs | 23 ++ crates/aingle_cortex/src/service/vault_map.rs | 15 +- 5 files changed, 205 insertions(+), 58 deletions(-) create mode 100644 crates/aingle_cortex/src/service/triple_util.rs diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index 11e91bc6..2828d4a3 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -8,6 +8,8 @@ use serde::Serialize; use std::collections::BTreeMap; +use crate::service::triple_util::obj_string; + /// Verified link context for one note. #[derive(Debug, Clone, Serialize, Default)] pub struct Backlinks { @@ -24,19 +26,6 @@ pub struct BacklinkRef { pub provenance_anchor: Option, } -/// Return the object of a triple as a plain `String`, handling both literal -/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored -/// with `<…>` angle-bracket wrappers; this strips them so the result matches -/// the bare names used everywhere else in this module. -fn obj_string(t: &aingle_graph::Triple) -> Option { - if let Some(s) = t.object_string() { - Some(s.to_string()) - } else { - t.object_node() - .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) - } -} - /// Basename without directory or extension (wikilink resolution + titles). fn basename(path: &str) -> String { let file = path.rsplit(['/', '\\']).next().unwrap_or(path); diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index afb15e13..7d5b59e3 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -7,6 +7,8 @@ use std::collections::{BTreeMap, BTreeSet}; +use crate::service::triple_util::obj_string; + /// The semantic context for one note — the semantically related notes, even /// when never explicitly linked. #[derive(Debug, Clone, serde::Serialize, Default)] @@ -43,22 +45,9 @@ pub struct Neighbor { const SEMANTIC_MIN_DIMS: usize = 128; // --------------------------------------------------------------------------- -// Helpers (mirrored verbatim from backlinks.rs) +// Helpers // --------------------------------------------------------------------------- -/// Return the object of a triple as a plain `String`, handling both literal -/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored -/// with `<…>` angle-bracket wrappers; this strips them so the result matches -/// the bare names used everywhere else in this module. -fn obj_string(t: &aingle_graph::Triple) -> Option { - if let Some(s) = t.object_string() { - Some(s.to_string()) - } else { - t.object_node() - .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) - } -} - /// Retrieve a signed provenance anchor hash for a note path, if available. async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Option { #[cfg(feature = "dag")] @@ -155,15 +144,14 @@ pub async fn note_context( .collect(); // 4. Build the active note's query text from its own chunks. + // Read STM and LTM separately and filter to `note` immediately — avoids + // allocating a merged Vec of every entry in memory. let mut own_text = String::new(); - let all_entries: Vec = { + let (stm_entries, ltm_entries) = { let mem = state.memory.read().await; - let mut v = mem.stm.all_entries(); - v.extend(mem.ltm.all_entries()); - v + (mem.stm.all_entries(), mem.ltm.all_entries()) }; - - for e in &all_entries { + for e in stm_entries.iter().chain(ltm_entries.iter()) { if e.entry_type != crate::service::ingest::CHUNK_ENTRY_TYPE { continue; } @@ -227,19 +215,24 @@ pub async fn note_context( if !note_set.contains(src.as_str()) { continue; } - let text = d - .get("text") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let entry = best_by_src.entry(src).or_insert((rel, text.clone())); - if rel > entry.0 { - *entry = (rel, text); + // Only clone the chunk text when actually inserting or replacing the + // best entry — avoids a clone on every already-occupied iteration. + let text = d.get("text").and_then(|v| v.as_str()).unwrap_or(""); + match best_by_src.entry(src) { + std::collections::btree_map::Entry::Vacant(e) => { + e.insert((rel, text.to_string())); + } + std::collections::btree_map::Entry::Occupied(mut e) => { + if rel > e.get().0 { + *e.get_mut() = (rel, text.to_string()); + } + } } } - // 6. Build Neighbor list, resolve provenance, sort, truncate. + // 6. Build Neighbor list (provenance is None for now), sort by score desc, + // truncate to `limit`, then resolve provenance only for the survivors. + // This cuts up to ~48 DAG reads (fetch_limit) down to `limit` (≤ 10). let mut neighbors: Vec = Vec::with_capacity(best_by_src.len()); for (src, (rel, chunk_text)) in best_by_src { let passage = Some({ @@ -252,12 +245,11 @@ pub async fn note_context( } }); let already_linked = outgoing_set.contains(&src); - let provenance_anchor = provenance_anchor_for(state, &src).await; neighbors.push(Neighbor { path: src, score: rel, passage, - provenance_anchor, + provenance_anchor: None, already_linked, }); } @@ -270,6 +262,11 @@ pub async fn note_context( }); neighbors.truncate(limit); + // Resolve provenance only for the survivors (typically ≤ limit DAG reads). + for n in &mut neighbors { + n.provenance_anchor = provenance_anchor_for(state, &n.path).await; + } + NoteContext { semantic_ready: true, neighbors, @@ -317,6 +314,12 @@ pub async fn note_context_cached( .note_context_cache .lock() .expect("note_context cache poisoned"); + // Simple growth cap: if more than 256 notes are cached, clear entirely + // before inserting. This bounds memory without per-entry LRU bookkeeping; + // a typical Akashi session edits far fewer than 256 notes in one run. + if cache.len() > 256 { + cache.clear(); + } cache.insert(note.to_string(), (key, result.clone())); } @@ -715,4 +718,146 @@ mod tests { n ); } + + // ----------------------------------------------------------------------- + // Cache tests (item 1 + item 5) + // ----------------------------------------------------------------------- + + /// `note_context_cached` must return an identical result on the second call + /// (cache hit), and a fresh result after graph/memory mutation (invalidation). + /// This test locks both the hit path and the version-change recompute path. + #[tokio::test] + async fn note_context_cached_hit_and_invalidation() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("alpha.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + let e0: Vec = { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + }; + insert_chunk(&state, "active.md", "alpha active note", e0.clone()).await; + insert_chunk(&state, "alpha.md", "alpha related content", e0.clone()).await; + + // First call: computes and caches. + let ctx1 = super::note_context_cached(&state, "active.md", 10).await; + assert!(ctx1.semantic_ready, "StubEmbedder is 128d → semantic_ready"); + assert!(!ctx1.neighbors.is_empty(), "alpha.md must be a neighbor"); + + // Second call: graph/memory unchanged → must return the cached result. + let ctx2 = super::note_context_cached(&state, "active.md", 10).await; + assert_eq!( + ctx1.neighbors.len(), + ctx2.neighbors.len(), + "cache hit: neighbor count must be identical" + ); + assert_eq!( + ctx1.neighbors[0].path, + ctx2.neighbors[0].path, + "cache hit: top neighbor must be identical" + ); + + // Mutate: add beta.md (changes triple_count AND total_memory_bytes). + insert_triples(&state, &[("beta.md", "aingle:source_hash", "h2")]).await; + insert_chunk(&state, "beta.md", "alpha beta content", e0.clone()).await; + + // Third call: version mismatch → cache must be invalidated; beta.md appears. + let ctx3 = super::note_context_cached(&state, "active.md", 10).await; + assert!( + ctx3.neighbors.iter().any(|n| n.path == "beta.md"), + "after mutation (triple_count+memory_bytes changed), beta.md must appear: {:?}", + ctx3.neighbors + ); + } + + /// When the note_context_cache exceeds 256 entries, inserting a new result + /// must clear the map first so the cache never grows without bound. + #[tokio::test] + async fn cache_cap_clears_when_exceeded() { + let state = stub_state(); + + // Pre-fill the cache with 257 dummy entries to exceed the cap. + { + let mut cache = state.note_context_cache.lock().unwrap(); + for i in 0..257usize { + cache.insert( + format!("dummy_{i}.md"), + ((0, 0), super::NoteContext { semantic_ready: false, neighbors: vec![] }), + ); + } + } + assert_eq!( + state.note_context_cache.lock().unwrap().len(), + 257, + "pre-condition: cache must have 257 dummy entries" + ); + + // Call note_context_cached for a fresh note (not in cache). + // The cap must clear the map before inserting this new entry. + let _ = super::note_context_cached(&state, "fresh.md", 5).await; + + let cache = state.note_context_cache.lock().unwrap(); + assert_eq!( + cache.len(), + 1, + "cap must clear the oversized cache before inserting; got {} entries", + cache.len() + ); + assert!( + cache.contains_key("fresh.md"), + "fresh.md must be in the cache after the cap-and-insert" + ); + } + + // ----------------------------------------------------------------------- + // Optional nit + // ----------------------------------------------------------------------- + + /// An active note with NO chunks falls back to the basename as query text + /// and still surfaces neighbors. The active note must never appear as its + /// own neighbor (self-match guard). + #[tokio::test] + async fn no_chunks_falls_back_to_basename_and_never_self_matches() { + let state = stub_state(); + + insert_triples( + &state, + &[ + ("active.md", "aingle:source_hash", "h0"), + ("related.md", "aingle:source_hash", "h1"), + ], + ) + .await; + + // active.md has NO chunks. StubEmbedder: basename("active.md") = "active" + // → v[2] = 1.0 (default case, no "alpha" or "zzz"). related.md chunk + // "general content" → v[2] = 1.0. Cosine = 1.0 ≥ low threshold (0.1). + let e_default: Vec = { + let mut v = vec![0.0_f32; 128]; + v[2] = 1.0; + v + }; + insert_chunk(&state, "related.md", "general related content", e_default).await; + + let ctx = super::note_context(&state, "active.md", 10).await; + assert!(ctx.semantic_ready, "StubEmbedder is 128d → semantic_ready"); + assert!( + !ctx.neighbors.iter().any(|n| n.path == "active.md"), + "active.md must never be its own neighbor: {:?}", + ctx.neighbors + ); + assert!( + ctx.neighbors.iter().any(|n| n.path == "related.md"), + "basename-fallback must still surface related.md: {:?}", + ctx.neighbors + ); + } } diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index 85e7f7d5..2ebb24da 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -16,6 +16,7 @@ pub mod skill; #[cfg(feature = "sparql")] pub mod sparql; pub mod stats; +pub(crate) mod triple_util; pub mod triples; pub mod validate; pub mod vault_map; diff --git a/crates/aingle_cortex/src/service/triple_util.rs b/crates/aingle_cortex/src/service/triple_util.rs new file mode 100644 index 00000000..b3d24ebe --- /dev/null +++ b/crates/aingle_cortex/src/service/triple_util.rs @@ -0,0 +1,23 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Shared triple-object extraction helpers. +//! +//! # Why a shared module? +//! `obj_string` was previously duplicated verbatim in `backlinks`, `context`, +//! and `vault_map`. A copy-paste drift on exactly this helper caused a real bug +//! (node-valued `links_to` triples were silently dropped). This module is the +//! single source of truth; every consumer must import from here. + +/// Return the object of a triple as a plain `String`, handling both literal +/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored +/// with `<…>` angle-bracket wrappers; this strips them so the result matches +/// the bare names used everywhere else in the service layer. +pub(crate) fn obj_string(t: &aingle_graph::Triple) -> Option { + if let Some(s) = t.object_string() { + Some(s.to_string()) + } else { + t.object_node() + .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) + } +} diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index bad2e6c2..883ff9ff 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -7,6 +7,8 @@ use serde::Serialize; use std::collections::BTreeMap; +use crate::service::triple_util::obj_string; + /// The full vault map returned to the UI and the connected AI. #[derive(Debug, Clone, Serialize, Default)] pub struct VaultMap { @@ -105,19 +107,6 @@ pub(crate) fn is_maps_path(path: &str) -> bool { path.starts_with("_maps/") || path.starts_with("_maps\\") } -/// Return the object of a triple as a plain `String`, handling both literal -/// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored -/// with `<…>` angle-bracket wrappers; this strips them so the result matches -/// the bare names used everywhere else in this module. -fn obj_string(t: &aingle_graph::Triple) -> Option { - if let Some(s) = t.object_string() { - Some(s.to_string()) - } else { - t.object_node() - .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) - } -} - /// Structural inputs derived from the graph (no embeddings). #[derive(Debug, Default)] pub(crate) struct Structural { From 0b5a49bd117d10e834e3f85fa24bf1b82c4384c8 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 21:56:49 +0200 Subject: [PATCH 58/72] =?UTF-8?q?feat(cortex):=20aingle=5Fnote=5Fcontext?= =?UTF-8?q?=20MCP=20tool=20=E2=80=94=20verified=20semantic=20context=20bun?= =?UTF-8?q?dle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/aingle_cortex/src/mcp/server.rs | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index a9f803b5..af515c3d 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -143,6 +143,29 @@ impl AingleMcp { Ok(CallToolResult::success(vec![Content::json(resp)?])) } + /// Verified context bundle for a note: semantically-related notes (by meaning, + /// not just links) with the matching passage and signed provenance. + #[tool( + description = "Verified context bundle for a note: notes that are semantically \ + related by meaning (not just by explicit links), each with the matching \ + passage as evidence and a signed-provenance anchor when available. Use to \ + answer grounded in a note's verified neighborhood without hallucinating.", + annotations(read_only_hint = true) + )] + async fn aingle_note_context( + &self, + params: Parameters, + ) -> Result { + let Parameters(p) = params; + let resp = crate::service::context::note_context_cached( + &self.state, + &p.note, + p.limit.unwrap_or(8), + ) + .await; + Ok(CallToolResult::success(vec![Content::json(resp)?])) + } + /// List ingested sources and their signed content hashes. #[tool( description = "List ingested source files with their content hashes (the \ @@ -643,6 +666,15 @@ pub struct BacklinksParams { pub note: String, } +/// Parameters for the `aingle_note_context` tool. +#[derive(serde::Deserialize, schemars::JsonSchema)] +pub struct NoteContextParams { + /// Note path (vault-relative) to get the verified context bundle for. + pub note: String, + /// Max number of related neighbors to return (default 8). + pub limit: Option, +} + #[tool_handler(router = self.tool_router)] impl ServerHandler for AingleMcp { fn get_info(&self) -> ServerInfo { @@ -671,7 +703,7 @@ mod ingest_tools_tests { .into_iter() .map(|t| t.name.to_string()) .collect(); - for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map", "aingle_backlinks"] { + for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map", "aingle_backlinks", "aingle_note_context"] { assert!( names.contains(&expected.to_string()), "missing tool {expected}" From 3ba4b67fb68c598030cee6a982c58185a57ab293 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 22:46:59 +0200 Subject: [PATCH 59/72] =?UTF-8?q?test(cortex):=20neural=20e2e=20=E2=80=94?= =?UTF-8?q?=20note=5Fcontext=20finds=20same-topic=20neighbors=20with=20e5?= =?UTF-8?q?=20embedder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds neural_note_context_finds_same_topic gated on the neural-embeddings feature: bootstraps a real multilingual-e5-small embedder, ingests two same-topic Spanish dog-care notes and one off-topic elections note, and asserts the sibling note is found as a semantic neighbor (with passage) while the off-topic note stays below the relevance floor (low=0.77). Mirrors the neural_grounding_is_topical acceptance test in ground.rs. --- crates/aingle_cortex/src/service/context.rs | 93 +++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index 7d5b59e3..6dcd4169 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -860,4 +860,97 @@ mod tests { ctx.neighbors ); } + + /// End-to-end acceptance test for the real neural embedder: same-topic notes + /// must surface as semantic neighbors while an off-topic note is filtered out. + /// Gated on the `neural-embeddings` feature and skips if the model files are + /// absent. Requires `ORT_DYLIB_PATH` to point at an onnxruntime shared library. + #[cfg(feature = "neural-embeddings")] + #[tokio::test] + async fn neural_note_context_finds_same_topic() { + let model_dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { + concat!( + env!("CARGO_MANIFEST_DIR"), + "/../ineru/test-models/multilingual-e5-small" + ) + .to_string() + }); + if !std::path::Path::new(&model_dir) + .join("onnx/model.onnx") + .exists() + { + eprintln!("skipping neural_note_context_finds_same_topic: e5 model not found at {model_dir}"); + return; + } + + let embedder = crate::embedder::build_embedder(Some(&model_dir)); + assert_eq!( + embedder.dimensions(), + 384, + "neural embedder must be active (384d)" + ); + + let state = + AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + + let dir = tempfile::tempdir().unwrap(); + // Two same-topic notes about dog care — sentences reused from + // neural_grounding_is_topical in ground.rs for reliable embedding behaviour. + std::fs::write( + dir.path().join("perros1.md"), + "# Cuidado de perros\n\nLos perros necesitan paseos diarios, agua fresca y una dieta equilibrada para estar sanos.\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("perros2.md"), + "# Mascotas\n\nUn perro sano requiere ejercicio diario, hidratación constante y alimentación balanceada.\n", + ) + .unwrap(); + // Off-topic note: elections have no semantic overlap with dog care. + std::fs::write( + dir.path().join("elecciones.md"), + "# Elecciones\n\nLos resultados de las elecciones presidenciales determinan el futuro del país.\n", + ) + .unwrap(); + + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + + let ctx = super::note_context(&state, "perros1.md", 5).await; + + assert!( + ctx.semantic_ready, + "neural embedder (384d) must set semantic_ready=true" + ); + + assert!( + ctx.neighbors.iter().any(|n| n.path == "perros2.md"), + "perros2.md (same-topic sibling) must be a semantic neighbor of perros1.md: {:?}", + ctx.neighbors + ); + + let sibling = ctx + .neighbors + .iter() + .find(|n| n.path == "perros2.md") + .unwrap(); + assert!( + sibling.passage.is_some(), + "perros2.md neighbor must include a matching passage: {:?}", + sibling + ); + + // elecciones.md is semantically orthogonal to dog care; its cosine against + // the perros1.md query vector should not reach the low threshold (0.77). + assert!( + !ctx.neighbors.iter().any(|n| n.path == "elecciones.md"), + "off-topic elecciones.md must not appear as a neighbor (below low=0.77 floor): {:?}", + ctx.neighbors + ); + } } From d783a5a9684b823f5b336df4a2e05817c38957a3 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sat, 27 Jun 2026 23:05:12 +0200 Subject: [PATCH 60/72] fix(cortex): calibrate neural neighbor floor, fix path-qualified links, fix cache key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three integration fixes in the context / backlinks / cache layer: **Fix 1 — NEIGHBOR_FLOOR (neural calibration)** Introduce `NEIGHBOR_FLOOR = 0.88` in `service/context.rs`. multilingual-e5 assigns a cosine baseline of ~0.83 to any same-language text, making the embedder's grounding `low` threshold (0.77) too permissive for note-to-note neighbor selection. `NEIGHBOR_FLOOR` mirrors `vault_map::SEMANTIC_THRESHOLD` (related notes ~0.90+, unrelated ~0.81-0.83). Removes the now-unused `relevance_thresholds()` call. Neural test `neural_note_context_finds_same_topic` now passes: perros2.md (0.93) is a neighbor; elecciones.md (0.83) is excluded. **Fix 2 — path-qualified wikilink resolution (I-1)** Add `resolve_link_target` to `service/triple_util.rs`. Resolution order mirrors the editor's `wikilinks.ts`: (1) exact path match; (2) path-qualified target (`[[dir/note]]`) matched by path-without-ext scan — prevents the collision where `b/note` wrongly collapsed to the alphabetically-first `a/note.md`; (3) basename fallback. Three unit tests lock the fix. Both `context.rs` and `backlinks.rs` inline `resolve` closures replaced with the shared helper. **Fix 3 — cache key includes `limit` (M-2)** `note_context_cache` map key changed from `String` (note path) to `(String, usize)` (note path, limit) in `state.rs` and `context::note_context_cached`. MCP calls with different limits now cache independently; the `cache_cap_clears_when_exceeded` test updated to use the new key type. --- crates/aingle_cortex/src/service/backlinks.rs | 8 +- crates/aingle_cortex/src/service/context.rs | 49 ++++---- .../aingle_cortex/src/service/triple_util.rs | 112 +++++++++++++++++- crates/aingle_cortex/src/state.rs | 8 +- 4 files changed, 141 insertions(+), 36 deletions(-) diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index 2828d4a3..c412255f 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -8,7 +8,7 @@ use serde::Serialize; use std::collections::BTreeMap; -use crate::service::triple_util::obj_string; +use crate::service::triple_util::{obj_string, resolve_link_target}; /// Verified link context for one note. #[derive(Debug, Clone, Serialize, Default)] @@ -109,11 +109,7 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks by_base.entry(basename(n)).or_insert_with(|| n.clone()); } let resolve = |target: &str| -> Option { - if note_set.contains(target) { - Some(target.to_string()) - } else { - by_base.get(&basename(target)).cloned() - } + resolve_link_target(target, ¬e_set, &by_base) }; let active_base = basename(note); let active_base_lc = active_base.to_lowercase(); diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index 6dcd4169..58a194ba 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -7,7 +7,7 @@ use std::collections::{BTreeMap, BTreeSet}; -use crate::service::triple_util::obj_string; +use crate::service::triple_util::{obj_string, resolve_link_target}; /// The semantic context for one note — the semantically related notes, even /// when never explicitly linked. @@ -44,6 +44,14 @@ pub struct Neighbor { /// cross-note retrieval; this gate keeps the result honest. const SEMANTIC_MIN_DIMS: usize = 128; +/// Minimum cosine for a note to count as a semantic neighbor. Calibrated for +/// note-to-note neural similarity: multilingual-e5 assigns a high baseline +/// (~0.83) to any same-language text, so the embedder's grounding `low` +/// threshold (0.77) is too permissive here. Mirrors vault_map's +/// SEMANTIC_THRESHOLD rationale (related notes ~0.90+, unrelated ~0.81-0.83). +/// Follow-up: make this per-embedder if more neural models are added. +const NEIGHBOR_FLOOR: f32 = 0.88; + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -87,8 +95,6 @@ pub async fn note_context( }; } - let (_, low) = state.embedder.relevance_thresholds(); - // 2. Build the note set (subjects of PRED_SOURCE_HASH) + basename index, // and collect all links_to triples. let strip = @@ -126,13 +132,7 @@ pub async fn note_context( } let resolve = |target: &str| -> Option { - if note_set.contains(target) { - Some(target.to_string()) - } else { - by_base - .get(&crate::service::vault_map::basename(target)) - .cloned() - } + resolve_link_target(target, ¬e_set, &by_base) }; // 3. Compute `outgoing_set`: full paths that the active `note` links to. @@ -198,7 +198,7 @@ pub async fn note_context( None => continue, }; let rel = q.cosine_similarity(emb); - if rel < low { + if rel < NEIGHBOR_FLOOR { continue; } let d = &r.entry.data; @@ -279,10 +279,8 @@ pub async fn note_context( /// Like [`note_context`] but memoised on `(triple_count, total_memory_bytes)`. /// -/// The cache key does NOT include `limit` — keep it simple and document the -/// assumption: callers should use a stable `limit` for the same note. If -/// `limit` varies per call, the first winning result is served. For Akashi's -/// use-case (fixed sidebar top-N) this is always correct. +/// The map key is `(note_path, limit)` so that MCP calls with different `limit` +/// values are cached independently and never serve a stale neighbor count. pub async fn note_context_cached( state: &crate::state::AppState, note: &str, @@ -290,7 +288,8 @@ pub async fn note_context_cached( ) -> NoteContext { let tc = { state.graph.read().await.stats().triple_count }; let mem_bytes = { state.memory.read().await.stats().total_memory_bytes }; - let key = (tc, mem_bytes); + let version_key = (tc, mem_bytes); + let map_key = (note.to_string(), limit); // Check cache — release lock before any await. { @@ -298,8 +297,8 @@ pub async fn note_context_cached( .note_context_cache .lock() .expect("note_context cache poisoned"); - if let Some((cached_key, ctx)) = cache.get(note) { - if *cached_key == key { + if let Some((cached_key, ctx)) = cache.get(&map_key) { + if *cached_key == version_key { return ctx.clone(); } } @@ -314,13 +313,13 @@ pub async fn note_context_cached( .note_context_cache .lock() .expect("note_context cache poisoned"); - // Simple growth cap: if more than 256 notes are cached, clear entirely + // Simple growth cap: if more than 256 entries are cached, clear entirely // before inserting. This bounds memory without per-entry LRU bookkeeping; - // a typical Akashi session edits far fewer than 256 notes in one run. + // a typical Akashi session edits far fewer than 256 (note, limit) pairs. if cache.len() > 256 { cache.clear(); } - cache.insert(note.to_string(), (key, result.clone())); + cache.insert(map_key, (version_key, result.clone())); } result @@ -789,7 +788,7 @@ mod tests { let mut cache = state.note_context_cache.lock().unwrap(); for i in 0..257usize { cache.insert( - format!("dummy_{i}.md"), + (format!("dummy_{i}.md"), 0usize), ((0, 0), super::NoteContext { semantic_ready: false, neighbors: vec![] }), ); } @@ -812,7 +811,7 @@ mod tests { cache.len() ); assert!( - cache.contains_key("fresh.md"), + cache.contains_key(&("fresh.md".to_string(), 5usize)), "fresh.md must be in the cache after the cap-and-insert" ); } @@ -946,10 +945,10 @@ mod tests { ); // elecciones.md is semantically orthogonal to dog care; its cosine against - // the perros1.md query vector should not reach the low threshold (0.77). + // the perros1.md query vector (~0.83) must not reach NEIGHBOR_FLOOR (0.88). assert!( !ctx.neighbors.iter().any(|n| n.path == "elecciones.md"), - "off-topic elecciones.md must not appear as a neighbor (below low=0.77 floor): {:?}", + "off-topic elecciones.md must not appear as a neighbor (below NEIGHBOR_FLOOR=0.88): {:?}", ctx.neighbors ); } diff --git a/crates/aingle_cortex/src/service/triple_util.rs b/crates/aingle_cortex/src/service/triple_util.rs index b3d24ebe..dddfd2f2 100644 --- a/crates/aingle_cortex/src/service/triple_util.rs +++ b/crates/aingle_cortex/src/service/triple_util.rs @@ -1,7 +1,7 @@ // Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. // SPDX-License-Identifier: Apache-2.0 OR Commercial -//! Shared triple-object extraction helpers. +//! Shared triple-object extraction and wikilink-resolution helpers. //! //! # Why a shared module? //! `obj_string` was previously duplicated verbatim in `backlinks`, `context`, @@ -21,3 +21,113 @@ pub(crate) fn obj_string(t: &aingle_graph::Triple) -> Option { .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) } } + +/// Basename without directory or extension (for wikilink resolution). +fn basename(path: &str) -> String { + let file = path.rsplit(['/', '\\']).next().unwrap_or(path); + file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file).to_string() +} + +/// Strip the extension from the last path segment only. Input must already be +/// slash-normalized (forward slashes). Returns the path-without-ext. +/// "b/note.md" → "b/note", "b/note" → "b/note", "note.md" → "note". +fn path_without_ext(path: &str) -> String { + if let Some(idx) = path.rfind('/') { + let dir = &path[..=idx]; // includes the trailing '/' + let file = &path[idx + 1..]; + let stem = file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file); + format!("{dir}{stem}") + } else { + path.rsplit_once('.').map(|(s, _)| s).unwrap_or(path).to_string() + } +} + +/// Resolve a wikilink `target` to a full note path. Order mirrors the editor's +/// `wikilinks.ts`: +/// 1. Exact path match (after normalizing `\\`→`/`). +/// 2. When `target` is path-qualified (contains `/`), find a note whose +/// slash-normalized path-without-extension equals the target's. +/// This handles `[[dir/note]]` → `dir/note.md` without collapsing to the +/// alphabetically-first note that shares a bare basename. +/// 3. Basename fallback via `by_base`. +pub(crate) fn resolve_link_target( + target: &str, + note_set: &std::collections::BTreeSet<&str>, + by_base: &std::collections::BTreeMap, +) -> Option { + // Normalize backslash to forward slash for consistent matching. + let t_norm = target.replace('\\', "/"); + let t_ref: &str = &t_norm; + + // (1) Exact path match. + if note_set.contains(t_ref) { + return Some(t_norm); + } + + // (2) Path-qualified: find a note whose path-without-ext (slash-normalized) + // equals the target's path-without-ext. + if t_norm.contains('/') { + let t_ne = path_without_ext(t_ref); + for &p in note_set.iter() { + let p_norm = p.replace('\\', "/"); + if path_without_ext(&p_norm) == t_ne { + return Some(p.to_string()); + } + } + } + + // (3) Basename fallback. + by_base.get(&basename(t_ref)).cloned() +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeMap, BTreeSet}; + + use super::resolve_link_target; + + #[test] + fn exact_path_match() { + // "b/note.md" exists verbatim — must return it, not "a/note.md". + let notes = vec!["a/note.md".to_string(), "b/note.md".to_string()]; + let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + let mut by_base: BTreeMap = BTreeMap::new(); + by_base.insert("note".to_string(), "a/note.md".to_string()); + + assert_eq!( + resolve_link_target("b/note.md", ¬e_set, &by_base).as_deref(), + Some("b/note.md") + ); + } + + #[test] + fn path_qualified_resolves_correct_note_not_alphabetical_first() { + // "[[b/note]]" (no extension) must resolve to "b/note.md", NOT "a/note.md". + // by_base["note"] = "a/note.md" (first alphabetically — the collision + // that previously caused the bug). + let notes = vec!["a/note.md".to_string(), "b/note.md".to_string()]; + let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + let mut by_base: BTreeMap = BTreeMap::new(); + by_base.insert("note".to_string(), "a/note.md".to_string()); + + assert_eq!( + resolve_link_target("b/note", ¬e_set, &by_base).as_deref(), + Some("b/note.md"), + "path-qualified target must not collapse to the alphabetically-first basename match" + ); + } + + #[test] + fn bare_basename_unique_fallback() { + // No path component → falls through to by_base. + let notes = vec!["dir/note.md".to_string()]; + let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + let mut by_base: BTreeMap = BTreeMap::new(); + by_base.insert("note".to_string(), "dir/note.md".to_string()); + + assert_eq!( + resolve_link_target("note", ¬e_set, &by_base).as_deref(), + Some("dir/note.md") + ); + } +} diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index 47e16254..87bd4625 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -34,15 +34,15 @@ pub struct AppState { pub vault_map_cache: std::sync::Arc< std::sync::Mutex>, >, - /// Per-note semantic-neighbor cache, keyed by note path, storing + /// Per-note semantic-neighbor cache, keyed by `(note_path, limit)`, storing /// `(graph_triple_count, total_memory_bytes) → NoteContext`. Invalidated /// whenever the graph or memory changes — same staleness signal as - /// vault_map_cache. Cache key does not include `limit`; callers should - /// use a consistent limit for the same note. + /// vault_map_cache. `limit` is part of the key so that MCP calls with + /// different limits do not serve stale neighbor counts from cache. pub note_context_cache: std::sync::Arc< std::sync::Mutex< std::collections::HashMap< - String, + (String, usize), ((usize, usize), crate::service::context::NoteContext), >, >, From 5ace0ffd0a67012d704c789399ffd661a9c9c76e Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 00:50:59 +0200 Subject: [PATCH 61/72] =?UTF-8?q?feat(cortex):=20local=5Fgraph=20service?= =?UTF-8?q?=20=E2=80=94=20typed=20neighborhood=20graph=20for=20VC-2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds service::local_graph with LocalGraph, GNode, TypedEdge types and local_graph / local_graph_cached async functions. Produces per-note BFS neighborhoods (depth ≤ 2) with three typed edge kinds: explicit wikilinks ("link"), semantic neighbors via note_context ("semantic", with signed provenance anchors under dag feature), and shared-tag connections ("tag"). Caps at 80 nodes, dedupes symmetric edges, excludes _maps/ paths. Also makes NEIGHBOR_FLOOR pub in service::context so local_graph can reuse the same threshold, and adds local_graph_cache field to AppState (mirroring note_context_cache) in all four constructors. 7 tests: link_edge_from_wikilink, semantic_edge_from_neighbor, semantic_edge_carries_provenance (dag), tag_edge_from_shared_tag, hash_embedder_omits_semantic, maps_excluded, caps_respected — all pass with and without --features dag. --- crates/aingle_cortex/src/service/context.rs | 2 +- .../aingle_cortex/src/service/local_graph.rs | 738 ++++++++++++++++++ crates/aingle_cortex/src/service/mod.rs | 1 + crates/aingle_cortex/src/state.rs | 15 + 4 files changed, 755 insertions(+), 1 deletion(-) create mode 100644 crates/aingle_cortex/src/service/local_graph.rs diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index 58a194ba..13c21f0c 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -50,7 +50,7 @@ const SEMANTIC_MIN_DIMS: usize = 128; /// threshold (0.77) is too permissive here. Mirrors vault_map's /// SEMANTIC_THRESHOLD rationale (related notes ~0.90+, unrelated ~0.81-0.83). /// Follow-up: make this per-embedder if more neural models are added. -const NEIGHBOR_FLOOR: f32 = 0.88; +pub const NEIGHBOR_FLOOR: f32 = 0.88; // --------------------------------------------------------------------------- // Helpers diff --git a/crates/aingle_cortex/src/service/local_graph.rs b/crates/aingle_cortex/src/service/local_graph.rs new file mode 100644 index 00000000..67559710 --- /dev/null +++ b/crates/aingle_cortex/src/service/local_graph.rs @@ -0,0 +1,738 @@ +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR Commercial + +//! Local graph neighborhood for a single note: typed edges (link / semantic / tag) +//! up to depth 2 for the Akashi per-note graph panel (VC-2). + +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; + +use crate::service::triple_util::{obj_string, resolve_link_target}; +use crate::service::vault_map::{basename, is_maps_path}; +use crate::service::context::{note_context, NEIGHBOR_FLOOR}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// The typed local neighborhood graph around a center note. +#[derive(Debug, Clone, serde::Serialize, Default)] +pub struct LocalGraph { + /// The center note path. + pub center: String, + /// All nodes in this neighborhood (center + neighbors). + pub nodes: Vec, + /// All typed edges in this neighborhood. + pub edges: Vec, + /// `true` when the embedder has enough dimensions for semantic edges. + pub semantic_ready: bool, +} + +/// A node in the local neighborhood graph. +#[derive(Debug, Clone, serde::Serialize)] +pub struct GNode { + /// Full relative path (canonical identity). + pub id: String, + /// Human-readable label (basename without extension). + pub label: String, + /// `"center"` for the focal note; `"note"` for all others. + pub kind: String, + /// Semantic cluster id. Always `-1` here (clustering is global / expensive). + pub cluster: i64, + /// Number of edges in THIS graph touching this node. + pub degree: usize, +} + +/// A typed, optionally weighted edge in the local neighborhood graph. +#[derive(Debug, Clone, serde::Serialize)] +pub struct TypedEdge { + pub source: String, + pub target: String, + /// `"link"` | `"semantic"` | `"tag"` + pub kind: String, + /// Cosine similarity score — present only for semantic edges. + pub score: Option, + /// For tag edges: the shared tag name. + pub label: Option, + /// Signed DAG action hash for semantic edges (🔒). `None` if unavailable. + pub provenance_anchor: Option, +} + +// --------------------------------------------------------------------------- +// Private constants +// --------------------------------------------------------------------------- + +const NODE_CAP: usize = 80; +const SEM_PER_NODE: usize = 5; +const MAX_DEPTH: usize = 2; +/// Max tag-edges added per (node, tag) pair — prevents explosion on popular tags. +const TAG_FANOUT_CAP: usize = 6; + +// --------------------------------------------------------------------------- +// Core function +// --------------------------------------------------------------------------- + +/// Build the typed local neighborhood graph for `note` at BFS depth `depth`. +pub async fn local_graph( + state: &crate::state::AppState, + note: &str, + depth: usize, +) -> LocalGraph { + use aingle_graph::{Predicate, TriplePattern}; + + let depth = depth.clamp(1, MAX_DEPTH); + let semantic_grade = state.embedder.dimensions() >= 128; + + let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); + + // ----------------------------------------------------------------------- + // 1. Load structural data from the graph once. + // ----------------------------------------------------------------------- + // notes: all ingested note paths + // links_raw: (subject, object-string) for every links_to triple + // tagged_raw: (subject, tag) for every tagged triple + type PairVec = Vec<(String, String)>; + let (notes, links_raw, tagged_raw): (Vec, PairVec, PairVec) = { + let g = state.graph.read().await; + let collect = |pred: &str| -> PairVec { + g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) + .unwrap_or_default() + .into_iter() + .filter_map(|t| { + obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) + }) + .collect() + }; + let mut ns: Vec = collect(crate::service::ingest::PRED_SOURCE_HASH) + .into_iter() + .map(|(s, _)| s) + .collect(); + ns.sort(); + ns.dedup(); + let lnks = collect("links_to"); + let tags = collect("tagged"); + (ns, lnks, tags) + }; + + let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); + + // Basename index for wikilink resolution. + let mut by_base: BTreeMap = BTreeMap::new(); + for n in ¬es { + by_base + .entry(basename(n)) + .or_insert_with(|| n.clone()); + } + + let resolve = |target: &str| -> Option { + resolve_link_target(target, ¬e_set, &by_base) + }; + + // Resolved outgoing links: (src, dst) — both are full paths, neither a maps path. + let links: Vec<(String, String)> = links_raw + .iter() + .filter_map(|(src, tgt)| resolve(tgt).map(|dst| (src.clone(), dst))) + .filter(|(src, dst)| src != dst) + .filter(|(src, _)| note_set.contains(src.as_str()) && !is_maps_path(src)) + .filter(|(_, dst)| note_set.contains(dst.as_str()) && !is_maps_path(dst)) + .collect(); + + // tag_of_note: note → set + // notes_of_tag: tag → vec (sorted, deduped) + let mut tag_of_note: BTreeMap> = BTreeMap::new(); + let mut notes_of_tag: BTreeMap> = BTreeMap::new(); + for (note_path, tag) in &tagged_raw { + if note_set.contains(note_path.as_str()) && !is_maps_path(note_path) { + tag_of_note + .entry(note_path.clone()) + .or_default() + .insert(tag.clone()); + notes_of_tag + .entry(tag.clone()) + .or_default() + .push(note_path.clone()); + } + } + for v in notes_of_tag.values_mut() { + v.sort(); + v.dedup(); + } + + // ----------------------------------------------------------------------- + // 2. BFS to collect edges. + // ----------------------------------------------------------------------- + let mut edges: Vec = Vec::new(); + let mut visited: HashSet = HashSet::new(); + visited.insert(note.to_string()); + + let mut frontier: VecDeque = VecDeque::new(); + frontier.push_back(note.to_string()); + + let mut semantic_ready = semantic_grade; + + for _level in 0..depth { + let mut next_frontier: Vec = Vec::new(); + while let Some(n) = frontier.pop_front() { + if is_maps_path(&n) { + continue; + } + + // --- link edges (outgoing from n) --- + for (src, dst) in links.iter().filter(|(s, _)| s == &n) { + edges.push(TypedEdge { + source: src.clone(), + target: dst.clone(), + kind: "link".to_string(), + score: None, + label: None, + provenance_anchor: None, + }); + if !visited.contains(dst) { + visited.insert(dst.clone()); + next_frontier.push(dst.clone()); + } + } + // --- link edges (incoming to n) --- + for (src, dst) in links.iter().filter(|(_, d)| d == &n) { + edges.push(TypedEdge { + source: src.clone(), + target: dst.clone(), + kind: "link".to_string(), + score: None, + label: None, + provenance_anchor: None, + }); + if !visited.contains(src) { + visited.insert(src.clone()); + next_frontier.push(src.clone()); + } + } + + // --- semantic edges --- + if semantic_grade { + let ctx = note_context(state, &n, SEM_PER_NODE).await; + if !ctx.semantic_ready { + semantic_ready = false; + } else { + for nb in ctx.neighbors { + if nb.score < NEIGHBOR_FLOOR { + continue; + } + if is_maps_path(&nb.path) { + continue; + } + edges.push(TypedEdge { + source: n.clone(), + target: nb.path.clone(), + kind: "semantic".to_string(), + score: Some(nb.score), + label: None, + provenance_anchor: nb.provenance_anchor, + }); + if !visited.contains(&nb.path) { + visited.insert(nb.path.clone()); + next_frontier.push(nb.path.clone()); + } + } + } + } + + // --- tag edges --- + if let Some(tags) = tag_of_note.get(&n) { + for tag in tags { + if let Some(peers) = notes_of_tag.get(tag) { + let mut added = 0usize; + for peer in peers { + if peer == &n || is_maps_path(peer) { + continue; + } + if added >= TAG_FANOUT_CAP { + break; + } + edges.push(TypedEdge { + source: n.clone(), + target: peer.clone(), + kind: "tag".to_string(), + score: None, + label: Some(tag.clone()), + provenance_anchor: None, + }); + if !visited.contains(peer) { + visited.insert(peer.clone()); + next_frontier.push(peer.clone()); + } + added += 1; + } + } + } + } + } + + for n in next_frontier { + frontier.push_back(n); + } + } + + // ----------------------------------------------------------------------- + // 3. Deduplicate edges. + // ----------------------------------------------------------------------- + // Links are directional — dedupe by (source, target, kind). + // Semantic/tag are symmetric — dedupe order-insensitively by (min,max,kind). + let mut seen_link: HashSet<(String, String)> = HashSet::new(); + let mut seen_sym: HashSet<(String, String, String)> = HashSet::new(); + let mut deduped: Vec = Vec::new(); + + for e in edges { + // Remove self-loops. + if e.source == e.target { + continue; + } + match e.kind.as_str() { + "link" => { + let key = (e.source.clone(), e.target.clone()); + if seen_link.insert(key) { + deduped.push(e); + } + } + _ => { + // symmetric kinds: (tag, semantic) + let (lo, hi) = if e.source <= e.target { + (e.source.clone(), e.target.clone()) + } else { + (e.target.clone(), e.source.clone()) + }; + let key = (lo, hi, e.kind.clone()); + if seen_sym.insert(key) { + deduped.push(e); + } + } + } + } + + // ----------------------------------------------------------------------- + // 4. Collect all node ids referenced by edges, plus the center. + // ----------------------------------------------------------------------- + let mut all_node_ids: HashSet = HashSet::new(); + all_node_ids.insert(note.to_string()); + for e in &deduped { + all_node_ids.insert(e.source.clone()); + all_node_ids.insert(e.target.clone()); + } + + // ----------------------------------------------------------------------- + // 5. Cap: keep center + highest-degree nodes; drop edges to removed nodes. + // ----------------------------------------------------------------------- + let mut degree_map: HashMap = HashMap::new(); + for id in &all_node_ids { + degree_map.insert(id.clone(), 0); + } + for e in &deduped { + *degree_map.entry(e.source.clone()).or_default() += 1; + *degree_map.entry(e.target.clone()).or_default() += 1; + } + + let kept_ids: HashSet = if all_node_ids.len() > NODE_CAP { + // Always keep center; fill remaining slots by degree descending. + let mut by_degree: Vec<(String, usize)> = degree_map + .iter() + .filter(|(id, _)| id.as_str() != note) + .map(|(id, &d)| (id.clone(), d)) + .collect(); + by_degree.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + let mut kept: HashSet = HashSet::new(); + kept.insert(note.to_string()); + for (id, _) in by_degree.into_iter().take(NODE_CAP - 1) { + kept.insert(id); + } + kept + } else { + all_node_ids.clone() + }; + + // Drop edges that reference removed nodes. + let final_edges: Vec = deduped + .into_iter() + .filter(|e| kept_ids.contains(&e.source) && kept_ids.contains(&e.target)) + .collect(); + + // Recompute degree map for final kept set. + let mut final_degree: HashMap = HashMap::new(); + for id in &kept_ids { + final_degree.insert(id.clone(), 0); + } + for e in &final_edges { + *final_degree.entry(e.source.clone()).or_default() += 1; + *final_degree.entry(e.target.clone()).or_default() += 1; + } + + // Build nodes vector. + let mut nodes: Vec = kept_ids + .iter() + .map(|id| { + let kind = if id == note { "center" } else { "note" }.to_string(); + let degree = *final_degree.get(id).unwrap_or(&0); + GNode { + label: basename(id), + id: id.clone(), + kind, + cluster: -1, + degree, + } + }) + .collect(); + nodes.sort_by(|a, b| a.id.cmp(&b.id)); + + LocalGraph { + center: note.to_string(), + nodes, + edges: final_edges, + semantic_ready, + } +} + +// --------------------------------------------------------------------------- +// Cached variant +// --------------------------------------------------------------------------- + +/// Like [`local_graph`] but memoised on `(triple_count, total_memory_bytes)`. +/// +/// Map key is `(note_path, depth)`. Cap: 256 entries (clear-on-exceed). +pub async fn local_graph_cached( + state: &crate::state::AppState, + note: &str, + depth: usize, +) -> LocalGraph { + let tc = { state.graph.read().await.stats().triple_count }; + let mem_bytes = { state.memory.read().await.stats().total_memory_bytes }; + let version_key = (tc, mem_bytes); + let map_key = (note.to_string(), depth); + + // Check cache — release lock before any await. + { + let cache = state + .local_graph_cache + .lock() + .expect("local_graph cache poisoned"); + if let Some((cached_key, graph)) = cache.get(&map_key) { + if *cached_key == version_key { + return graph.clone(); + } + } + } + + // Compute without holding the mutex. + let result = local_graph(state, note, depth).await; + + // Store result. + { + let mut cache = state + .local_graph_cache + .lock() + .expect("local_graph cache poisoned"); + if cache.len() > 256 { + cache.clear(); + } + cache.insert(map_key, (version_key, result.clone())); + } + + result +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use aingle_graph::{NodeId, Predicate, Triple, Value}; + use ineru::{Embedder, Embedding, MemoryEntry}; + + use crate::state::AppState; + + // ----------------------------------------------------------------------- + // Stub embedder: 128-dim (same as context.rs tests). + // text with "alpha" → e0=[1,0,…], "zzz" → e1=[0,1,…], else → e2=[0,0,1,…] + // Cosine(alpha,alpha) = 1.0 ≥ NEIGHBOR_FLOOR(0.88) → passes. + // ----------------------------------------------------------------------- + struct StubEmbedder; + + impl Embedder for StubEmbedder { + fn embed_passage(&self, text: &str) -> Embedding { + let mut v = vec![0.0_f32; 128]; + if text.contains("alpha") { + v[0] = 1.0; + } else if text.contains("zzz") { + v[1] = 1.0; + } else { + v[2] = 1.0; + } + Embedding::new(v) + } + + fn embed_query(&self, text: &str) -> Embedding { + self.embed_passage(text) + } + + fn dimensions(&self) -> usize { + 128 + } + + fn relevance_thresholds(&self) -> (f32, f32) { + (0.5, 0.1) + } + } + + fn stub_state() -> AppState { + AppState::with_db_path_and_embedder(":memory:", None, Arc::new(StubEmbedder)).unwrap() + } + + async fn insert_triple_node(state: &AppState, s: &str, p: &str, o_node: &str) { + let g = state.graph.write().await; + g.insert(Triple::new( + NodeId::named(s), + Predicate::named(p), + Value::Node(NodeId::named(o_node)), + )) + .unwrap(); + } + + async fn insert_triple_lit(state: &AppState, s: &str, p: &str, o: &str) { + let g = state.graph.write().await; + g.insert(Triple::new( + NodeId::named(s), + Predicate::named(p), + Value::literal(o), + )) + .unwrap(); + } + + async fn register_note(state: &AppState, path: &str) { + insert_triple_lit(state, path, crate::service::ingest::PRED_SOURCE_HASH, "h").await; + } + + async fn insert_chunk(state: &AppState, source_path: &str, text: &str, emb: Vec) { + let mut mem = state.memory.write().await; + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": text, "source_path": source_path }), + ); + e.embedding = Some(Embedding::new(emb)); + mem.remember(e).unwrap(); + } + + fn e0() -> Vec { + let mut v = vec![0.0_f32; 128]; + v[0] = 1.0; + v + } + + // ----------------------------------------------------------------------- + // 1. link_edge_from_wikilink + // ----------------------------------------------------------------------- + /// A `links_to` triple (Value::Node) from a.md to b yields a "link" edge a→b, + /// and center is "a.md". + #[tokio::test] + async fn link_edge_from_wikilink() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + // wikilink stored as Value::Node (basename without extension) + insert_triple_node(&state, "a.md", "links_to", "b").await; + + let g = super::local_graph(&state, "a.md", 1).await; + assert_eq!(g.center, "a.md"); + let link = g.edges.iter().find(|e| e.kind == "link"); + assert!(link.is_some(), "must have a link edge: {:?}", g.edges); + let link = link.unwrap(); + assert_eq!(link.source, "a.md"); + assert_eq!(link.target, "b.md"); + } + + // ----------------------------------------------------------------------- + // 2. semantic_edge_from_neighbor + // ----------------------------------------------------------------------- + /// With the stub 128-d embedder and alpha-topic chunks, a.md and b.md both + /// project onto e0. note_context yields them as mutual neighbors with score + /// 1.0 ≥ NEIGHBOR_FLOOR → a "semantic" edge with score.is_some(). + #[tokio::test] + async fn semantic_edge_from_neighbor() { + let state = stub_state(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_chunk(&state, "a.md", "alpha content for a", e0()).await; + insert_chunk(&state, "b.md", "alpha content for b", e0()).await; + + let g = super::local_graph(&state, "a.md", 1).await; + assert!(g.semantic_ready, "StubEmbedder(128d) must be semantic_ready"); + let sem = g.edges.iter().find(|e| e.kind == "semantic"); + assert!(sem.is_some(), "must have a semantic edge: {:?}", g.edges); + assert!(sem.unwrap().score.is_some(), "semantic edge must carry a score"); + } + + // ----------------------------------------------------------------------- + // 3. semantic_edge_carries_provenance (dag-gated) + // ----------------------------------------------------------------------- + #[cfg(feature = "dag")] + #[tokio::test] + async fn semantic_edge_carries_provenance() { + let state = AppState::with_db_path_and_embedder( + ":memory:", + None, + Arc::new(StubEmbedder), + ) + .unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_chunk(&state, "a.md", "alpha content for a", e0()).await; + insert_chunk(&state, "b.md", "alpha content for b", e0()).await; + + // Record a signed DAG action for b.md so provenance_anchor_for("b.md") is Some. + { + let graph = state.graph.read().await; + let dag_store = graph.dag_store().expect("DAG must be enabled"); + let parents = dag_store.tips().expect("tips must be readable"); + let mut action = aingle_graph::dag::DagAction { + parents, + author: aingle_graph::NodeId::named("test"), + seq: 0, + timestamp: chrono::Utc::now(), + payload: aingle_graph::dag::DagPayload::Custom { + payload_type: "ingest".to_string(), + payload_summary: "b.md ingested".to_string(), + payload: None, + subject: Some("b.md".to_string()), + }, + signature: None, + }; + let key = aingle_graph::dag::DagSigningKey::generate(); + key.sign(&mut action); + dag_store.put(&action).expect("put signed action must succeed"); + } + + let g = super::local_graph(&state, "a.md", 1).await; + let sem = g + .edges + .iter() + .find(|e| e.kind == "semantic" && e.target == "b.md") + .expect("must have semantic edge a→b"); + assert!( + sem.provenance_anchor.is_some(), + "semantic edge to b.md must carry provenance_anchor when a signed DAG action exists: {:?}", + sem + ); + } + + // ----------------------------------------------------------------------- + // 4. tag_edge_from_shared_tag + // ----------------------------------------------------------------------- + /// a.md and b.md both tagged "x" → a "tag" edge with label == Some("x"). + #[tokio::test] + async fn tag_edge_from_shared_tag() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_triple_lit(&state, "a.md", "tagged", "x").await; + insert_triple_lit(&state, "b.md", "tagged", "x").await; + + let g = super::local_graph(&state, "a.md", 1).await; + let tag_edge = g.edges.iter().find(|e| e.kind == "tag"); + assert!(tag_edge.is_some(), "must have a tag edge: {:?}", g.edges); + assert_eq!( + tag_edge.unwrap().label.as_deref(), + Some("x"), + "tag edge label must be the shared tag" + ); + } + + // ----------------------------------------------------------------------- + // 5. hash_embedder_omits_semantic + // ----------------------------------------------------------------------- + /// The default 64-dim hash embedder fails the semantic gate → semantic_ready==false, + /// no "semantic" edges; link and tag edges still appear. + #[tokio::test] + async fn hash_embedder_omits_semantic() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + // A link edge (so we know other edges work). + insert_triple_node(&state, "a.md", "links_to", "b").await; + + let g = super::local_graph(&state, "a.md", 1).await; + assert!(!g.semantic_ready, "64-dim hash embedder must set semantic_ready=false"); + assert!( + g.edges.iter().all(|e| e.kind != "semantic"), + "no semantic edges with hash embedder: {:?}", + g.edges + ); + // Link edges still present. + assert!( + g.edges.iter().any(|e| e.kind == "link"), + "link edges must still appear: {:?}", + g.edges + ); + } + + // ----------------------------------------------------------------------- + // 6. maps_excluded + // ----------------------------------------------------------------------- + /// Notes under `_maps/` are never included in the graph even when they + /// share tags or links with the center note. + #[tokio::test] + async fn maps_excluded() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "_maps/vault-map.md").await; + insert_triple_lit(&state, "a.md", "tagged", "x").await; + insert_triple_lit(&state, "_maps/vault-map.md", "tagged", "x").await; + // Also a direct link to make sure links are filtered too. + insert_triple_node(&state, "a.md", "links_to", "vault-map").await; + + let g = super::local_graph(&state, "a.md", 1).await; + assert!( + !g.nodes.iter().any(|n| n.id.starts_with("_maps/")), + "_maps/ nodes must be excluded: {:?}", + g.nodes + ); + assert!( + !g.edges.iter().any(|e| e.target.starts_with("_maps/") || e.source.starts_with("_maps/")), + "_maps/ edges must be excluded: {:?}", + g.edges + ); + } + + // ----------------------------------------------------------------------- + // 7. caps_respected + // ----------------------------------------------------------------------- + /// With more than NODE_CAP neighbors, nodes.len() <= NODE_CAP and center is present. + #[tokio::test] + async fn caps_respected() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "center.md").await; + // Create NODE_CAP + 10 = 90 notes, each sharing a tag with center.md. + for i in 0..90 { + let path = format!("note{i}.md"); + register_note(&state, &path).await; + insert_triple_lit(&state, &path, "tagged", "bigtag").await; + } + insert_triple_lit(&state, "center.md", "tagged", "bigtag").await; + + let g = super::local_graph(&state, "center.md", 1).await; + assert!( + g.nodes.len() <= super::NODE_CAP, + "nodes.len() ({}) must be <= NODE_CAP ({}): center present: {}", + g.nodes.len(), + super::NODE_CAP, + g.nodes.iter().any(|n| n.id == "center.md") + ); + assert!( + g.nodes.iter().any(|n| n.id == "center.md"), + "center must always be in the graph: {:?}", + g.nodes.iter().map(|n| &n.id).collect::>() + ); + } +} diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index 2ebb24da..0c1a2c4b 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -5,6 +5,7 @@ pub mod backlinks; pub mod context; +pub mod local_graph; #[cfg(feature = "dag")] pub mod dag; pub mod ground; diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index 87bd4625..d33939eb 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -47,6 +47,17 @@ pub struct AppState { >, >, >, + /// Per-note local-graph cache, keyed by `(note_path, depth)`, storing + /// `(graph_triple_count, total_memory_bytes) → LocalGraph`. Invalidated + /// on any graph or memory change — mirrors note_context_cache semantics. + pub local_graph_cache: std::sync::Arc< + std::sync::Mutex< + std::collections::HashMap< + (String, usize), + ((usize, usize), crate::service::local_graph::LocalGraph), + >, + >, + >, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. @@ -117,6 +128,7 @@ impl AppState { embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -164,6 +176,7 @@ impl AppState { embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -211,6 +224,7 @@ impl AppState { embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -355,6 +369,7 @@ impl AppState { embedder, vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), From a520d73ddc1d1bc6876d917cbdd08cf45447c6f1 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 01:19:23 +0200 Subject: [PATCH 62/72] perf(local_graph): pre-index links, cached context, frontier cap, stable sort - Replace note_context with note_context_cached in the BFS semantic pass to avoid redundant HNSW queries for the same note across depths. - Pre-index links into by_src/by_dst BTreeMaps for O(1) per-node lookup; eliminates the O(links * frontier) linear scan per BFS level. - Add SEM_FRONTIER_CAP (16): sort+truncate the next_frontier before promoting, bounding depth-2 semantic cost to at most 16 cached calls. - Extend seen_sym key to (lo, hi, kind, tag_label) so two notes sharing two distinct tags produce two distinct tag edges instead of one. - Sort final_edges by (source, target, kind) for stable cross-run output. - Add 7 new tests (8-14): depth traversal, incoming links, link+semantic coexistence, symmetric dedup, cache hit/invalidation, cap eviction, and frontier-cap perf guard. --- .../aingle_cortex/src/service/local_graph.rs | 328 ++++++++++++++++-- 1 file changed, 298 insertions(+), 30 deletions(-) diff --git a/crates/aingle_cortex/src/service/local_graph.rs b/crates/aingle_cortex/src/service/local_graph.rs index 67559710..b6f16e27 100644 --- a/crates/aingle_cortex/src/service/local_graph.rs +++ b/crates/aingle_cortex/src/service/local_graph.rs @@ -1,4 +1,4 @@ -// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. // SPDX-License-Identifier: Apache-2.0 OR Commercial //! Local graph neighborhood for a single note: typed edges (link / semantic / tag) @@ -8,7 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use crate::service::triple_util::{obj_string, resolve_link_target}; use crate::service::vault_map::{basename, is_maps_path}; -use crate::service::context::{note_context, NEIGHBOR_FLOOR}; +use crate::service::context::{note_context_cached, NEIGHBOR_FLOOR}; // --------------------------------------------------------------------------- // Public types @@ -66,6 +66,11 @@ const SEM_PER_NODE: usize = 5; const MAX_DEPTH: usize = 2; /// Max tag-edges added per (node, tag) pair — prevents explosion on popular tags. const TAG_FANOUT_CAP: usize = 6; +/// Maximum frontier size before the per-node semantic pass at each BFS level. +/// Caps the depth-2 semantic N+1: a hub with many link-neighbors would otherwise +/// trigger one `note_context_cached` call per frontier node. Sorting the frontier +/// first ensures deterministic behavior when truncating. +const SEM_FRONTIER_CAP: usize = 16; // --------------------------------------------------------------------------- // Core function @@ -136,6 +141,15 @@ pub async fn local_graph( .filter(|(_, dst)| note_set.contains(dst.as_str()) && !is_maps_path(dst)) .collect(); + // Pre-index links for O(1) per-node lookup in the BFS loop — avoids + // re-scanning the full `links` vec twice per node. + let mut by_src: BTreeMap> = BTreeMap::new(); + let mut by_dst: BTreeMap> = BTreeMap::new(); + for (src, dst) in &links { + by_src.entry(src.clone()).or_default().push(dst.clone()); + by_dst.entry(dst.clone()).or_default().push(src.clone()); + } + // tag_of_note: note → set // notes_of_tag: tag → vec (sorted, deduped) let mut tag_of_note: BTreeMap> = BTreeMap::new(); @@ -177,39 +191,43 @@ pub async fn local_graph( } // --- link edges (outgoing from n) --- - for (src, dst) in links.iter().filter(|(s, _)| s == &n) { - edges.push(TypedEdge { - source: src.clone(), - target: dst.clone(), - kind: "link".to_string(), - score: None, - label: None, - provenance_anchor: None, - }); - if !visited.contains(dst) { - visited.insert(dst.clone()); - next_frontier.push(dst.clone()); + if let Some(dsts) = by_src.get(&n) { + for dst in dsts { + edges.push(TypedEdge { + source: n.clone(), + target: dst.clone(), + kind: "link".to_string(), + score: None, + label: None, + provenance_anchor: None, + }); + if !visited.contains(dst) { + visited.insert(dst.clone()); + next_frontier.push(dst.clone()); + } } } // --- link edges (incoming to n) --- - for (src, dst) in links.iter().filter(|(_, d)| d == &n) { - edges.push(TypedEdge { - source: src.clone(), - target: dst.clone(), - kind: "link".to_string(), - score: None, - label: None, - provenance_anchor: None, - }); - if !visited.contains(src) { - visited.insert(src.clone()); - next_frontier.push(src.clone()); + if let Some(srcs) = by_dst.get(&n) { + for src in srcs { + edges.push(TypedEdge { + source: src.clone(), + target: n.clone(), + kind: "link".to_string(), + score: None, + label: None, + provenance_anchor: None, + }); + if !visited.contains(src) { + visited.insert(src.clone()); + next_frontier.push(src.clone()); + } } } // --- semantic edges --- if semantic_grade { - let ctx = note_context(state, &n, SEM_PER_NODE).await; + let ctx = note_context_cached(state, &n, SEM_PER_NODE).await; if !ctx.semantic_ready { semantic_ready = false; } else { @@ -267,6 +285,11 @@ pub async fn local_graph( } } + // Cap the next frontier before promoting to bound semantic cost at the + // next level (≤ SEM_FRONTIER_CAP × note_context_cached calls). + // Depth-1 behavior is identical: next_frontier is never used again. + next_frontier.sort(); + next_frontier.truncate(SEM_FRONTIER_CAP); for n in next_frontier { frontier.push_back(n); } @@ -278,7 +301,7 @@ pub async fn local_graph( // Links are directional — dedupe by (source, target, kind). // Semantic/tag are symmetric — dedupe order-insensitively by (min,max,kind). let mut seen_link: HashSet<(String, String)> = HashSet::new(); - let mut seen_sym: HashSet<(String, String, String)> = HashSet::new(); + let mut seen_sym: HashSet<(String, String, String, String)> = HashSet::new(); let mut deduped: Vec = Vec::new(); for e in edges { @@ -300,7 +323,14 @@ pub async fn local_graph( } else { (e.target.clone(), e.source.clone()) }; - let key = (lo, hi, e.kind.clone()); + // Include the tag label so a pair sharing two distinct tags + // yields two edges. Semantic label is always None → "" → no clash. + let tag_label = if e.kind == "tag" { + e.label.clone().unwrap_or_default() + } else { + String::new() + }; + let key = (lo, hi, e.kind.clone(), tag_label); if seen_sym.insert(key) { deduped.push(e); } @@ -349,10 +379,17 @@ pub async fn local_graph( }; // Drop edges that reference removed nodes. - let final_edges: Vec = deduped + let mut final_edges: Vec = deduped .into_iter() .filter(|e| kept_ids.contains(&e.source) && kept_ids.contains(&e.target)) .collect(); + // Sort for stable cross-run output. + final_edges.sort_by(|a, b| { + a.source + .cmp(&b.source) + .then(a.target.cmp(&b.target)) + .then(a.kind.cmp(&b.kind)) + }); // Recompute degree map for final kept set. let mut final_degree: HashMap = HashMap::new(); @@ -735,4 +772,235 @@ mod tests { g.nodes.iter().map(|n| &n.id).collect::>() ); } + + // ----------------------------------------------------------------------- + // 8. depth_two_expands_frontier + // ----------------------------------------------------------------------- + /// A→B→C via wikilinks: depth=2 reaches c.md, depth=1 does not. + #[tokio::test] + async fn depth_two_expands_frontier() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + register_note(&state, "c.md").await; + insert_triple_node(&state, "a.md", "links_to", "b").await; + insert_triple_node(&state, "b.md", "links_to", "c").await; + + let g1 = super::local_graph(&state, "a.md", 1).await; + assert!( + !g1.nodes.iter().any(|n| n.id == "c.md"), + "depth=1 must NOT include c.md: {:?}", + g1.nodes.iter().map(|n| &n.id).collect::>() + ); + + let g2 = super::local_graph(&state, "a.md", 2).await; + assert!( + g2.nodes.iter().any(|n| n.id == "c.md"), + "depth=2 must include c.md (reached via a→b→c): {:?}", + g2.nodes.iter().map(|n| &n.id).collect::>() + ); + } + + + // ----------------------------------------------------------------------- + // 9. incoming_link_edge + // ----------------------------------------------------------------------- + /// X links_to A (incoming); local_graph("a.md", 1) must include x→a link edge. + #[tokio::test] + async fn incoming_link_edge() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "x.md").await; + register_note(&state, "a.md").await; + insert_triple_node(&state, "x.md", "links_to", "a").await; + + let g = super::local_graph(&state, "a.md", 1).await; + let link = g + .edges + .iter() + .find(|e| e.kind == "link" && e.source == "x.md" && e.target == "a.md"); + assert!( + link.is_some(), + "incoming link x→a must appear in graph centered on a.md: {:?}", + g.edges + ); + } + + + // ----------------------------------------------------------------------- + // 10. pair_with_link_and_semantic_keeps_both + // ----------------------------------------------------------------------- + /// A links_to B AND B is A's semantic neighbor → both a link edge AND a + /// semantic edge must be present for the pair (different dedup sets). + #[tokio::test] + async fn pair_with_link_and_semantic_keeps_both() { + let state = stub_state(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_triple_node(&state, "a.md", "links_to", "b").await; + insert_chunk(&state, "a.md", "alpha content for a", e0()).await; + insert_chunk(&state, "b.md", "alpha content for b", e0()).await; + + let g = super::local_graph(&state, "a.md", 1).await; + let has_link = g + .edges + .iter() + .any(|e| e.kind == "link" && e.source == "a.md" && e.target == "b.md"); + let has_sem = g.edges.iter().any(|e| { + e.kind == "semantic" + && ((e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md")) + }); + assert!(has_link, "link edge a→b must be present: {:?}", g.edges); + assert!(has_sem, "semantic edge a↔b must be present: {:?}", g.edges); + } + + + // ----------------------------------------------------------------------- + // 11. symmetric_semantic_dedup + // ----------------------------------------------------------------------- + /// With a→b and b→a semantic edges produced at different BFS levels, dedup + /// must yield exactly ONE semantic edge for the pair. + #[tokio::test] + async fn symmetric_semantic_dedup() { + let state = stub_state(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_chunk(&state, "a.md", "alpha content for a", e0()).await; + insert_chunk(&state, "b.md", "alpha content for b", e0()).await; + + // depth=2: level-1 processes a.md → finds b.md; level-2 processes b.md → finds a.md. + // Both produce a↔b semantic edge candidates. Dedup keeps exactly one. + let g = super::local_graph(&state, "a.md", 2).await; + let sem_count = g + .edges + .iter() + .filter(|e| { + e.kind == "semantic" + && ((e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md")) + }) + .count(); + assert_eq!( + sem_count, + 1, + "symmetric a↔b semantic must yield exactly ONE edge, got {sem_count}: {:?}", + g.edges + ); + } + + + // ----------------------------------------------------------------------- + // 12. local_graph_cached_hit_and_invalidation + // ----------------------------------------------------------------------- + /// Cache hit: second call with unchanged graph returns same result. + /// Invalidation: after a graph mutation, the next call recomputes. + #[tokio::test] + async fn local_graph_cached_hit_and_invalidation() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_triple_node(&state, "a.md", "links_to", "b").await; + + // First call: computes and caches. + let g1 = super::local_graph_cached(&state, "a.md", 1).await; + assert!(g1.nodes.iter().any(|n| n.id == "b.md"), "b.md must be in graph"); + + // Second call: graph/memory unchanged → cache hit → identical result. + let g2 = super::local_graph_cached(&state, "a.md", 1).await; + assert_eq!( + g1.nodes.len(), + g2.nodes.len(), + "cache hit must return same node count" + ); + + // Mutate: add c.md and a link a→c (changes triple_count). + register_note(&state, "c.md").await; + insert_triple_node(&state, "a.md", "links_to", "c").await; + + // Third call: version mismatch → invalidated → c.md appears. + let g3 = super::local_graph_cached(&state, "a.md", 1).await; + assert!( + g3.nodes.iter().any(|n| n.id == "c.md"), + "after mutation, c.md must appear in recomputed result: {:?}", + g3.nodes.iter().map(|n| &n.id).collect::>() + ); + } + + + // ----------------------------------------------------------------------- + // 13. cache_cap_clears_when_exceeded + // ----------------------------------------------------------------------- + /// When local_graph_cache exceeds 256 entries, the next insert clears the map + /// first, then inserts the new entry — so len() == 1 afterward. + #[tokio::test] + async fn cache_cap_clears_when_exceeded() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + + // Pre-fill with 257 dummy entries to exceed the cap. + { + let mut cache = state.local_graph_cache.lock().unwrap(); + for i in 0..257usize { + cache.insert( + (format!("dummy_{i}.md"), 1usize), + ((0, 0), super::LocalGraph::default()), + ); + } + } + assert_eq!( + state.local_graph_cache.lock().unwrap().len(), + 257, + "pre-condition: cache must have 257 dummy entries" + ); + + // Call for a key not in the cache; cap fires before insert. + let _ = super::local_graph_cached(&state, "fresh.md", 1).await; + + let cache = state.local_graph_cache.lock().unwrap(); + assert_eq!( + cache.len(), + 1, + "cap must clear oversized cache then insert one entry; got {} entries", + cache.len() + ); + assert!( + cache.contains_key(&("fresh.md".to_string(), 1usize)), + "fresh.md must be in cache after cap-and-insert" + ); + } + + + // ----------------------------------------------------------------------- + // 14. frontier_cap_bounds_semantic (optional perf guard) + // ----------------------------------------------------------------------- + /// A hub with >SEM_FRONTIER_CAP link-neighbors at depth=2 still completes + /// and the result satisfies NODE_CAP and includes the center. + #[tokio::test] + async fn frontier_cap_bounds_semantic() { + let state = stub_state(); + register_note(&state, "center.md").await; + register_note(&state, "hub.md").await; + insert_triple_node(&state, "center.md", "links_to", "hub").await; + insert_chunk(&state, "center.md", "alpha content center", e0()).await; + insert_chunk(&state, "hub.md", "alpha content hub", e0()).await; + + // 20 spokes — more than SEM_FRONTIER_CAP (16). + for i in 0..20usize { + let path = format!("spoke{i}.md"); + register_note(&state, &path).await; + insert_triple_node(&state, "hub.md", "links_to", &format!("spoke{i}")).await; + insert_chunk(&state, &path, "alpha content spoke", e0()).await; + } + + let g = super::local_graph(&state, "center.md", 2).await; + assert!( + g.nodes.len() <= super::NODE_CAP, + "nodes must be ≤ NODE_CAP ({}), got {}", + super::NODE_CAP, + g.nodes.len() + ); + assert!( + g.nodes.iter().any(|n| n.id == "center.md"), + "center must always be in the graph" + ); + } } From f404a6f69299e31447f1ce2140a07bf4a2a766f7 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 01:27:15 +0200 Subject: [PATCH 63/72] feat(vault_map): add kind field to GraphEdge and emit semantic edges from clustering - GraphEdge gains pub kind: String; existing links_to edges get kind='link' - cluster_semantic now returns (Vec, Vec<(String, String, f32)>), capturing cosine pairs during the union-find pass at no extra O(n^2) cost - compute_vault_map emits kind='semantic' edges for pairs that cleared SEMANTIC_THRESHOLD (0.88), filtered to the rendered node set (GRAPH_NODE_CAP), deduplicated order-insensitively, skipped when a link edge already exists, and capped at SEMANTIC_EDGE_CAP = 1200 (highest-cosine pairs kept) - totals.links continues to count only explicit wikilinks (s.link_count) - Tests: link_edges_have_link_kind, clustering_emits_semantic_edges (with no-dup-when-also-linked variant), totals_links_counts_only_explicit, and updated semantic_clusters_group_similar_notes for new return type (10/10 green) --- crates/aingle_cortex/src/service/vault_map.rs | 231 +++++++++++++++++- 1 file changed, 221 insertions(+), 10 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 883ff9ff..6daad87a 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -88,11 +88,19 @@ pub struct GraphNode { pub struct GraphEdge { pub source: String, pub target: String, + /// Edge type: `"link"` for explicit wikilinks, `"semantic"` for cosine-similar pairs + /// discovered during topic clustering. + pub kind: String, } /// Max nodes rendered in the visual graph (top-degree); larger vaults are capped. const GRAPH_NODE_CAP: usize = 600; +/// Hard cap on semantic edges in the graph view. Dense clusters can produce O(n²) +/// pairs; beyond this limit only the highest-cosine pairs are kept (the sorting +/// happens inside `compute_vault_map` before truncation). +const SEMANTIC_EDGE_CAP: usize = 1200; + /// Tags (case-insensitive) that mark a note as a reusable skill/process. const SKILL_TAGS: [&str; 6] = ["skill", "process", "sop", "workflow", "how-to", "howto"]; @@ -225,7 +233,14 @@ fn cosine(a: &[f32], b: &[f32]) -> f32 { /// cosine >= `threshold` are linked; each connected component is a topic. Labeled /// by the most central note (highest mean cosine to its component). Deterministic /// (inputs are a sorted BTreeMap). O(n^2) — the caller caps n. -pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32) -> Vec { +/// +/// Returns `(topics, sem_pairs)` where `sem_pairs` is the list of +/// `(note_a, note_b, cosine)` pairs that met the threshold. These are captured +/// during the union-find pass so no additional O(n²) scan is needed. +pub(crate) fn cluster_semantic( + vecs: &BTreeMap>, + threshold: f32, +) -> (Vec, Vec<(String, String, f32)>) { let names: Vec<&String> = vecs.keys().collect(); let n = names.len(); // union-find @@ -237,13 +252,18 @@ pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32 } x } + // Pairs above threshold — captured here so `compute_vault_map` can emit + // semantic edges without an additional O(n²) pass. + let mut sem_pairs: Vec<(String, String, f32)> = Vec::new(); for i in 0..n { for j in (i + 1)..n { - if cosine(&vecs[names[i]], &vecs[names[j]]) >= threshold { + let c = cosine(&vecs[names[i]], &vecs[names[j]]); + if c >= threshold { let (ri, rj) = (find(&mut parent, i), find(&mut parent, j)); if ri != rj { parent[ri] = rj; } + sem_pairs.push((names[i].clone(), names[j].clone(), c)); } } } @@ -276,7 +296,7 @@ pub(crate) fn cluster_semantic(vecs: &BTreeMap>, threshold: f32 }); } topics.sort_by(|a, b| b.size.cmp(&a.size).then(a.label.cmp(&b.label))); - topics + (topics, sem_pairs) } fn mean_sim(self_idx: usize, members: &[usize], names: &[&String], vecs: &BTreeMap>) -> f32 { @@ -373,8 +393,10 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { .cloned() .collect(); - // Semantic topics (capped). - let topics = if s.notes.len() <= SEMANTIC_NOTE_CAP { + // Semantic topics (capped) + raw pairs for semantic-edge emission. + // `raw_sem_pairs` holds (note_a, note_b, cosine) captured during the O(n²) + // union-find pass — no additional scan is needed to produce semantic edges. + let (topics, raw_sem_pairs) = if s.notes.len() <= SEMANTIC_NOTE_CAP { let mem = state.memory.read().await; let all_vecs = per_note_vectors(&mem); let vecs: std::collections::BTreeMap> = all_vecs @@ -384,7 +406,7 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { if vecs.len() >= 2 { cluster_semantic(&vecs, SEMANTIC_THRESHOLD) } else { - Vec::new() + (Vec::new(), Vec::new()) } } else { log::info!( @@ -392,7 +414,7 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { s.notes.len(), SEMANTIC_NOTE_CAP ); - Vec::new() + (Vec::new(), Vec::new()) }; // Tag clusters + tag index. @@ -453,13 +475,58 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { + s.out_deg.get(p).copied().unwrap_or(0), }) .collect(); - let edges: Vec = s + // Link edges (explicit wikilinks), typed "link". + let mut edges: Vec = s .edges .iter() .filter(|(a, b)| kept.contains(a) && kept.contains(b)) - .map(|(a, b)| GraphEdge { source: a.clone(), target: b.clone() }) + .map(|(a, b)| GraphEdge { source: a.clone(), target: b.clone(), kind: "link".into() }) .collect(); + // Semantic edges from the clustering pass — no new O(n²) scan; cosines were + // already captured in `raw_sem_pairs`. Rules: + // 1. Both endpoints must be in the rendered node set (`kept`). + // 2. Skip pairs that already have an explicit link (order-insensitive). + // 3. Deduplicate order-insensitively (BTreeMap keys are sorted so i = s + .edges + .iter() + .map(|(a, b)| { + if a <= b { (a.clone(), b.clone()) } else { (b.clone(), a.clone()) } + }) + .collect(); + + // Normalise pair order, filter to the rendered node set, sort by cosine desc. + let mut candidates: Vec<(String, String, f32)> = raw_sem_pairs + .into_iter() + .filter(|(a, b, _)| kept.contains(a) && kept.contains(b)) + .map(|(a, b, c)| if a <= b { (a, b, c) } else { (b, a, c) }) + .collect(); + candidates + .sort_by(|x, y| y.2.partial_cmp(&x.2).unwrap_or(std::cmp::Ordering::Equal)); + + let mut seen: std::collections::BTreeSet<(String, String)> = + std::collections::BTreeSet::new(); + let mut sem_count = 0usize; + for (a, b, _c) in candidates { + if sem_count >= SEMANTIC_EDGE_CAP { + break; + } + let key = (a.clone(), b.clone()); + if link_pair_set.contains(&key) || seen.contains(&key) { + continue; + } + seen.insert(key); + edges.push(GraphEdge { source: a, target: b, kind: "semantic".into() }); + sem_count += 1; + } + } + + // `totals.links` counts only explicit wikilinks (s.link_count), not semantic edges. let totals = Totals { notes: s.notes.len(), links: s.link_count, @@ -610,7 +677,7 @@ mod tests { vecs.insert("b.md".into(), vec![0.99, 0.01, 0.0]); vecs.insert("c.md".into(), vec![0.0, 0.0, 1.0]); - let topics = super::cluster_semantic(&vecs, 0.9); + let (topics, sem_pairs) = super::cluster_semantic(&vecs, 0.9); // a & b together, c alone → 2 topics assert_eq!(topics.len(), 2); let big = topics.iter().max_by_key(|t| t.size).unwrap(); @@ -619,6 +686,14 @@ mod tests { big.notes.contains(&"a.md".to_string()) && big.notes.contains(&"b.md".to_string()) ); + // The pair (a.md, b.md) must be captured in sem_pairs with cosine ≥ 0.9. + assert!( + sem_pairs.iter().any(|(a, b, c)| { + ((a == "a.md" && b == "b.md") || (a == "b.md" && b == "a.md")) && *c >= 0.9 + }), + "sem_pairs must contain (a.md, b.md) pair: {:?}", + sem_pairs + ); } #[tokio::test] @@ -731,4 +806,140 @@ mod tests { let m2 = super::vault_map_cached(&state).await; assert_eq!(m2.totals.notes, 2, "cache must invalidate when triple_count changes"); } + + // ----------------------------------------------------------------- + // VC-2 Task 2: typed edges + semantic edge emission + // ----------------------------------------------------------------- + + /// Every explicit wikilink must produce a GraphEdge with `kind == "link"`. + #[tokio::test] + async fn link_edges_have_link_kind() { + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("a.md", "links_to", "b"), + ]) + .await; + let map = super::vault_map_cached(&state).await; + let edge = map.graph.edges.iter().find(|e| { + (e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md") + }); + let edge = edge.expect("link edge between a.md and b.md must exist"); + assert_eq!(edge.kind, "link", "wikilink edges must carry kind='link'"); + } + + /// Clustering must emit `kind == "semantic"` edges for similar notes, and must + /// NOT duplicate a pair that already has an explicit link edge. + #[tokio::test] + async fn clustering_emits_semantic_edges() { + use ineru::{Embedding, MemoryEntry}; + + // --- variant A: no explicit link; semantic edge must appear ---------------- + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ]) + .await; + { + let mut mem = state.memory.write().await; + // Identical embeddings → cosine 1.0 ≥ SEMANTIC_THRESHOLD (0.88). + for path in ["a.md", "b.md"] { + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "content", "source_path": path }), + ); + e.embedding = Some(Embedding::new(vec![1.0_f32, 0.0, 0.0])); + mem.remember(e).unwrap(); + } + } + let map = super::compute_vault_map(&state).await; + let sem_ab = map.graph.edges.iter().find(|e| { + e.kind == "semantic" + && ((e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md")) + }); + assert!( + sem_ab.is_some(), + "semantic edge between a.md and b.md must exist: {:?}", + map.graph.edges + ); + + // --- variant B: also linked explicitly; must not produce a semantic dup --- + let state2 = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("a.md", "links_to", "b"), // explicit wikilink + ]) + .await; + { + let mut mem = state2.memory.write().await; + for path in ["a.md", "b.md"] { + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "content", "source_path": path }), + ); + e.embedding = Some(Embedding::new(vec![1.0_f32, 0.0, 0.0])); + mem.remember(e).unwrap(); + } + } + let map2 = super::compute_vault_map(&state2).await; + let edges_ab: Vec<_> = map2.graph.edges.iter().filter(|e| { + (e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md") + }).collect(); + assert_eq!( + edges_ab.len(), + 1, + "a.md-b.md pair must appear exactly once (no semantic dup): {:?}", + map2.graph.edges + ); + assert_eq!( + edges_ab[0].kind, + "link", + "the single edge must have kind='link', not 'semantic': {:?}", + edges_ab[0] + ); + } + + /// `totals.links` must count only explicit wikilinks, not semantic edges. + #[tokio::test] + async fn totals_links_counts_only_explicit() { + use ineru::{Embedding, MemoryEntry}; + + // One explicit link between a and b; a and c are semantically similar but + // not wikilinked. `totals.links` must stay 1. + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("c.md", "aingle:source_hash", "h3"), + ("a.md", "links_to", "b"), + ]) + .await; + { + let mut mem = state.memory.write().await; + // a and c share a near-identical embedding → cosine 1.0 ≥ threshold. + for path in ["a.md", "c.md"] { + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "content", "source_path": path }), + ); + e.embedding = Some(Embedding::new(vec![1.0_f32, 0.0, 0.0])); + mem.remember(e).unwrap(); + } + } + let map = super::compute_vault_map(&state).await; + assert_eq!( + map.totals.links, + 1, + "totals.links must count only explicit wikilinks, not semantic edges: {:?}", + map.totals + ); + let sem_edges: Vec<_> = + map.graph.edges.iter().filter(|e| e.kind == "semantic").collect(); + assert!( + !sem_edges.is_empty(), + "semantic edges between similar notes must exist even when totals.links is 1" + ); + } } From 41b98fc6ff924bc5daefcc72eead32ddcd97f6a6 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 01:45:12 +0200 Subject: [PATCH 64/72] test(cortex): neural e2e for local_graph semantic edge (VC-2 Task 6) --- .../aingle_cortex/src/service/local_graph.rs | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/crates/aingle_cortex/src/service/local_graph.rs b/crates/aingle_cortex/src/service/local_graph.rs index b6f16e27..17ef7877 100644 --- a/crates/aingle_cortex/src/service/local_graph.rs +++ b/crates/aingle_cortex/src/service/local_graph.rs @@ -1003,4 +1003,100 @@ mod tests { "center must always be in the graph" ); } + + + // ----------------------------------------------------------------------- + // 15. neural_local_graph_has_semantic_edge (real e5 model, gated) + // ----------------------------------------------------------------------- + /// End-to-end acceptance test using the real multilingual-e5-small model. + /// Skipped when the model files are absent. Requires `ORT_DYLIB_PATH`. + /// + /// Two same-topic Spanish notes (dog care) must share a semantic edge; + /// an off-topic note (elections) must not appear (below NEIGHBOR_FLOOR=0.88). + #[cfg(feature = "neural-embeddings")] + #[tokio::test] + async fn neural_local_graph_has_semantic_edge() { + let model_dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { + concat!( + env!("CARGO_MANIFEST_DIR"), + "/../ineru/test-models/multilingual-e5-small" + ) + .to_string() + }); + if !std::path::Path::new(&model_dir) + .join("onnx/model.onnx") + .exists() + { + eprintln!( + "skipping neural_local_graph_has_semantic_edge: e5 model not found at {model_dir}" + ); + return; + } + + let embedder = crate::embedder::build_embedder(Some(&model_dir)); + assert_eq!( + embedder.dimensions(), + 384, + "neural embedder must be active (384d)" + ); + + let state = + AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); + { + let mut graph = state.graph.write().await; + graph.enable_dag(); + } + + let dir = tempfile::tempdir().unwrap(); + // Two same-topic notes about dog care (reused from neural_note_context_finds_same_topic). + std::fs::write( + dir.path().join("perros1.md"), + "# Cuidado de perros\n\nLos perros necesitan paseos diarios, agua fresca y una dieta equilibrada para estar sanos.\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("perros2.md"), + "# Mascotas\n\nUn perro sano requiere ejercicio diario, hidratación constante y alimentación balanceada.\n", + ) + .unwrap(); + // Off-topic note: elections have no semantic overlap with dog care. + std::fs::write( + dir.path().join("elecciones.md"), + "# Elecciones\n\nLos resultados de las elecciones presidenciales determinan el futuro del país.\n", + ) + .unwrap(); + + crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) + .await + .unwrap(); + + let g = super::local_graph(&state, "perros1.md", 1).await; + + assert!( + g.semantic_ready, + "neural embedder (384d) must set semantic_ready=true" + ); + + // There must be a semantic edge connecting perros1↔perros2 (either orientation). + let has_sem_edge = g.edges.iter().any(|e| { + e.kind == "semantic" + && ((e.source == "perros1.md" && e.target == "perros2.md") + || (e.source == "perros2.md" && e.target == "perros1.md")) + }); + assert!( + has_sem_edge, + "perros1.md and perros2.md (same-topic) must share a semantic edge: {:?}", + g.edges + ); + + // elecciones.md is off-topic; cosine vs perros1 is below NEIGHBOR_FLOOR (0.88). + assert!( + !g.edges.iter().any(|e| { + e.kind == "semantic" + && (e.source == "elecciones.md" || e.target == "elecciones.md") + }), + "off-topic elecciones.md must not have a semantic edge (below NEIGHBOR_FLOOR=0.88): {:?}", + g.edges + ); + } } From c847e4f7ba579a7b5c535aad5f04f7bf06b3f657 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 10:24:02 +0200 Subject: [PATCH 65/72] feat(cortex): add timestamp field to GraphNode and GNode from created triple Surface the note creation date on graph nodes so the Akashi UI can animate a chronological timelapse. GraphNode (vault map) and GNode (local graph) gain an optional `timestamp` field populated from the note'\''s `created` triple; falls back to `date` when `created` is absent. Nodes without either triple carry `None`. Covered by two new TDD tests (vault_map + local_graph). --- .../aingle_cortex/src/service/local_graph.rs | 44 +++++++++++++- crates/aingle_cortex/src/service/vault_map.rs | 60 +++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/crates/aingle_cortex/src/service/local_graph.rs b/crates/aingle_cortex/src/service/local_graph.rs index 17ef7877..b1e74bee 100644 --- a/crates/aingle_cortex/src/service/local_graph.rs +++ b/crates/aingle_cortex/src/service/local_graph.rs @@ -40,6 +40,9 @@ pub struct GNode { pub cluster: i64, /// Number of edges in THIS graph touching this node. pub degree: usize, + /// Creation date sourced from the note's `created` frontmatter scalar (e.g. `"2025-09-14"`). + /// `None` when the note has no `created` triple. + pub timestamp: Option, } /// A typed, optionally weighted edge in the local neighborhood graph. @@ -96,7 +99,7 @@ pub async fn local_graph( // links_raw: (subject, object-string) for every links_to triple // tagged_raw: (subject, tag) for every tagged triple type PairVec = Vec<(String, String)>; - let (notes, links_raw, tagged_raw): (Vec, PairVec, PairVec) = { + let (notes, links_raw, tagged_raw, created_map): (Vec, PairVec, PairVec, BTreeMap) = { let g = state.graph.read().await; let collect = |pred: &str| -> PairVec { g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) @@ -115,7 +118,12 @@ pub async fn local_graph( ns.dedup(); let lnks = collect("links_to"); let tags = collect("tagged"); - (ns, lnks, tags) + // Build created-date map: note_path → date. "date" as fallback, "created" takes precedence. + let mut cmap: BTreeMap = collect("date").into_iter().collect(); + for (k, v) in collect("created") { + cmap.insert(k, v); + } + (ns, lnks, tags, cmap) }; let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); @@ -413,6 +421,7 @@ pub async fn local_graph( kind, cluster: -1, degree, + timestamp: created_map.get(id).cloned(), } }) .collect(); @@ -969,6 +978,37 @@ mod tests { } + // ----------------------------------------------------------------------- + // timestamp field: created triple → GNode.timestamp + // ----------------------------------------------------------------------- + + /// A `created` triple for a note must surface its value in `GNode.timestamp`. + /// A note without a `created` triple must have `GNode.timestamp == None`. + #[tokio::test] + async fn gnode_timestamp_from_created_triple() { + let state = AppState::with_db_path(":memory:", None).unwrap(); + register_note(&state, "a.md").await; + register_note(&state, "b.md").await; + insert_triple_node(&state, "a.md", "links_to", "b").await; + insert_triple_lit(&state, "a.md", "created", "2025-03-15").await; + + let g = super::local_graph(&state, "a.md", 1).await; + let node_a = g.nodes.iter().find(|n| n.id == "a.md") + .expect("a.md must be in graph"); + assert_eq!( + node_a.timestamp, + Some("2025-03-15".to_string()), + "GNode.timestamp must come from the created triple" + ); + let node_b = g.nodes.iter().find(|n| n.id == "b.md") + .expect("b.md must be in graph"); + assert_eq!( + node_b.timestamp, + None, + "GNode without created triple must have timestamp=None" + ); + } + // ----------------------------------------------------------------------- // 14. frontier_cap_bounds_semantic (optional perf guard) // ----------------------------------------------------------------------- diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 6daad87a..82f4b863 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -82,6 +82,9 @@ pub struct GraphNode { pub label: String, pub cluster: i64, pub degree: usize, + /// Creation date sourced from the note's `created` frontmatter scalar (e.g. `"2025-09-14"`). + /// `None` when the note has no `created` triple. + pub timestamp: Option, } #[derive(Debug, Clone, Serialize)] @@ -446,6 +449,30 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { } } + // Build created-date map: note_path → date string, from "created" triples. + // Falls back to "date" when "created" is absent for a given note. + let created: BTreeMap = { + use aingle_graph::{Predicate, TriplePattern}; + let g = state.graph.read().await; + let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); + let collect_pred = |pred: &str| -> BTreeMap { + g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) + .unwrap_or_default() + .into_iter() + .filter_map(|t| { + let subj = strip(t.subject.to_string()); + obj_string(&t).map(|o| (subj, o)) + }) + .collect() + }; + let mut map = collect_pred("date"); + // "created" takes precedence: overwrite any "date" entry. + for (k, v) in collect_pred("created") { + map.insert(k, v); + } + map + }; + // GraphView (cap by degree). let mut ranked: Vec<&String> = s.notes.iter().collect(); ranked.sort_by(|a, b| { @@ -473,6 +500,7 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { cluster: cluster_of.get(p).copied().unwrap_or(-1), degree: s.in_deg.get(p).copied().unwrap_or(0) + s.out_deg.get(p).copied().unwrap_or(0), + timestamp: created.get(p).cloned(), }) .collect(); // Link edges (explicit wikilinks), typed "link". @@ -902,6 +930,38 @@ mod tests { ); } + // ----------------------------------------------------------------- + // Timestamp field: created triple → GraphNode.timestamp + // ----------------------------------------------------------------- + + /// A note with a `created` triple must surface its date in `GraphNode.timestamp`. + /// A note without a `created` triple must have `timestamp == None`. + #[tokio::test] + async fn graph_node_timestamp_from_created_triple() { + let state = graph_with(&[ + ("a.md", "aingle:source_hash", "h1"), + ("b.md", "aingle:source_hash", "h2"), + ("a.md", "created", "2025-01-02"), + ]) + .await; + + let map = super::compute_vault_map(&state).await; + let node_a = map.graph.nodes.iter().find(|n| n.id == "a.md") + .expect("a.md must be in graph"); + assert_eq!( + node_a.timestamp, + Some("2025-01-02".to_string()), + "timestamp must be populated from the created triple" + ); + let node_b = map.graph.nodes.iter().find(|n| n.id == "b.md") + .expect("b.md must be in graph"); + assert_eq!( + node_b.timestamp, + None, + "node without a created triple must have timestamp=None" + ); + } + /// `totals.links` must count only explicit wikilinks, not semantic edges. #[tokio::test] async fn totals_links_counts_only_explicit() { From 702e2b573242d6748cc52946bed5534c9c672c44 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 10:59:58 +0200 Subject: [PATCH 66/72] perf(vault_map): per-node top-K semantic edges to eliminate hairball Replace global pair-emission with per-node top-SEMANTIC_EDGES_PER_NODE=3 selection. Final edge set is the UNION of each node's top-3 highest-cosine partners so strongly similar pairs survive if either endpoint nominates the other. For a fully-similar N-note vault edges drop from C(N,2) to ~N*3, e.g. 63 notes: ~1953 edges before, ~183 after. totals.links unchanged. SEMANTIC_EDGE_CAP stays as a final safety cap. Extract pure helper top_k_semantic_pairs() with two new tests: - top_k_semantic_pairs_selects_union: unit test, k=1 prunes weakest pair - per_node_top_k_reduces_hairball: 5-note all-similar graph shows (n3.md,n4.md) absent and total==9 < C(5,2)=10; 13 tests green. --- crates/aingle_cortex/src/service/vault_map.rs | 179 ++++++++++++++++-- 1 file changed, 167 insertions(+), 12 deletions(-) diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index 82f4b863..ed59eb1c 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -104,6 +104,13 @@ const GRAPH_NODE_CAP: usize = 600; /// happens inside `compute_vault_map` before truncation). const SEMANTIC_EDGE_CAP: usize = 1200; +/// Maximum semantic neighbors each node may contribute via its own top-K ranking. +/// The final edge set is the UNION of all per-node top-K choices (so a strongly +/// similar pair survives even if only one endpoint nominated the other). This bounds +/// the edge count to roughly `N × SEMANTIC_EDGES_PER_NODE` instead of `O(N²)`, +/// preventing hairballs in themed vaults where most notes are mutually similar. +const SEMANTIC_EDGES_PER_NODE: usize = 3; + /// Tags (case-insensitive) that mark a note as a reusable skill/process. const SKILL_TAGS: [&str; 6] = ["skill", "process", "sop", "workflow", "how-to", "howto"]; @@ -217,6 +224,41 @@ pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { } } +/// From a sorted-descending list of `(a, b, cosine)` pairs (with `a ≤ b` and no +/// duplicates), return the canonical `(String, String) → cosine` map for the +/// top-`k` semantic neighbors of every node (union semantics: a pair is kept if +/// EITHER endpoint ranked the other in its top-`k`). +/// +/// Because the input is sorted desc by cosine and we iterate in that order, each +/// per-node accumulator is also sorted desc — so `take(k)` yields the top-k without +/// an additional per-node sort. +fn top_k_semantic_pairs<'a>( + candidates: &'a [(String, String, f32)], + k: usize, +) -> BTreeMap<(String, String), f32> { + // Accumulate per-node (partner, cosine) lists in global cosine-desc order. + let mut per_node: BTreeMap<&'a str, Vec<(&'a str, f32)>> = BTreeMap::new(); + for (a, b, c) in candidates { + per_node.entry(a.as_str()).or_default().push((b.as_str(), *c)); + per_node.entry(b.as_str()).or_default().push((a.as_str(), *c)); + } + // Union: an ordered pair (min, max) is kept if EITHER endpoint selects the other. + let mut chosen: BTreeMap<(String, String), f32> = BTreeMap::new(); + for (node, partners) in &per_node { + for (partner, c) in partners.iter().take(k) { + let key = if *node <= *partner { + (node.to_string(), partner.to_string()) + } else { + (partner.to_string(), node.to_string()) + }; + // `or_insert`: the first insertion for any pair holds the correct cosine + // because we iterate globally in desc order (highest cosine first). + chosen.entry(key).or_insert(*c); + } + } + chosen +} + /// Cosine similarity between two raw vectors (same length). fn cosine(a: &[f32], b: &[f32]) -> f32 { if a.len() != b.len() || a.is_empty() { @@ -511,13 +553,16 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { .map(|(a, b)| GraphEdge { source: a.clone(), target: b.clone(), kind: "link".into() }) .collect(); - // Semantic edges from the clustering pass — no new O(n²) scan; cosines were - // already captured in `raw_sem_pairs`. Rules: + // Semantic edges — per-node top-K selection with union semantics. + // Replaces the old "every pair ≥ threshold" approach that produced hairballs + // on themed vaults. Each node contributes at most SEMANTIC_EDGES_PER_NODE edges + // from its own ranking; the final set is the UNION of all per-node choices so + // no node becomes isolated. Total edges ≈ N × K instead of O(N²). + // + // Rules (unchanged): // 1. Both endpoints must be in the rendered node set (`kept`). - // 2. Skip pairs that already have an explicit link (order-insensitive). - // 3. Deduplicate order-insensitively (BTreeMap keys are sorted so i = s @@ -537,18 +582,22 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { candidates .sort_by(|x, y| y.2.partial_cmp(&x.2).unwrap_or(std::cmp::Ordering::Equal)); - let mut seen: std::collections::BTreeSet<(String, String)> = - std::collections::BTreeSet::new(); + // Per-node top-K with union semantics → O(N·K) edges instead of O(N²). + let chosen = top_k_semantic_pairs(&candidates, SEMANTIC_EDGES_PER_NODE); + + // Sort by cosine desc so SEMANTIC_EDGE_CAP retains the highest-quality edges. + let mut chosen_sorted: Vec<((String, String), f32)> = chosen.into_iter().collect(); + chosen_sorted + .sort_by(|x, y| y.1.partial_cmp(&x.1).unwrap_or(std::cmp::Ordering::Equal)); + let mut sem_count = 0usize; - for (a, b, _c) in candidates { + for ((a, b), _c) in chosen_sorted { if sem_count >= SEMANTIC_EDGE_CAP { break; } - let key = (a.clone(), b.clone()); - if link_pair_set.contains(&key) || seen.contains(&key) { + if link_pair_set.contains(&(a.clone(), b.clone())) { continue; } - seen.insert(key); edges.push(GraphEdge { source: a, target: b, kind: "semantic".into() }); sem_count += 1; } @@ -962,6 +1011,112 @@ mod tests { ); } + // ----------------------------------------------------------------- + // Per-node top-K semantic edges (hairball reduction) + // ----------------------------------------------------------------- + + /// `top_k_semantic_pairs` selects per-node top-k and applies union semantics. + #[test] + fn top_k_semantic_pairs_selects_union() { + // 3 pairs sorted desc by cosine: + // a picks b (0.99, its highest) + // b picks a (0.99, its highest) + // c picks a (0.95 > 0.91, so c's top-1 is a, NOT b) + // With k=1: union = {(a,b),(a,c)}. (b,c) absent — neither b nor c picks + // the other as its top-1. + let pairs = vec![ + ("a.md".to_string(), "b.md".to_string(), 0.99_f32), + ("a.md".to_string(), "c.md".to_string(), 0.95_f32), + ("b.md".to_string(), "c.md".to_string(), 0.91_f32), + ]; + let chosen = super::top_k_semantic_pairs(&pairs, 1); + assert!( + chosen.contains_key(&("a.md".to_string(), "b.md".to_string())), + "a-b must be chosen (a's and b's top-1)" + ); + assert!( + chosen.contains_key(&("a.md".to_string(), "c.md".to_string())), + "a-c must be chosen (c's top-1 is a)" + ); + assert!( + !chosen.contains_key(&("b.md".to_string(), "c.md".to_string())), + "b-c must be absent: neither b nor c ranks the other as top-1" + ); + assert_eq!(chosen.len(), 2, "exactly 2 pairs with k=1"); + } + + /// Per-node top-K reduces a fully-similar 5-note graph from C(5,2)=10 edges + /// to 9, pruning the (n3.md, n4.md) pair that neither endpoint selects in its + /// top-SEMANTIC_EDGES_PER_NODE. + #[tokio::test] + async fn per_node_top_k_reduces_hairball() { + use ineru::{Embedding, MemoryEntry}; + + // 5 notes, all with identical embeddings → every pair has cosine 1.0 ≥ threshold. + // Old code emits all C(5,2)=10 pairs. New per-node top-3 union emits 9: + // hub picks n1,n2,n3 (its first 3 in sort order); + // n4 picks hub,n1,n2 → (n3,n4) selected by neither endpoint. + let state = graph_with(&[ + ("hub.md", "aingle:source_hash", "h0"), + ("n1.md", "aingle:source_hash", "h1"), + ("n2.md", "aingle:source_hash", "h2"), + ("n3.md", "aingle:source_hash", "h3"), + ("n4.md", "aingle:source_hash", "h4"), + ]) + .await; + { + let mut mem = state.memory.write().await; + for path in ["hub.md", "n1.md", "n2.md", "n3.md", "n4.md"] { + let mut e = MemoryEntry::new( + crate::service::ingest::CHUNK_ENTRY_TYPE, + serde_json::json!({ "text": "content", "source_path": path }), + ); + e.embedding = Some(Embedding::new(vec![1.0_f32, 0.0, 0.0])); + mem.remember(e).unwrap(); + } + } + + let map = super::compute_vault_map(&state).await; + let sem_edges: Vec<_> = + map.graph.edges.iter().filter(|e| e.kind == "semantic").collect(); + + // (a) Clearly-strongest pair is connected. + assert!( + sem_edges.iter().any(|e| { + (e.source == "hub.md" && e.target == "n1.md") + || (e.source == "n1.md" && e.target == "hub.md") + }), + "hub.md-n1.md must be a semantic edge (strongest pair): {:?}", + sem_edges + ); + + // (b) (n3.md, n4.md) is absent: neither endpoint ranks the other in its top-3. + assert!( + !sem_edges.iter().any(|e| { + (e.source == "n3.md" && e.target == "n4.md") + || (e.source == "n4.md" && e.target == "n3.md") + }), + "n3.md-n4.md must be pruned by per-node top-K: {:?}", + sem_edges + ); + + // (c) Total semantic edges are reduced below the old O(n²) full-mesh count. + assert!( + sem_edges.len() < 10, + "per-node top-K must reduce edges below C(5,2)=10, got {}: {:?}", + sem_edges.len(), + sem_edges + ); + + // (d) Exact deterministic count for this naming + identical-vector combination. + assert_eq!( + sem_edges.len(), + 9, + "expected exactly 9 semantic edges with per-node top-3 union: {:?}", + sem_edges + ); + } + /// `totals.links` must count only explicit wikilinks, not semantic edges. #[tokio::test] async fn totals_links_counts_only_explicit() { From 6a5ea6ca27844d38736a98b034cda26c8b74ff81 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 15:59:22 +0200 Subject: [PATCH 67/72] feat(cortex): expose content_hash on DagActionDto for provenance cross-reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `content_hash: Option` to `DagActionDto` and populate it in `action_to_dto`: extracted from the first provenanced triple in a `TripleInsert` payload, or from the first `TripleInsert` inside a `Batch`. All other payload variants yield `None`. Covered by five unit tests (RED→GREEN verified): - TripleInsert with provenance → Some("deadbeef") - Batch containing provenanced TripleInsert → Some("cafebabe") - TripleInsert without provenance → None - Genesis → None - Noop → None --- crates/aingle_cortex/src/rest/dag.rs | 152 +++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 7 deletions(-) diff --git a/crates/aingle_cortex/src/rest/dag.rs b/crates/aingle_cortex/src/rest/dag.rs index eded0287..6699a75c 100644 --- a/crates/aingle_cortex/src/rest/dag.rs +++ b/crates/aingle_cortex/src/rest/dag.rs @@ -42,6 +42,10 @@ pub struct DagActionDto { pub payload_type: String, pub payload_summary: String, pub signed: bool, + /// Blake3 hex content hash of the source file, if present in the action's + /// provenance. Extracted from the first provenanced triple in a + /// `TripleInsert` (or the first `TripleInsert` inside a `Batch`). + pub content_hash: Option, } #[derive(Debug, Serialize)] @@ -590,7 +594,7 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD let hash = action.compute_hash().to_hex(); let parents: Vec = action.parents.iter().map(|h| h.to_hex()).collect(); - let (payload_type, payload_summary) = match &action.payload { + let (payload_type, payload_summary, content_hash) = match &action.payload { aingle_graph::dag::DagPayload::TripleInsert { triples } => { let summary = if triples.len() == 1 { let t = &triples[0]; @@ -598,7 +602,12 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD } else { format!("{} triple(s)", triples.len()) }; - ("triple:create".to_string(), summary) + // Extract content_hash from the first triple that carries provenance. + // All triples from a single file ingest share the same content_hash. + let content_hash = triples + .iter() + .find_map(|t| t.provenance.as_ref().map(|p| p.content_hash.clone())); + ("triple:create".to_string(), summary, content_hash) } aingle_graph::dag::DagPayload::TripleDelete { triple_ids, @@ -609,7 +618,7 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD } else { format!("{} triple(s)", triple_ids.len()) }; - ("triple:delete".to_string(), summary) + ("triple:delete".to_string(), summary, None) } aingle_graph::dag::DagPayload::MemoryOp { kind } => { let summary = match kind { @@ -621,10 +630,20 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD } aingle_graph::dag::MemoryOpKind::Consolidate => "Consolidate".to_string(), }; - ("memory:op".to_string(), summary) + ("memory:op".to_string(), summary, None) } aingle_graph::dag::DagPayload::Batch { ops } => { - ("batch".to_string(), format!("{} ops", ops.len())) + // Search the ops for the first TripleInsert that has a provenanced triple. + let content_hash = ops.iter().find_map(|op| { + if let aingle_graph::dag::DagPayload::TripleInsert { triples } = op { + triples + .iter() + .find_map(|t| t.provenance.as_ref().map(|p| p.content_hash.clone())) + } else { + None + } + }); + ("batch".to_string(), format!("{} ops", ops.len()), content_hash) } aingle_graph::dag::DagPayload::Genesis { triple_count, @@ -632,6 +651,7 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD } => ( "genesis".to_string(), format!("{} triples: {}", triple_count, description), + None, ), aingle_graph::dag::DagPayload::Compact { pruned_count, @@ -643,13 +663,14 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD "pruned {} / retained {} ({})", pruned_count, retained_count, policy ), + None, ), - aingle_graph::dag::DagPayload::Noop => ("noop".to_string(), String::new()), + aingle_graph::dag::DagPayload::Noop => ("noop".to_string(), String::new(), None), aingle_graph::dag::DagPayload::Custom { payload_type, payload_summary, .. - } => (payload_type.clone(), payload_summary.clone()), + } => (payload_type.clone(), payload_summary.clone(), None), }; DagActionDto { @@ -661,6 +682,7 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD payload_type, payload_summary, signed: action.signature.is_some(), + content_hash, } } @@ -677,3 +699,119 @@ fn triple_value_to_json(v: &aingle_graph::Value) -> serde_json::Value { _ => serde_json::Value::String(format!("{:?}", v)), } } + +#[cfg(test)] +mod tests { + use super::*; + use aingle_graph::dag::{DagAction, DagPayload, Provenance, TripleInsertPayload}; + use aingle_graph::NodeId; + use chrono::Utc; + + fn test_action(payload: DagPayload) -> DagAction { + DagAction { + parents: vec![], + author: NodeId::named("node:test"), + seq: 0, + timestamp: Utc::now(), + payload, + signature: None, + } + } + + #[test] + fn action_to_dto_extracts_content_hash_from_triple_insert() { + let provenance = Provenance { + source_path: "vault/note.md".into(), + line_start: 1, + line_end: 3, + content_hash: "deadbeef".into(), + }; + let action = test_action(DagPayload::TripleInsert { + triples: vec![TripleInsertPayload { + subject: "akashi://note".into(), + predicate: "akashi:title".into(), + object: serde_json::json!("Test Note"), + provenance: Some(provenance), + }], + }); + + let dto = action_to_dto(&action); + + assert_eq!( + dto.content_hash, + Some("deadbeef".into()), + "content_hash must be extracted from TripleInsert provenance" + ); + } + + #[test] + fn action_to_dto_extracts_content_hash_from_batch_with_triple_insert() { + let provenance = Provenance { + source_path: "vault/doc.md".into(), + line_start: 5, + line_end: 10, + content_hash: "cafebabe".into(), + }; + let action = test_action(DagPayload::Batch { + ops: vec![ + DagPayload::TripleInsert { + triples: vec![TripleInsertPayload { + subject: "akashi://doc".into(), + predicate: "akashi:body".into(), + object: serde_json::json!("content"), + provenance: Some(provenance), + }], + }, + DagPayload::Noop, + ], + }); + + let dto = action_to_dto(&action); + + assert_eq!( + dto.content_hash, + Some("cafebabe".into()), + "content_hash must be extracted from first TripleInsert inside Batch" + ); + } + + #[test] + fn action_to_dto_content_hash_none_for_triple_insert_without_provenance() { + let action = test_action(DagPayload::TripleInsert { + triples: vec![TripleInsertPayload { + subject: "s".into(), + predicate: "p".into(), + object: serde_json::json!("o"), + provenance: None, + }], + }); + + let dto = action_to_dto(&action); + + assert_eq!( + dto.content_hash, None, + "content_hash must be None when no provenance is present" + ); + } + + #[test] + fn action_to_dto_content_hash_none_for_genesis() { + let action = test_action(DagPayload::Genesis { + triple_count: 0, + description: "root".into(), + }); + + let dto = action_to_dto(&action); + + assert_eq!(dto.content_hash, None, "Genesis actions have no content_hash"); + } + + #[test] + fn action_to_dto_content_hash_none_for_noop() { + let action = test_action(DagPayload::Noop); + + let dto = action_to_dto(&action); + + assert_eq!(dto.content_hash, None, "Noop actions have no content_hash"); + } +} From 6686bc284a36baacfb1c7893d92cb631c99452ed Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 18:55:12 +0200 Subject: [PATCH 68/72] =?UTF-8?q?refactor(cortex):=20code-polish=20?= =?UTF-8?q?=E2=80=94=20type=20aliases,=20consolidated=20helpers,=20parse?= =?UTF-8?q?=5Fhex32,=20DEFAULT=5FHISTORY=5FLIMIT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A1: add VaultMapCache/NoteContextCache/LocalGraphCache type aliases in state.rs A2: replace vec![…] with array literals in triple_util tests A3: extract parse_hex32() helper in rest/dag.rs, replacing two inline loops B1: make triple_util::basename pub(crate); delete duplicates from backlinks/vault_map B2: move provenance_anchor_for to triple_util as pub(crate) async fn; delete from backlinks/context B3: add pub(crate) fn strip_brackets to triple_util; replace all inline bracket-strip closures B4: add DEFAULT_HISTORY_LIMIT const in service/dag.rs; update rest/dag.rs + mcp/server.rs wrappers D: add #[inline] to obj_string; add doc comment to action_to_dto; obj_string uses strip_brackets --- crates/aingle_cortex/src/mcp/server.rs | 11 +- crates/aingle_cortex/src/rest/dag.rs | 53 ++-- crates/aingle_cortex/src/service/backlinks.rs | 91 +++---- crates/aingle_cortex/src/service/context.rs | 128 ++++------ crates/aingle_cortex/src/service/dag.rs | 3 + .../aingle_cortex/src/service/local_graph.rs | 94 +++---- .../aingle_cortex/src/service/triple_util.rs | 53 +++- crates/aingle_cortex/src/service/vault_map.rs | 231 ++++++++++++------ crates/aingle_cortex/src/state.rs | 129 ++++++---- 9 files changed, 478 insertions(+), 315 deletions(-) diff --git a/crates/aingle_cortex/src/mcp/server.rs b/crates/aingle_cortex/src/mcp/server.rs index af515c3d..b4c97337 100644 --- a/crates/aingle_cortex/src/mcp/server.rs +++ b/crates/aingle_cortex/src/mcp/server.rs @@ -23,7 +23,7 @@ pub struct DagHistoryParams { #[cfg(feature = "dag")] fn default_hist_limit() -> usize { - 50 + crate::service::dag::DEFAULT_HISTORY_LIMIT } /// Parameters for the `aingle_dag_action` tool. @@ -703,7 +703,14 @@ mod ingest_tools_tests { .into_iter() .map(|t| t.name.to_string()) .collect(); - for expected in ["aingle_ingest", "aingle_ground", "aingle_sources", "aingle_vault_map", "aingle_backlinks", "aingle_note_context"] { + for expected in [ + "aingle_ingest", + "aingle_ground", + "aingle_sources", + "aingle_vault_map", + "aingle_backlinks", + "aingle_note_context", + ] { assert!( names.contains(&expected.to_string()), "missing tool {expected}" diff --git a/crates/aingle_cortex/src/rest/dag.rs b/crates/aingle_cortex/src/rest/dag.rs index 6699a75c..701c40f2 100644 --- a/crates/aingle_cortex/src/rest/dag.rs +++ b/crates/aingle_cortex/src/rest/dag.rs @@ -177,7 +177,7 @@ pub struct CreateDagActionResponse { } fn default_limit() -> usize { - 50 + crate::service::dag::DEFAULT_HISTORY_LIMIT } // ============================================================================ @@ -212,14 +212,8 @@ pub async fn get_dag_history( // Triple-ID-based lookup uses the affected index if let Some(ref tid_hex) = query.triple_id { - let mut bytes = [0u8; 32]; - if tid_hex.len() != 64 { - return Err(Error::InvalidInput("triple_id must be 64 hex chars".into())); - } - for i in 0..32 { - bytes[i] = u8::from_str_radix(&tid_hex[i * 2..i * 2 + 2], 16) - .map_err(|_| Error::InvalidInput("Invalid hex in triple_id".into()))?; - } + let bytes = parse_hex32(tid_hex) + .ok_or_else(|| Error::InvalidInput("triple_id must be 64 valid hex chars".into()))?; let actions = graph .dag_history(&bytes, query.limit) @@ -297,16 +291,8 @@ pub async fn get_dag_verify( let action_hash = aingle_graph::dag::DagActionHash::from_hex(&hash) .ok_or_else(|| Error::InvalidInput(format!("Invalid hash: {}", hash)))?; - let mut pk_bytes = [0u8; 32]; - if query.public_key.len() != 64 { - return Err(Error::InvalidInput( - "public_key must be 64 hex chars".into(), - )); - } - for i in 0..32 { - pk_bytes[i] = u8::from_str_radix(&query.public_key[i * 2..i * 2 + 2], 16) - .map_err(|_| Error::InvalidInput("Invalid hex in public_key".into()))?; - } + let pk_bytes = parse_hex32(&query.public_key) + .ok_or_else(|| Error::InvalidInput("public_key must be 64 valid hex chars".into()))?; let graph = state.graph.read().await; let action = graph @@ -590,6 +576,10 @@ pub fn dag_router() -> Router { // Helpers // ============================================================================ +/// Convert a raw [`DagAction`] to its serializable DTO form. +/// +/// Extracts the payload type, a human-readable summary, and the content hash +/// from the action's provenance (for `TripleInsert` and `Batch` payloads). pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionDto { let hash = action.compute_hash().to_hex(); let parents: Vec = action.parents.iter().map(|h| h.to_hex()).collect(); @@ -643,7 +633,11 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD None } }); - ("batch".to_string(), format!("{} ops", ops.len()), content_hash) + ( + "batch".to_string(), + format!("{} ops", ops.len()), + content_hash, + ) } aingle_graph::dag::DagPayload::Genesis { triple_count, @@ -686,6 +680,20 @@ pub(crate) fn action_to_dto(action: &aingle_graph::dag::DagAction) -> DagActionD } } +/// Parse a 64-character hex string into a 32-byte array. +/// +/// Returns `None` if `hex` is not exactly 64 characters or contains non-hex digits. +fn parse_hex32(hex: &str) -> Option<[u8; 32]> { + if hex.len() != 64 { + return None; + } + let mut out = [0u8; 32]; + for (i, b) in out.iter_mut().enumerate() { + *b = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?; + } + Some(out) +} + fn triple_value_to_json(v: &aingle_graph::Value) -> serde_json::Value { match v { aingle_graph::Value::String(s) => serde_json::Value::String(s.clone()), @@ -803,7 +811,10 @@ mod tests { let dto = action_to_dto(&action); - assert_eq!(dto.content_hash, None, "Genesis actions have no content_hash"); + assert_eq!( + dto.content_hash, None, + "Genesis actions have no content_hash" + ); } #[test] diff --git a/crates/aingle_cortex/src/service/backlinks.rs b/crates/aingle_cortex/src/service/backlinks.rs index c412255f..2dbd5adf 100644 --- a/crates/aingle_cortex/src/service/backlinks.rs +++ b/crates/aingle_cortex/src/service/backlinks.rs @@ -8,7 +8,9 @@ use serde::Serialize; use std::collections::BTreeMap; -use crate::service::triple_util::{obj_string, resolve_link_target}; +use crate::service::triple_util::{ + basename, obj_string, provenance_anchor_for, resolve_link_target, strip_brackets, +}; /// Verified link context for one note. #[derive(Debug, Clone, Serialize, Default)] @@ -26,12 +28,6 @@ pub struct BacklinkRef { pub provenance_anchor: Option, } -/// Basename without directory or extension (wikilink resolution + titles). -fn basename(path: &str) -> String { - let file = path.rsplit(['/', '\\']).next().unwrap_or(path); - file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file).to_string() -} - /// True if `text` contains `word` (case-insensitive) as a whole token — bounded /// by non-alphanumeric chars or string ends. Handles multi-token names like /// "meeting-notes" while NOT matching "note" inside "notebook". @@ -59,28 +55,10 @@ fn mentions_word(text: &str, word: &str) -> bool { false } -/// Retrieve a signed provenance anchor hash for a note path, if available. -async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Option { - #[cfg(feature = "dag")] - { - match crate::service::dag::history_by_subject(state, src, 1).await { - Ok(a) => a.first().filter(|x| x.signed).map(|x| x.hash.clone()), - Err(_) => None, - } - } - #[cfg(not(feature = "dag"))] - { - let _ = (state, src); - None - } -} - /// Compute backlinks, outgoing links, and unlinked mentions for `note`. pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks { use aingle_graph::{Predicate, TriplePattern}; - let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); - // Note set + basename index. let (notes, links): (Vec, Vec<(String, String)>) = { let g = state.graph.read().await; @@ -89,7 +67,7 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks .unwrap_or_default() .into_iter() .filter_map(|t| { - obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) + obj_string(&t).map(|o| (strip_brackets(&t.subject.to_string()).to_string(), o)) }) .collect() }; @@ -108,9 +86,8 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks for n in ¬es { by_base.entry(basename(n)).or_insert_with(|| n.clone()); } - let resolve = |target: &str| -> Option { - resolve_link_target(target, ¬e_set, &by_base) - }; + let resolve = + |target: &str| -> Option { resolve_link_target(target, ¬e_set, &by_base) }; let active_base = basename(note); let active_base_lc = active_base.to_lowercase(); @@ -145,10 +122,7 @@ pub async fn backlinks(state: &crate::state::AppState, note: &str) -> Backlinks if resolve(target).as_deref() == Some(note) && backlink_paths.insert(src.clone()) { let context = text_of.get(src).and_then(|txt| { txt.lines() - .find(|l| { - l.contains("[[") - && l.to_lowercase().contains(&active_base_lc) - }) + .find(|l| l.contains("[[") && l.to_lowercase().contains(&active_base_lc)) .map(|l| { let t = l.trim(); if t.chars().count() > 200 { @@ -210,8 +184,12 @@ mod tests { { let g = state.graph.write().await; for (s, p, o) in triples { - g.insert(Triple::new(NodeId::named(*s), Predicate::named(*p), Value::literal(*o))) - .unwrap(); + g.insert(Triple::new( + NodeId::named(*s), + Predicate::named(*p), + Value::literal(*o), + )) + .unwrap(); } } state @@ -224,8 +202,8 @@ mod tests { ("b.md", "aingle:source_hash", "h2"), ("c.md", "aingle:source_hash", "h3"), ("target.md", "aingle:source_hash", "h4"), - ("a.md", "links_to", "target"), // a → target (backlink) - ("target.md", "links_to", "b"), // target → b (outgoing) + ("a.md", "links_to", "target"), // a → target (backlink) + ("target.md", "links_to", "b"), // target → b (outgoing) ]) .await; // c.md mentions "target" in text but does not link it (unlinked). @@ -240,10 +218,22 @@ mod tests { } let r = super::backlinks(&state, "target.md").await; - assert!(r.backlinks.iter().any(|b| b.path == "a.md"), "a links to target"); - assert!(r.outgoing.contains(&"b.md".to_string()), "target links to b"); - assert!(r.unlinked.contains(&"c.md".to_string()), "c mentions target unlinked"); - assert!(!r.unlinked.contains(&"a.md".to_string()), "a is a backlink, not unlinked"); + assert!( + r.backlinks.iter().any(|b| b.path == "a.md"), + "a links to target" + ); + assert!( + r.outgoing.contains(&"b.md".to_string()), + "target links to b" + ); + assert!( + r.unlinked.contains(&"c.md".to_string()), + "c mentions target unlinked" + ); + assert!( + !r.unlinked.contains(&"a.md".to_string()), + "a is a backlink, not unlinked" + ); } #[tokio::test] @@ -284,9 +274,16 @@ mod tests { let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); { let g = state.graph.write().await; - for (s, p) in [("a.md", "aingle:source_hash"), ("hub.md", "aingle:source_hash")] { - g.insert(Triple::new(NodeId::named(s), Predicate::named(p), Value::literal("h"))) - .unwrap(); + for (s, p) in [ + ("a.md", "aingle:source_hash"), + ("hub.md", "aingle:source_hash"), + ] { + g.insert(Triple::new( + NodeId::named(s), + Predicate::named(p), + Value::literal("h"), + )) + .unwrap(); } // links_to stored as a NODE object — how real ingest produces it. g.insert(Triple::new( @@ -329,7 +326,11 @@ mod tests { } // Must not panic; context should be present and ≤ 201 chars (200 + ellipsis). let r = super::backlinks(&state, "t.md").await; - let b = r.backlinks.iter().find(|b| b.path == "src.md").expect("backlink"); + let b = r + .backlinks + .iter() + .find(|b| b.path == "src.md") + .expect("backlink"); let ctx = b.context.as_ref().expect("context"); assert!(ctx.chars().count() <= 201); } diff --git a/crates/aingle_cortex/src/service/context.rs b/crates/aingle_cortex/src/service/context.rs index 13c21f0c..21daf7c4 100644 --- a/crates/aingle_cortex/src/service/context.rs +++ b/crates/aingle_cortex/src/service/context.rs @@ -7,7 +7,9 @@ use std::collections::{BTreeMap, BTreeSet}; -use crate::service::triple_util::{obj_string, resolve_link_target}; +use crate::service::triple_util::{ + basename, obj_string, provenance_anchor_for, resolve_link_target, strip_brackets, +}; /// The semantic context for one note — the semantically related notes, even /// when never explicitly linked. @@ -52,26 +54,6 @@ const SEMANTIC_MIN_DIMS: usize = 128; /// Follow-up: make this per-embedder if more neural models are added. pub const NEIGHBOR_FLOOR: f32 = 0.88; -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -/// Retrieve a signed provenance anchor hash for a note path, if available. -async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Option { - #[cfg(feature = "dag")] - { - match crate::service::dag::history_by_subject(state, src, 1).await { - Ok(a) => a.first().filter(|x| x.signed).map(|x| x.hash.clone()), - Err(_) => None, - } - } - #[cfg(not(feature = "dag"))] - { - let _ = (state, src); - None - } -} - // --------------------------------------------------------------------------- // Core retrieval // --------------------------------------------------------------------------- @@ -79,11 +61,7 @@ async fn provenance_anchor_for(state: &crate::state::AppState, src: &str) -> Opt /// Compute the semantic neighbors of `note` — up to `limit` related notes, /// ranked by embedding cosine similarity, each with a matching passage and /// optional signed provenance anchor. -pub async fn note_context( - state: &crate::state::AppState, - note: &str, - limit: usize, -) -> NoteContext { +pub async fn note_context(state: &crate::state::AppState, note: &str, limit: usize) -> NoteContext { use aingle_graph::{Predicate, TriplePattern}; use ineru::MemoryQuery; @@ -97,9 +75,6 @@ pub async fn note_context( // 2. Build the note set (subjects of PRED_SOURCE_HASH) + basename index, // and collect all links_to triples. - let strip = - |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); - let (notes, links): (Vec, Vec<(String, String)>) = { let g = state.graph.read().await; let collect = |pred: &str| -> Vec<(String, String)> { @@ -107,7 +82,7 @@ pub async fn note_context( .unwrap_or_default() .into_iter() .filter_map(|t| { - obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) + obj_string(&t).map(|o| (strip_brackets(&t.subject.to_string()).to_string(), o)) }) .collect() }; @@ -126,14 +101,11 @@ pub async fn note_context( // basename → first full path (for wikilink resolution). let mut by_base: BTreeMap = BTreeMap::new(); for n in ¬es { - by_base - .entry(crate::service::vault_map::basename(n)) - .or_insert_with(|| n.clone()); + by_base.entry(basename(n)).or_insert_with(|| n.clone()); } - let resolve = |target: &str| -> Option { - resolve_link_target(target, ¬e_set, &by_base) - }; + let resolve = + |target: &str| -> Option { resolve_link_target(target, ¬e_set, &by_base) }; // 3. Compute `outgoing_set`: full paths that the active `note` links to. let outgoing_set: BTreeSet = links @@ -167,7 +139,7 @@ pub async fn note_context( } let query_text: String = if own_text.trim().is_empty() { - crate::service::vault_map::basename(note) + basename(note) } else { own_text.clone() }; @@ -384,12 +356,7 @@ mod tests { // ----------------------------------------------------------------------- fn stub_state() -> AppState { - AppState::with_db_path_and_embedder( - ":memory:", - None, - Arc::new(StubEmbedder), - ) - .unwrap() + AppState::with_db_path_and_embedder(":memory:", None, Arc::new(StubEmbedder)).unwrap() } async fn insert_triples(state: &AppState, triples: &[(&str, &str, &str)]) { @@ -423,7 +390,10 @@ mod tests { async fn hash_grade_embedder_short_circuits() { let state = AppState::with_db_path(":memory:", None).unwrap(); let ctx = super::note_context(&state, "active.md", 5).await; - assert!(!ctx.semantic_ready, "64-d hash embedder must not be semantic_ready"); + assert!( + !ctx.semantic_ready, + "64-d hash embedder must not be semantic_ready" + ); assert!(ctx.neighbors.is_empty()); } @@ -444,22 +414,16 @@ mod tests { .await; // Active note's own chunk (alpha text → e0 query vector). - let e0 = vec![1.0_f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; + let e0 = vec![ + 1.0_f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + ]; let e1 = { let mut v = vec![0.0_f32; 128]; v[1] = 1.0; @@ -595,7 +559,13 @@ mod tests { v }; insert_chunk(&state, "active.md", "alpha active", e0.clone()).await; - insert_chunk(&state, "_maps/vault-map.md", "alpha maps content", e0.clone()).await; + insert_chunk( + &state, + "_maps/vault-map.md", + "alpha maps content", + e0.clone(), + ) + .await; let ctx = super::note_context(&state, "active.md", 10).await; assert!(ctx.semantic_ready); @@ -647,12 +617,8 @@ mod tests { #[cfg(feature = "dag")] #[tokio::test] async fn provenance_present_when_signed() { - let state = AppState::with_db_path_and_embedder( - ":memory:", - None, - Arc::new(StubEmbedder), - ) - .unwrap(); + let state = + AppState::with_db_path_and_embedder(":memory:", None, Arc::new(StubEmbedder)).unwrap(); { let mut graph = state.graph.write().await; graph.enable_dag(); @@ -697,7 +663,9 @@ mod tests { }; let key = aingle_graph::dag::DagSigningKey::generate(); key.sign(&mut action); - dag_store.put(&action).expect("put signed action must succeed"); + dag_store + .put(&action) + .expect("put signed action must succeed"); } let ctx = super::note_context(&state, "active.md", 10).await; @@ -706,11 +674,7 @@ mod tests { "alpha.md must be a semantic neighbor with dag feature: {:?}", ctx.neighbors ); - let n = ctx - .neighbors - .iter() - .find(|n| n.path == "alpha.md") - .unwrap(); + let n = ctx.neighbors.iter().find(|n| n.path == "alpha.md").unwrap(); assert!( n.provenance_anchor.is_some(), "provenance_anchor must be Some when a signed DAG action is recorded for the source: {:?}", @@ -759,8 +723,7 @@ mod tests { "cache hit: neighbor count must be identical" ); assert_eq!( - ctx1.neighbors[0].path, - ctx2.neighbors[0].path, + ctx1.neighbors[0].path, ctx2.neighbors[0].path, "cache hit: top neighbor must be identical" ); @@ -789,7 +752,13 @@ mod tests { for i in 0..257usize { cache.insert( (format!("dummy_{i}.md"), 0usize), - ((0, 0), super::NoteContext { semantic_ready: false, neighbors: vec![] }), + ( + (0, 0), + super::NoteContext { + semantic_ready: false, + neighbors: vec![], + }, + ), ); } } @@ -878,7 +847,9 @@ mod tests { .join("onnx/model.onnx") .exists() { - eprintln!("skipping neural_note_context_finds_same_topic: e5 model not found at {model_dir}"); + eprintln!( + "skipping neural_note_context_finds_same_topic: e5 model not found at {model_dir}" + ); return; } @@ -889,8 +860,7 @@ mod tests { "neural embedder must be active (384d)" ); - let state = - AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); + let state = AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); { let mut graph = state.graph.write().await; graph.enable_dag(); diff --git a/crates/aingle_cortex/src/service/dag.rs b/crates/aingle_cortex/src/service/dag.rs index e43997bf..4521e6fc 100644 --- a/crates/aingle_cortex/src/service/dag.rs +++ b/crates/aingle_cortex/src/service/dag.rs @@ -9,6 +9,9 @@ use crate::rest::dag::{ }; use crate::state::AppState; +/// Default action-history limit shared by REST and MCP endpoints. +pub(crate) const DEFAULT_HISTORY_LIMIT: usize = 50; + /// Return DAG actions affecting a subject, newest first, up to `limit`. pub async fn history_by_subject( state: &AppState, diff --git a/crates/aingle_cortex/src/service/local_graph.rs b/crates/aingle_cortex/src/service/local_graph.rs index b1e74bee..4714a27f 100644 --- a/crates/aingle_cortex/src/service/local_graph.rs +++ b/crates/aingle_cortex/src/service/local_graph.rs @@ -1,4 +1,4 @@ -// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. +// Copyright 2019-2026 Apilium Technologies OÜ. All rights reserved. // SPDX-License-Identifier: Apache-2.0 OR Commercial //! Local graph neighborhood for a single note: typed edges (link / semantic / tag) @@ -6,9 +6,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; -use crate::service::triple_util::{obj_string, resolve_link_target}; -use crate::service::vault_map::{basename, is_maps_path}; use crate::service::context::{note_context_cached, NEIGHBOR_FLOOR}; +use crate::service::triple_util::{basename, obj_string, resolve_link_target, strip_brackets}; +use crate::service::vault_map::is_maps_path; // --------------------------------------------------------------------------- // Public types @@ -80,18 +80,12 @@ const SEM_FRONTIER_CAP: usize = 16; // --------------------------------------------------------------------------- /// Build the typed local neighborhood graph for `note` at BFS depth `depth`. -pub async fn local_graph( - state: &crate::state::AppState, - note: &str, - depth: usize, -) -> LocalGraph { +pub async fn local_graph(state: &crate::state::AppState, note: &str, depth: usize) -> LocalGraph { use aingle_graph::{Predicate, TriplePattern}; let depth = depth.clamp(1, MAX_DEPTH); let semantic_grade = state.embedder.dimensions() >= 128; - let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); - // ----------------------------------------------------------------------- // 1. Load structural data from the graph once. // ----------------------------------------------------------------------- @@ -99,14 +93,19 @@ pub async fn local_graph( // links_raw: (subject, object-string) for every links_to triple // tagged_raw: (subject, tag) for every tagged triple type PairVec = Vec<(String, String)>; - let (notes, links_raw, tagged_raw, created_map): (Vec, PairVec, PairVec, BTreeMap) = { + let (notes, links_raw, tagged_raw, created_map): ( + Vec, + PairVec, + PairVec, + BTreeMap, + ) = { let g = state.graph.read().await; let collect = |pred: &str| -> PairVec { g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) .unwrap_or_default() .into_iter() .filter_map(|t| { - obj_string(&t).map(|o| (strip(t.subject.to_string()), o)) + obj_string(&t).map(|o| (strip_brackets(&t.subject.to_string()).to_string(), o)) }) .collect() }; @@ -131,14 +130,11 @@ pub async fn local_graph( // Basename index for wikilink resolution. let mut by_base: BTreeMap = BTreeMap::new(); for n in ¬es { - by_base - .entry(basename(n)) - .or_insert_with(|| n.clone()); + by_base.entry(basename(n)).or_insert_with(|| n.clone()); } - let resolve = |target: &str| -> Option { - resolve_link_target(target, ¬e_set, &by_base) - }; + let resolve = + |target: &str| -> Option { resolve_link_target(target, ¬e_set, &by_base) }; // Resolved outgoing links: (src, dst) — both are full paths, neither a maps path. let links: Vec<(String, String)> = links_raw @@ -610,10 +606,16 @@ mod tests { insert_chunk(&state, "b.md", "alpha content for b", e0()).await; let g = super::local_graph(&state, "a.md", 1).await; - assert!(g.semantic_ready, "StubEmbedder(128d) must be semantic_ready"); + assert!( + g.semantic_ready, + "StubEmbedder(128d) must be semantic_ready" + ); let sem = g.edges.iter().find(|e| e.kind == "semantic"); assert!(sem.is_some(), "must have a semantic edge: {:?}", g.edges); - assert!(sem.unwrap().score.is_some(), "semantic edge must carry a score"); + assert!( + sem.unwrap().score.is_some(), + "semantic edge must carry a score" + ); } // ----------------------------------------------------------------------- @@ -622,12 +624,8 @@ mod tests { #[cfg(feature = "dag")] #[tokio::test] async fn semantic_edge_carries_provenance() { - let state = AppState::with_db_path_and_embedder( - ":memory:", - None, - Arc::new(StubEmbedder), - ) - .unwrap(); + let state = + AppState::with_db_path_and_embedder(":memory:", None, Arc::new(StubEmbedder)).unwrap(); { let mut graph = state.graph.write().await; graph.enable_dag(); @@ -657,7 +655,9 @@ mod tests { }; let key = aingle_graph::dag::DagSigningKey::generate(); key.sign(&mut action); - dag_store.put(&action).expect("put signed action must succeed"); + dag_store + .put(&action) + .expect("put signed action must succeed"); } let g = super::local_graph(&state, "a.md", 1).await; @@ -709,7 +709,10 @@ mod tests { insert_triple_node(&state, "a.md", "links_to", "b").await; let g = super::local_graph(&state, "a.md", 1).await; - assert!(!g.semantic_ready, "64-dim hash embedder must set semantic_ready=false"); + assert!( + !g.semantic_ready, + "64-dim hash embedder must set semantic_ready=false" + ); assert!( g.edges.iter().all(|e| e.kind != "semantic"), "no semantic edges with hash embedder: {:?}", @@ -745,7 +748,9 @@ mod tests { g.nodes ); assert!( - !g.edges.iter().any(|e| e.target.starts_with("_maps/") || e.source.starts_with("_maps/")), + !g.edges + .iter() + .any(|e| e.target.starts_with("_maps/") || e.source.starts_with("_maps/")), "_maps/ edges must be excluded: {:?}", g.edges ); @@ -810,7 +815,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- // 9. incoming_link_edge // ----------------------------------------------------------------------- @@ -834,7 +838,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- // 10. pair_with_link_and_semantic_keeps_both // ----------------------------------------------------------------------- @@ -863,7 +866,6 @@ mod tests { assert!(has_sem, "semantic edge a↔b must be present: {:?}", g.edges); } - // ----------------------------------------------------------------------- // 11. symmetric_semantic_dedup // ----------------------------------------------------------------------- @@ -890,14 +892,12 @@ mod tests { }) .count(); assert_eq!( - sem_count, - 1, + sem_count, 1, "symmetric a↔b semantic must yield exactly ONE edge, got {sem_count}: {:?}", g.edges ); } - // ----------------------------------------------------------------------- // 12. local_graph_cached_hit_and_invalidation // ----------------------------------------------------------------------- @@ -912,7 +912,10 @@ mod tests { // First call: computes and caches. let g1 = super::local_graph_cached(&state, "a.md", 1).await; - assert!(g1.nodes.iter().any(|n| n.id == "b.md"), "b.md must be in graph"); + assert!( + g1.nodes.iter().any(|n| n.id == "b.md"), + "b.md must be in graph" + ); // Second call: graph/memory unchanged → cache hit → identical result. let g2 = super::local_graph_cached(&state, "a.md", 1).await; @@ -935,7 +938,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- // 13. cache_cap_clears_when_exceeded // ----------------------------------------------------------------------- @@ -977,7 +979,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- // timestamp field: created triple → GNode.timestamp // ----------------------------------------------------------------------- @@ -993,18 +994,23 @@ mod tests { insert_triple_lit(&state, "a.md", "created", "2025-03-15").await; let g = super::local_graph(&state, "a.md", 1).await; - let node_a = g.nodes.iter().find(|n| n.id == "a.md") + let node_a = g + .nodes + .iter() + .find(|n| n.id == "a.md") .expect("a.md must be in graph"); assert_eq!( node_a.timestamp, Some("2025-03-15".to_string()), "GNode.timestamp must come from the created triple" ); - let node_b = g.nodes.iter().find(|n| n.id == "b.md") + let node_b = g + .nodes + .iter() + .find(|n| n.id == "b.md") .expect("b.md must be in graph"); assert_eq!( - node_b.timestamp, - None, + node_b.timestamp, None, "GNode without created triple must have timestamp=None" ); } @@ -1044,7 +1050,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- // 15. neural_local_graph_has_semantic_edge (real e5 model, gated) // ----------------------------------------------------------------------- @@ -1080,8 +1085,7 @@ mod tests { "neural embedder must be active (384d)" ); - let state = - AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); + let state = AppState::with_db_path_and_embedder(":memory:", None, embedder).unwrap(); { let mut graph = state.graph.write().await; graph.enable_dag(); diff --git a/crates/aingle_cortex/src/service/triple_util.rs b/crates/aingle_cortex/src/service/triple_util.rs index dddfd2f2..524b5ffc 100644 --- a/crates/aingle_cortex/src/service/triple_util.rs +++ b/crates/aingle_cortex/src/service/triple_util.rs @@ -13,19 +13,55 @@ /// strings (`Value::Str`) and graph nodes (`Value::Node`). Node IDs are stored /// with `<…>` angle-bracket wrappers; this strips them so the result matches /// the bare names used everywhere else in the service layer. +#[inline] pub(crate) fn obj_string(t: &aingle_graph::Triple) -> Option { if let Some(s) = t.object_string() { Some(s.to_string()) } else { t.object_node() - .map(|n| n.to_string().trim_start_matches('<').trim_end_matches('>').to_string()) + .map(|n| strip_brackets(&n.to_string()).to_string()) } } +/// Strip leading `<` and trailing `>` angle-bracket wrappers from an IRI string. +/// +/// Node IDs in the graph are stored with angle-bracket wrappers (e.g. ``); +/// this strips them so the result matches the bare names used everywhere in the service layer. +pub(crate) fn strip_brackets(s: &str) -> &str { + s.trim_start_matches('<').trim_end_matches('>') +} + /// Basename without directory or extension (for wikilink resolution). -fn basename(path: &str) -> String { +/// +/// Strips both `/` and `\` directory separators and removes the last `.ext`. +pub(crate) fn basename(path: &str) -> String { let file = path.rsplit(['/', '\\']).next().unwrap_or(path); - file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file).to_string() + file.rsplit_once('.') + .map(|(s, _)| s) + .unwrap_or(file) + .to_string() +} + +/// Retrieve a signed provenance anchor hash for a note path, if available. +/// +/// Returns the hex hash of the most-recent signed DAG action whose subject is +/// `src`, or `None` when the `dag` feature is off or no signed action exists. +pub(crate) async fn provenance_anchor_for( + state: &crate::state::AppState, + src: &str, +) -> Option { + #[cfg(feature = "dag")] + { + match crate::service::dag::history_by_subject(state, src, 1).await { + Ok(a) => a.first().filter(|x| x.signed).map(|x| x.hash.clone()), + Err(_) => None, + } + } + #[cfg(not(feature = "dag"))] + { + let _ = (state, src); + None + } } /// Strip the extension from the last path segment only. Input must already be @@ -38,7 +74,10 @@ fn path_without_ext(path: &str) -> String { let stem = file.rsplit_once('.').map(|(s, _)| s).unwrap_or(file); format!("{dir}{stem}") } else { - path.rsplit_once('.').map(|(s, _)| s).unwrap_or(path).to_string() + path.rsplit_once('.') + .map(|(s, _)| s) + .unwrap_or(path) + .to_string() } } @@ -89,7 +128,7 @@ mod tests { #[test] fn exact_path_match() { // "b/note.md" exists verbatim — must return it, not "a/note.md". - let notes = vec!["a/note.md".to_string(), "b/note.md".to_string()]; + let notes = ["a/note.md".to_string(), "b/note.md".to_string()]; let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); let mut by_base: BTreeMap = BTreeMap::new(); by_base.insert("note".to_string(), "a/note.md".to_string()); @@ -105,7 +144,7 @@ mod tests { // "[[b/note]]" (no extension) must resolve to "b/note.md", NOT "a/note.md". // by_base["note"] = "a/note.md" (first alphabetically — the collision // that previously caused the bug). - let notes = vec!["a/note.md".to_string(), "b/note.md".to_string()]; + let notes = ["a/note.md".to_string(), "b/note.md".to_string()]; let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); let mut by_base: BTreeMap = BTreeMap::new(); by_base.insert("note".to_string(), "a/note.md".to_string()); @@ -120,7 +159,7 @@ mod tests { #[test] fn bare_basename_unique_fallback() { // No path component → falls through to by_base. - let notes = vec!["dir/note.md".to_string()]; + let notes = ["dir/note.md".to_string()]; let note_set: BTreeSet<&str> = notes.iter().map(|s| s.as_str()).collect(); let mut by_base: BTreeMap = BTreeMap::new(); by_base.insert("note".to_string(), "dir/note.md".to_string()); diff --git a/crates/aingle_cortex/src/service/vault_map.rs b/crates/aingle_cortex/src/service/vault_map.rs index ed59eb1c..a68449e0 100644 --- a/crates/aingle_cortex/src/service/vault_map.rs +++ b/crates/aingle_cortex/src/service/vault_map.rs @@ -7,7 +7,7 @@ use serde::Serialize; use std::collections::BTreeMap; -use crate::service::triple_util::obj_string; +use crate::service::triple_util::{basename, obj_string, strip_brackets}; /// The full vault map returned to the UI and the connected AI. #[derive(Debug, Clone, Serialize, Default)] @@ -114,12 +114,6 @@ const SEMANTIC_EDGES_PER_NODE: usize = 3; /// Tags (case-insensitive) that mark a note as a reusable skill/process. const SKILL_TAGS: [&str; 6] = ["skill", "process", "sop", "workflow", "how-to", "howto"]; -/// Basename without directory or extension, for wikilink resolution + titles. -pub(crate) fn basename(path: &str) -> String { - let file = path.rsplit(['/', '\\']).next().unwrap_or(path); - file.rsplit_once('.').map(|(stem, _)| stem).unwrap_or(file).to_string() -} - /// True for paths under the generated maps folder (excluded from the vault map). pub(crate) fn is_maps_path(path: &str) -> bool { path.starts_with("_maps/") || path.starts_with("_maps\\") @@ -140,14 +134,13 @@ pub(crate) struct Structural { pub(crate) fn derive_structural(graph: &aingle_graph::GraphDB) -> Structural { use aingle_graph::{Predicate, TriplePattern}; - let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); let find = |pred: &str| -> Vec<(String, String)> { graph .find(TriplePattern::any().with_predicate(Predicate::named(pred))) .unwrap_or_default() .into_iter() .filter_map(|t| { - let subj = strip(t.subject.to_string()); + let subj = strip_brackets(&t.subject.to_string()).to_string(); obj_string(&t).map(|o| (subj, o)) }) .collect() @@ -239,8 +232,14 @@ fn top_k_semantic_pairs<'a>( // Accumulate per-node (partner, cosine) lists in global cosine-desc order. let mut per_node: BTreeMap<&'a str, Vec<(&'a str, f32)>> = BTreeMap::new(); for (a, b, c) in candidates { - per_node.entry(a.as_str()).or_default().push((b.as_str(), *c)); - per_node.entry(b.as_str()).or_default().push((a.as_str(), *c)); + per_node + .entry(a.as_str()) + .or_default() + .push((b.as_str(), *c)); + per_node + .entry(b.as_str()) + .or_default() + .push((a.as_str(), *c)); } // Union: an ordered pair (min, max) is kept if EITHER endpoint selects the other. let mut chosen: BTreeMap<(String, String), f32> = BTreeMap::new(); @@ -344,7 +343,12 @@ pub(crate) fn cluster_semantic( (topics, sem_pairs) } -fn mean_sim(self_idx: usize, members: &[usize], names: &[&String], vecs: &BTreeMap>) -> f32 { +fn mean_sim( + self_idx: usize, + members: &[usize], + names: &[&String], + vecs: &BTreeMap>, +) -> f32 { if members.len() <= 1 { return 1.0; } @@ -358,7 +362,11 @@ fn mean_sim(self_idx: usize, members: &[usize], names: &[&String], vecs: &BTreeM sum += cosine(v, &vecs[names[m]]); cnt += 1; } - if cnt == 0 { 1.0 } else { sum / cnt as f32 } + if cnt == 0 { + 1.0 + } else { + sum / cnt as f32 + } } /// Mean per-note embedding from Ineru `doc_chunk` entries, grouped by source_path. @@ -373,8 +381,12 @@ pub(crate) fn per_note_vectors(mem: &ineru::IneruMemory) -> BTreeMap VaultMap { let mut tag_clusters: Vec = s .tag_notes .iter() - .map(|(tag, notes)| TagGroup { tag: tag.clone(), notes: notes.clone() }) + .map(|(tag, notes)| TagGroup { + tag: tag.clone(), + notes: notes.clone(), + }) .collect(); tag_clusters.sort_by(|a, b| b.notes.len().cmp(&a.notes.len()).then(a.tag.cmp(&b.tag))); let mut tags: Vec = s .tag_notes .iter() - .map(|(tag, notes)| TagCount { tag: tag.clone(), count: notes.len() }) + .map(|(tag, notes)| TagCount { + tag: tag.clone(), + count: notes.len(), + }) .collect(); tags.sort_by(|a, b| b.count.cmp(&a.count).then(a.tag.cmp(&b.tag))); let mut types: Vec = s .type_counts .iter() - .map(|(ty, count)| TypeCount { ty: ty.clone(), count: *count }) + .map(|(ty, count)| TypeCount { + ty: ty.clone(), + count: *count, + }) .collect(); types.sort_by(|a, b| b.count.cmp(&a.count).then(a.ty.cmp(&b.ty))); @@ -496,13 +517,12 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { let created: BTreeMap = { use aingle_graph::{Predicate, TriplePattern}; let g = state.graph.read().await; - let strip = |n: String| n.trim_start_matches('<').trim_end_matches('>').to_string(); let collect_pred = |pred: &str| -> BTreeMap { g.find(TriplePattern::any().with_predicate(Predicate::named(pred))) .unwrap_or_default() .into_iter() .filter_map(|t| { - let subj = strip(t.subject.to_string()); + let subj = strip_brackets(&t.subject.to_string()).to_string(); obj_string(&t).map(|o| (subj, o)) }) .collect() @@ -518,10 +538,8 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { // GraphView (cap by degree). let mut ranked: Vec<&String> = s.notes.iter().collect(); ranked.sort_by(|a, b| { - let da = - s.in_deg.get(*a).copied().unwrap_or(0) + s.out_deg.get(*a).copied().unwrap_or(0); - let db = - s.in_deg.get(*b).copied().unwrap_or(0) + s.out_deg.get(*b).copied().unwrap_or(0); + let da = s.in_deg.get(*a).copied().unwrap_or(0) + s.out_deg.get(*a).copied().unwrap_or(0); + let db = s.in_deg.get(*b).copied().unwrap_or(0) + s.out_deg.get(*b).copied().unwrap_or(0); db.cmp(&da).then(a.cmp(b)) }); if s.notes.len() > GRAPH_NODE_CAP { @@ -540,8 +558,7 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { id: p.clone(), label: basename(p), cluster: cluster_of.get(p).copied().unwrap_or(-1), - degree: s.in_deg.get(p).copied().unwrap_or(0) - + s.out_deg.get(p).copied().unwrap_or(0), + degree: s.in_deg.get(p).copied().unwrap_or(0) + s.out_deg.get(p).copied().unwrap_or(0), timestamp: created.get(p).cloned(), }) .collect(); @@ -550,7 +567,11 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { .edges .iter() .filter(|(a, b)| kept.contains(a) && kept.contains(b)) - .map(|(a, b)| GraphEdge { source: a.clone(), target: b.clone(), kind: "link".into() }) + .map(|(a, b)| GraphEdge { + source: a.clone(), + target: b.clone(), + kind: "link".into(), + }) .collect(); // Semantic edges — per-node top-K selection with union semantics. @@ -569,7 +590,11 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { .edges .iter() .map(|(a, b)| { - if a <= b { (a.clone(), b.clone()) } else { (b.clone(), a.clone()) } + if a <= b { + (a.clone(), b.clone()) + } else { + (b.clone(), a.clone()) + } }) .collect(); @@ -579,16 +604,14 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { .filter(|(a, b, _)| kept.contains(a) && kept.contains(b)) .map(|(a, b, c)| if a <= b { (a, b, c) } else { (b, a, c) }) .collect(); - candidates - .sort_by(|x, y| y.2.partial_cmp(&x.2).unwrap_or(std::cmp::Ordering::Equal)); + candidates.sort_by(|x, y| y.2.partial_cmp(&x.2).unwrap_or(std::cmp::Ordering::Equal)); // Per-node top-K with union semantics → O(N·K) edges instead of O(N²). let chosen = top_k_semantic_pairs(&candidates, SEMANTIC_EDGES_PER_NODE); // Sort by cosine desc so SEMANTIC_EDGE_CAP retains the highest-quality edges. let mut chosen_sorted: Vec<((String, String), f32)> = chosen.into_iter().collect(); - chosen_sorted - .sort_by(|x, y| y.1.partial_cmp(&x.1).unwrap_or(std::cmp::Ordering::Equal)); + chosen_sorted.sort_by(|x, y| y.1.partial_cmp(&x.1).unwrap_or(std::cmp::Ordering::Equal)); let mut sem_count = 0usize; for ((a, b), _c) in chosen_sorted { @@ -598,7 +621,11 @@ pub async fn compute_vault_map(state: &crate::state::AppState) -> VaultMap { if link_pair_set.contains(&(a.clone(), b.clone())) { continue; } - edges.push(GraphEdge { source: a, target: b, kind: "semantic".into() }); + edges.push(GraphEdge { + source: a, + target: b, + kind: "semantic".into(), + }); sem_count += 1; } } @@ -674,7 +701,10 @@ pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { let mem_bytes = { state.memory.read().await.stats().total_memory_bytes }; let key = (tc, mem_bytes); { - let cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); + let cache = state + .vault_map_cache + .lock() + .expect("vault_map cache poisoned"); if let Some((cached_key, map)) = cache.as_ref() { if *cached_key == key { return map.clone(); @@ -684,7 +714,10 @@ pub async fn vault_map_cached(state: &crate::state::AppState) -> VaultMap { // The cache mutex is intentionally released before the async compute to avoid // holding it across an `.await` point. let map = compute_vault_map(state).await; - let mut cache = state.vault_map_cache.lock().expect("vault_map cache poisoned"); + let mut cache = state + .vault_map_cache + .lock() + .expect("vault_map cache poisoned"); *cache = Some((key, map.clone())); map } @@ -694,9 +727,7 @@ mod tests { use super::*; use aingle_graph::{NodeId, Predicate, Triple, Value}; - pub(super) async fn graph_with( - triples: &[(&str, &str, &str)], - ) -> crate::state::AppState { + pub(super) async fn graph_with(triples: &[(&str, &str, &str)]) -> crate::state::AppState { let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); { let g = state.graph.write().await; @@ -736,12 +767,20 @@ mod tests { super::derive_structural(&g) }; assert_eq!(s.notes.len(), 4); - assert_eq!(s.in_deg.get("hub.md").copied().unwrap_or(0), 2, "hub has 2 incoming"); + assert_eq!( + s.in_deg.get("hub.md").copied().unwrap_or(0), + 2, + "hub has 2 incoming" + ); assert_eq!(s.out_deg.get("a.md").copied().unwrap_or(0), 1); assert_eq!(s.tag_notes.get("storage").map(|v| v.len()), Some(2)); assert_eq!(s.link_count, 2); // Self-link must not be counted as incoming for a.md. - assert_eq!(s.in_deg.get("a.md").copied().unwrap_or(0), 0, "self-link must not count as incoming"); + assert_eq!( + s.in_deg.get("a.md").copied().unwrap_or(0), + 0, + "self-link must not count as incoming" + ); // type_counts must reflect the triple ("a.md","type","note"). assert_eq!(s.type_counts.get("note"), Some(&1)); } @@ -759,10 +798,7 @@ mod tests { assert_eq!(topics.len(), 2); let big = topics.iter().max_by_key(|t| t.size).unwrap(); assert_eq!(big.size, 2); - assert!( - big.notes.contains(&"a.md".to_string()) - && big.notes.contains(&"b.md".to_string()) - ); + assert!(big.notes.contains(&"a.md".to_string()) && big.notes.contains(&"b.md".to_string())); // The pair (a.md, b.md) must be captured in sem_pairs with cosine ≥ 0.9. assert!( sem_pairs.iter().any(|(a, b, c)| { @@ -788,7 +824,10 @@ mod tests { assert_eq!(m1.totals.notes, 3); assert_eq!(m1.totals.links, 1); assert_eq!(m1.totals.orphans, 1); // orphan.md - assert!(m1.entry_points.iter().any(|e| e.path == "hub.md" && e.in_links == 1)); + assert!(m1 + .entry_points + .iter() + .any(|e| e.path == "hub.md" && e.in_links == 1)); assert!(m1.tag_clusters.iter().any(|t| t.tag == "storage")); assert!(!m1.guidance.is_empty()); assert!(!m1.graph.nodes.is_empty()); @@ -812,8 +851,15 @@ mod tests { let map = super::vault_map_cached(&state).await; assert_eq!(map.totals.notes, 2, "_maps/ notes excluded from the count"); assert!(!map.graph.nodes.iter().any(|n| n.id.starts_with("_maps/"))); - assert!(!map.entry_points.iter().any(|e| e.path.starts_with("_maps/"))); - let hub = map.entry_points.iter().find(|e| e.path == "hub.md").expect("hub"); + assert!(!map + .entry_points + .iter() + .any(|e| e.path.starts_with("_maps/"))); + let hub = map + .entry_points + .iter() + .find(|e| e.path == "hub.md") + .expect("hub"); assert_eq!(hub.in_links, 1, "the _maps link to hub must be excluded"); } @@ -834,8 +880,14 @@ mod tests { assert_eq!(map.identity.as_deref(), Some("me.md")); assert!(map.skills.contains(&"deploy.md".to_string())); assert!(map.skills.contains(&"writing.md".to_string())); - assert!(!map.skills.contains(&"note.md".to_string()), "non-skill tag excluded"); - assert!(map.guidance.contains("me.md"), "guidance points at identity"); + assert!( + !map.skills.contains(&"note.md".to_string()), + "non-skill tag excluded" + ); + assert!( + map.guidance.contains("me.md"), + "guidance points at identity" + ); } #[tokio::test] @@ -845,9 +897,16 @@ mod tests { let state = crate::state::AppState::with_db_path(":memory:", None).unwrap(); { let g = state.graph.write().await; - for (s, p) in [("a.md", "aingle:source_hash"), ("hub.md", "aingle:source_hash")] { - g.insert(Triple::new(NodeId::named(s), Predicate::named(p), Value::literal("h"))) - .unwrap(); + for (s, p) in [ + ("a.md", "aingle:source_hash"), + ("hub.md", "aingle:source_hash"), + ] { + g.insert(Triple::new( + NodeId::named(s), + Predicate::named(p), + Value::literal("h"), + )) + .unwrap(); } // links_to as a NODE object — how real ingest produces it. g.insert(Triple::new( @@ -858,9 +917,15 @@ mod tests { .unwrap(); } let map = super::vault_map_cached(&state).await; - assert_eq!(map.totals.links, 1, "node-valued links_to must be counted: {:?}", map.totals); + assert_eq!( + map.totals.links, 1, + "node-valued links_to must be counted: {:?}", + map.totals + ); assert!( - map.entry_points.iter().any(|e| e.path == "hub.md" && e.in_links == 1), + map.entry_points + .iter() + .any(|e| e.path == "hub.md" && e.in_links == 1), "hub.md must appear as a hub with 1 incoming link: {:?}", map.entry_points ); @@ -881,7 +946,10 @@ mod tests { .unwrap(); } let m2 = super::vault_map_cached(&state).await; - assert_eq!(m2.totals.notes, 2, "cache must invalidate when triple_count changes"); + assert_eq!( + m2.totals.notes, 2, + "cache must invalidate when triple_count changes" + ); } // ----------------------------------------------------------------- @@ -899,8 +967,7 @@ mod tests { .await; let map = super::vault_map_cached(&state).await; let edge = map.graph.edges.iter().find(|e| { - (e.source == "a.md" && e.target == "b.md") - || (e.source == "b.md" && e.target == "a.md") + (e.source == "a.md" && e.target == "b.md") || (e.source == "b.md" && e.target == "a.md") }); let edge = edge.expect("link edge between a.md and b.md must exist"); assert_eq!(edge.kind, "link", "wikilink edges must carry kind='link'"); @@ -961,10 +1028,15 @@ mod tests { } } let map2 = super::compute_vault_map(&state2).await; - let edges_ab: Vec<_> = map2.graph.edges.iter().filter(|e| { - (e.source == "a.md" && e.target == "b.md") - || (e.source == "b.md" && e.target == "a.md") - }).collect(); + let edges_ab: Vec<_> = map2 + .graph + .edges + .iter() + .filter(|e| { + (e.source == "a.md" && e.target == "b.md") + || (e.source == "b.md" && e.target == "a.md") + }) + .collect(); assert_eq!( edges_ab.len(), 1, @@ -972,8 +1044,7 @@ mod tests { map2.graph.edges ); assert_eq!( - edges_ab[0].kind, - "link", + edges_ab[0].kind, "link", "the single edge must have kind='link', not 'semantic': {:?}", edges_ab[0] ); @@ -995,18 +1066,25 @@ mod tests { .await; let map = super::compute_vault_map(&state).await; - let node_a = map.graph.nodes.iter().find(|n| n.id == "a.md") + let node_a = map + .graph + .nodes + .iter() + .find(|n| n.id == "a.md") .expect("a.md must be in graph"); assert_eq!( node_a.timestamp, Some("2025-01-02".to_string()), "timestamp must be populated from the created triple" ); - let node_b = map.graph.nodes.iter().find(|n| n.id == "b.md") + let node_b = map + .graph + .nodes + .iter() + .find(|n| n.id == "b.md") .expect("b.md must be in graph"); assert_eq!( - node_b.timestamp, - None, + node_b.timestamp, None, "node without a created triple must have timestamp=None" ); } @@ -1077,8 +1155,12 @@ mod tests { } let map = super::compute_vault_map(&state).await; - let sem_edges: Vec<_> = - map.graph.edges.iter().filter(|e| e.kind == "semantic").collect(); + let sem_edges: Vec<_> = map + .graph + .edges + .iter() + .filter(|e| e.kind == "semantic") + .collect(); // (a) Clearly-strongest pair is connected. assert!( @@ -1145,13 +1227,16 @@ mod tests { } let map = super::compute_vault_map(&state).await; assert_eq!( - map.totals.links, - 1, + map.totals.links, 1, "totals.links must count only explicit wikilinks, not semantic edges: {:?}", map.totals ); - let sem_edges: Vec<_> = - map.graph.edges.iter().filter(|e| e.kind == "semantic").collect(); + let sem_edges: Vec<_> = map + .graph + .edges + .iter() + .filter(|e| e.kind == "semantic") + .collect(); assert!( !sem_edges.is_empty(), "semantic edges between similar notes must exist even when totals.links is 1" diff --git a/crates/aingle_cortex/src/state.rs b/crates/aingle_cortex/src/state.rs index d33939eb..973274a9 100644 --- a/crates/aingle_cortex/src/state.rs +++ b/crates/aingle_cortex/src/state.rs @@ -15,6 +15,30 @@ use crate::auth::UserStore; use crate::proofs::ProofStore; use crate::rest::audit::AuditLog; +// --------------------------------------------------------------------------- +// Cache type aliases (avoid clippy::type_complexity on the struct fields) +// --------------------------------------------------------------------------- + +/// Shared cache type for the vault map. +type VaultMapCache = + std::sync::Mutex>; + +/// Shared cache type for per-note semantic-neighbor contexts. +type NoteContextCache = std::sync::Mutex< + std::collections::HashMap< + (String, usize), + ((usize, usize), crate::service::context::NoteContext), + >, +>; + +/// Shared cache type for per-note local-graph neighborhoods. +type LocalGraphCache = std::sync::Mutex< + std::collections::HashMap< + (String, usize), + ((usize, usize), crate::service::local_graph::LocalGraph), + >, +>; + /// The shared state accessible by all API handlers. /// /// This struct uses `Arc` and `RwLock` to provide safe, concurrent access @@ -31,33 +55,17 @@ pub struct AppState { pub embedder: std::sync::Arc, /// Cached vault map, keyed on (graph triple-count, memory bytes) — see /// service::vault_map::vault_map_cached. - pub vault_map_cache: std::sync::Arc< - std::sync::Mutex>, - >, + pub vault_map_cache: Arc, /// Per-note semantic-neighbor cache, keyed by `(note_path, limit)`, storing /// `(graph_triple_count, total_memory_bytes) → NoteContext`. Invalidated /// whenever the graph or memory changes — same staleness signal as /// vault_map_cache. `limit` is part of the key so that MCP calls with /// different limits do not serve stale neighbor counts from cache. - pub note_context_cache: std::sync::Arc< - std::sync::Mutex< - std::collections::HashMap< - (String, usize), - ((usize, usize), crate::service::context::NoteContext), - >, - >, - >, + pub note_context_cache: Arc, /// Per-note local-graph cache, keyed by `(note_path, depth)`, storing /// `(graph_triple_count, total_memory_bytes) → LocalGraph`. Invalidated /// on any graph or memory change — mirrors note_context_cache semantics. - pub local_graph_cache: std::sync::Arc< - std::sync::Mutex< - std::collections::HashMap< - (String, usize), - ((usize, usize), crate::service::local_graph::LocalGraph), - >, - >, - >, + pub local_graph_cache: Arc, /// The event broadcaster for sending real-time updates to WebSocket subscribers. pub broadcaster: Arc, /// The store for managing and verifying zero-knowledge proofs. @@ -127,8 +135,12 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), - note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), - local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -175,8 +187,12 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), - note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), - local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -223,8 +239,12 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder: std::sync::Arc::new(HashEmbedder::new()), vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), - note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), - local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), broadcaster: Arc::new(EventBroadcaster::new()), proof_store: Arc::new(ProofStore::new()), sandbox_manager: Arc::new(SandboxManager::new()), @@ -368,8 +388,12 @@ impl AppState { memory: Arc::new(RwLock::new(memory)), embedder, vault_map_cache: std::sync::Arc::new(std::sync::Mutex::new(None)), - note_context_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), - local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())), + note_context_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), + local_graph_cache: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashMap::new(), + )), broadcaster: Arc::new(EventBroadcaster::new()), proof_store, sandbox_manager: Arc::new(SandboxManager::new()), @@ -654,7 +678,11 @@ mod tests { let mut g = state.graph.write().await; g.enable_dag(); } - std::fs::write(dir.path().join("note.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + std::fs::write( + dir.path().join("note.md"), + "# N\n\nsled has exclusive locks.\n", + ) + .unwrap(); crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) .await .unwrap(); @@ -666,9 +694,10 @@ mod tests { let state = AppState::with_db_path(db_str, None).unwrap(); let g = state.graph.read().await; let n = g - .find(TriplePattern::any().with_predicate(Predicate::named( - crate::service::ingest::PRED_SOURCE_HASH, - ))) + .find( + TriplePattern::any() + .with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)), + ) .unwrap() .len(); assert!(n >= 1, "registry triple should exist after first ingest"); @@ -680,9 +709,10 @@ mod tests { let state = AppState::with_db_path_and_embedder(db_str, None, fake_384).unwrap(); let g = state.graph.read().await; let n = g - .find(TriplePattern::any().with_predicate(Predicate::named( - crate::service::ingest::PRED_SOURCE_HASH, - ))) + .find( + TriplePattern::any() + .with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)), + ) .unwrap() .len(); assert_eq!(n, 0, "registry must be cleared on embedder dim change"); @@ -703,7 +733,11 @@ mod tests { let mut g = state.graph.write().await; g.enable_dag(); } - std::fs::write(dir.path().join("n.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + std::fs::write( + dir.path().join("n.md"), + "# N\n\nsled has exclusive locks.\n", + ) + .unwrap(); crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) .await .unwrap(); @@ -719,12 +753,16 @@ mod tests { let state = AppState::with_db_path_and_embedder(db_str, None, fake_384).unwrap(); let g = state.graph.read().await; let n = g - .find(TriplePattern::any().with_predicate(Predicate::named( - crate::service::ingest::PRED_SOURCE_HASH, - ))) + .find( + TriplePattern::any() + .with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)), + ) .unwrap() .len(); - assert_eq!(n, 0, "legacy snapshot without sidecar must migrate when dims differ"); + assert_eq!( + n, 0, + "legacy snapshot without sidecar must migrate when dims differ" + ); } } @@ -741,7 +779,11 @@ mod tests { let mut g = state.graph.write().await; g.enable_dag(); } - std::fs::write(dir.path().join("n.md"), "# N\n\nsled has exclusive locks.\n").unwrap(); + std::fs::write( + dir.path().join("n.md"), + "# N\n\nsled has exclusive locks.\n", + ) + .unwrap(); crate::service::ingest::ingest_path(&state, dir.path().to_str().unwrap(), None) .await .unwrap(); @@ -753,9 +795,10 @@ mod tests { let state = AppState::with_db_path(db_str, None).unwrap(); let g = state.graph.read().await; let n = g - .find(TriplePattern::any().with_predicate(Predicate::named( - crate::service::ingest::PRED_SOURCE_HASH, - ))) + .find( + TriplePattern::any() + .with_predicate(Predicate::named(crate::service::ingest::PRED_SOURCE_HASH)), + ) .unwrap() .len(); assert!(n >= 1, "same-dims boot must preserve the registry"); From e14945dab1d200de1309464eb58755ddc3e972f9 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Sun, 28 Jun 2026 20:59:19 +0200 Subject: [PATCH 69/72] style: apply cargo fmt to aingle_cortex (polish-round drift) --- crates/aingle_cortex/src/embedder.rs | 50 +++++++++++++++++----- crates/aingle_cortex/src/lib.rs | 2 +- crates/aingle_cortex/src/server.rs | 7 +-- crates/aingle_cortex/src/service/ground.rs | 26 ++++++++--- crates/aingle_cortex/src/service/mod.rs | 2 +- 5 files changed, 62 insertions(+), 25 deletions(-) diff --git a/crates/aingle_cortex/src/embedder.rs b/crates/aingle_cortex/src/embedder.rs index 0714fb2a..de64d837 100644 --- a/crates/aingle_cortex/src/embedder.rs +++ b/crates/aingle_cortex/src/embedder.rs @@ -89,9 +89,15 @@ struct PendingEmbedder { } impl Embedder for PendingEmbedder { - fn embed_passage(&self, _text: &str) -> Embedding { Embedding::new(vec![0.0; self.dims]) } - fn embed_query(&self, _text: &str) -> Embedding { Embedding::new(vec![0.0; self.dims]) } - fn dimensions(&self) -> usize { self.dims } + fn embed_passage(&self, _text: &str) -> Embedding { + Embedding::new(vec![0.0; self.dims]) + } + fn embed_query(&self, _text: &str) -> Embedding { + Embedding::new(vec![0.0; self.dims]) + } + fn dimensions(&self) -> usize { + self.dims + } } impl SwappableEmbedder { @@ -121,16 +127,30 @@ impl SwappableEmbedder { impl Embedder for SwappableEmbedder { fn embed_passage(&self, text: &str) -> Embedding { - let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + let inner = self + .inner + .read() + .expect("swappable embedder poisoned") + .clone(); inner.embed_passage(text) } fn embed_query(&self, text: &str) -> Embedding { - let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + let inner = self + .inner + .read() + .expect("swappable embedder poisoned") + .clone(); inner.embed_query(text) } - fn dimensions(&self) -> usize { self.dims } + fn dimensions(&self) -> usize { + self.dims + } fn relevance_thresholds(&self) -> (f32, f32) { - let inner = self.inner.read().expect("swappable embedder poisoned").clone(); + let inner = self + .inner + .read() + .expect("swappable embedder poisoned") + .clone(); inner.relevance_thresholds() } } @@ -203,9 +223,17 @@ mod tests { /// 384-dim test delegate with non-zero output and the e5 thresholds. struct Fake384; impl ineru::Embedder for Fake384 { - fn embed_passage(&self, _t: &str) -> ineru::Embedding { ineru::Embedding::new(vec![0.5; 384]) } - fn embed_query(&self, _t: &str) -> ineru::Embedding { ineru::Embedding::new(vec![0.5; 384]) } - fn dimensions(&self) -> usize { 384 } - fn relevance_thresholds(&self) -> (f32, f32) { (0.80, 0.77) } + fn embed_passage(&self, _t: &str) -> ineru::Embedding { + ineru::Embedding::new(vec![0.5; 384]) + } + fn embed_query(&self, _t: &str) -> ineru::Embedding { + ineru::Embedding::new(vec![0.5; 384]) + } + fn dimensions(&self) -> usize { + 384 + } + fn relevance_thresholds(&self) -> (f32, f32) { + (0.80, 0.77) + } } } diff --git a/crates/aingle_cortex/src/lib.rs b/crates/aingle_cortex/src/lib.rs index 58242cdc..f1c83b62 100644 --- a/crates/aingle_cortex/src/lib.rs +++ b/crates/aingle_cortex/src/lib.rs @@ -164,9 +164,9 @@ #[cfg(feature = "auth")] pub mod auth; pub mod client; -pub mod embedder; #[cfg(feature = "cluster")] pub mod cluster_init; +pub mod embedder; pub mod error; #[cfg(feature = "graphql")] pub mod graphql; diff --git a/crates/aingle_cortex/src/server.rs b/crates/aingle_cortex/src/server.rs index 0fcad465..ed4e25ef 100644 --- a/crates/aingle_cortex/src/server.rs +++ b/crates/aingle_cortex/src/server.rs @@ -130,11 +130,8 @@ impl CortexServer { pub fn new(config: CortexConfig) -> Result { let db_path = resolve_db_path(&config.db_path); let embedder = crate::embedder::build_embedder(config.embed_model.as_deref()); - let state = AppState::with_db_path_and_embedder( - &db_path, - config.audit_log_path.clone(), - embedder, - )?; + let state = + AppState::with_db_path_and_embedder(&db_path, config.audit_log_path.clone(), embedder)?; info!("Graph database: {}", db_path); Ok(Self { config, state }) } diff --git a/crates/aingle_cortex/src/service/ground.rs b/crates/aingle_cortex/src/service/ground.rs index f9c4264f..92def967 100644 --- a/crates/aingle_cortex/src/service/ground.rs +++ b/crates/aingle_cortex/src/service/ground.rs @@ -309,10 +309,16 @@ mod tests { #[tokio::test] async fn neural_grounding_is_topical() { let model_dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { - concat!(env!("CARGO_MANIFEST_DIR"), "/../ineru/test-models/multilingual-e5-small") - .to_string() + concat!( + env!("CARGO_MANIFEST_DIR"), + "/../ineru/test-models/multilingual-e5-small" + ) + .to_string() }); - if !std::path::Path::new(&model_dir).join("onnx/model.onnx").exists() { + if !std::path::Path::new(&model_dir) + .join("onnx/model.onnx") + .exists() + { eprintln!("skipping: e5 model not found at {model_dir}"); return; } @@ -341,16 +347,22 @@ mod tests { .await .unwrap(); - let topical = ground(&state, "¿Cómo debo cuidar a mi perro?", 5).await.unwrap(); + let topical = ground(&state, "¿Cómo debo cuidar a mi perro?", 5) + .await + .unwrap(); assert_ne!( topical.groundedness, "ungrounded", "a dog-care question must find the dog-care notes; ctx: {:?}", topical.answer_context ); - let off_topic = ground(&state, "¿Cuál fue el resultado de las elecciones presidenciales?", 5) - .await - .unwrap(); + let off_topic = ground( + &state, + "¿Cuál fue el resultado de las elecciones presidenciales?", + 5, + ) + .await + .unwrap(); assert_eq!( off_topic.groundedness, "ungrounded", "an unrelated question must be ungrounded; ctx: {:?}", diff --git a/crates/aingle_cortex/src/service/mod.rs b/crates/aingle_cortex/src/service/mod.rs index 0c1a2c4b..5d3a501b 100644 --- a/crates/aingle_cortex/src/service/mod.rs +++ b/crates/aingle_cortex/src/service/mod.rs @@ -5,11 +5,11 @@ pub mod backlinks; pub mod context; -pub mod local_graph; #[cfg(feature = "dag")] pub mod dag; pub mod ground; pub mod ingest; +pub mod local_graph; pub mod proof; pub mod query; pub mod reputation; From 014ec9f0b79a67271231c4e448b849955582aa27 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Wed, 1 Jul 2026 17:29:54 +0200 Subject: [PATCH 70/72] style: cargo fmt (ineru embedder/lib) --- crates/ineru/src/embedder.rs | 24 ++++++++++++++---------- crates/ineru/src/lib.rs | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/crates/ineru/src/embedder.rs b/crates/ineru/src/embedder.rs index b96fb887..e3508d64 100644 --- a/crates/ineru/src/embedder.rs +++ b/crates/ineru/src/embedder.rs @@ -103,8 +103,8 @@ impl NeuralEmbedder { }; // E5 REQUIRES mean pooling; the fastembed default is Cls. - let model = UserDefinedEmbeddingModel::new(onnx, tokenizer_files) - .with_pooling(Pooling::Mean); + let model = + UserDefinedEmbeddingModel::new(onnx, tokenizer_files).with_pooling(Pooling::Mean); let options = InitOptionsUserDefined::new().with_max_length(512); let embedding = TextEmbedding::try_new_from_user_defined(model, options) @@ -117,9 +117,7 @@ impl NeuralEmbedder { fn embed_one(&self, prefixed: String) -> Embedding { let mut guard = self.model.lock().expect("embedder mutex poisoned"); - let out = guard - .embed(vec![prefixed], None) - .expect("e5 embed failed"); + let out = guard.embed(vec![prefixed], None).expect("e5 embed failed"); let vector = out .into_iter() .next() @@ -197,7 +195,11 @@ mod neural_tests { /// Returns the model dir, or `None` (test skips) if it isn't present. fn model_dir() -> Option { let dir = std::env::var("INERU_E5_MODEL_DIR").unwrap_or_else(|_| { - concat!(env!("CARGO_MANIFEST_DIR"), "/test-models/multilingual-e5-small").to_string() + concat!( + env!("CARGO_MANIFEST_DIR"), + "/test-models/multilingual-e5-small" + ) + .to_string() }); let p = PathBuf::from(dir); if p.join("onnx/model.onnx").exists() { @@ -234,10 +236,12 @@ mod neural_tests { // single words cluster too tightly to test meaningfully; realistic // sentence-level inputs produce a clear semantic margin. let query = e.embed_query("¿Cómo debo cuidar a mi perro?"); - let related = - e.embed_passage("Los perros necesitan paseos diarios, agua fresca y una dieta equilibrada."); - let unrelated = - e.embed_passage("La bolsa de valores cerró hoy con fuertes pérdidas para los inversores."); + let related = e.embed_passage( + "Los perros necesitan paseos diarios, agua fresca y una dieta equilibrada.", + ); + let unrelated = e.embed_passage( + "La bolsa de valores cerró hoy con fuertes pérdidas para los inversores.", + ); let near = query.cosine_similarity(&related); let far = query.cosine_similarity(&unrelated); diff --git a/crates/ineru/src/lib.rs b/crates/ineru/src/lib.rs index 5010de92..c5983d5f 100644 --- a/crates/ineru/src/lib.rs +++ b/crates/ineru/src/lib.rs @@ -74,9 +74,9 @@ pub mod types; pub use config::{ConsolidationConfig, LtmConfig, MemoryConfig, StmConfig}; pub use consolidation::Consolidator; -pub use embedder::{Embedder, HashEmbedder}; #[cfg(feature = "neural-embeddings")] pub use embedder::NeuralEmbedder; +pub use embedder::{Embedder, HashEmbedder}; pub use error::{Error, Result}; pub use ltm::{KnowledgeGraph, LongTermMemory}; pub use stm::ShortTermMemory; From e308e51a493d9739256a7b30ebba5f7a217fa9a6 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Wed, 1 Jul 2026 17:29:54 +0200 Subject: [PATCH 71/72] =?UTF-8?q?docs(brand):=20new=20Aingle=20logo=20(blu?= =?UTF-8?q?e=E2=86=92orange)=20+=20Grounded=20Retrieval=20&=20Provenance?= =?UTF-8?q?=20README=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 55 ++++++++++++ assets/aingle-legacy.svg | 80 ++++++++++++++++++ assets/aingle.svg | 89 +++----------------- assets/logo-legacy.svg | 80 ++++++++++++++++++ assets/logo.svg | 89 +++----------------- crates/aingle_viz/web/assets/logo-legacy.svg | 80 ++++++++++++++++++ crates/aingle_viz/web/assets/logo.svg | 89 +++----------------- 7 files changed, 325 insertions(+), 237 deletions(-) create mode 100644 assets/aingle-legacy.svg create mode 100644 assets/logo-legacy.svg create mode 100644 crates/aingle_viz/web/assets/logo-legacy.svg diff --git a/README.md b/README.md index 271eb2cc..d01074ef 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,61 @@ Interactive D3.js dashboard. Watch your DAG evolve in real-time. Filter, search, --- +## Grounded Retrieval & Provenance + +AIngle includes a retrieval layer that turns a corpus of documents into +**cited, provenance-backed context** for question answering, with an explicit +signal for how well an answer is supported by the underlying sources. The intent +is *grounded* generation: a downstream language model is given only material that +can be traced back to verifiable records, rather than being trusted to recall +facts on its own. + +The pipeline has three stages: + +1. **Ingestion (`aingle_ingest`).** A pure, deterministic extractor maps a + document `(path, content)` to an `Extraction` of **provenanced triples** and + **text chunks**. Structure (headings, sections, links) is preserved as + semantic triples in the graph, while chunks become the unit of retrieval. + Because ingestion is deterministic, the same input always yields the same + triples and chunks — a prerequisite for reproducible provenance. + +2. **Provenance anchoring.** Each ingested unit is tied to the append-only DAG + action that recorded it via a `provenance_anchor` — the signed hash of that + action (see the `dag-sign` feature). This makes a retrieved chunk not just + *findable* but *attestable*: you can point at the exact, tamper-evident record + it came from. + +3. **Grounded retrieval (`service::ground`).** A question is embedded and matched + against the ingested chunks; the service returns a `GroundedContext` + containing the cited chunks (with their `provenance_anchor`) and a + **groundedness** classification: + + | groundedness | meaning | + |--------------|---------| + | `grounded` | strong similarity **and** enough corroborating chunks — the answer is well supported by the sources. | + | `weak` | some relevant material, but below the corroboration threshold — answer with caution. | + | `ungrounded` | no sufficiently similar source — the sources do not support an answer. | + + The groundedness signal is derived from the best match plus a minimum number + of corroborating chunks, so a caller (or an LLM prompted with the context) can + explicitly say when the sources *don't* support an answer instead of + hallucinating one. + +These capabilities are exposed to Model-Context-Protocol clients through three +tools (see the [MCP server](#mcp-server) section): + +| Tool | Purpose | +|------|---------| +| `aingle_ingest` | Ingest a document `(path, content)` → provenanced triples + chunks. | +| `aingle_ground` | Answer a question with cited, provenance-backed context + a groundedness signal. | +| `aingle_sources` | List the sources that have been ingested. | + +Because retrieval results carry provenance anchors into the signed DAG history, +grounded answers are **auditable**: every cited passage resolves to a verifiable +action in the ledger. + +--- + ## Clustering AIngle supports multi-node clustering via Raft consensus for high availability and horizontal scalability. Writes are replicated to all nodes; reads can be served from any node with optional quorum consistency. diff --git a/assets/aingle-legacy.svg b/assets/aingle-legacy.svg new file mode 100644 index 00000000..e55a326d --- /dev/null +++ b/assets/aingle-legacy.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/aingle.svg b/assets/aingle.svg index e55a326d..b1651e21 100644 --- a/assets/aingle.svg +++ b/assets/aingle.svg @@ -1,80 +1,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + diff --git a/assets/logo-legacy.svg b/assets/logo-legacy.svg new file mode 100644 index 00000000..e55a326d --- /dev/null +++ b/assets/logo-legacy.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/logo.svg b/assets/logo.svg index e55a326d..b1651e21 100644 --- a/assets/logo.svg +++ b/assets/logo.svg @@ -1,80 +1,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + diff --git a/crates/aingle_viz/web/assets/logo-legacy.svg b/crates/aingle_viz/web/assets/logo-legacy.svg new file mode 100644 index 00000000..e55a326d --- /dev/null +++ b/crates/aingle_viz/web/assets/logo-legacy.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crates/aingle_viz/web/assets/logo.svg b/crates/aingle_viz/web/assets/logo.svg index e55a326d..b1651e21 100644 --- a/crates/aingle_viz/web/assets/logo.svg +++ b/crates/aingle_viz/web/assets/logo.svg @@ -1,80 +1,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + From 0b7866510f3849fe8b8d39c210a001e010823732 Mon Sep 17 00:00:00 2001 From: ApiliumDevTeam Date: Wed, 1 Jul 2026 17:29:54 +0200 Subject: [PATCH 72/72] release: v0.7.0 --- Cargo.lock | 26 +++++++++++++------------- crates/aingle/Cargo.toml | 4 ++-- crates/aingle_ai/Cargo.toml | 2 +- crates/aingle_contracts/Cargo.toml | 2 +- crates/aingle_cortex/Cargo.toml | 16 ++++++++-------- crates/aingle_graph/Cargo.toml | 2 +- crates/aingle_ingest/Cargo.toml | 4 ++-- crates/aingle_logic/Cargo.toml | 4 ++-- crates/aingle_minimal/Cargo.toml | 6 +++--- crates/aingle_raft/Cargo.toml | 8 ++++---- crates/aingle_viz/Cargo.toml | 6 +++--- crates/aingle_wal/Cargo.toml | 2 +- crates/aingle_zk/Cargo.toml | 2 +- crates/ineru/Cargo.toml | 2 +- crates/kaneru/Cargo.toml | 4 ++-- 15 files changed, 45 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ace3d3d1..b2743605 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,7 +99,7 @@ dependencies = [ [[package]] name = "aingle_ai" -version = "0.6.3" +version = "0.7.0" dependencies = [ "blake2", "candle-core 0.9.2", @@ -121,7 +121,7 @@ dependencies = [ [[package]] name = "aingle_contracts" -version = "0.6.3" +version = "0.7.0" dependencies = [ "blake3", "dashmap 6.1.0", @@ -140,7 +140,7 @@ dependencies = [ [[package]] name = "aingle_cortex" -version = "0.6.3" +version = "0.7.0" dependencies = [ "aingle_graph", "aingle_ingest", @@ -199,7 +199,7 @@ dependencies = [ [[package]] name = "aingle_graph" -version = "0.6.3" +version = "0.7.0" dependencies = [ "bincode", "blake3", @@ -223,7 +223,7 @@ dependencies = [ [[package]] name = "aingle_ingest" -version = "0.6.3" +version = "0.7.0" dependencies = [ "aingle_graph", "blake3", @@ -235,7 +235,7 @@ dependencies = [ [[package]] name = "aingle_logic" -version = "0.6.3" +version = "0.7.0" dependencies = [ "aingle_graph", "chrono", @@ -251,7 +251,7 @@ dependencies = [ [[package]] name = "aingle_minimal" -version = "0.6.3" +version = "0.7.0" dependencies = [ "async-io", "async-tungstenite", @@ -293,7 +293,7 @@ dependencies = [ [[package]] name = "aingle_raft" -version = "0.6.3" +version = "0.7.0" dependencies = [ "aingle_graph", "aingle_wal", @@ -314,7 +314,7 @@ dependencies = [ [[package]] name = "aingle_viz" -version = "0.6.3" +version = "0.7.0" dependencies = [ "aingle_graph", "aingle_minimal", @@ -336,7 +336,7 @@ dependencies = [ [[package]] name = "aingle_wal" -version = "0.6.3" +version = "0.7.0" dependencies = [ "bincode", "blake3", @@ -348,7 +348,7 @@ dependencies = [ [[package]] name = "aingle_zk" -version = "0.6.3" +version = "0.7.0" dependencies = [ "blake3", "bulletproofs", @@ -4262,7 +4262,7 @@ dependencies = [ [[package]] name = "ineru" -version = "0.6.3" +version = "0.7.0" dependencies = [ "bincode", "blake3", @@ -4522,7 +4522,7 @@ dependencies = [ [[package]] name = "kaneru" -version = "0.6.3" +version = "0.7.0" dependencies = [ "chrono", "criterion", diff --git a/crates/aingle/Cargo.toml b/crates/aingle/Cargo.toml index f1fe24cd..2966aac9 100644 --- a/crates/aingle/Cargo.toml +++ b/crates/aingle/Cargo.toml @@ -26,14 +26,14 @@ ghost_actor = "0.3.0-alpha.1" ai_hash = { version = ">=0.0.1", path = "../ai_hash", features = ["full"] } aingle_cascade = { version = "0.0.1", path = "../aingle_cascade" } aingle_conductor_api = { version = "0.0.1", path = "../aingle_conductor_api" } -aingle_ai = { version = "0.6", path = "../aingle_ai", optional = true } +aingle_ai = { version = "0.7", path = "../aingle_ai", optional = true } aingle_keystore = { version = "0.0.1", path = "../aingle_keystore" } aingle_p2p = { version = "0.0.1", path = "../aingle_p2p" } aingle_sqlite = { version = "0.0.1", path = "../aingle_sqlite" } aingle_middleware_bytes = "=0.0.3" aingle_state = { version = "0.0.1", path = "../aingle_state" } aingle_types = { version = "0.0.1", path = "../aingle_types" } -aingle_cortex = { version = "0.6", path = "../aingle_cortex", default-features = false, features = ["rest"] } +aingle_cortex = { version = "0.7", path = "../aingle_cortex", default-features = false, features = ["rest"] } aingle_wasmer_host = "0.0.1" aingle_websocket = { version = "0.0.1", path = "../aingle_websocket" } wasmer = "=7.0.1" diff --git a/crates/aingle_ai/Cargo.toml b/crates/aingle_ai/Cargo.toml index a087fc5a..5f007c88 100644 --- a/crates/aingle_ai/Cargo.toml +++ b/crates/aingle_ai/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_ai" -version = "0.6.3" +version = "0.7.0" description = "AI integration layer for AIngle - Ineru, Nested Learning, Kaneru" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/aingle_contracts/Cargo.toml b/crates/aingle_contracts/Cargo.toml index c3ea5927..4c43ac3c 100644 --- a/crates/aingle_contracts/Cargo.toml +++ b/crates/aingle_contracts/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_contracts" -version = "0.6.3" +version = "0.7.0" description = "Smart Contracts DSL and WASM Runtime for AIngle" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/aingle_cortex/Cargo.toml b/crates/aingle_cortex/Cargo.toml index fe6eacad..84d9bd75 100644 --- a/crates/aingle_cortex/Cargo.toml +++ b/crates/aingle_cortex/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_cortex" -version = "0.6.3" +version = "0.7.0" description = "Córtex API - REST/GraphQL/SPARQL interface for AIngle semantic graphs" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -36,11 +36,11 @@ path = "src/main.rs" [dependencies] # Core AIngle crates -aingle_graph = { version = "0.6", path = "../aingle_graph", features = ["sled-backend"] } -aingle_logic = { version = "0.6", path = "../aingle_logic" } -aingle_zk = { version = "0.6", path = "../aingle_zk" } -ineru = { version = "0.6", path = "../ineru" } -aingle_ingest = { version = "0.6", path = "../aingle_ingest" } +aingle_graph = { version = "0.7", path = "../aingle_graph", features = ["sled-backend"] } +aingle_logic = { version = "0.7", path = "../aingle_logic" } +aingle_zk = { version = "0.7", path = "../aingle_zk" } +ineru = { version = "0.7", path = "../ineru" } +aingle_ingest = { version = "0.7", path = "../aingle_ingest" } ignore = "0.4" # Web framework @@ -108,8 +108,8 @@ rcgen = { version = "0.13", optional = true } ed25519-dalek = { version = "2", features = ["rand_core"], optional = true } hex = { version = "0.4", optional = true } # Clustering (optional) -aingle_wal = { version = "0.6", path = "../aingle_wal", optional = true } -aingle_raft = { version = "0.6", path = "../aingle_raft", optional = true } +aingle_wal = { version = "0.7", path = "../aingle_wal", optional = true } +aingle_raft = { version = "0.7", path = "../aingle_raft", optional = true } openraft = { version = "0.10.0-alpha.17", features = ["serde", "type-alias"], optional = true } tokio-rustls = { version = "0.26", default-features = false, features = ["ring"], optional = true } rustls-pemfile = { version = "2", optional = true } diff --git a/crates/aingle_graph/Cargo.toml b/crates/aingle_graph/Cargo.toml index c0d45378..905babd6 100644 --- a/crates/aingle_graph/Cargo.toml +++ b/crates/aingle_graph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_graph" -version = "0.6.3" +version = "0.7.0" description = "Native GraphDB for AIngle - Semantic triple store with SPO indexes" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/aingle_ingest/Cargo.toml b/crates/aingle_ingest/Cargo.toml index 2a4a6b16..49f39fc0 100644 --- a/crates/aingle_ingest/Cargo.toml +++ b/crates/aingle_ingest/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "aingle_ingest" -version = "0.6.3" +version = "0.7.0" description = "Structural extraction of triples and text chunks from markdown/code for AIngle" license = "Apache-2.0 OR LicenseRef-Commercial" edition = "2021" rust-version = "1.83" [dependencies] -aingle_graph = { version = "0.6", path = "../aingle_graph", features = ["dag"] } +aingle_graph = { version = "0.7", path = "../aingle_graph", features = ["dag"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" regex = "1.12" diff --git a/crates/aingle_logic/Cargo.toml b/crates/aingle_logic/Cargo.toml index abf698e7..7ebe5f1e 100644 --- a/crates/aingle_logic/Cargo.toml +++ b/crates/aingle_logic/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_logic" -version = "0.6.3" +version = "0.7.0" description = "Proof-of-Logic validation engine for AIngle semantic graphs" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -21,7 +21,7 @@ owl = [] [dependencies] # Graph database -aingle_graph = { version = "0.6", path = "../aingle_graph" } +aingle_graph = { version = "0.7", path = "../aingle_graph" } # Serialization serde = { version = "1.0", features = ["derive"] } diff --git a/crates/aingle_minimal/Cargo.toml b/crates/aingle_minimal/Cargo.toml index b90e7d5e..59767d86 100644 --- a/crates/aingle_minimal/Cargo.toml +++ b/crates/aingle_minimal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_minimal" -version = "0.6.3" +version = "0.7.0" description = "Ultra-light AIngle node for IoT devices (<1MB RAM)" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -124,10 +124,10 @@ embedded-hal = { version = "1.0", optional = true } embedded-hal-async = { version = "1.0", optional = true } # AI Memory (Ineru) -ineru = { version = "0.6", path = "../ineru", optional = true } +ineru = { version = "0.7", path = "../ineru", optional = true } # Kaneru (AI Agent Framework) -kaneru = { version = "0.6", path = "../kaneru", optional = true } +kaneru = { version = "0.7", path = "../kaneru", optional = true } # REST API server (lightweight HTTP) tiny_http = { version = "0.12", optional = true } diff --git a/crates/aingle_raft/Cargo.toml b/crates/aingle_raft/Cargo.toml index 7a275ee3..dafd9da0 100644 --- a/crates/aingle_raft/Cargo.toml +++ b/crates/aingle_raft/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_raft" -version = "0.6.3" +version = "0.7.0" description = "Raft consensus for AIngle clustering" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -18,7 +18,7 @@ dag = ["aingle_graph/dag"] [dependencies] openraft = { version = "0.10.0-alpha.17", features = ["serde", "type-alias"] } -aingle_wal = { version = "0.6", path = "../aingle_wal" } +aingle_wal = { version = "0.7", path = "../aingle_wal" } serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1", features = ["full"] } @@ -28,8 +28,8 @@ tracing = "0.1" chrono = { version = "0.4", features = ["serde"] } futures-util = "0.3" anyerror = "0.1" -aingle_graph = { version = "0.6", path = "../aingle_graph", features = ["sled-backend"] } -ineru = { version = "0.6", path = "../ineru" } +aingle_graph = { version = "0.7", path = "../aingle_graph", features = ["sled-backend"] } +ineru = { version = "0.7", path = "../ineru" } [dev-dependencies] tempfile = "3.26" diff --git a/crates/aingle_viz/Cargo.toml b/crates/aingle_viz/Cargo.toml index 982b3a77..95b09b19 100644 --- a/crates/aingle_viz/Cargo.toml +++ b/crates/aingle_viz/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_viz" -version = "0.6.3" +version = "0.7.0" description = "DAG Visualization for AIngle - Web-based graph explorer" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -30,8 +30,8 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" # Graph data -aingle_graph = { version = "0.6", path = "../aingle_graph" } -aingle_minimal = { version = "0.6", path = "../aingle_minimal", default-features = false, features = ["sqlite"] } +aingle_graph = { version = "0.7", path = "../aingle_graph" } +aingle_minimal = { version = "0.7", path = "../aingle_minimal", default-features = false, features = ["sqlite"] } # Utilities log = "0.4" diff --git a/crates/aingle_wal/Cargo.toml b/crates/aingle_wal/Cargo.toml index 05d31a95..3242a041 100644 --- a/crates/aingle_wal/Cargo.toml +++ b/crates/aingle_wal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_wal" -version = "0.6.3" +version = "0.7.0" description = "Write-Ahead Log for AIngle clustering and replication" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/aingle_zk/Cargo.toml b/crates/aingle_zk/Cargo.toml index a911f652..9c3c70e0 100644 --- a/crates/aingle_zk/Cargo.toml +++ b/crates/aingle_zk/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aingle_zk" -version = "0.6.3" +version = "0.7.0" description = "Zero-Knowledge Proofs for AIngle - privacy-preserving cryptographic primitives" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/ineru/Cargo.toml b/crates/ineru/Cargo.toml index f1e8fbe6..36a4189c 100644 --- a/crates/ineru/Cargo.toml +++ b/crates/ineru/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ineru" -version = "0.6.3" +version = "0.7.0" description = "Ineru: Neural-inspired memory system for AIngle AI agents" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" diff --git a/crates/kaneru/Cargo.toml b/crates/kaneru/Cargo.toml index f5f6175c..b917190b 100644 --- a/crates/kaneru/Cargo.toml +++ b/crates/kaneru/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "kaneru" -version = "0.6.3" +version = "0.7.0" description = "Kaneru: Unified Multi-Agent Execution System for AIngle AI agents" license = "Apache-2.0 OR LicenseRef-Commercial" repository = "https://github.com/ApiliumCode/aingle" @@ -31,7 +31,7 @@ serde_json = "1.0" log = "0.4" # AI Memory integration -ineru = { version = "0.6", path = "../ineru", optional = true } +ineru = { version = "0.7", path = "../ineru", optional = true } # Random for exploration (updated from 0.7) rand = { version = "0.9", default-features = false, features = ["std", "thread_rng"] }