From 1aad219c443d665d0b8e7df777436234cc4fb210 Mon Sep 17 00:00:00 2001 From: Jeff Larson Date: Sat, 20 Jun 2026 23:49:43 -0700 Subject: [PATCH 1/2] fix(engine): prune non-CVE library loads from runtime evidence (JEF-75) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After JEF-68 lit up library-load on-node, LibraryLoaded floods the runtime evidence: every workload loads dozens of libs (libc, libpthread, …) and LibraryLoaded is fingerprinted verbatim (lib:{name}, uncoarsened). On a 300s TTL the set churns every pass, busting the per-entry verdict cache → the engine re-judged ~all internet-facing entries every pass (not just the changed one), and bloated the prompt with irrelevant loads. Keep the signal, drop the noise: in CveReachabilityAdapter, after correlation, prune each workload's runtime LibraryLoaded to only those matching a CVE package on an image it runs (library_matches). A *vulnerable* library load is the signal; the rest is dropped before it reaches the prompt or the fingerprint. Reachability is unaffected (set from the same match; pruning only removes loads that never matched). Non-library behaviors pass through. Test: a non-CVE load (libpthread) is pruned while the CVE-matching one (libssl↔openssl) survives. clippy + full lib suite (106) green. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01VtjoJttCvBY4dzCoE4f9vP --- engine/src/engine/observe/adapter/enrich.rs | 122 ++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/engine/src/engine/observe/adapter/enrich.rs b/engine/src/engine/observe/adapter/enrich.rs index 6971112..ac67cc9 100644 --- a/engine/src/engine/observe/adapter/enrich.rs +++ b/engine/src/engine/observe/adapter/enrich.rs @@ -223,6 +223,73 @@ impl Adapter for CveReachabilityAdapter { } }); } + + // Prune library-load noise (JEF-75): a LibraryLoaded only matters if it's a + // *vulnerable* library — its name matches a CVE package on an image the workload + // runs. Drop the rest (libc, libpthread, …) so they don't bloat the model prompt + // or churn the verdict fingerprint (every process loads dozens of libraries, on a + // 300s TTL). Reachability is already set above from the same match, so this only + // removes loads that never contributed one. + let mut cve_pkgs_by_image: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut pkgs_by_workload: std::collections::HashMap> = + std::collections::HashMap::new(); + { + let g = graph.inner(); + for idx in g.node_indices() { + if let Some(node @ Node::Image(img)) = g.node_weight(idx) { + let pkgs: Vec = img + .vulnerabilities + .iter() + .filter_map(|v| v.pkg_name.clone()) + .collect(); + if !pkgs.is_empty() { + cve_pkgs_by_image.insert(node.key().0, pkgs); + } + } + } + // Each workload inherits the CVE packages of every image it runs. + for idx in g.node_indices() { + let Some(node @ Node::Workload(_)) = g.node_weight(idx) else { + continue; + }; + let mut pkgs: Vec = Vec::new(); + for edge in g.edges(idx) { + if matches!(edge.weight().relation, Relation::RunsImage) + && let Some(img_key) = g.node_weight(edge.target()).map(Node::key) + && let Some(p) = cve_pkgs_by_image.get(&img_key.0) + { + pkgs.extend(p.iter().cloned()); + } + } + if !pkgs.is_empty() { + pkgs_by_workload.insert(node.key().0, pkgs); + } + } + } + let workload_keys: Vec = graph + .inner() + .node_indices() + .filter_map(|idx| match graph.inner().node_weight(idx) { + Some(node @ Node::Workload(_)) => Some(node.key()), + _ => None, + }) + .collect(); + for key in workload_keys { + graph.update_node(&key, |node| { + if let Node::Workload(w) = node { + let pkgs = pkgs_by_workload.get(&key.0); + // Keep every non-library behavior; keep a LibraryLoaded only if it + // matches a CVE package the workload's images carry. + w.runtime.retain(|obs| match &obs.behavior { + Behavior::LibraryLoaded { name } => { + pkgs.is_some_and(|ps| ps.iter().any(|pkg| library_matches(name, pkg))) + } + _ => true, + }); + } + }); + } } } @@ -611,6 +678,61 @@ mod tests { ); } + #[test] + fn non_cve_library_loads_are_pruned_from_runtime() { + // The workload loads two libraries: libssl (matches the openssl CVE) and + // libpthread (matches nothing). After the pipeline only the vulnerable-library + // load survives on the workload — the rest is dropped so it never reaches the + // prompt or the verdict fingerprint (JEF-75). + let web = pod(json!({ + "apiVersion": "v1", "kind": "Pod", + "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}}, + "spec": {"containers": [{"name": "web", "image": "web:1"}]} + })); + let lib = |name: &str| RuntimeObservation { + attribution: Attribution::by_namespaced_name("app", "web"), + source: None, + observed_at_ms: None, + behavior: Behavior::LibraryLoaded { name: name.into() }, + }; + let snap = Snapshot { + pods: vec![web], + image_vulns: vec![ImageVulnerabilities { + image: "web:1".into(), + vulnerabilities: vec![Vulnerability { + id: "CVE-2022-0001".into(), + severity: crate::engine::graph::Severity::Critical, + pkg_name: Some("openssl".into()), + ..Default::default() + }], + }], + runtime_events: vec![lib("libssl.so.3"), lib("libpthread.so.0")], + ..Default::default() + }; + let graph = super::super::build_graph(&snap, &super::super::default_adapters()); + let surviving: Vec = graph + .inner() + .node_weights() + .find_map(|n| match n { + Node::Workload(w) => Some( + w.runtime + .iter() + .filter_map(|o| match &o.behavior { + Behavior::LibraryLoaded { name } => Some(name.clone()), + _ => None, + }) + .collect::>(), + ), + _ => None, + }) + .expect("workload node exists"); + assert_eq!( + surviving, + vec!["libssl.so.3".to_string()], + "only the CVE-matching library load survives the prune" + ); + } + #[test] fn no_load_is_not_observed() { // The image is scanned but nothing loaded → NotObserved (distinct from Unknown). From 58dc55f03d9cbfc2826ac73270064974ae2dc687 Mon Sep 17 00:00:00 2001 From: Jeff Larson Date: Sat, 20 Jun 2026 23:54:56 -0700 Subject: [PATCH 2/2] test(engine): cover multi-image union + zero-CVE drop for JEF-75 prune Architect nit: the highest-risk false-drop path (a load matching a SECOND image's CVE on a multi-image workload) and the zero-CVE drop-all branch lacked coverage. Factor a surviving_libs/lib helper and add both cases. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01VtjoJttCvBY4dzCoE4f9vP --- engine/src/engine/observe/adapter/enrich.rs | 141 +++++++++++++++----- 1 file changed, 111 insertions(+), 30 deletions(-) diff --git a/engine/src/engine/observe/adapter/enrich.rs b/engine/src/engine/observe/adapter/enrich.rs index ac67cc9..907df00 100644 --- a/engine/src/engine/observe/adapter/enrich.rs +++ b/engine/src/engine/observe/adapter/enrich.rs @@ -678,23 +678,48 @@ mod tests { ); } + /// A `LibraryLoaded` observation on pod app/web (the fixture these tests use). + fn lib(name: &str) -> RuntimeObservation { + RuntimeObservation { + attribution: Attribution::by_namespaced_name("app", "web"), + source: None, + observed_at_ms: None, + behavior: Behavior::LibraryLoaded { name: name.into() }, + } + } + + /// The `LibraryLoaded` names surviving on the (single) workload after the full + /// adapter pipeline — i.e. what's left after the JEF-75 prune. + fn surviving_libs(snap: Snapshot) -> Vec { + let graph = super::super::build_graph(&snap, &super::super::default_adapters()); + graph + .inner() + .node_weights() + .find_map(|n| match n { + Node::Workload(w) => Some( + w.runtime + .iter() + .filter_map(|o| match &o.behavior { + Behavior::LibraryLoaded { name } => Some(name.clone()), + _ => None, + }) + .collect::>(), + ), + _ => None, + }) + .expect("workload node exists") + } + #[test] fn non_cve_library_loads_are_pruned_from_runtime() { - // The workload loads two libraries: libssl (matches the openssl CVE) and - // libpthread (matches nothing). After the pipeline only the vulnerable-library - // load survives on the workload — the rest is dropped so it never reaches the - // prompt or the verdict fingerprint (JEF-75). + // libssl matches the openssl CVE; libpthread matches nothing → only the + // vulnerable-library load survives, so the noise never reaches the prompt or the + // verdict fingerprint (JEF-75). let web = pod(json!({ "apiVersion": "v1", "kind": "Pod", "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}}, "spec": {"containers": [{"name": "web", "image": "web:1"}]} })); - let lib = |name: &str| RuntimeObservation { - attribution: Attribution::by_namespaced_name("app", "web"), - source: None, - observed_at_ms: None, - behavior: Behavior::LibraryLoaded { name: name.into() }, - }; let snap = Snapshot { pods: vec![web], image_vulns: vec![ImageVulnerabilities { @@ -709,27 +734,83 @@ mod tests { runtime_events: vec![lib("libssl.so.3"), lib("libpthread.so.0")], ..Default::default() }; - let graph = super::super::build_graph(&snap, &super::super::default_adapters()); - let surviving: Vec = graph - .inner() - .node_weights() - .find_map(|n| match n { - Node::Workload(w) => Some( - w.runtime - .iter() - .filter_map(|o| match &o.behavior { - Behavior::LibraryLoaded { name } => Some(name.clone()), - _ => None, - }) - .collect::>(), - ), - _ => None, - }) - .expect("workload node exists"); + assert_eq!(surviving_libs(snap), vec!["libssl.so.3".to_string()]); + } + + #[test] + fn library_load_matching_any_of_a_workloads_images_survives() { + // Multi-image workload (app + sidecar): a load matching the SECOND image's CVE + // must survive even though the first image carries a different CVE — proving the + // prune unions CVE packages across ALL RunsImage edges before deciding (the + // false-drop path that would silently weaken reachability). + let web = pod(json!({ + "apiVersion": "v1", "kind": "Pod", + "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}}, + "spec": {"containers": [ + {"name": "web", "image": "web:1"}, + {"name": "sidecar", "image": "sidecar:1"} + ]} + })); + let cve = |id: &str, pkg: &str| Vulnerability { + id: id.into(), + severity: crate::engine::graph::Severity::Critical, + pkg_name: Some(pkg.into()), + ..Default::default() + }; + let snap = Snapshot { + pods: vec![web], + image_vulns: vec![ + ImageVulnerabilities { + image: "web:1".into(), + vulnerabilities: vec![cve("CVE-A", "openssl")], + }, + ImageVulnerabilities { + image: "sidecar:1".into(), + vulnerabilities: vec![cve("CVE-B", "log4j-core")], + }, + ], + runtime_events: vec![ + lib("libssl.so.3"), + lib("log4j-core-2.14.jar"), + lib("libpthread.so.0"), + ], + ..Default::default() + }; + let mut got = surviving_libs(snap); + got.sort(); assert_eq!( - surviving, - vec!["libssl.so.3".to_string()], - "only the CVE-matching library load survives the prune" + got, + vec!["libssl.so.3".to_string(), "log4j-core-2.14.jar".to_string()], + "loads matching EITHER image's CVE survive; the unrelated load is pruned" + ); + } + + #[test] + fn workload_with_no_cve_packages_drops_all_loads() { + // A CVE with no pkg_name can't be correlated → no load can match → all pruned + // (the `pkgs.is_none()` branch of the prune). + let web = pod(json!({ + "apiVersion": "v1", "kind": "Pod", + "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}}, + "spec": {"containers": [{"name": "web", "image": "web:1"}]} + })); + let snap = Snapshot { + pods: vec![web], + image_vulns: vec![ImageVulnerabilities { + image: "web:1".into(), + vulnerabilities: vec![Vulnerability { + id: "CVE-2022-0002".into(), + severity: crate::engine::graph::Severity::Critical, + pkg_name: None, + ..Default::default() + }], + }], + runtime_events: vec![lib("libssl.so.3")], + ..Default::default() + }; + assert!( + surviving_libs(snap).is_empty(), + "no correlatable CVE package → every library load pruned" ); }