From 1aad219c443d665d0b8e7df777436234cc4fb210 Mon Sep 17 00:00:00 2001
From: Jeff Larson <thejefflarson@gmail.com>
Date: Sat, 20 Jun 2026 23:49:43 -0700
Subject: [PATCH 1/2] fix(engine): prune non-CVE library loads from runtime
 evidence (JEF-75)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After JEF-68 lit up library-load on-node, LibraryLoaded floods the runtime evidence: every
workload loads dozens of libs (libc, libpthread, …) and LibraryLoaded is fingerprinted
verbatim (lib:{name}, uncoarsened). On a 300s TTL the set churns every pass, busting the
per-entry verdict cache → the engine re-judged ~all internet-facing entries every pass
(not just the changed one), and bloated the prompt with irrelevant loads.

Keep the signal, drop the noise: in CveReachabilityAdapter, after correlation, prune each
workload's runtime LibraryLoaded to only those matching a CVE package on an image it runs
(library_matches). A *vulnerable* library load is the signal; the rest is dropped before it
reaches the prompt or the fingerprint. Reachability is unaffected (set from the same match;
pruning only removes loads that never matched). Non-library behaviors pass through.

Test: a non-CVE load (libpthread) is pruned while the CVE-matching one (libssl↔openssl)
survives. clippy + full lib suite (106) green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01VtjoJttCvBY4dzCoE4f9vP
---
 engine/src/engine/observe/adapter/enrich.rs | 122 ++++++++++++++++++++
 1 file changed, 122 insertions(+)
diff --git a/engine/src/engine/observe/adapter/enrich.rs b/engine/src/engine/observe/adapter/enrich.rs
index 6971112..ac67cc9 100644
--- a/engine/src/engine/observe/adapter/enrich.rs
+++ b/engine/src/engine/observe/adapter/enrich.rs
@@ -223,6 +223,73 @@ impl Adapter for CveReachabilityAdapter {
                 }
             });
         }
+
+        // Prune library-load noise (JEF-75): a LibraryLoaded only matters if it's a
+        // *vulnerable* library — its name matches a CVE package on an image the workload
+        // runs. Drop the rest (libc, libpthread, …) so they don't bloat the model prompt
+        // or churn the verdict fingerprint (every process loads dozens of libraries, on a
+        // 300s TTL). Reachability is already set above from the same match, so this only
+        // removes loads that never contributed one.
+        let mut cve_pkgs_by_image: std::collections::HashMap<String, Vec<String>> =
+            std::collections::HashMap::new();
+        let mut pkgs_by_workload: std::collections::HashMap<String, Vec<String>> =
+            std::collections::HashMap::new();
+        {
+            let g = graph.inner();
+            for idx in g.node_indices() {
+                if let Some(node @ Node::Image(img)) = g.node_weight(idx) {
+                    let pkgs: Vec<String> = img
+                        .vulnerabilities
+                        .iter()
+                        .filter_map(|v| v.pkg_name.clone())
+                        .collect();
+                    if !pkgs.is_empty() {
+                        cve_pkgs_by_image.insert(node.key().0, pkgs);
+                    }
+                }
+            }
+            // Each workload inherits the CVE packages of every image it runs.
+            for idx in g.node_indices() {
+                let Some(node @ Node::Workload(_)) = g.node_weight(idx) else {
+                    continue;
+                };
+                let mut pkgs: Vec<String> = Vec::new();
+                for edge in g.edges(idx) {
+                    if matches!(edge.weight().relation, Relation::RunsImage)
+                        && let Some(img_key) = g.node_weight(edge.target()).map(Node::key)
+                        && let Some(p) = cve_pkgs_by_image.get(&img_key.0)
+                    {
+                        pkgs.extend(p.iter().cloned());
+                    }
+                }
+                if !pkgs.is_empty() {
+                    pkgs_by_workload.insert(node.key().0, pkgs);
+                }
+            }
+        }
+        let workload_keys: Vec<NodeKey> = graph
+            .inner()
+            .node_indices()
+            .filter_map(|idx| match graph.inner().node_weight(idx) {
+                Some(node @ Node::Workload(_)) => Some(node.key()),
+                _ => None,
+            })
+            .collect();
+        for key in workload_keys {
+            graph.update_node(&key, |node| {
+                if let Node::Workload(w) = node {
+                    let pkgs = pkgs_by_workload.get(&key.0);
+                    // Keep every non-library behavior; keep a LibraryLoaded only if it
+                    // matches a CVE package the workload's images carry.
+                    w.runtime.retain(|obs| match &obs.behavior {
+                        Behavior::LibraryLoaded { name } => {
+                            pkgs.is_some_and(|ps| ps.iter().any(|pkg| library_matches(name, pkg)))
+                        }
+                        _ => true,
+                    });
+                }
+            });
+        }
     }
 }
 
@@ -611,6 +678,61 @@ mod tests {
         );
     }
 
+    #[test]
+    fn non_cve_library_loads_are_pruned_from_runtime() {
+        // The workload loads two libraries: libssl (matches the openssl CVE) and
+        // libpthread (matches nothing). After the pipeline only the vulnerable-library
+        // load survives on the workload — the rest is dropped so it never reaches the
+        // prompt or the verdict fingerprint (JEF-75).
+        let web = pod(json!({
+            "apiVersion": "v1", "kind": "Pod",
+            "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}},
+            "spec": {"containers": [{"name": "web", "image": "web:1"}]}
+        }));
+        let lib = |name: &str| RuntimeObservation {
+            attribution: Attribution::by_namespaced_name("app", "web"),
+            source: None,
+            observed_at_ms: None,
+            behavior: Behavior::LibraryLoaded { name: name.into() },
+        };
+        let snap = Snapshot {
+            pods: vec![web],
+            image_vulns: vec![ImageVulnerabilities {
+                image: "web:1".into(),
+                vulnerabilities: vec![Vulnerability {
+                    id: "CVE-2022-0001".into(),
+                    severity: crate::engine::graph::Severity::Critical,
+                    pkg_name: Some("openssl".into()),
+                    ..Default::default()
+                }],
+            }],
+            runtime_events: vec![lib("libssl.so.3"), lib("libpthread.so.0")],
+            ..Default::default()
+        };
+        let graph = super::super::build_graph(&snap, &super::super::default_adapters());
+        let surviving: Vec<String> = graph
+            .inner()
+            .node_weights()
+            .find_map(|n| match n {
+                Node::Workload(w) => Some(
+                    w.runtime
+                        .iter()
+                        .filter_map(|o| match &o.behavior {
+                            Behavior::LibraryLoaded { name } => Some(name.clone()),
+                            _ => None,
+                        })
+                        .collect::<Vec<_>>(),
+                ),
+                _ => None,
+            })
+            .expect("workload node exists");
+        assert_eq!(
+            surviving,
+            vec!["libssl.so.3".to_string()],
+            "only the CVE-matching library load survives the prune"
+        );
+    }
+
     #[test]
     fn no_load_is_not_observed() {
         // The image is scanned but nothing loaded → NotObserved (distinct from Unknown).

From 58dc55f03d9cbfc2826ac73270064974ae2dc687 Mon Sep 17 00:00:00 2001
From: Jeff Larson <thejefflarson@gmail.com>
Date: Sat, 20 Jun 2026 23:54:56 -0700
Subject: [PATCH 2/2] test(engine): cover multi-image union + zero-CVE drop for
 JEF-75 prune

Architect nit: the highest-risk false-drop path (a load matching a SECOND image's CVE on a
multi-image workload) and the zero-CVE drop-all branch lacked coverage. Factor a
surviving_libs/lib helper and add both cases.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01VtjoJttCvBY4dzCoE4f9vP
---
 engine/src/engine/observe/adapter/enrich.rs | 141 +++++++++++++++-----
 1 file changed, 111 insertions(+), 30 deletions(-)

diff --git a/engine/src/engine/observe/adapter/enrich.rs b/engine/src/engine/observe/adapter/enrich.rs
index ac67cc9..907df00 100644
--- a/engine/src/engine/observe/adapter/enrich.rs
+++ b/engine/src/engine/observe/adapter/enrich.rs
@@ -678,23 +678,48 @@ mod tests {
         );
     }
 
+    /// A `LibraryLoaded` observation on pod app/web (the fixture these tests use).
+    fn lib(name: &str) -> RuntimeObservation {
+        RuntimeObservation {
+            attribution: Attribution::by_namespaced_name("app", "web"),
+            source: None,
+            observed_at_ms: None,
+            behavior: Behavior::LibraryLoaded { name: name.into() },
+        }
+    }
+
+    /// The `LibraryLoaded` names surviving on the (single) workload after the full
+    /// adapter pipeline — i.e. what's left after the JEF-75 prune.
+    fn surviving_libs(snap: Snapshot) -> Vec<String> {
+        let graph = super::super::build_graph(&snap, &super::super::default_adapters());
+        graph
+            .inner()
+            .node_weights()
+            .find_map(|n| match n {
+                Node::Workload(w) => Some(
+                    w.runtime
+                        .iter()
+                        .filter_map(|o| match &o.behavior {
+                            Behavior::LibraryLoaded { name } => Some(name.clone()),
+                            _ => None,
+                        })
+                        .collect::<Vec<_>>(),
+                ),
+                _ => None,
+            })
+            .expect("workload node exists")
+    }
+
     #[test]
     fn non_cve_library_loads_are_pruned_from_runtime() {
-        // The workload loads two libraries: libssl (matches the openssl CVE) and
-        // libpthread (matches nothing). After the pipeline only the vulnerable-library
-        // load survives on the workload — the rest is dropped so it never reaches the
-        // prompt or the verdict fingerprint (JEF-75).
+        // libssl matches the openssl CVE; libpthread matches nothing → only the
+        // vulnerable-library load survives, so the noise never reaches the prompt or the
+        // verdict fingerprint (JEF-75).
         let web = pod(json!({
             "apiVersion": "v1", "kind": "Pod",
             "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}},
             "spec": {"containers": [{"name": "web", "image": "web:1"}]}
         }));
-        let lib = |name: &str| RuntimeObservation {
-            attribution: Attribution::by_namespaced_name("app", "web"),
-            source: None,
-            observed_at_ms: None,
-            behavior: Behavior::LibraryLoaded { name: name.into() },
-        };
         let snap = Snapshot {
             pods: vec![web],
             image_vulns: vec![ImageVulnerabilities {
@@ -709,27 +734,83 @@ mod tests {
             runtime_events: vec![lib("libssl.so.3"), lib("libpthread.so.0")],
             ..Default::default()
         };
-        let graph = super::super::build_graph(&snap, &super::super::default_adapters());
-        let surviving: Vec<String> = graph
-            .inner()
-            .node_weights()
-            .find_map(|n| match n {
-                Node::Workload(w) => Some(
-                    w.runtime
-                        .iter()
-                        .filter_map(|o| match &o.behavior {
-                            Behavior::LibraryLoaded { name } => Some(name.clone()),
-                            _ => None,
-                        })
-                        .collect::<Vec<_>>(),
-                ),
-                _ => None,
-            })
-            .expect("workload node exists");
+        assert_eq!(surviving_libs(snap), vec!["libssl.so.3".to_string()]);
+    }
+
+    #[test]
+    fn library_load_matching_any_of_a_workloads_images_survives() {
+        // Multi-image workload (app + sidecar): a load matching the SECOND image's CVE
+        // must survive even though the first image carries a different CVE — proving the
+        // prune unions CVE packages across ALL RunsImage edges before deciding (the
+        // false-drop path that would silently weaken reachability).
+        let web = pod(json!({
+            "apiVersion": "v1", "kind": "Pod",
+            "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}},
+            "spec": {"containers": [
+                {"name": "web", "image": "web:1"},
+                {"name": "sidecar", "image": "sidecar:1"}
+            ]}
+        }));
+        let cve = |id: &str, pkg: &str| Vulnerability {
+            id: id.into(),
+            severity: crate::engine::graph::Severity::Critical,
+            pkg_name: Some(pkg.into()),
+            ..Default::default()
+        };
+        let snap = Snapshot {
+            pods: vec![web],
+            image_vulns: vec![
+                ImageVulnerabilities {
+                    image: "web:1".into(),
+                    vulnerabilities: vec![cve("CVE-A", "openssl")],
+                },
+                ImageVulnerabilities {
+                    image: "sidecar:1".into(),
+                    vulnerabilities: vec![cve("CVE-B", "log4j-core")],
+                },
+            ],
+            runtime_events: vec![
+                lib("libssl.so.3"),
+                lib("log4j-core-2.14.jar"),
+                lib("libpthread.so.0"),
+            ],
+            ..Default::default()
+        };
+        let mut got = surviving_libs(snap);
+        got.sort();
         assert_eq!(
-            surviving,
-            vec!["libssl.so.3".to_string()],
-            "only the CVE-matching library load survives the prune"
+            got,
+            vec!["libssl.so.3".to_string(), "log4j-core-2.14.jar".to_string()],
+            "loads matching EITHER image's CVE survive; the unrelated load is pruned"
+        );
+    }
+
+    #[test]
+    fn workload_with_no_cve_packages_drops_all_loads() {
+        // A CVE with no pkg_name can't be correlated → no load can match → all pruned
+        // (the `pkgs.is_none()` branch of the prune).
+        let web = pod(json!({
+            "apiVersion": "v1", "kind": "Pod",
+            "metadata": {"name": "web", "namespace": "app", "labels": {"app": "web"}},
+            "spec": {"containers": [{"name": "web", "image": "web:1"}]}
+        }));
+        let snap = Snapshot {
+            pods: vec![web],
+            image_vulns: vec![ImageVulnerabilities {
+                image: "web:1".into(),
+                vulnerabilities: vec![Vulnerability {
+                    id: "CVE-2022-0002".into(),
+                    severity: crate::engine::graph::Severity::Critical,
+                    pkg_name: None,
+                    ..Default::default()
+                }],
+            }],
+            runtime_events: vec![lib("libssl.so.3")],
+            ..Default::default()
+        };
+        assert!(
+            surviving_libs(snap).is_empty(),
+            "no correlatable CVE package → every library load pruned"
         );
     }