-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtest_coverage_matrix.py
More file actions
85 lines (70 loc) · 3.66 KB
/
Copy pathtest_coverage_matrix.py
File metadata and controls
85 lines (70 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""Gated pytest wrapper around the live coverage-matrix harness.
This is the *thin* pytest entry point for the full live verification in
``coverage_matrix.py`` (which is a standalone script, not collected by pytest).
It deploys the six-dimension fixture to a real managed runtime, queries the live
engine, classifies what the runtime actually did, and asserts the receipt — then
tears the deployment down.
It is gated **twice** so it never runs by accident:
* the ``live`` marker — excluded by CI's ``pytest -m "not live"``; and
* an explicit ``AGENTLIFT_LIVE_COVERAGE=1`` opt-in — so even ``pytest -m live``
skips it unless you mean it (it is slow + billable, and deploys a full
multi-agent team, not a single agent).
What it asserts vs. what the receipt records (per the 4-state model): the test
gates the *deterministic* signal — nothing FAILs, and the dimensions that fire on
every run land EXERCISED. The strict, headline "6/6 EXERCISED" claim lives in the
committed receipts under ``receipts/`` (model tool-choice is nondeterministic, so a
hard 6/6 assertion would be flaky; the shared-MCP cell is allowed to be WIRED).
Run it deliberately:
AGENTLIFT_LIVE_COVERAGE=1 pytest -m live tests/live/test_coverage_matrix.py
"""
import importlib.util
import os
import pytest
pytestmark = pytest.mark.live
HERE = os.path.dirname(os.path.abspath(__file__))
_OPT_IN = os.environ.get("AGENTLIFT_LIVE_COVERAGE") == "1"
def _load_harness():
"""Import coverage_matrix.py by path (it has no ``test_`` prefix, so pytest
does not collect it, and ``tests/live`` is not a package)."""
path = os.path.join(HERE, "coverage_matrix.py")
spec = importlib.util.spec_from_file_location("coverage_matrix", path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod
def _assert_matrix(matrix, *, allow_wired_shared_mcp=True):
"""Common gate: nothing FAILed, the deterministic dimensions are EXERCISED."""
dims = matrix["dimensions"]
assert matrix["any_error"] is False, matrix
for name, cell in dims.items():
assert cell["state"] != "FAIL", (name, cell)
# deterministic on every run (delegation event, the agent's own private MCP
# server, and the skill markers always fire for this fixture)
for name in ("agents", "subagents", "individual_mcp", "shared_skill", "individual_skill"):
assert dims[name]["state"] == "PASS-EXERCISED", (name, dims[name])
# shared MCP is model-tool-choice dependent; the explicit prompt drives it to
# EXERCISED, but accept WIRED so the gate is not flaky.
allowed = {"PASS-EXERCISED", "PASS-WIRED"} if allow_wired_shared_mcp else {"PASS-EXERCISED"}
assert dims["shared_mcp"]["state"] in allowed, dims["shared_mcp"]
@pytest.mark.skipif(not _OPT_IN, reason="set AGENTLIFT_LIVE_COVERAGE=1 to run the billable coverage matrix")
@pytest.mark.skipif(not os.environ.get("ANTHROPIC_API_KEY"), reason="ANTHROPIC_API_KEY not set")
def test_anthropic_coverage_matrix_exercised():
h = _load_harness()
h.deploy_anthropic()
try:
receipt = h.query_anthropic()
_assert_matrix(receipt["matrix"])
finally:
h.teardown_anthropic()
@pytest.mark.skipif(not _OPT_IN, reason="set AGENTLIFT_LIVE_COVERAGE=1 to run the billable coverage matrix")
@pytest.mark.skipif(
not (os.environ.get("GOOGLE_CLOUD_PROJECT") and os.environ.get("AGENTLIFT_GCP_STAGING_BUCKET")),
reason="Google ADC / GOOGLE_CLOUD_PROJECT / AGENTLIFT_GCP_STAGING_BUCKET not set",
)
def test_google_coverage_matrix_exercised():
h = _load_harness()
h.deploy_google()
try:
receipt = h.query_google()
_assert_matrix(receipt["matrix"])
finally:
h.teardown_google()