Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
metadata digest mismatches, committed observation mismatches, and retained
material unavailability while preserving the coarse replay posture for
existing callers.
- `echo-dind-tests` now includes a process-kill WAL crashpoint witness that
kills child processes after committed WAL material and before transaction
commit, proving recovery preserves committed history and excludes uncommitted
tails.
- `warp-core` can now materialize WAL projection records into deterministic
WARP graph facts with root, writer epoch, segment, commit-anchor, and recovery
certificate nodes plus typed graph edges suitable for WSC serialization. The
Expand Down Expand Up @@ -100,6 +104,9 @@
- `cargo xtask test-slice durability-release` now includes the exact
`materialization_outbox_recovery_returns_typed_posture` witness, locking
typed materialization outbox recovery posture into the release gate.
- `cargo xtask test-slice durability-release` now includes the exact
`wal_process_crashpoints` witness, promoting the process-kill WAL crashpoint
runner from future descriptor to release-gate evidence.
- `warp-core` trusted runtime hosts now configure runtime WAL through
`TrustedRuntimeWalConfig`, including in-memory and filesystem-backed
adapters. `TrustedRuntimeWalStoreKind` exposes the configured adapter kind as
Expand Down
232 changes: 232 additions & 0 deletions crates/echo-dind-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,235 @@ impl EchoKernel {
.canonical_state_hash()
}
}

#[cfg(test)]
mod tests {
use std::{
env, fs,
path::{Path, PathBuf},
process::{self, Child, Command},
thread,
time::Duration,
};

use warp_core::{
causal_wal::{
build_submission_acceptance_transaction, recover_filesystem_store,
recover_submission_index, AffectedFrontier, AffectedFrontierKind, FilesystemWalStore,
Lsn, PayloadCodecId, PayloadSchemaId, RecoveredSubmissionPosture, RecoveryAccessMode,
RecoveryTailPosture, SubmissionAcceptanceRecord, WalAppendAuthority,
WalCommittedTransaction, WalDurabilityMode, WalSegmentId, WalStorePort,
WalTransactionBuilder, WalTransactionId, WalTransactionKind, WriterEpochId,
WriterEpochRequest,
},
Hash,
};

const CHILD_MODE_ENV: &str = "ECHO_DIND_WAL_CRASHPOINT_CHILD";
const WAL_ROOT_ENV: &str = "ECHO_DIND_WAL_CRASHPOINT_ROOT";
const READY_MARKER_ENV: &str = "ECHO_DIND_WAL_CRASHPOINT_READY";
const AFTER_COMMIT_MODE: &str = "after_wal_commit";
const BEFORE_COMMIT_MODE: &str = "before_wal_commit";

#[test]
fn wal_process_crashpoints() {
if let Ok(mode) = env::var(CHILD_MODE_ENV) {
run_wal_crashpoint_child(&mode);
}

let root = crashpoint_root();
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).expect("create crashpoint root");

let after_root = root.join("after-wal-commit");
let after_acceptance = acceptance("after-wal-commit");
run_and_kill_child(AFTER_COMMIT_MODE, &after_root);
let after_report = recover_filesystem_store(&after_root, RecoveryAccessMode::ReadOnly)
.expect("recover after-commit WAL root");
let after_index =
recover_submission_index(&after_report).expect("recover after-commit index");
let after_entry = after_index
.get(&after_acceptance.submission_id)
.expect("after-commit submission recovered");
assert_eq!(after_report.tail_posture, RecoveryTailPosture::Clean);
assert_eq!(after_entry.acceptance, after_acceptance);
assert_eq!(
after_entry.posture,
RecoveredSubmissionPosture::AcceptedPending
);

let before_root = root.join("before-wal-commit");
let before_acceptance = acceptance("before-wal-commit");
run_and_kill_child(BEFORE_COMMIT_MODE, &before_root);
let before_report = recover_filesystem_store(&before_root, RecoveryAccessMode::ReadOnly)
.expect("recover before-commit WAL root");
let before_index =
recover_submission_index(&before_report).expect("recover before-commit index");
assert_eq!(
before_report.tail_posture,
RecoveryTailPosture::WouldTruncateAll
);
assert!(before_index.get(&before_acceptance.submission_id).is_none());
assert!(before_index.is_empty());

fs::remove_dir_all(&root).expect("remove crashpoint root");
}

fn run_wal_crashpoint_child(mode: &str) -> ! {
let root = PathBuf::from(env::var_os(WAL_ROOT_ENV).expect("WAL root env"));
let marker = PathBuf::from(env::var_os(READY_MARKER_ENV).expect("ready marker env"));
fs::create_dir_all(&root).expect("create child WAL root");
let mut store = FilesystemWalStore::open(&root, WalSegmentId::from_raw(1))
.expect("open child WAL store");
store
.acquire_writer_epoch(writer_epoch_request())
.expect("acquire writer epoch");
match mode {
AFTER_COMMIT_MODE => {
store
.append_transaction(submission_transaction(
"after-wal-commit",
Lsn::from_raw(0),
))
.expect("append committed child transaction");
}
BEFORE_COMMIT_MODE => {
let transaction = submission_transaction("before-wal-commit", Lsn::from_raw(0));
store
.append_uncommitted_frame(epoch_id(), transaction.frames[0].clone())
.expect("append uncommitted child frame");
}
other => panic!("unknown child crashpoint mode: {other}"),
}
fs::write(marker, b"ready").expect("write ready marker");
loop {
thread::sleep(Duration::from_secs(60));
}
}

fn run_and_kill_child(mode: &str, wal_root: &Path) {
fs::create_dir_all(wal_root).expect("create WAL root");
let marker = wal_root.join("ready");
let mut child = Command::new(env::current_exe().expect("current test binary"))
.arg("tests::wal_process_crashpoints")
.arg("--exact")
.arg("--nocapture")
.env(CHILD_MODE_ENV, mode)
.env(WAL_ROOT_ENV, wal_root)
.env(READY_MARKER_ENV, &marker)
.spawn()
.expect("spawn WAL crashpoint child");
wait_for_ready_marker(&mut child, &marker);
child.kill().expect("kill WAL crashpoint child");
let status = child.wait().expect("wait for killed child");
assert!(!status.success(), "child should have been killed");
}

fn wait_for_ready_marker(child: &mut Child, marker: &Path) {
for _ in 0..200 {
if marker.exists() {
return;
}
if let Some(status) = child.try_wait().expect("poll child") {
panic!("child exited before ready marker: {status}");
}
thread::sleep(Duration::from_millis(50));
}
let _ = child.kill();
panic!("timed out waiting for ready marker at {}", marker.display());
}

fn crashpoint_root() -> PathBuf {
env::current_dir()
.expect("current dir")
.join("target")
.join("echo-dind-wal-crashpoints")
.join(process::id().to_string())
}

fn submission_transaction(label: &str, first_lsn: Lsn) -> WalCommittedTransaction {
build_submission_acceptance_transaction(
builder(
transaction_id(label),
first_lsn,
WalAppendAuthority::SubmissionIntake,
WalTransactionKind::SubmissionIntake,
),
acceptance(label),
vec![frontier(label)],
)
.expect("build submission transaction")
}

fn acceptance(label: &str) -> SubmissionAcceptanceRecord {
SubmissionAcceptanceRecord {
submission_id: digest(&format!("submission:{label}")),
canonical_envelope_digest: digest(&format!("envelope:{label}")),
idempotency_key_digest: Some(digest(&format!("idempotency:{label}"))),
acceptance_evidence_digest: digest(&format!("accepted:{label}")),
}
}

fn builder(
transaction_id: WalTransactionId,
first_lsn: Lsn,
authority: WalAppendAuthority,
transaction_kind: WalTransactionKind,
) -> WalTransactionBuilder {
WalTransactionBuilder::new(
epoch_id(),
WalSegmentId::from_raw(1),
transaction_id,
transaction_kind,
authority,
first_lsn,
digest("genesis-frame"),
digest("genesis-commit"),
WalDurabilityMode::StrictFilesystem,
PayloadCodecId::from_hash(digest("codec:echo-dind-wal-crashpoint")),
PayloadSchemaId::from_hash(digest("schema:echo-dind-wal-crashpoint")),
1,
1,
digest("domain:echo-dind-wal-crashpoint"),
)
}

fn writer_epoch_request() -> WriterEpochRequest {
WriterEpochRequest {
epoch_id: epoch_id(),
storage_fencing_token: digest("fence:echo-dind-wal-crashpoint"),
process_identity: digest("process:echo-dind-wal-crashpoint"),
host_identity: digest("host:echo-dind-wal-crashpoint"),
started_at_lsn: Lsn::from_raw(0),
previous_epoch_id: None,
previous_epoch_final_commit_digest: None,
lease_or_lock_evidence: digest("lease:echo-dind-wal-crashpoint"),
}
}

fn frontier(label: &str) -> AffectedFrontier {
AffectedFrontier {
kind: AffectedFrontierKind::SubmissionQueue,
before_digest: digest(&format!("{label}:submission:before")),
after_digest: digest(&format!("{label}:submission:after")),
}
}

fn transaction_id(label: &str) -> WalTransactionId {
WalTransactionId::from_hash(digest(&format!("tx:{label}")))
}

fn epoch_id() -> WriterEpochId {
WriterEpochId::from_hash(digest("epoch:echo-dind-wal-crashpoint"))
}

fn digest(label: &str) -> Hash {
let mut out = [0_u8; 32];
for (index, byte) in label.as_bytes().iter().enumerate() {
out[index % 32] = out[index % 32]
.wrapping_add(*byte)
.wrapping_add(index as u8);
}
out
}
}
4 changes: 3 additions & 1 deletion crates/warp-core/src/causal_wal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,8 @@ pub struct WalManifest {
pub enum WalCrashpointExecution {
/// Crashpoint is simulated in-process by Rust fixtures.
SimulatedInProcess,
/// Crashpoint is exercised by killing a child process.
ProcessKill,
/// Crashpoint is reserved for a future process-kill runner.
ProcessKillFuture,
}
Expand Down Expand Up @@ -1386,7 +1388,7 @@ const WAL_CRASHPOINT_MANIFEST: &[WalCrashpointDescriptor] = &[
WalCrashpointDescriptor {
name: "process.kill.after_wal_commit",
boundary: WalCrashpointBoundary::Process,
execution: WalCrashpointExecution::ProcessKillFuture,
execution: WalCrashpointExecution::ProcessKill,
},
];

Expand Down
4 changes: 2 additions & 2 deletions crates/warp-core/tests/causal_wal_hardening_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1868,7 +1868,7 @@ fn crashpoint_manifest_lists_checkpoint_boundaries() {
}

#[test]
fn crashpoint_manifest_marks_process_kill_as_future_until_runner_exists() {
fn crashpoint_manifest_marks_process_kill_as_available_runner() {
let process_entries = wal_crashpoint_manifest()
.iter()
.filter(|entry| entry.boundary == WalCrashpointBoundary::Process)
Expand All @@ -1877,7 +1877,7 @@ fn crashpoint_manifest_marks_process_kill_as_future_until_runner_exists() {
assert!(!process_entries.is_empty());
assert!(process_entries
.iter()
.all(|entry| entry.execution == WalCrashpointExecution::ProcessKillFuture));
.all(|entry| entry.execution == WalCrashpointExecution::ProcessKill));
}

#[test]
Expand Down
12 changes: 6 additions & 6 deletions docs/design/causal-wal-hardening-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -429,23 +429,23 @@ Test plan:

User story:

As Echo, I need a future CLI/BATS crash runner contract that mirrors the Rust
fixture semantics before it shells out to real processes.
As Echo, I need a process-kill crash runner contract that mirrors the Rust
fixture semantics while exercising real parent/child process boundaries.

Acceptance criteria:

- Rust crash fixtures define canonical crashpoint names.
- A test-visible crashpoint manifest lists supported boundaries.
- The manifest distinguishes simulated in-process cuts from future process-kill
cuts.
- No CLI runner claims more than the Rust fixture proves.
- The manifest distinguishes simulated in-process cuts from process-kill cuts.
- No process runner claims more than the Rust fixture proves.

Test plan:

- `crashpoint_manifest_lists_submission_boundaries`
- `crashpoint_manifest_lists_tick_boundaries`
- `crashpoint_manifest_lists_checkpoint_boundaries`
- `crashpoint_manifest_marks_process_kill_as_future_until_runner_exists`
- `crashpoint_manifest_marks_process_kill_as_available_runner`
- `wal_process_crashpoints`

## Slice 62: Filesystem Strict Sync Evidence

Expand Down
5 changes: 5 additions & 0 deletions docs/topics/WAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ artifact or metadata mismatches, committed observation mismatches, and retained
material unavailability so restart logic can retry, repair, or obstruct without
blindly replaying effects.

The process-kill crashpoint runner exercises the filesystem WAL across real
parent/child process boundaries. A killed child that already committed WAL
material recovers as committed history; a killed child with only uncommitted
frames recovers as tail posture and does not enter accepted or decided history.

## Evidence

The runtime ACK and recovery witnesses live in
Expand Down
2 changes: 1 addition & 1 deletion docs/workflows.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ The repo also exposes maintenance commands via `cargo xtask …`:
- `cargo xtask test-slice contract-path-release` runs the v0.1 local contract-host release witness: installed contract pipeline replay, reference trusted host loop, and the serious external consumer fixture.
- `cargo xtask test-slice runtime-wal-ack` runs the fast runtime WAL-backed ACK witness: app-facing acceptance rollback, scheduler tick receipt invariant checks, scheduler tick commit-before-publish, recovered indexes, CLI submission posture JSON, stale-claim guard, and generated man-page check.
- `cargo xtask test-slice durable-runtime-wal` runs the release-grade filesystem runtime WAL durability witness: filesystem ACK recovery, filesystem failure atomicity, CLI submission posture JSON, stale-claim guard, and generated man-page check.
- `cargo xtask test-slice durability-release` runs the joined WAL/WSC release witness: filesystem runtime WAL durability, WSC retained evidence recovery, app-safe missing-retention posture, recovery plan bootstrap posture, committed-only durability index rebuilds, typed materialization outbox recovery, WSC topology recovery, topology WAL recovery, typed missing-material obstruction, stale-claim guards, doctrine checks, and generated man-page freshness. This is a release-gate slice, not the fastest local edit loop.
- `cargo xtask test-slice durability-release` runs the joined WAL/WSC release witness: filesystem runtime WAL durability, WSC retained evidence recovery, app-safe missing-retention posture, recovery plan bootstrap posture, committed-only durability index rebuilds, typed materialization outbox recovery, process-kill WAL crashpoints, WSC topology recovery, topology WAL recovery, typed missing-material obstruction, stale-claim guards, doctrine checks, and generated man-page freshness. This is a release-gate slice, not the fastest local edit loop.
- `cargo xtask pr-preflight` runs the default changed-scope pre-PR gate against `origin/main`.
- `cargo xtask pr-preflight --full` runs the broader explicit full pre-PR gate.
- `cargo xtask dind` runs the DIND (Deterministic Ironclad Nightmare Drills) harness locally.
Expand Down
7 changes: 6 additions & 1 deletion xtask/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ fn build_test_slice_commands(slice: TestSlice) -> Vec<Command> {
"causal_wal_tests",
"materialization_outbox_recovery_returns_typed_posture",
]),
cargo_command(["test", "-p", "echo-dind-tests", "wal_process_crashpoints"]),
cargo_command([
"test",
"-p",
Expand Down Expand Up @@ -6743,7 +6744,7 @@ mod tests {
#[test]
fn test_slice_durability_release_stays_explicit() {
let commands = build_test_slice_commands(TestSlice::DurabilityRelease);
assert_eq!(commands.len(), 16);
assert_eq!(commands.len(), 17);

let expected = [
(
Expand Down Expand Up @@ -6882,6 +6883,10 @@ mod tests {
"materialization_outbox_recovery_returns_typed_posture",
],
),
(
"cargo",
vec!["test", "-p", "echo-dind-tests", "wal_process_crashpoints"],
),
(
"cargo",
vec![
Expand Down
Loading