From 3a29e1b0983b87c37693b3e67d1f79906f0bc629 Mon Sep 17 00:00:00 2001 From: stacknil Date: Sun, 5 Jul 2026 09:15:57 +0800 Subject: [PATCH] test(parser): add property and fuzz coverage --- .github/workflows/ci.yml | 29 ++ CHANGELOG.md | 5 + CMakeLists.txt | 35 ++- docs/parser-contract.md | 2 + docs/quality-gates.md | 3 + fuzz/parser_fuzz.cpp | 47 +++ src/parser/program_dispatch.cpp | 24 +- src/parser/program_dispatch.hpp | 14 + tests/fuzz/README.md | 23 ++ tests/fuzz/corpus/parser/journalctl_sshd | 1 + tests/fuzz/corpus/parser/malformed_source_ip | 1 + tests/fuzz/corpus/parser/pam_faillock | 1 + tests/fuzz/corpus/parser/sshd_failed_password | 1 + tests/fuzz/corpus/parser/sudo_command | 1 + tests/fuzz/corpus/parser/unsupported_program | 1 + tests/test_parser_properties.cpp | 269 ++++++++++++++++++ 16 files changed, 444 insertions(+), 13 deletions(-) create mode 100644 fuzz/parser_fuzz.cpp create mode 100644 tests/fuzz/README.md create mode 100644 tests/fuzz/corpus/parser/journalctl_sshd create mode 100644 tests/fuzz/corpus/parser/malformed_source_ip create mode 100644 tests/fuzz/corpus/parser/pam_faillock create mode 100644 tests/fuzz/corpus/parser/sshd_failed_password create mode 100644 tests/fuzz/corpus/parser/sudo_command create mode 100644 tests/fuzz/corpus/parser/unsupported_program create mode 100644 tests/test_parser_properties.cpp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18f892f..07674f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,3 +30,32 @@ jobs: - name: Test run: ctest --test-dir build --build-config Release --output-on-failure + + fuzz-smoke: + name: Parser fuzz smoke + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - name: Configure fuzz target + env: + CC: clang + CXX: clang++ + run: >- + cmake -S . -B build-fuzz + -D CMAKE_BUILD_TYPE=RelWithDebInfo + -D BUILD_TESTING=OFF + -D LOGLENS_BUILD_FUZZERS=ON + + - name: Build fuzz target + run: cmake --build build-fuzz --target fuzz_parser --config RelWithDebInfo + + - name: Run bounded parser fuzz smoke + run: >- + ./build-fuzz/fuzz_parser + -runs=2000 + -max_len=1024 + -timeout=2 + tests/fuzz/corpus/parser diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f3f4ae..34c8d2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,11 @@ All notable user-visible changes should be recorded here. input order and inclusive window-boundary behavior. - Added parser regression coverage for malformed source-IP token classification. +- Added deterministic parser property tests for registry-order independence, + generated malformed tokens, failure taxonomy stability, and arbitrary-byte + result invariants. +- Added an optional Clang libFuzzer parser target with a sanitized seed corpus + and bounded Ubuntu CI smoke campaign. ### Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fd1406..580c0e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,9 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -add_library(loglens_lib +option(LOGLENS_BUILD_FUZZERS "Build Clang libFuzzer targets" OFF) + +set(LOGLENS_LIBRARY_SOURCES src/config.cpp src/parser.cpp src/parser/failure_classifier.cpp @@ -23,6 +25,8 @@ add_library(loglens_lib src/report.cpp ) +add_library(loglens_lib ${LOGLENS_LIBRARY_SOURCES}) + target_include_directories(loglens_lib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src" @@ -38,6 +42,10 @@ if(BUILD_TESTING) target_link_libraries(test_parser PRIVATE loglens_lib) add_test(NAME parser COMMAND test_parser) + add_executable(test_parser_properties tests/test_parser_properties.cpp) + target_link_libraries(test_parser_properties PRIVATE loglens_lib) + add_test(NAME parser_properties COMMAND test_parser_properties) + add_executable(test_detector tests/test_detector.cpp) target_link_libraries(test_detector PRIVATE loglens_lib) add_test(NAME detector COMMAND test_detector) @@ -65,3 +73,28 @@ if(BUILD_TESTING) ${CMAKE_CURRENT_BINARY_DIR}/report_contract_output ) endif() + +if(LOGLENS_BUILD_FUZZERS) + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + message(FATAL_ERROR "LOGLENS_BUILD_FUZZERS requires Clang with libFuzzer support") + endif() + + add_library(loglens_fuzz_lib STATIC ${LOGLENS_LIBRARY_SOURCES}) + target_include_directories(loglens_fuzz_lib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src") + target_compile_options( + loglens_fuzz_lib + PRIVATE + -fsanitize=fuzzer-no-link,address,undefined + -fno-omit-frame-pointer + ) + + add_executable(fuzz_parser fuzz/parser_fuzz.cpp) + target_link_libraries(fuzz_parser PRIVATE loglens_fuzz_lib) + target_compile_options( + fuzz_parser + PRIVATE + -fsanitize=fuzzer,address,undefined + -fno-omit-frame-pointer + ) + target_link_options(fuzz_parser PRIVATE -fsanitize=fuzzer,address,undefined) +endif() diff --git a/docs/parser-contract.md b/docs/parser-contract.md index a72c575..8900356 100644 --- a/docs/parser-contract.md +++ b/docs/parser-contract.md @@ -107,6 +107,8 @@ Parsed successes and audit-only events remain reportable but do not count as bru | Artifact | What it proves | | --- | --- | | [`tests/test_parser.cpp`](../tests/test_parser.cpp) | Unit-level parser expectations, malformed-line behavior, mode aliases, fixture-matrix counts, and unknown-pattern buckets | +| [`tests/test_parser_properties.cpp`](../tests/test_parser_properties.cpp) | Deterministic generated checks for handler-registry order independence, malformed source tokens, failure taxonomy stability, and arbitrary-byte result invariants | +| [`tests/fuzz/README.md`](../tests/fuzz/README.md) and [`tests/fuzz/corpus/parser`](../tests/fuzz/corpus/parser) | Optional Clang libFuzzer harness instructions plus a sanitized parser seed corpus used by the bounded CI fuzz smoke campaign | | [`tests/test_detector.cpp`](../tests/test_detector.cpp) | Detection signal mapping and default counting behavior after parsing | | [`assets/parser_fixture_matrix_syslog.log`](../assets/parser_fixture_matrix_syslog.log) | Syslog known/unknown parser matrix | | [`assets/parser_fixture_matrix_journalctl_short_full.log`](../assets/parser_fixture_matrix_journalctl_short_full.log) | Journalctl short-full known/unknown parser matrix | diff --git a/docs/quality-gates.md b/docs/quality-gates.md index 1ae0e73..9e70398 100644 --- a/docs/quality-gates.md +++ b/docs/quality-gates.md @@ -13,6 +13,7 @@ The main review principle is: | --- | --- | --- | --- | | Supported input formats are explicit | [`parser-contract.md`](./parser-contract.md), [`parser-conformance-matrix.md`](./parser-conformance-matrix.md) | `ctest --test-dir build --output-on-failure` through `test_parser`; fixture anchors in `assets/parser_fixture_matrix_syslog.log` and `assets/parser_fixture_matrix_journalctl_short_full.log` | Reviewer can name the two supported formats and see known/unknown line behavior in fixtures | | Parser coverage is visible | [`parser-coverage-notes.md`](./parser-coverage-notes.md), [`tests/fixtures/parser_matrix/noisy_auth_expected.json`](../tests/fixtures/parser_matrix/noisy_auth_expected.json) | `test_parser` compares noisy-auth coverage output to the checked-in expected summary | Reviewer can see parsed lines, skipped blanks, warnings, failure categories, and unknown-pattern buckets | +| Parser handler dispatch remains robust | [`parser-contract.md`](./parser-contract.md), [`tests/test_parser_properties.cpp`](../tests/test_parser_properties.cpp), [`tests/fuzz/README.md`](../tests/fuzz/README.md) | `test_parser_properties` checks all handler-order permutations and generated failure invariants; Ubuntu CI runs a bounded Clang libFuzzer smoke corpus | Reviewer can see that registry order does not change results and malformed or arbitrary bytes remain bounded by parser result invariants | | Unsupported evidence does not silently become detector evidence | [`parser-contract.md`](./parser-contract.md), [`rule-catalog.md`](./rule-catalog.md), [`case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md) | `test_parser` covers unknown-pattern warnings; `test_detector` covers signal-boundary behavior | Reviewer can explain why unsupported lines remain warnings instead of findings | | Report artifacts are deterministic | [`report-artifacts.md`](./report-artifacts.md), report-contract fixtures under [`tests/fixtures/report_contracts`](../tests/fixtures/report_contracts) | `test_report_contracts` compares generated `report.md`, `report.json`, `findings.csv`, and `warnings.csv` against golden fixtures | Reviewer can regenerate reports and see schema or text changes as explicit snapshot diffs | | Findings are explainable | [`rule-catalog.md`](./rule-catalog.md), [`report-artifacts.md`](./report-artifacts.md) | `test_report` checks JSON finding fields; report-contract fixtures lock `finding_id`, `episode_index`, `rule_id`, `window_start`, `window_end`, `threshold`, `observed_count`, `grouping_key`, `evidence_event_ids`, and `verdict_boundary` | Reviewer can trace a finding from rule context back to source line IDs and see the non-verdict boundary | @@ -31,6 +32,8 @@ Use the smallest command set that answers the review question: | Build and unit/regression tests | `cmake -S . -B build && cmake --build build && ctest --test-dir build --output-on-failure` | | Multi-config local test run | `ctest --test-dir build -C Debug --output-on-failure` | | Report-contract snapshot verification | `ctest --test-dir build -C Debug -R report_contracts --output-on-failure` | +| Parser property checks | `ctest --test-dir build -C Debug -R parser_properties --output-on-failure` | +| Bounded parser fuzz smoke (Clang) | See [`tests/fuzz/README.md`](../tests/fuzz/README.md) | | Performance envelope reproduction | `pwsh -File scripts/benchmark-performance-envelope.ps1` | | Fast performance smoke check | `pwsh -File scripts/benchmark-performance-envelope.ps1 -LineCounts 1000 -Runs 1 -WarmupRuns 0 -SkipBuild` | diff --git a/fuzz/parser_fuzz.cpp b/fuzz/parser_fuzz.cpp new file mode 100644 index 0000000..995ed9a --- /dev/null +++ b/fuzz/parser_fuzz.cpp @@ -0,0 +1,47 @@ +#include "parser.hpp" + +#include +#include +#include +#include +#include +#include + +namespace { + +void enforce_result_invariants(const std::optional& event, + const std::string& reason) { + if (event.has_value()) { + if (event->event_type == loglens::EventType::Unknown || event->program.empty()) { + std::abort(); + } + return; + } + + if (reason.empty()) { + std::abort(); + } +} + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, std::size_t size) { + const std::string line(reinterpret_cast(data), size); + const std::array parsers{{ + loglens::AuthLogParser(loglens::ParserConfig{ + loglens::InputMode::SyslogLegacy, + 2026}), + loglens::AuthLogParser(loglens::ParserConfig{ + loglens::InputMode::JournalctlShortFull, + std::nullopt}), + }}; + + for (const auto& parser : parsers) { + std::string reason; + auto category = loglens::ParserFailureCategory::KnownProgramUnknownMessage; + const auto event = parser.parse_line(line, 1, &reason, &category); + enforce_result_invariants(event, reason); + } + + return 0; +} diff --git a/src/parser/program_dispatch.cpp b/src/parser/program_dispatch.cpp index 883212a..d5751e7 100644 --- a/src/parser/program_dispatch.cpp +++ b/src/parser/program_dispatch.cpp @@ -7,19 +7,10 @@ #include "parser/sudo_handlers.hpp" #include -#include namespace loglens::parser_internal { namespace { -using ProgramMatcher = bool (*)(std::string_view program); -using ProgramHandler = HandlerResult (*)(const Event& source); - -struct HandlerRegistration { - ProgramMatcher matches; - ProgramHandler handle; -}; - bool is_sshd(std::string_view program) { return program == "sshd"; } @@ -44,7 +35,7 @@ bool is_su(std::string_view program) { return program == "su"; } -constexpr std::array handler_registry{{ +constexpr std::array handler_registry{{ {is_sshd, handle_sshd_event}, {is_pam_unix, handle_pam_unix_event}, {is_pam_faillock, handle_pam_faillock_event}, @@ -55,8 +46,13 @@ constexpr std::array handler_registry{{ } // namespace -HandlerResult dispatch_program(const Event& source) { - for (const auto& registration : handler_registry) { +std::span program_handler_registry() { + return handler_registry; +} + +HandlerResult dispatch_program(const Event& source, + std::span registry) { + for (const auto& registration : registry) { if (registration.matches(source.program)) { return registration.handle(source); } @@ -65,4 +61,8 @@ HandlerResult dispatch_program(const Event& source) { return classify_unrecognized_event(source); } +HandlerResult dispatch_program(const Event& source) { + return dispatch_program(source, program_handler_registry()); +} + } // namespace loglens::parser_internal diff --git a/src/parser/program_dispatch.hpp b/src/parser/program_dispatch.hpp index ab74726..421ba45 100644 --- a/src/parser/program_dispatch.hpp +++ b/src/parser/program_dispatch.hpp @@ -2,8 +2,22 @@ #include "parser/handler_result.hpp" +#include +#include + namespace loglens::parser_internal { +using ProgramMatcher = bool (*)(std::string_view program); +using ProgramHandler = HandlerResult (*)(const Event& source); + +struct ProgramHandlerRegistration { + ProgramMatcher matches; + ProgramHandler handle; +}; + +std::span program_handler_registry(); HandlerResult dispatch_program(const Event& source); +HandlerResult dispatch_program(const Event& source, + std::span registry); } // namespace loglens::parser_internal diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md new file mode 100644 index 0000000..d6e107e --- /dev/null +++ b/tests/fuzz/README.md @@ -0,0 +1,23 @@ +# Parser fuzz harness + +The parser fuzz target is optional and requires Clang with libFuzzer support. +It exercises both supported input modes and treats these result invariants as +crash conditions: + +- every emitted event has a normalized event type and non-empty program +- every rejected line has a non-empty parser failure reason +- arbitrary input must not terminate the parser unexpectedly + +Configure and run a bounded local campaign: + +```bash +CC=clang CXX=clang++ cmake -S . -B build-fuzz \ + -D CMAKE_BUILD_TYPE=RelWithDebInfo \ + -D BUILD_TESTING=OFF \ + -D LOGLENS_BUILD_FUZZERS=ON +cmake --build build-fuzz --target fuzz_parser +./build-fuzz/fuzz_parser -runs=2000 -max_len=1024 tests/fuzz/corpus/parser +``` + +The checked-in corpus is sanitized and intentionally small. CI uses it only as +a bounded smoke campaign; longer local campaigns can reuse the same target. diff --git a/tests/fuzz/corpus/parser/journalctl_sshd b/tests/fuzz/corpus/parser/journalctl_sshd new file mode 100644 index 0000000..0512b0a --- /dev/null +++ b/tests/fuzz/corpus/parser/journalctl_sshd @@ -0,0 +1 @@ +Mon 2026-03-10 08:40:03 UTC example-host sshd[4403]: Invalid user user-d from 203.0.113.62 port 50102 diff --git a/tests/fuzz/corpus/parser/malformed_source_ip b/tests/fuzz/corpus/parser/malformed_source_ip new file mode 100644 index 0000000..8f83fc4 --- /dev/null +++ b/tests/fuzz/corpus/parser/malformed_source_ip @@ -0,0 +1 @@ +Mar 10 08:40:04 example-host sshd[4404]: Failed password for user-e from 203.0.113.999 port 50103 ssh2 diff --git a/tests/fuzz/corpus/parser/pam_faillock b/tests/fuzz/corpus/parser/pam_faillock new file mode 100644 index 0000000..948223d --- /dev/null +++ b/tests/fuzz/corpus/parser/pam_faillock @@ -0,0 +1 @@ +Mar 10 08:40:01 example-host pam_faillock(sshd:auth): Authentication failure for user user-b from 203.0.113.61 diff --git a/tests/fuzz/corpus/parser/sshd_failed_password b/tests/fuzz/corpus/parser/sshd_failed_password new file mode 100644 index 0000000..74ca203 --- /dev/null +++ b/tests/fuzz/corpus/parser/sshd_failed_password @@ -0,0 +1 @@ +Mar 10 08:40:00 example-host sshd[4400]: Failed password for user-a from 203.0.113.60 port 50101 ssh2 diff --git a/tests/fuzz/corpus/parser/sudo_command b/tests/fuzz/corpus/parser/sudo_command new file mode 100644 index 0000000..b755475 --- /dev/null +++ b/tests/fuzz/corpus/parser/sudo_command @@ -0,0 +1 @@ +Mar 10 08:40:02 example-host sudo[4402]: user-c : TTY=pts/1 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/id diff --git a/tests/fuzz/corpus/parser/unsupported_program b/tests/fuzz/corpus/parser/unsupported_program new file mode 100644 index 0000000..522157a --- /dev/null +++ b/tests/fuzz/corpus/parser/unsupported_program @@ -0,0 +1 @@ +Mar 10 08:40:05 example-host cron[4405]: job completed diff --git a/tests/test_parser_properties.cpp b/tests/test_parser_properties.cpp new file mode 100644 index 0000000..c7073b1 --- /dev/null +++ b/tests/test_parser_properties.cpp @@ -0,0 +1,269 @@ +#include "parser.hpp" +#include "parser/program_dispatch.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +void expect(bool condition, const std::string& message) { + if (!condition) { + throw std::runtime_error(message); + } +} + +loglens::AuthLogParser make_syslog_parser() { + return loglens::AuthLogParser(loglens::ParserConfig{ + loglens::InputMode::SyslogLegacy, + 2026}); +} + +bool events_equal(const loglens::Event& left, const loglens::Event& right) { + return left.timestamp == right.timestamp + && left.hostname == right.hostname + && left.program == right.program + && left.pid == right.pid + && left.message == right.message + && left.source_ip == right.source_ip + && left.username == right.username + && left.event_type == right.event_type + && left.line_number == right.line_number; +} + +bool results_equal(const loglens::parser_internal::HandlerResult& left, + const loglens::parser_internal::HandlerResult& right) { + if (left.matched != right.matched + || left.failure_category != right.failure_category + || left.reason != right.reason + || left.event.has_value() != right.event.has_value()) { + return false; + } + + return !left.event.has_value() || events_equal(*left.event, *right.event); +} + +loglens::Event make_source_event(std::string program, std::string message) { + loglens::Event event; + event.program = std::move(program); + event.message = std::move(message); + event.hostname = "example-host"; + event.line_number = 1; + return event; +} + +std::array representative_registry_events() { + return { + make_source_event( + "sshd", + "Failed password for user-a from 203.0.113.10 port 50101 ssh2"), + make_source_event( + "pam_unix(sshd:auth)", + "authentication failure; rhost=203.0.113.11 user=user-b"), + make_source_event( + "pam_faillock(sshd:auth)", + "Authentication failure for user user-c from 203.0.113.12"), + make_source_event( + "pam_sss(sshd:auth)", + "received for user user-d: 7 (Authentication failure)"), + make_source_event( + "sudo", + "user-e : TTY=pts/1 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/id"), + make_source_event( + "su", + "FAILED SU (to root) user-f on pts/2"), + }; +} + +void test_registry_dispatch_is_order_independent() { + const auto registry = loglens::parser_internal::program_handler_registry(); + const auto sources = representative_registry_events(); + expect(registry.size() == sources.size(), "expected one registry entry per supported program family"); + + for (const auto& source : sources) { + const auto match_count = std::count_if( + registry.begin(), + registry.end(), + [&source](const loglens::parser_internal::ProgramHandlerRegistration& registration) { + return registration.matches(source.program); + }); + expect(match_count == 1, "expected each representative program to match exactly one handler"); + } + + std::vector order(registry.size()); + std::iota(order.begin(), order.end(), 0); + std::size_t permutation_count = 0; + + do { + std::vector permuted; + permuted.reserve(order.size()); + for (const auto index : order) { + permuted.push_back(registry[index]); + } + + for (const auto& source : sources) { + const auto expected = loglens::parser_internal::dispatch_program(source); + const auto actual = loglens::parser_internal::dispatch_program(source, permuted); + expect(results_equal(actual, expected), "expected dispatch result to be registry-order independent"); + } + ++permutation_count; + } while (std::next_permutation(order.begin(), order.end())); + + expect(permutation_count == 720, "expected all six-handler registry permutations to be checked"); +} + +std::uint32_t next_random(std::uint32_t& state) { + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; + return state; +} + +std::string generated_malformed_ipv4(std::size_t index, std::uint32_t& state) { + const auto first = 1U + next_random(state) % 223U; + const auto second = next_random(state) % 256U; + const auto third = next_random(state) % 256U; + const auto fourth = next_random(state) % 256U; + + switch (index % 4) { + case 0: + return std::to_string(first) + "." + std::to_string(second) + "." + + std::to_string(third) + "." + std::to_string(256U + next_random(state) % 744U); + case 1: + return std::to_string(first) + "." + std::to_string(second) + "." + std::to_string(third); + case 2: + return std::to_string(first) + ".invalid." + std::to_string(third) + "." + std::to_string(fourth); + default: + return std::to_string(first) + "." + std::to_string(second) + "." + + std::to_string(third) + "." + std::to_string(fourth) + ","; + } +} + +void test_generated_malformed_source_tokens_keep_failure_taxonomy() { + const auto parser = make_syslog_parser(); + std::uint32_t state = 0x6c6f676cU; + + for (std::size_t index = 0; index < 256; ++index) { + const auto source_ip = generated_malformed_ipv4(index, state); + const auto line = "Mar 10 08:30:00 example-host sshd[4200]: Failed password for user-a from " + + source_ip + " port 50101 ssh2"; + std::string reason; + auto category = loglens::ParserFailureCategory::KnownProgramUnknownMessage; + + const auto event = parser.parse_line(line, index + 1, &reason, &category); + + expect(!event.has_value(), "expected malformed source token not to emit an event"); + expect(category == loglens::ParserFailureCategory::MalformedSourceIp, + "expected malformed source token to keep malformed_source_ip category"); + expect(reason == "malformed source IP", "expected stable malformed source failure reason"); + } +} + +void test_failure_classification_is_stable_across_envelope_variants() { + struct FailureCase { + std::string program; + std::string message; + loglens::ParserFailureCategory category; + std::string reason; + }; + + const std::array cases{{ + {"sshd", "Connection closed by 203.0.113.50 port 50100 [preauth]", + loglens::ParserFailureCategory::KnownProgramUnknownMessage, + "unrecognized auth pattern: sshd_connection_closed_preauth"}, + {"pam_unix(sshd:session)", "session closed for user user-a", + loglens::ParserFailureCategory::UnsupportedPamVariant, + "unrecognized auth pattern: pam_unix_session_closed"}, + {"pam_faillock(sshd:auth)", "Account temporarily locked for user user-b", + loglens::ParserFailureCategory::UnsupportedPamVariant, + "unrecognized auth pattern: pam_faillock_account_locked"}, + {"pam_sss(sshd:auth)", "User not known to the underlying authentication module", + loglens::ParserFailureCategory::UnsupportedPamVariant, + "unrecognized auth pattern: pam_sss_unknown_user"}, + {"sudo", "user-c : TTY=pts/1 ; PWD=/home/user/project ; USER=root", + loglens::ParserFailureCategory::KnownProgramUnknownMessage, + "unrecognized auth pattern: sudo_other"}, + {"su", "pam_authenticate: Authentication failure", + loglens::ParserFailureCategory::KnownProgramUnknownMessage, + "unrecognized auth pattern: su_other"}, + {"cron", "job completed", + loglens::ParserFailureCategory::UnknownProgram, + "unrecognized auth pattern: program_cron"}, + }}; + + const auto parser = make_syslog_parser(); + for (const auto& test_case : cases) { + for (std::size_t variant = 0; variant < 16; ++variant) { + const auto tag = variant % 2 == 0 + ? test_case.program + : test_case.program + "[" + std::to_string(4300 + variant) + "]"; + auto line = "Mar 10 08:31:00 example-host " + tag + ": " + test_case.message; + if (variant % 3 == 0) { + line.push_back('\r'); + } + + std::string reason; + auto category = loglens::ParserFailureCategory::MalformedSourceIp; + const auto event = parser.parse_line(line, variant + 1, &reason, &category); + + expect(!event.has_value(), "expected unsupported pattern not to emit an event"); + expect(category == test_case.category, "expected stable failure category across envelope variants"); + expect(reason == test_case.reason, "expected stable failure reason across envelope variants"); + } + } +} + +void test_deterministic_byte_corpus_never_breaks_result_invariants() { + const std::array parsers{{ + make_syslog_parser(), + loglens::AuthLogParser(loglens::ParserConfig{ + loglens::InputMode::JournalctlShortFull, + std::nullopt}), + }}; + std::uint32_t state = 0x70617273U; + + for (std::size_t index = 0; index < 512; ++index) { + const auto length = static_cast(next_random(state) % 257U); + std::string line(length, '\0'); + for (auto& character : line) { + character = static_cast(next_random(state) & 0xffU); + } + + for (const auto& parser : parsers) { + std::string reason; + auto category = loglens::ParserFailureCategory::KnownProgramUnknownMessage; + const auto event = parser.parse_line(line, index + 1, &reason, &category); + + if (event.has_value()) { + expect(event->event_type != loglens::EventType::Unknown, + "expected every emitted event to have a normalized type"); + expect(!event->program.empty(), "expected every emitted event to have a program"); + } else { + expect(!reason.empty(), "expected every rejected input to have a failure reason"); + } + } + } +} + +} // namespace + +int main() { + try { + test_registry_dispatch_is_order_independent(); + test_generated_malformed_source_tokens_keep_failure_taxonomy(); + test_failure_classification_is_stable_across_envelope_variants(); + test_deterministic_byte_corpus_never_breaks_result_invariants(); + return 0; + } catch (const std::exception& error) { + std::cerr << error.what() << '\n'; + return 1; + } +}