Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,32 @@ jobs:

- name: Test
run: ctest --test-dir build --build-config Release --output-on-failure

fuzz-smoke:
name: Parser fuzz smoke
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0

- name: Configure fuzz target
env:
CC: clang
CXX: clang++
run: >-
cmake -S . -B build-fuzz
-D CMAKE_BUILD_TYPE=RelWithDebInfo
-D BUILD_TESTING=OFF
-D LOGLENS_BUILD_FUZZERS=ON

- name: Build fuzz target
run: cmake --build build-fuzz --target fuzz_parser --config RelWithDebInfo

- name: Run bounded parser fuzz smoke
run: >-
./build-fuzz/fuzz_parser
-runs=2000
-max_len=1024
-timeout=2
tests/fuzz/corpus/parser
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ All notable user-visible changes should be recorded here.
input order and inclusive window-boundary behavior.
- Added parser regression coverage for malformed source-IP token
classification.
- Added deterministic parser property tests for registry-order independence,
generated malformed tokens, failure taxonomy stability, and arbitrary-byte
result invariants.
- Added an optional Clang libFuzzer parser target with a sanitized seed corpus
and bounded Ubuntu CI smoke campaign.

### Changed

Expand Down
35 changes: 34 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

add_library(loglens_lib
option(LOGLENS_BUILD_FUZZERS "Build Clang libFuzzer targets" OFF)

set(LOGLENS_LIBRARY_SOURCES
src/config.cpp
src/parser.cpp
src/parser/failure_classifier.cpp
Expand All @@ -23,6 +25,8 @@ add_library(loglens_lib
src/report.cpp
)

add_library(loglens_lib ${LOGLENS_LIBRARY_SOURCES})

target_include_directories(loglens_lib
PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/src"
Expand All @@ -38,6 +42,10 @@ if(BUILD_TESTING)
target_link_libraries(test_parser PRIVATE loglens_lib)
add_test(NAME parser COMMAND test_parser)

add_executable(test_parser_properties tests/test_parser_properties.cpp)
target_link_libraries(test_parser_properties PRIVATE loglens_lib)
add_test(NAME parser_properties COMMAND test_parser_properties)

add_executable(test_detector tests/test_detector.cpp)
target_link_libraries(test_detector PRIVATE loglens_lib)
add_test(NAME detector COMMAND test_detector)
Expand Down Expand Up @@ -65,3 +73,28 @@ if(BUILD_TESTING)
${CMAKE_CURRENT_BINARY_DIR}/report_contract_output
)
endif()

if(LOGLENS_BUILD_FUZZERS)
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(FATAL_ERROR "LOGLENS_BUILD_FUZZERS requires Clang with libFuzzer support")
endif()

add_library(loglens_fuzz_lib STATIC ${LOGLENS_LIBRARY_SOURCES})
target_include_directories(loglens_fuzz_lib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src")
target_compile_options(
loglens_fuzz_lib
PRIVATE
-fsanitize=fuzzer-no-link,address,undefined
-fno-omit-frame-pointer
)

add_executable(fuzz_parser fuzz/parser_fuzz.cpp)
target_link_libraries(fuzz_parser PRIVATE loglens_fuzz_lib)
target_compile_options(
fuzz_parser
PRIVATE
-fsanitize=fuzzer,address,undefined
-fno-omit-frame-pointer
)
target_link_options(fuzz_parser PRIVATE -fsanitize=fuzzer,address,undefined)
endif()
2 changes: 2 additions & 0 deletions docs/parser-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ Parsed successes and audit-only events remain reportable but do not count as bru
| Artifact | What it proves |
| --- | --- |
| [`tests/test_parser.cpp`](../tests/test_parser.cpp) | Unit-level parser expectations, malformed-line behavior, mode aliases, fixture-matrix counts, and unknown-pattern buckets |
| [`tests/test_parser_properties.cpp`](../tests/test_parser_properties.cpp) | Deterministic generated checks for handler-registry order independence, malformed source tokens, failure taxonomy stability, and arbitrary-byte result invariants |
| [`tests/fuzz/README.md`](../tests/fuzz/README.md) and [`tests/fuzz/corpus/parser`](../tests/fuzz/corpus/parser) | Optional Clang libFuzzer harness instructions plus a sanitized parser seed corpus used by the bounded CI fuzz smoke campaign |
| [`tests/test_detector.cpp`](../tests/test_detector.cpp) | Detection signal mapping and default counting behavior after parsing |
| [`assets/parser_fixture_matrix_syslog.log`](../assets/parser_fixture_matrix_syslog.log) | Syslog known/unknown parser matrix |
| [`assets/parser_fixture_matrix_journalctl_short_full.log`](../assets/parser_fixture_matrix_journalctl_short_full.log) | Journalctl short-full known/unknown parser matrix |
Expand Down
3 changes: 3 additions & 0 deletions docs/quality-gates.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The main review principle is:
| --- | --- | --- | --- |
| Supported input formats are explicit | [`parser-contract.md`](./parser-contract.md), [`parser-conformance-matrix.md`](./parser-conformance-matrix.md) | `ctest --test-dir build --output-on-failure` through `test_parser`; fixture anchors in `assets/parser_fixture_matrix_syslog.log` and `assets/parser_fixture_matrix_journalctl_short_full.log` | Reviewer can name the two supported formats and see known/unknown line behavior in fixtures |
| Parser coverage is visible | [`parser-coverage-notes.md`](./parser-coverage-notes.md), [`tests/fixtures/parser_matrix/noisy_auth_expected.json`](../tests/fixtures/parser_matrix/noisy_auth_expected.json) | `test_parser` compares noisy-auth coverage output to the checked-in expected summary | Reviewer can see parsed lines, skipped blanks, warnings, failure categories, and unknown-pattern buckets |
| Parser handler dispatch remains robust | [`parser-contract.md`](./parser-contract.md), [`tests/test_parser_properties.cpp`](../tests/test_parser_properties.cpp), [`tests/fuzz/README.md`](../tests/fuzz/README.md) | `test_parser_properties` checks all handler-order permutations and generated failure invariants; Ubuntu CI runs a bounded Clang libFuzzer smoke corpus | Reviewer can see that registry order does not change results and malformed or arbitrary bytes remain bounded by parser result invariants |
| Unsupported evidence does not silently become detector evidence | [`parser-contract.md`](./parser-contract.md), [`rule-catalog.md`](./rule-catalog.md), [`case-study-linux-auth-bruteforce.md`](./case-study-linux-auth-bruteforce.md) | `test_parser` covers unknown-pattern warnings; `test_detector` covers signal-boundary behavior | Reviewer can explain why unsupported lines remain warnings instead of findings |
| Report artifacts are deterministic | [`report-artifacts.md`](./report-artifacts.md), report-contract fixtures under [`tests/fixtures/report_contracts`](../tests/fixtures/report_contracts) | `test_report_contracts` compares generated `report.md`, `report.json`, `findings.csv`, and `warnings.csv` against golden fixtures | Reviewer can regenerate reports and see schema or text changes as explicit snapshot diffs |
| Findings are explainable | [`rule-catalog.md`](./rule-catalog.md), [`report-artifacts.md`](./report-artifacts.md) | `test_report` checks JSON finding fields; report-contract fixtures lock `finding_id`, `episode_index`, `rule_id`, `window_start`, `window_end`, `threshold`, `observed_count`, `grouping_key`, `evidence_event_ids`, and `verdict_boundary` | Reviewer can trace a finding from rule context back to source line IDs and see the non-verdict boundary |
Expand All @@ -31,6 +32,8 @@ Use the smallest command set that answers the review question:
| Build and unit/regression tests | `cmake -S . -B build && cmake --build build && ctest --test-dir build --output-on-failure` |
| Multi-config local test run | `ctest --test-dir build -C Debug --output-on-failure` |
| Report-contract snapshot verification | `ctest --test-dir build -C Debug -R report_contracts --output-on-failure` |
| Parser property checks | `ctest --test-dir build -C Debug -R parser_properties --output-on-failure` |
| Bounded parser fuzz smoke (Clang) | See [`tests/fuzz/README.md`](../tests/fuzz/README.md) |
| Performance envelope reproduction | `pwsh -File scripts/benchmark-performance-envelope.ps1` |
| Fast performance smoke check | `pwsh -File scripts/benchmark-performance-envelope.ps1 -LineCounts 1000 -Runs 1 -WarmupRuns 0 -SkipBuild` |

Expand Down
47 changes: 47 additions & 0 deletions fuzz/parser_fuzz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "parser.hpp"

#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <optional>
#include <string>

namespace {

void enforce_result_invariants(const std::optional<loglens::Event>& event,
const std::string& reason) {
if (event.has_value()) {
if (event->event_type == loglens::EventType::Unknown || event->program.empty()) {
std::abort();
}
return;
}

if (reason.empty()) {
std::abort();
}
}

} // namespace

extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, std::size_t size) {
const std::string line(reinterpret_cast<const char*>(data), size);
const std::array<loglens::AuthLogParser, 2> parsers{{
loglens::AuthLogParser(loglens::ParserConfig{
loglens::InputMode::SyslogLegacy,
2026}),
loglens::AuthLogParser(loglens::ParserConfig{
loglens::InputMode::JournalctlShortFull,
std::nullopt}),
}};

for (const auto& parser : parsers) {
std::string reason;
auto category = loglens::ParserFailureCategory::KnownProgramUnknownMessage;
const auto event = parser.parse_line(line, 1, &reason, &category);
enforce_result_invariants(event, reason);
}

return 0;
}
24 changes: 12 additions & 12 deletions src/parser/program_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,10 @@
#include "parser/sudo_handlers.hpp"

#include <array>
#include <string_view>

namespace loglens::parser_internal {
namespace {

using ProgramMatcher = bool (*)(std::string_view program);
using ProgramHandler = HandlerResult (*)(const Event& source);

struct HandlerRegistration {
ProgramMatcher matches;
ProgramHandler handle;
};

bool is_sshd(std::string_view program) {
return program == "sshd";
}
Expand All @@ -44,7 +35,7 @@ bool is_su(std::string_view program) {
return program == "su";
}

constexpr std::array<HandlerRegistration, 6> handler_registry{{
constexpr std::array<ProgramHandlerRegistration, 6> handler_registry{{
{is_sshd, handle_sshd_event},
{is_pam_unix, handle_pam_unix_event},
{is_pam_faillock, handle_pam_faillock_event},
Expand All @@ -55,8 +46,13 @@ constexpr std::array<HandlerRegistration, 6> handler_registry{{

} // namespace

HandlerResult dispatch_program(const Event& source) {
for (const auto& registration : handler_registry) {
std::span<const ProgramHandlerRegistration> program_handler_registry() {
return handler_registry;
}

HandlerResult dispatch_program(const Event& source,
std::span<const ProgramHandlerRegistration> registry) {
for (const auto& registration : registry) {
if (registration.matches(source.program)) {
return registration.handle(source);
}
Expand All @@ -65,4 +61,8 @@ HandlerResult dispatch_program(const Event& source) {
return classify_unrecognized_event(source);
}

HandlerResult dispatch_program(const Event& source) {
return dispatch_program(source, program_handler_registry());
}

} // namespace loglens::parser_internal
14 changes: 14 additions & 0 deletions src/parser/program_dispatch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,22 @@

#include "parser/handler_result.hpp"

#include <span>
#include <string_view>

namespace loglens::parser_internal {

using ProgramMatcher = bool (*)(std::string_view program);
using ProgramHandler = HandlerResult (*)(const Event& source);

struct ProgramHandlerRegistration {
ProgramMatcher matches;
ProgramHandler handle;
};

std::span<const ProgramHandlerRegistration> program_handler_registry();
HandlerResult dispatch_program(const Event& source);
HandlerResult dispatch_program(const Event& source,
std::span<const ProgramHandlerRegistration> registry);

} // namespace loglens::parser_internal
23 changes: 23 additions & 0 deletions tests/fuzz/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Parser fuzz harness

The parser fuzz target is optional and requires Clang with libFuzzer support.
It exercises both supported input modes and treats these result invariants as
crash conditions:

- every emitted event has a normalized event type and non-empty program
- every rejected line has a non-empty parser failure reason
- arbitrary input must not terminate the parser unexpectedly

Configure and run a bounded local campaign:

```bash
CC=clang CXX=clang++ cmake -S . -B build-fuzz \
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
-D BUILD_TESTING=OFF \
-D LOGLENS_BUILD_FUZZERS=ON
cmake --build build-fuzz --target fuzz_parser
./build-fuzz/fuzz_parser -runs=2000 -max_len=1024 tests/fuzz/corpus/parser
```

The checked-in corpus is sanitized and intentionally small. CI uses it only as
a bounded smoke campaign; longer local campaigns can reuse the same target.
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/journalctl_sshd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mon 2026-03-10 08:40:03 UTC example-host sshd[4403]: Invalid user user-d from 203.0.113.62 port 50102
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/malformed_source_ip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mar 10 08:40:04 example-host sshd[4404]: Failed password for user-e from 203.0.113.999 port 50103 ssh2
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/pam_faillock
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mar 10 08:40:01 example-host pam_faillock(sshd:auth): Authentication failure for user user-b from 203.0.113.61
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/sshd_failed_password
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mar 10 08:40:00 example-host sshd[4400]: Failed password for user-a from 203.0.113.60 port 50101 ssh2
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/sudo_command
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mar 10 08:40:02 example-host sudo[4402]: user-c : TTY=pts/1 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/id
1 change: 1 addition & 0 deletions tests/fuzz/corpus/parser/unsupported_program
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mar 10 08:40:05 example-host cron[4405]: job completed
Loading
Loading