diff --git a/CHANGELOG.md b/CHANGELOG.md index 65a4ece..3ff71c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ All notable user-visible changes should be recorded here. ### Changed -- None yet. +- Detector rules now emit separate findings for time-separated detection + episodes within the same rule subject instead of collapsing each subject to a + single best window. ### Fixed @@ -18,7 +20,8 @@ All notable user-visible changes should be recorded here. ### Docs -- None yet. +- Documented detection episode semantics in the rule catalog and report artifact + contract notes. ## v0.5.0 diff --git a/README.md b/README.md index 388892c..aa821db 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,11 @@ LogLens currently detects: - One IP trying multiple usernames within 15 minutes - Bursty sudo activity from the same user within 5 minutes +Each rule can emit multiple findings for the same subject when matching +evidence appears in time-separated detector episodes. Report consumers should +use `window_start`, `window_end`, and `evidence_event_ids` rather than assuming +one finding per `rule_id` and subject. + LogLens currently parses and reports these additional auth patterns beyond the core detector inputs: - `Accepted publickey` SSH successes diff --git a/docs/report-artifacts.md b/docs/report-artifacts.md index b746896..f7e0151 100644 --- a/docs/report-artifacts.md +++ b/docs/report-artifacts.md @@ -61,6 +61,11 @@ fixtures explicitly. `evidence_event_ids` are deterministic local event identifiers derived from the source line number, formatted as `line:`. They let reviewers trace a finding back to the normalized input events that satisfied the rule window without implying global event identity. +Consumers should not assume that `rule_id` plus `subject` is unique within a +report. A rule can emit multiple findings for the same subject when matching +evidence appears in time-separated detector episodes. Use `window_start`, +`window_end`, and `evidence_event_ids` to distinguish episode-level findings. + `verdict_boundary` is a stable token that states what the finding must not be read as. It keeps machine-readable findings aligned with LogLens's triage scope: diff --git a/docs/rule-catalog.md b/docs/rule-catalog.md index 1bacfa6..90f6e0a 100644 --- a/docs/rule-catalog.md +++ b/docs/rule-catalog.md @@ -26,6 +26,29 @@ Metadata equivalent: - Default values below match the built-in detector configuration. - The checked-in `assets/sample_config.json` is a tested default-equivalent fixture. +## Detection Episode Semantics + +Within each rule grouping key, LogLens sorts matching signals by timestamp and +source line number. Consecutive signals separated by an idle gap greater than +the rule window start a new episode candidate. + +Inside each episode candidate, the detector keeps the best sliding window for +the rule: + +- `brute_force` and `sudo_burst`: highest event count +- `multi_user_probing`: highest distinct username count, with event count as + the tie-breaker + +Each episode candidate that reaches the configured threshold emits one finding. +The same `rule_id` and `subject` can therefore appear more than once in one +report when the evidence contains time-separated bursts. Review +`window_start`, `window_end`, and `evidence_event_ids` to distinguish those +episodes. + +Episode splitting is a detector reporting model, not an incident boundary. It +does not infer compromise, attribution, causality between rules, or cross-host +correlation. + ## Finding Explainability Fields JSON findings include both the finding conclusion and the rule context used to reach it: @@ -89,7 +112,7 @@ Signals without a source IP are not grouped for this rule. 10 minutes by default. -The detector uses a sliding timestamp window within each source-IP group. +The detector uses the episode semantics above within each source-IP group. ### Threshold @@ -155,7 +178,7 @@ Signals without a source IP are not grouped for this rule. Distinct username cou 15 minutes by default. -The detector uses a sliding timestamp window within each source-IP group. +The detector uses the episode semantics above within each source-IP group. ### Threshold @@ -224,7 +247,7 @@ Signals without a username are not grouped for this rule. 5 minutes by default. -The detector uses a sliding timestamp window within each username group. +The detector uses the episode semantics above within each username group. ### Threshold diff --git a/src/detector.cpp b/src/detector.cpp index 903894f..eb40e0a 100644 --- a/src/detector.cpp +++ b/src/detector.cpp @@ -2,12 +2,29 @@ #include #include +#include namespace loglens { namespace { using SignalGroup = std::unordered_map>; +struct CountWindowSelection { + std::size_t start = 0; + std::size_t end = 0; + std::size_t count = 0; + bool matched = false; +}; + +struct MultiUserWindowSelection { + std::size_t start = 0; + std::size_t end = 0; + std::size_t event_count = 0; + std::size_t distinct_username_count = 0; + std::vector usernames; + bool matched = false; +}; + std::vector sort_signals_by_time(const std::vector& signals) { auto sorted = signals; std::sort(sorted.begin(), sorted.end(), [](const AuthSignal* left, const AuthSignal* right) { @@ -19,6 +36,99 @@ std::vector sort_signals_by_time(const std::vector> activity_segments(const std::vector& ordered, + std::chrono::minutes window) { + std::vector> segments; + if (ordered.empty()) { + return segments; + } + + std::size_t segment_start = 0; + for (std::size_t index = 1; index < ordered.size(); ++index) { + if (ordered[index]->timestamp - ordered[index - 1]->timestamp > window) { + segments.emplace_back(segment_start, index - 1); + segment_start = index; + } + } + + segments.emplace_back(segment_start, ordered.size() - 1); + return segments; +} + +CountWindowSelection best_count_window(const std::vector& ordered, + std::size_t segment_start, + std::size_t segment_end, + std::chrono::minutes window) { + CountWindowSelection selection; + std::size_t start = segment_start; + + for (std::size_t end = segment_start; end <= segment_end; ++end) { + while (start < end && ordered[end]->timestamp - ordered[start]->timestamp > window) { + ++start; + } + + const auto count = end - start + 1; + if (!selection.matched || count > selection.count) { + selection.start = start; + selection.end = end; + selection.count = count; + selection.matched = true; + } + } + + return selection; +} + +MultiUserWindowSelection best_multi_user_window(const std::vector& ordered, + std::size_t segment_start, + std::size_t segment_end, + std::chrono::minutes window) { + MultiUserWindowSelection selection; + std::size_t start = segment_start; + std::unordered_map username_counts; + + for (std::size_t end = segment_start; end <= segment_end; ++end) { + if (!ordered[end]->username.empty()) { + ++username_counts[ordered[end]->username]; + } + + while (start < end && ordered[end]->timestamp - ordered[start]->timestamp > window) { + if (!ordered[start]->username.empty()) { + auto count_it = username_counts.find(ordered[start]->username); + if (count_it != username_counts.end()) { + if (count_it->second == 1) { + username_counts.erase(count_it); + } else { + --count_it->second; + } + } + } + ++start; + } + + const auto distinct_username_count = username_counts.size(); + const auto event_count = end - start + 1; + if (!selection.matched + || distinct_username_count > selection.distinct_username_count + || (distinct_username_count == selection.distinct_username_count + && event_count > selection.event_count)) { + selection.start = start; + selection.end = end; + selection.event_count = event_count; + selection.distinct_username_count = distinct_username_count; + selection.usernames.clear(); + selection.usernames.reserve(username_counts.size()); + for (const auto& [username, _] : username_counts) { + selection.usernames.push_back(username); + } + std::sort(selection.usernames.begin(), selection.usernames.end()); + selection.matched = true; + } + } + + return selection; +} + std::vector evidence_event_ids_for_window(const std::vector& ordered, std::size_t start, std::size_t end) { @@ -153,34 +263,23 @@ std::vector detect_brute_force(const std::vector& signals, for (const auto& [ip, group] : grouped) { const auto ordered = sort_signals_by_time(group); - std::size_t start = 0; - std::size_t best_count = 0; - std::size_t best_start = 0; - std::size_t best_end = 0; - - for (std::size_t end = 0; end < ordered.size(); ++end) { - while (start < end - && ordered[end]->timestamp - ordered[start]->timestamp > config.brute_force.window) { - ++start; + for (const auto& [segment_start, segment_end] : activity_segments(ordered, config.brute_force.window)) { + const auto episode = best_count_window( + ordered, + segment_start, + segment_end, + config.brute_force.window); + + if (episode.matched && episode.count >= config.brute_force.threshold) { + findings.push_back(make_brute_force_finding( + ip, + episode.count, + config.brute_force.threshold, + ordered[episode.start]->timestamp, + ordered[episode.end]->timestamp, + config.brute_force.window, + evidence_event_ids_for_window(ordered, episode.start, episode.end))); } - - const auto count = end - start + 1; - if (count > best_count) { - best_count = count; - best_start = start; - best_end = end; - } - } - - if (best_count >= config.brute_force.threshold) { - findings.push_back(make_brute_force_finding( - ip, - best_count, - config.brute_force.threshold, - ordered[best_start]->timestamp, - ordered[best_end]->timestamp, - config.brute_force.window, - evidence_event_ids_for_window(ordered, best_start, best_end))); } } @@ -193,63 +292,26 @@ std::vector detect_multi_user(const std::vector& signals, c for (const auto& [ip, group] : grouped) { const auto ordered = sort_signals_by_time(group); - std::size_t start = 0; - std::unordered_map username_counts; - std::size_t best_distinct = 0; - std::size_t best_count = 0; - std::size_t best_start = 0; - std::size_t best_end = 0; - std::vector best_usernames; - - for (std::size_t end = 0; end < ordered.size(); ++end) { - if (!ordered[end]->username.empty()) { - ++username_counts[ordered[end]->username]; - } - - while (start < end - && ordered[end]->timestamp - ordered[start]->timestamp > config.multi_user_probing.window) { - if (!ordered[start]->username.empty()) { - auto count_it = username_counts.find(ordered[start]->username); - if (count_it != username_counts.end()) { - if (count_it->second == 1) { - username_counts.erase(count_it); - } else { - --count_it->second; - } - } - } - ++start; - } - - const auto distinct_count = username_counts.size(); - const auto event_count = end - start + 1; - if (distinct_count > best_distinct - || (distinct_count == best_distinct && event_count > best_count)) { - best_distinct = distinct_count; - best_count = event_count; - best_start = start; - best_end = end; - best_usernames.clear(); - best_usernames.reserve(username_counts.size()); - for (const auto& [username, _] : username_counts) { - best_usernames.push_back(username); - } - std::sort(best_usernames.begin(), best_usernames.end()); + for (const auto& [segment_start, segment_end] : activity_segments(ordered, config.multi_user_probing.window)) { + auto episode = best_multi_user_window( + ordered, + segment_start, + segment_end, + config.multi_user_probing.window); + + if (episode.matched && episode.distinct_username_count >= config.multi_user_probing.threshold) { + findings.push_back(make_multi_user_finding( + ip, + episode.event_count, + config.multi_user_probing.threshold, + episode.distinct_username_count, + ordered[episode.start]->timestamp, + ordered[episode.end]->timestamp, + std::move(episode.usernames), + config.multi_user_probing.window, + evidence_event_ids_for_window(ordered, episode.start, episode.end))); } } - - if (best_distinct >= config.multi_user_probing.threshold) { - findings.push_back(make_multi_user_finding( - ip, - best_count, - config.multi_user_probing.threshold, - best_distinct, - ordered[best_start]->timestamp, - ordered[best_end]->timestamp, - best_usernames, - config.multi_user_probing.window, - evidence_event_ids_for_window(ordered, best_start, best_end))); - } } return findings; @@ -261,35 +323,24 @@ std::vector detect_sudo_burst(const std::vector& signals, c for (const auto& [username, group] : grouped) { const auto ordered = sort_signals_by_time(group); - std::size_t start = 0; - std::size_t best_count = 0; - std::size_t best_start = 0; - std::size_t best_end = 0; - - for (std::size_t end = 0; end < ordered.size(); ++end) { - while (start < end - && ordered[end]->timestamp - ordered[start]->timestamp > config.sudo_burst.window) { - ++start; - } - - const auto count = end - start + 1; - if (count > best_count) { - best_count = count; - best_start = start; - best_end = end; + for (const auto& [segment_start, segment_end] : activity_segments(ordered, config.sudo_burst.window)) { + const auto episode = best_count_window( + ordered, + segment_start, + segment_end, + config.sudo_burst.window); + + if (episode.matched && episode.count >= config.sudo_burst.threshold) { + findings.push_back(make_sudo_finding( + username, + episode.count, + config.sudo_burst.threshold, + ordered[episode.start]->timestamp, + ordered[episode.end]->timestamp, + config.sudo_burst.window, + evidence_event_ids_for_window(ordered, episode.start, episode.end))); } } - - if (best_count >= config.sudo_burst.threshold) { - findings.push_back(make_sudo_finding( - username, - best_count, - config.sudo_burst.threshold, - ordered[best_start]->timestamp, - ordered[best_end]->timestamp, - config.sudo_burst.window, - evidence_event_ids_for_window(ordered, best_start, best_end))); - } } return findings; diff --git a/tests/test_detector.cpp b/tests/test_detector.cpp index 8c52554..a85e28f 100644 --- a/tests/test_detector.cpp +++ b/tests/test_detector.cpp @@ -29,6 +29,18 @@ const loglens::Finding* find_finding(const std::vector& findin return it == findings.end() ? nullptr : &(*it); } +std::vector find_findings(const std::vector& findings, + loglens::FindingType type, + const std::string& subject) { + std::vector matches; + for (const auto& finding : findings) { + if (finding.type == type && finding.subject == subject) { + matches.push_back(&finding); + } + } + return matches; +} + const loglens::AuthSignal* find_signal(const std::vector& signals, loglens::AuthSignalKind signal_kind) { const auto it = std::find_if(signals.begin(), signals.end(), [&](const loglens::AuthSignal& signal) { @@ -100,6 +112,43 @@ std::vector build_events() { "Mar 10 08:24:15 example-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/vi /etc/ssh/sshd_config\n"); } +std::vector build_two_bruteforce_episode_events() { + return parse_events( + make_syslog_config(), + "Mar 10 09:00:00 example-host sshd[2001]: Failed password for user001 from 203.0.113.10 port 52001 ssh2\n" + "Mar 10 09:01:00 example-host sshd[2002]: Failed password for user001 from 203.0.113.10 port 52002 ssh2\n" + "Mar 10 09:02:00 example-host sshd[2003]: Failed password for user001 from 203.0.113.10 port 52003 ssh2\n" + "Mar 10 09:03:00 example-host sshd[2004]: Failed password for user001 from 203.0.113.10 port 52004 ssh2\n" + "Mar 10 09:04:00 example-host sshd[2005]: Failed password for user001 from 203.0.113.10 port 52005 ssh2\n" + "Mar 10 15:00:00 example-host sshd[2006]: Failed password for user001 from 203.0.113.10 port 53001 ssh2\n" + "Mar 10 15:01:00 example-host sshd[2007]: Failed password for user001 from 203.0.113.10 port 53002 ssh2\n" + "Mar 10 15:02:00 example-host sshd[2008]: Failed password for user001 from 203.0.113.10 port 53003 ssh2\n" + "Mar 10 15:03:00 example-host sshd[2009]: Failed password for user001 from 203.0.113.10 port 53004 ssh2\n" + "Mar 10 15:04:00 example-host sshd[2010]: Failed password for user001 from 203.0.113.10 port 53005 ssh2\n"); +} + +std::vector build_two_multi_user_episode_events() { + return parse_events( + make_syslog_config(), + "Mar 10 09:00:00 example-host sshd[2101]: Failed password for user001 from 203.0.113.20 port 52001 ssh2\n" + "Mar 10 09:01:00 example-host sshd[2102]: Failed password for user002 from 203.0.113.20 port 52002 ssh2\n" + "Mar 10 09:02:00 example-host sshd[2103]: Failed password for user003 from 203.0.113.20 port 52003 ssh2\n" + "Mar 10 15:00:00 example-host sshd[2104]: Failed password for user004 from 203.0.113.20 port 53001 ssh2\n" + "Mar 10 15:01:00 example-host sshd[2105]: Failed password for user005 from 203.0.113.20 port 53002 ssh2\n" + "Mar 10 15:02:00 example-host sshd[2106]: Failed password for user006 from 203.0.113.20 port 53003 ssh2\n"); +} + +std::vector build_two_sudo_episode_events() { + return parse_events( + make_syslog_config(), + "Mar 10 09:00:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/systemctl status ssh\n" + "Mar 10 09:01:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/journalctl -u ssh\n" + "Mar 10 09:02:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/systemctl reload ssh\n" + "Mar 10 15:00:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/systemctl status ssh\n" + "Mar 10 15:01:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/journalctl -u ssh\n" + "Mar 10 15:02:00 example-host sudo: user001 : TTY=pts/0 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/systemctl reload ssh\n"); +} + std::vector build_publickey_bruteforce_candidate_events() { return parse_events( make_syslog_config(), @@ -254,6 +303,86 @@ void test_custom_thresholds() { expect(findings.empty(), "expected custom thresholds to suppress findings"); } +void test_bruteforce_emits_multiple_episodes_for_same_source() { + const auto events = build_two_bruteforce_episode_events(); + const loglens::Detector detector; + const auto findings = detector.analyze(events); + + const auto episodes = find_findings(findings, loglens::FindingType::BruteForce, "203.0.113.10"); + expect(episodes.size() == 2, "expected two brute-force episodes for the same source IP"); + expect(episodes[0]->event_count == 5, "expected first brute-force episode count"); + expect(episodes[0]->observed_count == 5, "expected first brute-force observed count"); + expect(loglens::format_timestamp(episodes[0]->first_seen) == "2026-03-10 09:00:00", + "expected first brute-force episode start"); + expect(loglens::format_timestamp(episodes[0]->last_seen) == "2026-03-10 09:04:00", + "expected first brute-force episode end"); + expect((episodes[0]->evidence_event_ids == std::vector{ + "line:1", "line:2", "line:3", "line:4", "line:5"}), + "expected first brute-force episode evidence ids"); + + expect(episodes[1]->event_count == 5, "expected second brute-force episode count"); + expect(episodes[1]->observed_count == 5, "expected second brute-force observed count"); + expect(loglens::format_timestamp(episodes[1]->first_seen) == "2026-03-10 15:00:00", + "expected second brute-force episode start"); + expect(loglens::format_timestamp(episodes[1]->last_seen) == "2026-03-10 15:04:00", + "expected second brute-force episode end"); + expect((episodes[1]->evidence_event_ids == std::vector{ + "line:6", "line:7", "line:8", "line:9", "line:10"}), + "expected second brute-force episode evidence ids"); +} + +void test_multi_user_emits_multiple_episodes_for_same_source() { + const auto events = build_two_multi_user_episode_events(); + const loglens::Detector detector; + const auto findings = detector.analyze(events); + + const auto episodes = find_findings(findings, loglens::FindingType::MultiUserProbing, "203.0.113.20"); + expect(episodes.size() == 2, "expected two multi-user probing episodes for the same source IP"); + expect(episodes[0]->event_count == 3, "expected first multi-user episode event count"); + expect(episodes[0]->observed_count == 3, "expected first multi-user episode distinct username count"); + expect((episodes[0]->usernames == std::vector{"user001", "user002", "user003"}), + "expected first multi-user episode usernames"); + expect(loglens::format_timestamp(episodes[0]->first_seen) == "2026-03-10 09:00:00", + "expected first multi-user episode start"); + expect(loglens::format_timestamp(episodes[0]->last_seen) == "2026-03-10 09:02:00", + "expected first multi-user episode end"); + + expect(episodes[1]->event_count == 3, "expected second multi-user episode event count"); + expect(episodes[1]->observed_count == 3, "expected second multi-user episode distinct username count"); + expect((episodes[1]->usernames == std::vector{"user004", "user005", "user006"}), + "expected second multi-user episode usernames"); + expect(loglens::format_timestamp(episodes[1]->first_seen) == "2026-03-10 15:00:00", + "expected second multi-user episode start"); + expect(loglens::format_timestamp(episodes[1]->last_seen) == "2026-03-10 15:02:00", + "expected second multi-user episode end"); +} + +void test_sudo_burst_emits_multiple_episodes_for_same_user() { + const auto events = build_two_sudo_episode_events(); + const loglens::Detector detector; + const auto findings = detector.analyze(events); + + const auto episodes = find_findings(findings, loglens::FindingType::SudoBurst, "user001"); + expect(episodes.size() == 2, "expected two sudo burst episodes for the same user"); + expect(episodes[0]->event_count == 3, "expected first sudo episode count"); + expect(episodes[0]->observed_count == 3, "expected first sudo episode observed count"); + expect(loglens::format_timestamp(episodes[0]->first_seen) == "2026-03-10 09:00:00", + "expected first sudo episode start"); + expect(loglens::format_timestamp(episodes[0]->last_seen) == "2026-03-10 09:02:00", + "expected first sudo episode end"); + expect((episodes[0]->evidence_event_ids == std::vector{"line:1", "line:2", "line:3"}), + "expected first sudo episode evidence ids"); + + expect(episodes[1]->event_count == 3, "expected second sudo episode count"); + expect(episodes[1]->observed_count == 3, "expected second sudo episode observed count"); + expect(loglens::format_timestamp(episodes[1]->first_seen) == "2026-03-10 15:00:00", + "expected second sudo episode start"); + expect(loglens::format_timestamp(episodes[1]->last_seen) == "2026-03-10 15:02:00", + "expected second sudo episode end"); + expect((episodes[1]->evidence_event_ids == std::vector{"line:4", "line:5", "line:6"}), + "expected second sudo episode evidence ids"); +} + void test_auth_signal_defaults() { const auto events = parse_events( make_syslog_config(), @@ -505,6 +634,9 @@ void test_reject_invalid_config() { int main() { test_default_thresholds(); test_custom_thresholds(); + test_bruteforce_emits_multiple_episodes_for_same_source(); + test_multi_user_emits_multiple_episodes_for_same_source(); + test_sudo_burst_emits_multiple_episodes_for_same_user(); test_auth_signal_defaults(); test_failed_publickey_contributes_to_bruteforce_by_default(); test_accepted_publickey_success_stays_out_of_failure_signals();