From dce6a22bbb5cc38b3635b9d65804012dd5abe207 Mon Sep 17 00:00:00 2001 From: stacknil Date: Sat, 4 Jul 2026 23:17:26 +0800 Subject: [PATCH] refactor(parser): add handler registry --- CHANGELOG.md | 3 + CMakeLists.txt | 9 + docs/parser-conformance-matrix.md | 3 +- docs/parser-contract.md | 18 + src/parser.cpp | 1164 +------------------------ src/parser/failure_classifier.cpp | 236 +++++ src/parser/failure_classifier.hpp | 15 + src/parser/handler_result.hpp | 26 + src/parser/pam_handlers.cpp | 138 +++ src/parser/pam_handlers.hpp | 11 + src/parser/program_dispatch.cpp | 68 ++ src/parser/program_dispatch.hpp | 9 + src/parser/source_envelope_parser.cpp | 68 ++ src/parser/source_envelope_parser.hpp | 14 + src/parser/sshd_handlers.cpp | 264 ++++++ src/parser/sshd_handlers.hpp | 9 + src/parser/su_handlers.cpp | 74 ++ src/parser/su_handlers.hpp | 9 + src/parser/sudo_handlers.cpp | 56 ++ src/parser/sudo_handlers.hpp | 9 + src/parser/text_utils.cpp | 108 +++ src/parser/text_utils.hpp | 16 + src/parser/timestamp_parser.cpp | 248 ++++++ src/parser/timestamp_parser.hpp | 14 + tests/test_parser.cpp | 51 ++ 25 files changed, 1498 insertions(+), 1142 deletions(-) create mode 100644 src/parser/failure_classifier.cpp create mode 100644 src/parser/failure_classifier.hpp create mode 100644 src/parser/handler_result.hpp create mode 100644 src/parser/pam_handlers.cpp create mode 100644 src/parser/pam_handlers.hpp create mode 100644 src/parser/program_dispatch.cpp create mode 100644 src/parser/program_dispatch.hpp create mode 100644 src/parser/source_envelope_parser.cpp create mode 100644 src/parser/source_envelope_parser.hpp create mode 100644 src/parser/sshd_handlers.cpp create mode 100644 src/parser/sshd_handlers.hpp create mode 100644 src/parser/su_handlers.cpp create mode 100644 src/parser/su_handlers.hpp create mode 100644 src/parser/sudo_handlers.cpp create mode 100644 src/parser/sudo_handlers.hpp create mode 100644 src/parser/text_utils.cpp create mode 100644 src/parser/text_utils.hpp create mode 100644 src/parser/timestamp_parser.cpp create mode 100644 src/parser/timestamp_parser.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index cc675d3..5f3f4ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ All notable user-visible changes should be recorded here. ### Changed +- Refactored parser internals into timestamp, source-envelope, program-dispatch, + program-handler, and failure-classifier modules behind the unchanged + `AuthLogParser` interface. - Detector rules now emit separate findings for time-separated detection episodes within the same rule subject instead of collapsing each subject to a single best window. diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bbbd9e..1fd1406 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,15 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_library(loglens_lib src/config.cpp src/parser.cpp + src/parser/failure_classifier.cpp + src/parser/pam_handlers.cpp + src/parser/program_dispatch.cpp + src/parser/source_envelope_parser.cpp + src/parser/sshd_handlers.cpp + src/parser/su_handlers.cpp + src/parser/sudo_handlers.cpp + src/parser/text_utils.cpp + src/parser/timestamp_parser.cpp src/signal.cpp src/detector.cpp src/report.cpp diff --git a/docs/parser-conformance-matrix.md b/docs/parser-conformance-matrix.md index 87bdbba..31b9f59 100644 --- a/docs/parser-conformance-matrix.md +++ b/docs/parser-conformance-matrix.md @@ -1,7 +1,8 @@ # Parser Conformance Matrix This matrix documents the parser behavior that LogLens currently treats as -reviewable contract surface. It is derived from `src/parser.cpp`, +reviewable contract surface. It is derived from `src/parser.cpp`, the internal +`src/parser/` handler registry, `src/event.hpp`, `tests/test_parser.cpp`, and the checked-in parser fixture corpus. diff --git a/docs/parser-contract.md b/docs/parser-contract.md index d85d57b..a72c575 100644 --- a/docs/parser-contract.md +++ b/docs/parser-contract.md @@ -47,6 +47,24 @@ Recognized success or audit families include accepted password, accepted publick This is the main trust boundary: unsupported input should remain inspectable, even when it does not produce a finding. +## Internal parsing pipeline + +`AuthLogParser` remains the single public parser interface. Its implementation +is split into internal modules with one-way flow: + +1. `timestamp_parser` parses the selected input mode's timestamp and hostname. +2. `source_envelope_parser` extracts the program tag, optional pid, and raw message. +3. `program_dispatch` selects a registered `sshd`, PAM, `sudo`, or `su` handler. +4. The selected handler either emits a normalized event or returns a structured + failure result. +5. `failure_classifier` assigns malformed-source and unsupported-pattern + telemetry without turning unsupported evidence into an event. + +All registered program handlers return the same internal result shape: +`matched`, optional `event`, `failure_category`, and `reason`. The registry and +handler modules are implementation details; callers continue to depend only on +`AuthLogParser`, `Event`, and the parser telemetry contract. + Parser failure categories are intentionally coarser than unknown-pattern buckets: diff --git a/src/parser.cpp b/src/parser.cpp index 6e7385f..e86012d 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,10 +1,11 @@ #include "parser.hpp" +#include "parser/failure_classifier.hpp" +#include "parser/program_dispatch.hpp" +#include "parser/source_envelope_parser.hpp" +#include "parser/text_utils.hpp" + #include -#include -#include -#include -#include #include #include #include @@ -15,1140 +16,15 @@ namespace loglens { namespace { -struct ClockTime { - int hour = 0; - int minute = 0; - int second = 0; -}; - void set_failure(std::string* error, ParserFailureCategory* category, - std::string reason, - ParserFailureCategory failure_category) { + const parser_internal::HandlerResult& result) { if (error != nullptr) { - *error = std::move(reason); + *error = result.reason; } if (category != nullptr) { - *category = failure_category; - } -} - -std::string_view trim_left(std::string_view value) { - while (!value.empty() && std::isspace(static_cast(value.front())) != 0) { - value.remove_prefix(1); - } - return value; -} - -std::string_view trim(std::string_view value) { - value = trim_left(value); - while (!value.empty() && std::isspace(static_cast(value.back())) != 0) { - value.remove_suffix(1); - } - return value; -} - -std::string_view consume_token(std::string_view& input) { - input = trim_left(input); - if (input.empty()) { - return {}; - } - - const auto separator = input.find(' '); - if (separator == std::string_view::npos) { - const auto token = input; - input = {}; - return token; - } - - const auto token = input.substr(0, separator); - input.remove_prefix(separator + 1); - return token; -} - -bool parse_int(std::string_view token, int& value) { - const auto* begin = token.data(); - const auto* end = token.data() + token.size(); - const auto result = std::from_chars(begin, end, value); - return result.ec == std::errc{} && result.ptr == end; -} - -bool is_valid_ipv4_token(std::string_view token) { - int parts = 0; - while (!token.empty()) { - const auto dot = token.find('.'); - const auto part = dot == std::string_view::npos ? token : token.substr(0, dot); - if (part.empty()) { - return false; - } - - int value = 0; - if (!parse_int(part, value) || value < 0 || value > 255) { - return false; - } - - ++parts; - if (dot == std::string_view::npos) { - token = {}; - } else { - token.remove_prefix(dot + 1); - } - } - - return parts == 4; -} - -bool is_valid_ipv6_like_token(std::string_view token) { - if (token.find(':') == std::string_view::npos) { - return false; - } - - bool saw_hex = false; - for (const char character : token) { - if (std::isxdigit(static_cast(character)) != 0) { - saw_hex = true; - continue; - } - if (character == ':' || character == '.') { - continue; - } - return false; - } - - return saw_hex; -} - -bool is_valid_source_ip_token(std::string_view token) { - return is_valid_ipv4_token(token) || is_valid_ipv6_like_token(token); -} - -bool parse_month(std::string_view token, unsigned& month_index) { - static constexpr std::array months = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; - - for (std::size_t index = 0; index < months.size(); ++index) { - if (months[index] == token) { - month_index = static_cast(index + 1); - return true; - } - } - - return false; -} - -bool parse_clock_token(std::string_view token, ClockTime& time) { - if (token.size() < 8 || token[2] != ':' || token[5] != ':') { - return false; - } - - if (!parse_int(token.substr(0, 2), time.hour) - || !parse_int(token.substr(3, 2), time.minute) - || !parse_int(token.substr(6, 2), time.second)) { - return false; - } - - if (token.size() == 8) { - return time.hour >= 0 && time.hour <= 23 - && time.minute >= 0 && time.minute <= 59 - && time.second >= 0 && time.second <= 59; - } - - if (token[8] != '.' || token.size() == 9) { - return false; - } - - for (std::size_t index = 9; index < token.size(); ++index) { - if (std::isdigit(static_cast(token[index])) == 0) { - return false; - } - } - - return time.hour >= 0 && time.hour <= 23 - && time.minute >= 0 && time.minute <= 59 - && time.second >= 0 && time.second <= 59; -} - -std::optional build_timestamp(int year_value, - unsigned month_index, - int day_value, - const ClockTime& time, - std::chrono::minutes offset = std::chrono::minutes{0}) { - using namespace std::chrono; - - const year_month_day date{year{year_value}, month{month_index}, day{static_cast(day_value)}}; - if (!date.ok()) { - return std::nullopt; - } - - const auto timestamp = sys_days{date} - + hours{time.hour} - + minutes{time.minute} - + seconds{time.second}; - return timestamp - offset; -} - -bool parse_calendar_date_parts(std::string_view token, int& year_value, unsigned& month_index, int& day_value) { - int parsed_month = 0; - if (token.size() != 10 || token[4] != '-' || token[7] != '-') { - return false; - } - - return parse_int(token.substr(0, 4), year_value) - && parse_int(token.substr(5, 2), parsed_month) - && parse_int(token.substr(8, 2), day_value) - && parsed_month >= 1 && parsed_month <= 12 - && (month_index = static_cast(parsed_month), true); -} - -bool parse_timezone_token(std::string_view token, std::chrono::minutes& offset) { - using namespace std::chrono; - - if (token == "UTC" || token == "GMT" || token == "Z") { - offset = minutes{0}; - return true; - } - - if (token.size() != 5 && token.size() != 6) { - return false; - } - - if (token.front() != '+' && token.front() != '-') { - return false; - } - - const bool negative = token.front() == '-'; - const auto digits = token.substr(1); - int parsed_hours = 0; - int minutes_value = 0; - - if (digits.size() == 4) { - if (!parse_int(digits.substr(0, 2), parsed_hours) || !parse_int(digits.substr(2, 2), minutes_value)) { - return false; - } - } else { - if (digits[2] != ':' - || !parse_int(digits.substr(0, 2), parsed_hours) - || !parse_int(digits.substr(3, 2), minutes_value)) { - return false; - } - } - - if (parsed_hours < 0 || parsed_hours > 23 || minutes_value < 0 || minutes_value > 59) { - return false; - } - - offset = std::chrono::hours{parsed_hours} + minutes{minutes_value}; - if (negative) { - offset = -offset; - } - return true; -} - -void parse_program_tag(std::string_view tag, std::string& program, std::optional& pid) { - tag = trim(tag); - const auto open_bracket = tag.find('['); - if (open_bracket == std::string_view::npos || tag.empty() || tag.back() != ']') { - program.assign(tag); - pid.reset(); - return; - } - - const auto pid_token = tag.substr(open_bracket + 1, tag.size() - open_bracket - 2); - int parsed_pid = 0; - if (!parse_int(pid_token, parsed_pid)) { - program.assign(tag); - pid.reset(); - return; - } - - program.assign(tag.substr(0, open_bracket)); - pid = parsed_pid; -} - -bool parse_program_and_message(std::string_view remaining, Event& event, std::string* error) { - const auto delimiter = remaining.find(": "); - const auto fallback_delimiter = remaining.find(':'); - const auto split_position = delimiter != std::string_view::npos ? delimiter : fallback_delimiter; - if (split_position == std::string_view::npos) { - if (error != nullptr) { - *error = "missing program/message delimiter"; - } - return false; - } - - const auto tag = remaining.substr(0, split_position); - const auto message_offset = split_position + (delimiter != std::string_view::npos ? 2 : 1); - const auto message = trim_left(remaining.substr(message_offset)); - - parse_program_tag(tag, event.program, event.pid); - event.message.assign(message); - return true; -} - -std::string extract_token_after(std::string_view input, std::string_view marker) { - const auto marker_position = input.find(marker); - if (marker_position == std::string_view::npos) { - return {}; - } - - auto remaining = input.substr(marker_position + marker.size()); - return std::string(consume_token(remaining)); -} - -std::string extract_kv_value(std::string_view input, std::string_view key) { - std::size_t search_position = 0; - while (search_position < input.size()) { - const auto key_position = input.find(key, search_position); - if (key_position == std::string_view::npos) { - return {}; - } - - if (key_position == 0 - || std::isspace(static_cast(input[key_position - 1])) != 0 - || input[key_position - 1] == ';') { - auto remaining = input.substr(key_position + key.size()); - const auto end = remaining.find_first_of(" ;"); - if (end != std::string_view::npos) { - remaining = remaining.substr(0, end); - } - return std::string(remaining); - } - - search_position = key_position + key.size(); - } - - return {}; -} - -std::string extract_source_ip_after_from(std::string_view message) { - const auto marker_position = message.find(" from "); - if (marker_position == std::string_view::npos) { - return {}; - } - - auto remaining = message.substr(marker_position + std::string_view{" from "}.size()); - const auto first = consume_token(remaining); - if (first.empty()) { - return {}; - } - - if (first == "authenticating") { - const auto second = consume_token(remaining); - if (second == "user") { - static_cast(consume_token(remaining)); - return std::string(consume_token(remaining)); - } - } - - if (first == "invalid" || first == "illegal") { - const auto second = consume_token(remaining); - if (second == "user") { - static_cast(consume_token(remaining)); - return std::string(consume_token(remaining)); - } - } - - if (first == "user") { - static_cast(consume_token(remaining)); - return std::string(consume_token(remaining)); - } - - return std::string(first); -} - -std::string extract_source_ip_candidate(const Event& event) { - auto candidate = extract_source_ip_after_from(event.message); - if (!candidate.empty()) { - return candidate; - } - - candidate = extract_kv_value(event.message, "rhost="); - if (!candidate.empty()) { - return candidate; - } - - if (event.program == "sshd" && event.message.starts_with("Unable to negotiate with ")) { - candidate = extract_token_after(event.message, " with "); - } - - return candidate; -} - -bool has_malformed_source_ip(const Event& event) { - const auto candidate = extract_source_ip_candidate(event); - return !candidate.empty() && !is_valid_source_ip_token(candidate); -} - -std::string sanitize_pattern_label(std::string_view value) { - std::string normalized; - normalized.reserve(value.size()); - - bool previous_was_separator = false; - for (const char character : value) { - if (std::isalnum(static_cast(character)) != 0) { - normalized.push_back(static_cast(std::tolower(static_cast(character)))); - previous_was_separator = false; - continue; - } - - if (!normalized.empty() && !previous_was_separator) { - normalized.push_back('_'); - previous_was_separator = true; - } - } - - while (!normalized.empty() && normalized.back() == '_') { - normalized.pop_back(); - } - - return normalized.empty() ? "unknown_pattern" : normalized; -} - -bool consume_invalid_or_illegal_user_prefix(std::string_view& remaining) { - static constexpr std::string_view invalid_user_prefix = "invalid user "; - static constexpr std::string_view illegal_user_prefix = "illegal user "; - - if (remaining.starts_with(invalid_user_prefix)) { - remaining.remove_prefix(invalid_user_prefix.size()); - return true; - } - - if (remaining.starts_with(illegal_user_prefix)) { - remaining.remove_prefix(illegal_user_prefix.size()); - return true; - } - - return false; -} - -bool parse_ssh_failed_message(std::string_view message, Event& event) { - static constexpr std::string_view failed_password_prefix = "Failed password for "; - static constexpr std::string_view failed_none_prefix = "Failed none for "; - - bool failed_none = false; - std::string_view remaining; - if (message.starts_with(failed_password_prefix)) { - remaining = message.substr(failed_password_prefix.size()); - } else if (message.starts_with(failed_none_prefix)) { - failed_none = true; - remaining = message.substr(failed_none_prefix.size()); - } else { - return false; - } - - const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - if (failed_none && !invalid_user) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshFailedPassword; - return true; -} - -bool parse_ssh_accepted_message(std::string_view message, Event& event) { - static constexpr std::string_view accepted_prefix = "Accepted password for "; - if (!message.starts_with(accepted_prefix)) { - return false; - } - - auto remaining = message.substr(accepted_prefix.size()); - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::SshAcceptedPassword; - return true; -} - -bool parse_ssh_accepted_publickey_message(std::string_view message, Event& event) { - static constexpr std::string_view accepted_prefix = "Accepted publickey for "; - if (!message.starts_with(accepted_prefix)) { - return false; - } - - auto remaining = message.substr(accepted_prefix.size()); - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::SshAcceptedPublicKey; - return true; -} - -bool parse_ssh_accepted_keyboard_interactive_message(std::string_view message, Event& event) { - static constexpr std::string_view accepted_prefix = "Accepted keyboard-interactive/pam for "; - if (!message.starts_with(accepted_prefix)) { - return false; - } - - auto remaining = message.substr(accepted_prefix.size()); - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::SshAcceptedKeyboardInteractive; - return true; -} - -bool parse_ssh_failed_publickey_message(std::string_view message, Event& event) { - static constexpr std::string_view publickey_prefix = "Failed publickey for "; - if (!message.starts_with(publickey_prefix)) { - return false; - } - - auto remaining = message.substr(publickey_prefix.size()); - consume_invalid_or_illegal_user_prefix(remaining); - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::SshFailedPublicKey; - return true; -} - -bool parse_ssh_failed_keyboard_interactive_message(std::string_view message, Event& event) { - static constexpr std::string_view keyboard_prefix = "Failed keyboard-interactive/pam for "; - if (!message.starts_with(keyboard_prefix)) { - return false; - } - - auto remaining = message.substr(keyboard_prefix.size()); - const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshFailedKeyboardInteractive; - return true; -} - -bool parse_ssh_max_auth_tries_message(std::string_view message, Event& event) { - static constexpr std::string_view max_auth_prefix = "maximum authentication attempts exceeded for "; - static constexpr std::string_view error_prefix = "error: "; - if (message.starts_with(error_prefix)) { - message.remove_prefix(error_prefix.size()); - } - - if (!message.starts_with(max_auth_prefix)) { - return false; - } - - auto remaining = message.substr(max_auth_prefix.size()); - const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshMaxAuthTries; - return true; -} - -bool parse_ssh_pam_auth_failure_message(std::string_view message, Event& event) { - static constexpr std::string_view error_prefix = "error: "; - static constexpr std::string_view pam_auth_prefix = "PAM: Authentication failure for "; - - if (message.starts_with(error_prefix)) { - message.remove_prefix(error_prefix.size()); - } - - if (!message.starts_with(pam_auth_prefix)) { - return false; - } - - auto remaining = message.substr(pam_auth_prefix.size()); - const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::PamAuthFailure; - return true; -} - -bool parse_ssh_input_userauth_request_message(std::string_view message, Event& event) { - static constexpr std::string_view input_userauth_prefix = "input_userauth_request: "; - if (!message.starts_with(input_userauth_prefix)) { - return false; - } - - auto remaining = message.substr(input_userauth_prefix.size()); - if (!consume_invalid_or_illegal_user_prefix(remaining)) { - return false; - } - - const auto username = consume_token(remaining); - if (username.empty()) { - return false; + *category = result.failure_category; } - - event.username.assign(username); - event.event_type = EventType::SshInvalidUser; - return true; -} - -bool parse_ssh_invalid_user_message(std::string_view message, Event& event) { - static constexpr std::string_view invalid_user_prefix = "Invalid user "; - static constexpr std::string_view illegal_user_prefix = "Illegal user "; - if (!message.starts_with(invalid_user_prefix) && !message.starts_with(illegal_user_prefix)) { - return false; - } - - auto remaining = message.starts_with(invalid_user_prefix) - ? message.substr(invalid_user_prefix.size()) - : message.substr(illegal_user_prefix.size()); - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::SshInvalidUser; - return true; -} - -bool parse_pam_named_user_failure_message(std::string_view message, - std::string_view prefix, - Event& event) { - if (!message.starts_with(prefix)) { - return false; - } - - auto remaining = message.substr(prefix.size()); - const auto username = consume_token(remaining); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.source_ip = extract_token_after(message, " from "); - event.event_type = EventType::PamAuthFailure; - return true; -} - -bool parse_pam_auth_failure_message(std::string_view message, Event& event) { - static constexpr std::string_view auth_failure_prefix = "authentication failure;"; - if (!message.starts_with(auth_failure_prefix)) { - return false; - } - - event.username = extract_kv_value(message, "user="); - event.source_ip = extract_kv_value(message, "rhost="); - event.event_type = EventType::PamAuthFailure; - return true; -} - -bool parse_pam_sss_received_failure_message(std::string_view message, Event& event) { - static constexpr std::string_view received_prefix = "received for user "; - static constexpr std::string_view failure_marker = "(Authentication failure)"; - - if (!message.starts_with(received_prefix) || message.find(failure_marker) == std::string_view::npos) { - return false; - } - - auto remaining = message.substr(received_prefix.size()); - const auto separator = remaining.find(':'); - if (separator == std::string_view::npos) { - return false; - } - - const auto username = trim(remaining.substr(0, separator)); - if (username.empty()) { - return false; - } - - event.username.assign(username); - event.event_type = EventType::PamAuthFailure; - return true; -} - -bool parse_session_opened_message(std::string_view message, Event& event) { - static constexpr std::string_view session_prefix = "session opened for user "; - if (!message.starts_with(session_prefix)) { - return false; - } - - const auto by_position = message.find(" by "); - if (by_position == std::string_view::npos) { - return false; - } - - auto actor = message.substr(by_position + std::string_view{" by "}.size()); - const auto actor_end = actor.find_first_of("( "); - if (actor_end != std::string_view::npos) { - actor = actor.substr(0, actor_end); - } - - actor = trim(actor); - if (actor.empty()) { - return false; - } - - event.username.assign(actor); - event.event_type = EventType::SessionOpened; - return true; -} - -bool parse_sudo_message(std::string_view message, Event& event) { - auto remaining = trim_left(message); - const auto separator = remaining.find(':'); - if (separator == std::string_view::npos) { - return false; - } - - const auto username = trim(remaining.substr(0, separator)); - if (username.empty()) { - return false; - } - - event.username.assign(username); - const auto details = trim_left(remaining.substr(separator + 1)); - if (details.find("incorrect password attempt") != std::string_view::npos) { - event.event_type = EventType::SudoAuthFailure; - return true; - } - - if (details.find("user NOT in sudoers") != std::string_view::npos - || details.find("command not allowed") != std::string_view::npos) { - event.event_type = EventType::SudoPolicyDenied; - return true; - } - - if (details.find("COMMAND=") == std::string_view::npos) { - return false; - } - - event.event_type = EventType::SudoCommand; - return true; -} - -bool parse_su_message(std::string_view message, Event& event) { - static constexpr std::string_view failed_prefix = "FAILED SU (to "; - static constexpr std::string_view success_prefix = "Successful su for "; - - if (message.starts_with(failed_prefix)) { - const auto close_target = message.find(") "); - if (close_target == std::string_view::npos) { - return false; - } - - auto remaining = message.substr(close_target + 2); - const auto location_marker = remaining.find(" on "); - if (location_marker != std::string_view::npos) { - remaining = remaining.substr(0, location_marker); - } - - const auto actor = trim(remaining); - if (actor.empty()) { - return false; - } - - event.username.assign(actor); - event.event_type = EventType::SuAuthFailure; - return true; - } - - if (message.starts_with(success_prefix)) { - const auto by_position = message.find(" by "); - if (by_position == std::string_view::npos) { - return false; - } - - auto actor = message.substr(by_position + std::string_view{" by "}.size()); - const auto actor_end = actor.find_first_of("( "); - if (actor_end != std::string_view::npos) { - actor = actor.substr(0, actor_end); - } - - actor = trim(actor); - if (actor.empty()) { - return false; - } - - event.username.assign(actor); - event.event_type = EventType::SessionOpened; - return true; - } - - return false; -} - -bool parse_pam_faillock_message(std::string_view message, Event& event) { - if (parse_pam_named_user_failure_message(message, "Consecutive login failures for user ", event)) { - return true; - } - - if (parse_pam_named_user_failure_message(message, "Authentication failure for user ", event)) { - return true; - } - - return false; -} - -std::string classify_unknown_pam_faillock_pattern(std::string_view message) { - if (message.starts_with("Account temporarily locked for user ")) { - return "pam_faillock_account_locked"; - } - - if (message.starts_with("User ") && message.find("successfully authenticated") != std::string_view::npos) { - return "pam_faillock_authsucc"; - } - - return "pam_faillock_other"; -} - -std::string classify_unknown_pam_sss_pattern(std::string_view message) { - if (message.find("User not known to the underlying authentication module") != std::string_view::npos) { - return "pam_sss_unknown_user"; - } - - if (message.find("Authentication service cannot retrieve authentication info") != std::string_view::npos) { - return "pam_sss_authinfo_unavail"; - } - - return "pam_sss_other"; -} - -std::string classify_unknown_auth_pattern(const Event& event) { - const auto message = std::string_view{event.message}; - if (event.program == "sshd") { - if ((message.starts_with("Connection closed by ") || message.starts_with("Connection closed by authenticating user ") - || message.starts_with("Connection reset by ")) - && message.find("[preauth]") != std::string_view::npos) { - return "sshd_connection_closed_preauth"; - } - - if (message.starts_with("Timeout, client not responding") - || message.starts_with("Disconnected from ") - || message.starts_with("Received disconnect")) { - return "sshd_timeout_or_disconnection"; - } - - if (message.starts_with("Unable to negotiate with ")) { - return "sshd_negotiation_failure"; - } - - return "sshd_other"; - } - - if (event.program.starts_with("pam_unix(")) { - if (message.starts_with("session closed for user ")) { - return "pam_unix_session_closed"; - } - - return "pam_unix_other"; - } - - if (event.program.starts_with("pam_faillock(")) { - return classify_unknown_pam_faillock_pattern(message); - } - - if (event.program.starts_with("pam_sss(")) { - return classify_unknown_pam_sss_pattern(message); - } - - if (event.program == "sudo") { - return "sudo_other"; - } - - if (event.program == "su") { - return "su_other"; - } - - return "program_" + sanitize_pattern_label(event.program); -} - -bool is_pam_program(std::string_view program) { - return program.starts_with("pam_unix(") - || program.starts_with("pam_faillock(") - || program.starts_with("pam_sss("); -} - -bool is_known_auth_program(std::string_view program) { - return program == "sshd" - || program == "sudo" - || program == "su" - || is_pam_program(program); -} - -ParserFailureCategory failure_category_for_unrecognized_event(const Event& event) { - if (is_pam_program(event.program)) { - return ParserFailureCategory::UnsupportedPamVariant; - } - if (is_known_auth_program(event.program)) { - return ParserFailureCategory::KnownProgramUnknownMessage; - } - return ParserFailureCategory::UnknownProgram; -} - -bool classify_event(Event& event) { - const auto message = std::string_view{event.message}; - if (event.program == "sshd") { - if (parse_ssh_failed_message(message, event)) { - return true; - } - if (parse_ssh_accepted_message(message, event)) { - return true; - } - if (parse_ssh_accepted_publickey_message(message, event)) { - return true; - } - if (parse_ssh_accepted_keyboard_interactive_message(message, event)) { - return true; - } - if (parse_ssh_failed_publickey_message(message, event)) { - return true; - } - if (parse_ssh_failed_keyboard_interactive_message(message, event)) { - return true; - } - if (parse_ssh_max_auth_tries_message(message, event)) { - return true; - } - if (parse_ssh_pam_auth_failure_message(message, event)) { - return true; - } - if (parse_ssh_input_userauth_request_message(message, event)) { - return true; - } - if (parse_ssh_invalid_user_message(message, event)) { - return true; - } - return false; - } - - if (event.program.starts_with("pam_unix(")) { - if (parse_pam_auth_failure_message(message, event)) { - return true; - } - if (parse_session_opened_message(message, event)) { - return true; - } - return false; - } - - if (event.program.starts_with("pam_faillock(")) { - return parse_pam_faillock_message(message, event); - } - - if (event.program.starts_with("pam_sss(")) { - if (parse_pam_auth_failure_message(message, event)) { - return true; - } - if (parse_pam_sss_received_failure_message(message, event)) { - return true; - } - return false; - } - - if (event.program == "sudo") { - return parse_sudo_message(message, event); - } - - if (event.program == "su") { - return parse_su_message(message, event); - } - - return false; -} - -std::string extract_unknown_pattern_key(std::string_view error) { - static constexpr std::string_view unknown_prefix = "unrecognized auth pattern: "; - if (error.starts_with(unknown_prefix)) { - return std::string(error.substr(unknown_prefix.size())); - } - - return sanitize_pattern_label(error); -} - -std::optional parse_syslog_legacy_line(const ParserConfig& config, - std::string_view line, - std::size_t line_number, - std::string* error, - ParserFailureCategory* category) { - if (!config.assumed_year.has_value()) { - set_failure( - error, - category, - "syslog_legacy mode requires assume_year", - ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - auto remaining = line; - const auto month_token = consume_token(remaining); - const auto day_token = consume_token(remaining); - const auto time_token = consume_token(remaining); - const auto hostname_token = consume_token(remaining); - - if (month_token.empty() || day_token.empty() || time_token.empty() || hostname_token.empty()) { - set_failure(error, category, "missing syslog header fields", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - unsigned month_index = 0; - int day_value = 0; - ClockTime time; - - if (!parse_month(month_token, month_index)) { - set_failure(error, category, "invalid month token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - if (!parse_int(day_token, day_value)) { - set_failure(error, category, "invalid day token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - if (!parse_clock_token(time_token, time)) { - set_failure(error, category, "invalid time token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - const auto timestamp = build_timestamp(*config.assumed_year, month_index, day_value, time); - if (!timestamp.has_value()) { - set_failure(error, category, "invalid calendar date", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - Event event; - event.timestamp = *timestamp; - event.hostname.assign(hostname_token); - event.line_number = line_number; - - if (!parse_program_and_message(remaining, event, error)) { - if (category != nullptr) { - *category = ParserFailureCategory::UnknownProgram; - } - return std::nullopt; - } - - if (has_malformed_source_ip(event)) { - set_failure(error, category, "malformed source IP", ParserFailureCategory::MalformedSourceIp); - return std::nullopt; - } - - if (!classify_event(event)) { - set_failure( - error, - category, - "unrecognized auth pattern: " + classify_unknown_auth_pattern(event), - failure_category_for_unrecognized_event(event)); - return std::nullopt; - } - - return event; -} - -std::optional parse_journalctl_short_full_line(std::string_view line, - std::size_t line_number, - std::string* error, - ParserFailureCategory* category) { - auto remaining = line; - const auto weekday_token = consume_token(remaining); - const auto date_token = consume_token(remaining); - const auto time_token = consume_token(remaining); - const auto timezone_token = consume_token(remaining); - const auto hostname_token = consume_token(remaining); - - if (weekday_token.empty() || date_token.empty() || time_token.empty() - || timezone_token.empty() || hostname_token.empty()) { - set_failure( - error, - category, - "missing journalctl short-full header fields", - ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - int year_value = 0; - unsigned month_index = 0; - int day_value = 0; - ClockTime time; - std::chrono::minutes timezone_offset{0}; - - if (!parse_calendar_date_parts(date_token, year_value, month_index, day_value)) { - set_failure(error, category, "invalid journalctl date token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - if (!parse_clock_token(time_token, time)) { - set_failure(error, category, "invalid time token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - if (!parse_timezone_token(timezone_token, timezone_offset)) { - set_failure(error, category, "invalid timezone token", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - const auto timestamp = build_timestamp(year_value, month_index, day_value, time, timezone_offset); - if (!timestamp.has_value()) { - set_failure(error, category, "invalid calendar date", ParserFailureCategory::UnknownTimestamp); - return std::nullopt; - } - - Event event; - event.timestamp = *timestamp; - event.hostname.assign(hostname_token); - event.line_number = line_number; - - if (!parse_program_and_message(remaining, event, error)) { - if (category != nullptr) { - *category = ParserFailureCategory::UnknownProgram; - } - return std::nullopt; - } - - if (has_malformed_source_ip(event)) { - set_failure(error, category, "malformed source IP", ParserFailureCategory::MalformedSourceIp); - return std::nullopt; - } - - if (!classify_event(event)) { - set_failure( - error, - category, - "unrecognized auth pattern: " + classify_unknown_auth_pattern(event), - failure_category_for_unrecognized_event(event)); - return std::nullopt; - } - - return event; } } // namespace @@ -1207,15 +83,21 @@ std::optional AuthLogParser::parse_line(std::string_view line, *category = ParserFailureCategory::KnownProgramUnknownMessage; } - switch (config_.input_mode) { - case InputMode::SyslogLegacy: - return parse_syslog_legacy_line(config_, line, line_number, error, category); - case InputMode::JournalctlShortFull: - return parse_journalctl_short_full_line(line, line_number, error, category); - default: - set_failure(error, category, "unsupported input mode", ParserFailureCategory::UnknownProgram); + auto result = parser_internal::parse_source_envelope(config_, line, line_number); + if (result.matched && result.event.has_value()) { + if (auto source_ip_failure = parser_internal::classify_source_ip_failure(*result.event)) { + result = std::move(*source_ip_failure); + } else { + result = parser_internal::dispatch_program(*result.event); + } + } + + if (!result.matched || !result.event.has_value()) { + set_failure(error, category, result); return std::nullopt; } + + return std::move(result.event); } ParseReport AuthLogParser::parse_stream(std::istream& input) const { @@ -1233,7 +115,7 @@ ParseReport AuthLogParser::parse_stream(std::istream& input) const { while (std::getline(input, line)) { ++line_number; - if (trim(line).empty()) { + if (parser_internal::trim(line).empty()) { ++result.quality.skipped_blank_lines; continue; } @@ -1252,7 +134,7 @@ ParseReport AuthLogParser::parse_stream(std::istream& input) const { const auto reason = error.empty() ? "unrecognized line" : error; result.warnings.push_back(ParseWarning{line_number, reason, category}); ++result.quality.unparsed_lines; - ++unknown_pattern_counts[extract_unknown_pattern_key(reason)]; + ++unknown_pattern_counts[parser_internal::extract_unknown_pattern_key(reason)]; auto& category_count = failure_category_counts[to_string(category)]; category_count.first = category; ++category_count.second; diff --git a/src/parser/failure_classifier.cpp b/src/parser/failure_classifier.cpp new file mode 100644 index 0000000..e984d0d --- /dev/null +++ b/src/parser/failure_classifier.cpp @@ -0,0 +1,236 @@ +#include "parser/failure_classifier.hpp" + +#include "parser/text_utils.hpp" + +#include + +namespace loglens::parser_internal { +namespace { + +bool is_valid_ipv4_token(std::string_view token) { + int parts = 0; + while (!token.empty()) { + const auto dot = token.find('.'); + const auto part = dot == std::string_view::npos ? token : token.substr(0, dot); + if (part.empty()) { + return false; + } + + int value = 0; + if (!parse_int(part, value) || value < 0 || value > 255) { + return false; + } + + ++parts; + if (dot == std::string_view::npos) { + token = {}; + } else { + token.remove_prefix(dot + 1); + } + } + + return parts == 4; +} + +bool is_valid_ipv6_like_token(std::string_view token) { + if (token.find(':') == std::string_view::npos) { + return false; + } + + bool saw_hex = false; + for (const char character : token) { + if (std::isxdigit(static_cast(character)) != 0) { + saw_hex = true; + continue; + } + if (character == ':' || character == '.') { + continue; + } + return false; + } + + return saw_hex; +} + +bool is_valid_source_ip_token(std::string_view token) { + return is_valid_ipv4_token(token) || is_valid_ipv6_like_token(token); +} + +std::string extract_source_ip_after_from(std::string_view message) { + const auto marker_position = message.find(" from "); + if (marker_position == std::string_view::npos) { + return {}; + } + + auto remaining = message.substr(marker_position + std::string_view{" from "}.size()); + const auto first = consume_token(remaining); + if (first.empty()) { + return {}; + } + + if (first == "authenticating") { + const auto second = consume_token(remaining); + if (second == "user") { + static_cast(consume_token(remaining)); + return std::string(consume_token(remaining)); + } + } + + if (first == "invalid" || first == "illegal") { + const auto second = consume_token(remaining); + if (second == "user") { + static_cast(consume_token(remaining)); + return std::string(consume_token(remaining)); + } + } + + if (first == "user") { + static_cast(consume_token(remaining)); + return std::string(consume_token(remaining)); + } + + return std::string(first); +} + +std::string extract_source_ip_candidate(const Event& event) { + auto candidate = extract_source_ip_after_from(event.message); + if (!candidate.empty()) { + return candidate; + } + + candidate = extract_kv_value(event.message, "rhost="); + if (!candidate.empty()) { + return candidate; + } + + if (event.program == "sshd" && event.message.starts_with("Unable to negotiate with ")) { + candidate = extract_token_after(event.message, " with "); + } + + return candidate; +} + +std::string classify_unknown_pam_faillock_pattern(std::string_view message) { + if (message.starts_with("Account temporarily locked for user ")) { + return "pam_faillock_account_locked"; + } + + if (message.starts_with("User ") && message.find("successfully authenticated") != std::string_view::npos) { + return "pam_faillock_authsucc"; + } + + return "pam_faillock_other"; +} + +std::string classify_unknown_pam_sss_pattern(std::string_view message) { + if (message.find("User not known to the underlying authentication module") != std::string_view::npos) { + return "pam_sss_unknown_user"; + } + + if (message.find("Authentication service cannot retrieve authentication info") != std::string_view::npos) { + return "pam_sss_authinfo_unavail"; + } + + return "pam_sss_other"; +} + +std::string classify_unknown_auth_pattern(const Event& event) { + const auto message = std::string_view{event.message}; + if (event.program == "sshd") { + if ((message.starts_with("Connection closed by ") + || message.starts_with("Connection closed by authenticating user ") + || message.starts_with("Connection reset by ")) + && message.find("[preauth]") != std::string_view::npos) { + return "sshd_connection_closed_preauth"; + } + + if (message.starts_with("Timeout, client not responding") + || message.starts_with("Disconnected from ") + || message.starts_with("Received disconnect")) { + return "sshd_timeout_or_disconnection"; + } + + if (message.starts_with("Unable to negotiate with ")) { + return "sshd_negotiation_failure"; + } + + return "sshd_other"; + } + + if (event.program.starts_with("pam_unix(")) { + if (message.starts_with("session closed for user ")) { + return "pam_unix_session_closed"; + } + + return "pam_unix_other"; + } + + if (event.program.starts_with("pam_faillock(")) { + return classify_unknown_pam_faillock_pattern(message); + } + + if (event.program.starts_with("pam_sss(")) { + return classify_unknown_pam_sss_pattern(message); + } + + if (event.program == "sudo") { + return "sudo_other"; + } + + if (event.program == "su") { + return "su_other"; + } + + return "program_" + sanitize_pattern_label(event.program); +} + +bool is_pam_program(std::string_view program) { + return program.starts_with("pam_unix(") + || program.starts_with("pam_faillock(") + || program.starts_with("pam_sss("); +} + +bool is_known_auth_program(std::string_view program) { + return program == "sshd" + || program == "sudo" + || program == "su" + || is_pam_program(program); +} + +ParserFailureCategory failure_category_for_unrecognized_event(const Event& event) { + if (is_pam_program(event.program)) { + return ParserFailureCategory::UnsupportedPamVariant; + } + if (is_known_auth_program(event.program)) { + return ParserFailureCategory::KnownProgramUnknownMessage; + } + return ParserFailureCategory::UnknownProgram; +} + +} // namespace + +std::optional classify_source_ip_failure(const Event& event) { + const auto candidate = extract_source_ip_candidate(event); + if (candidate.empty() || is_valid_source_ip_token(candidate)) { + return std::nullopt; + } + + return failed_event(ParserFailureCategory::MalformedSourceIp, "malformed source IP"); +} + +HandlerResult classify_unrecognized_event(const Event& event) { + return failed_event( + failure_category_for_unrecognized_event(event), + "unrecognized auth pattern: " + classify_unknown_auth_pattern(event)); +} + +std::string extract_unknown_pattern_key(std::string_view error) { + static constexpr std::string_view unknown_prefix = "unrecognized auth pattern: "; + if (error.starts_with(unknown_prefix)) { + return std::string(error.substr(unknown_prefix.size())); + } + + return sanitize_pattern_label(error); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/failure_classifier.hpp b/src/parser/failure_classifier.hpp new file mode 100644 index 0000000..41b1061 --- /dev/null +++ b/src/parser/failure_classifier.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include "parser/handler_result.hpp" + +#include +#include +#include + +namespace loglens::parser_internal { + +std::optional classify_source_ip_failure(const Event& event); +HandlerResult classify_unrecognized_event(const Event& event); +std::string extract_unknown_pattern_key(std::string_view error); + +} // namespace loglens::parser_internal diff --git a/src/parser/handler_result.hpp b/src/parser/handler_result.hpp new file mode 100644 index 0000000..1220393 --- /dev/null +++ b/src/parser/handler_result.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "parser.hpp" + +#include +#include +#include + +namespace loglens::parser_internal { + +struct HandlerResult { + bool matched = false; + std::optional event; + ParserFailureCategory failure_category = ParserFailureCategory::KnownProgramUnknownMessage; + std::string reason; +}; + +inline HandlerResult matched_event(Event event) { + return HandlerResult{true, std::move(event), ParserFailureCategory::KnownProgramUnknownMessage, {}}; +} + +inline HandlerResult failed_event(ParserFailureCategory category, std::string reason) { + return HandlerResult{false, std::nullopt, category, std::move(reason)}; +} + +} // namespace loglens::parser_internal diff --git a/src/parser/pam_handlers.cpp b/src/parser/pam_handlers.cpp new file mode 100644 index 0000000..88bbe4c --- /dev/null +++ b/src/parser/pam_handlers.cpp @@ -0,0 +1,138 @@ +#include "parser/pam_handlers.hpp" + +#include "parser/failure_classifier.hpp" +#include "parser/text_utils.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +bool parse_pam_named_user_failure_message(std::string_view message, + std::string_view prefix, + Event& event) { + if (!message.starts_with(prefix)) { + return false; + } + + auto remaining = message.substr(prefix.size()); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::PamAuthFailure; + return true; +} + +bool parse_pam_auth_failure_message(std::string_view message, Event& event) { + static constexpr std::string_view auth_failure_prefix = "authentication failure;"; + if (!message.starts_with(auth_failure_prefix)) { + return false; + } + + event.username = extract_kv_value(message, "user="); + event.source_ip = extract_kv_value(message, "rhost="); + event.event_type = EventType::PamAuthFailure; + return true; +} + +bool parse_pam_sss_received_failure_message(std::string_view message, Event& event) { + static constexpr std::string_view received_prefix = "received for user "; + static constexpr std::string_view failure_marker = "(Authentication failure)"; + + if (!message.starts_with(received_prefix) || message.find(failure_marker) == std::string_view::npos) { + return false; + } + + auto remaining = message.substr(received_prefix.size()); + const auto separator = remaining.find(':'); + if (separator == std::string_view::npos) { + return false; + } + + const auto username = trim(remaining.substr(0, separator)); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.event_type = EventType::PamAuthFailure; + return true; +} + +bool parse_session_opened_message(std::string_view message, Event& event) { + static constexpr std::string_view session_prefix = "session opened for user "; + if (!message.starts_with(session_prefix)) { + return false; + } + + const auto by_position = message.find(" by "); + if (by_position == std::string_view::npos) { + return false; + } + + auto actor = message.substr(by_position + std::string_view{" by "}.size()); + const auto actor_end = actor.find_first_of("( "); + if (actor_end != std::string_view::npos) { + actor = actor.substr(0, actor_end); + } + + actor = trim(actor); + if (actor.empty()) { + return false; + } + + event.username.assign(actor); + event.event_type = EventType::SessionOpened; + return true; +} + +bool parse_pam_faillock_message(std::string_view message, Event& event) { + return parse_pam_named_user_failure_message( + message, + "Consecutive login failures for user ", + event) + || parse_pam_named_user_failure_message( + message, + "Authentication failure for user ", + event); +} + +} // namespace + +HandlerResult handle_pam_unix_event(const Event& source) { + Event event = source; + const auto message = std::string_view{event.message}; + if (parse_pam_auth_failure_message(message, event) + || parse_session_opened_message(message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +HandlerResult handle_pam_faillock_event(const Event& source) { + Event event = source; + if (parse_pam_faillock_message(event.message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +HandlerResult handle_pam_sss_event(const Event& source) { + Event event = source; + const auto message = std::string_view{event.message}; + if (parse_pam_auth_failure_message(message, event) + || parse_pam_sss_received_failure_message(message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/pam_handlers.hpp b/src/parser/pam_handlers.hpp new file mode 100644 index 0000000..c9b16d6 --- /dev/null +++ b/src/parser/pam_handlers.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include "parser/handler_result.hpp" + +namespace loglens::parser_internal { + +HandlerResult handle_pam_unix_event(const Event& source); +HandlerResult handle_pam_faillock_event(const Event& source); +HandlerResult handle_pam_sss_event(const Event& source); + +} // namespace loglens::parser_internal diff --git a/src/parser/program_dispatch.cpp b/src/parser/program_dispatch.cpp new file mode 100644 index 0000000..883212a --- /dev/null +++ b/src/parser/program_dispatch.cpp @@ -0,0 +1,68 @@ +#include "parser/program_dispatch.hpp" + +#include "parser/failure_classifier.hpp" +#include "parser/pam_handlers.hpp" +#include "parser/sshd_handlers.hpp" +#include "parser/su_handlers.hpp" +#include "parser/sudo_handlers.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +using ProgramMatcher = bool (*)(std::string_view program); +using ProgramHandler = HandlerResult (*)(const Event& source); + +struct HandlerRegistration { + ProgramMatcher matches; + ProgramHandler handle; +}; + +bool is_sshd(std::string_view program) { + return program == "sshd"; +} + +bool is_pam_unix(std::string_view program) { + return program.starts_with("pam_unix("); +} + +bool is_pam_faillock(std::string_view program) { + return program.starts_with("pam_faillock("); +} + +bool is_pam_sss(std::string_view program) { + return program.starts_with("pam_sss("); +} + +bool is_sudo(std::string_view program) { + return program == "sudo"; +} + +bool is_su(std::string_view program) { + return program == "su"; +} + +constexpr std::array handler_registry{{ + {is_sshd, handle_sshd_event}, + {is_pam_unix, handle_pam_unix_event}, + {is_pam_faillock, handle_pam_faillock_event}, + {is_pam_sss, handle_pam_sss_event}, + {is_sudo, handle_sudo_event}, + {is_su, handle_su_event}, +}}; + +} // namespace + +HandlerResult dispatch_program(const Event& source) { + for (const auto& registration : handler_registry) { + if (registration.matches(source.program)) { + return registration.handle(source); + } + } + + return classify_unrecognized_event(source); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/program_dispatch.hpp b/src/parser/program_dispatch.hpp new file mode 100644 index 0000000..ab74726 --- /dev/null +++ b/src/parser/program_dispatch.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include "parser/handler_result.hpp" + +namespace loglens::parser_internal { + +HandlerResult dispatch_program(const Event& source); + +} // namespace loglens::parser_internal diff --git a/src/parser/source_envelope_parser.cpp b/src/parser/source_envelope_parser.cpp new file mode 100644 index 0000000..ee2a8b4 --- /dev/null +++ b/src/parser/source_envelope_parser.cpp @@ -0,0 +1,68 @@ +#include "parser/source_envelope_parser.hpp" + +#include "parser/text_utils.hpp" +#include "parser/timestamp_parser.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +void parse_program_tag(std::string_view tag, std::string& program, std::optional& pid) { + tag = trim(tag); + const auto open_bracket = tag.find('['); + if (open_bracket == std::string_view::npos || tag.empty() || tag.back() != ']') { + program.assign(tag); + pid.reset(); + return; + } + + const auto pid_token = tag.substr(open_bracket + 1, tag.size() - open_bracket - 2); + int parsed_pid = 0; + if (!parse_int(pid_token, parsed_pid)) { + program.assign(tag); + pid.reset(); + return; + } + + program.assign(tag.substr(0, open_bracket)); + pid = parsed_pid; +} + +HandlerResult parse_program_and_message(std::string_view remaining, Event event) { + const auto delimiter = remaining.find(": "); + const auto fallback_delimiter = remaining.find(':'); + const auto split_position = delimiter != std::string_view::npos ? delimiter : fallback_delimiter; + if (split_position == std::string_view::npos) { + return failed_event(ParserFailureCategory::UnknownProgram, "missing program/message delimiter"); + } + + const auto tag = remaining.substr(0, split_position); + const auto message_offset = split_position + (delimiter != std::string_view::npos ? 2 : 1); + const auto message = trim_left(remaining.substr(message_offset)); + + parse_program_tag(tag, event.program, event.pid); + event.message.assign(message); + return matched_event(std::move(event)); +} + +} // namespace + +HandlerResult parse_source_envelope(const ParserConfig& config, + std::string_view line, + std::size_t line_number) { + if (!line.empty() && line.back() == '\r') { + line.remove_suffix(1); + } + + auto remaining = line; + auto timestamp_result = parse_timestamp_and_hostname(config, remaining, line_number); + if (!timestamp_result.matched || !timestamp_result.event.has_value()) { + return timestamp_result; + } + + return parse_program_and_message(remaining, std::move(*timestamp_result.event)); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/source_envelope_parser.hpp b/src/parser/source_envelope_parser.hpp new file mode 100644 index 0000000..5375213 --- /dev/null +++ b/src/parser/source_envelope_parser.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "parser/handler_result.hpp" + +#include +#include + +namespace loglens::parser_internal { + +HandlerResult parse_source_envelope(const ParserConfig& config, + std::string_view line, + std::size_t line_number); + +} // namespace loglens::parser_internal diff --git a/src/parser/sshd_handlers.cpp b/src/parser/sshd_handlers.cpp new file mode 100644 index 0000000..8a85cdf --- /dev/null +++ b/src/parser/sshd_handlers.cpp @@ -0,0 +1,264 @@ +#include "parser/sshd_handlers.hpp" + +#include "parser/failure_classifier.hpp" +#include "parser/text_utils.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +bool consume_invalid_or_illegal_user_prefix(std::string_view& remaining) { + static constexpr std::string_view invalid_user_prefix = "invalid user "; + static constexpr std::string_view illegal_user_prefix = "illegal user "; + + if (remaining.starts_with(invalid_user_prefix)) { + remaining.remove_prefix(invalid_user_prefix.size()); + return true; + } + + if (remaining.starts_with(illegal_user_prefix)) { + remaining.remove_prefix(illegal_user_prefix.size()); + return true; + } + + return false; +} + +bool parse_ssh_failed_message(std::string_view message, Event& event) { + static constexpr std::string_view failed_password_prefix = "Failed password for "; + static constexpr std::string_view failed_none_prefix = "Failed none for "; + + bool failed_none = false; + std::string_view remaining; + if (message.starts_with(failed_password_prefix)) { + remaining = message.substr(failed_password_prefix.size()); + } else if (message.starts_with(failed_none_prefix)) { + failed_none = true; + remaining = message.substr(failed_none_prefix.size()); + } else { + return false; + } + + const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + if (failed_none && !invalid_user) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshFailedPassword; + return true; +} + +bool parse_ssh_accepted_message(std::string_view message, Event& event) { + static constexpr std::string_view accepted_prefix = "Accepted password for "; + if (!message.starts_with(accepted_prefix)) { + return false; + } + + auto remaining = message.substr(accepted_prefix.size()); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::SshAcceptedPassword; + return true; +} + +bool parse_ssh_accepted_publickey_message(std::string_view message, Event& event) { + static constexpr std::string_view accepted_prefix = "Accepted publickey for "; + if (!message.starts_with(accepted_prefix)) { + return false; + } + + auto remaining = message.substr(accepted_prefix.size()); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::SshAcceptedPublicKey; + return true; +} + +bool parse_ssh_accepted_keyboard_interactive_message(std::string_view message, Event& event) { + static constexpr std::string_view accepted_prefix = "Accepted keyboard-interactive/pam for "; + if (!message.starts_with(accepted_prefix)) { + return false; + } + + auto remaining = message.substr(accepted_prefix.size()); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::SshAcceptedKeyboardInteractive; + return true; +} + +bool parse_ssh_failed_publickey_message(std::string_view message, Event& event) { + static constexpr std::string_view publickey_prefix = "Failed publickey for "; + if (!message.starts_with(publickey_prefix)) { + return false; + } + + auto remaining = message.substr(publickey_prefix.size()); + consume_invalid_or_illegal_user_prefix(remaining); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::SshFailedPublicKey; + return true; +} + +bool parse_ssh_failed_keyboard_interactive_message(std::string_view message, Event& event) { + static constexpr std::string_view keyboard_prefix = "Failed keyboard-interactive/pam for "; + if (!message.starts_with(keyboard_prefix)) { + return false; + } + + auto remaining = message.substr(keyboard_prefix.size()); + const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshFailedKeyboardInteractive; + return true; +} + +bool parse_ssh_max_auth_tries_message(std::string_view message, Event& event) { + static constexpr std::string_view max_auth_prefix = "maximum authentication attempts exceeded for "; + static constexpr std::string_view error_prefix = "error: "; + if (message.starts_with(error_prefix)) { + message.remove_prefix(error_prefix.size()); + } + + if (!message.starts_with(max_auth_prefix)) { + return false; + } + + auto remaining = message.substr(max_auth_prefix.size()); + const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::SshMaxAuthTries; + return true; +} + +bool parse_ssh_pam_auth_failure_message(std::string_view message, Event& event) { + static constexpr std::string_view error_prefix = "error: "; + static constexpr std::string_view pam_auth_prefix = "PAM: Authentication failure for "; + + if (message.starts_with(error_prefix)) { + message.remove_prefix(error_prefix.size()); + } + if (!message.starts_with(pam_auth_prefix)) { + return false; + } + + auto remaining = message.substr(pam_auth_prefix.size()); + const bool invalid_user = consume_invalid_or_illegal_user_prefix(remaining); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = invalid_user ? EventType::SshInvalidUser : EventType::PamAuthFailure; + return true; +} + +bool parse_ssh_input_userauth_request_message(std::string_view message, Event& event) { + static constexpr std::string_view input_userauth_prefix = "input_userauth_request: "; + if (!message.starts_with(input_userauth_prefix)) { + return false; + } + + auto remaining = message.substr(input_userauth_prefix.size()); + if (!consume_invalid_or_illegal_user_prefix(remaining)) { + return false; + } + + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.event_type = EventType::SshInvalidUser; + return true; +} + +bool parse_ssh_invalid_user_message(std::string_view message, Event& event) { + static constexpr std::string_view invalid_user_prefix = "Invalid user "; + static constexpr std::string_view illegal_user_prefix = "Illegal user "; + if (!message.starts_with(invalid_user_prefix) && !message.starts_with(illegal_user_prefix)) { + return false; + } + + auto remaining = message.starts_with(invalid_user_prefix) + ? message.substr(invalid_user_prefix.size()) + : message.substr(illegal_user_prefix.size()); + const auto username = consume_token(remaining); + if (username.empty()) { + return false; + } + + event.username.assign(username); + event.source_ip = extract_token_after(message, " from "); + event.event_type = EventType::SshInvalidUser; + return true; +} + +} // namespace + +HandlerResult handle_sshd_event(const Event& source) { + Event event = source; + const auto message = std::string_view{event.message}; + + if (parse_ssh_failed_message(message, event) + || parse_ssh_accepted_message(message, event) + || parse_ssh_accepted_publickey_message(message, event) + || parse_ssh_accepted_keyboard_interactive_message(message, event) + || parse_ssh_failed_publickey_message(message, event) + || parse_ssh_failed_keyboard_interactive_message(message, event) + || parse_ssh_max_auth_tries_message(message, event) + || parse_ssh_pam_auth_failure_message(message, event) + || parse_ssh_input_userauth_request_message(message, event) + || parse_ssh_invalid_user_message(message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/sshd_handlers.hpp b/src/parser/sshd_handlers.hpp new file mode 100644 index 0000000..5075e15 --- /dev/null +++ b/src/parser/sshd_handlers.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include "parser/handler_result.hpp" + +namespace loglens::parser_internal { + +HandlerResult handle_sshd_event(const Event& source); + +} // namespace loglens::parser_internal diff --git a/src/parser/su_handlers.cpp b/src/parser/su_handlers.cpp new file mode 100644 index 0000000..117ad47 --- /dev/null +++ b/src/parser/su_handlers.cpp @@ -0,0 +1,74 @@ +#include "parser/su_handlers.hpp" + +#include "parser/failure_classifier.hpp" +#include "parser/text_utils.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +bool parse_su_message(std::string_view message, Event& event) { + static constexpr std::string_view failed_prefix = "FAILED SU (to "; + static constexpr std::string_view success_prefix = "Successful su for "; + + if (message.starts_with(failed_prefix)) { + const auto close_target = message.find(") "); + if (close_target == std::string_view::npos) { + return false; + } + + auto remaining = message.substr(close_target + 2); + const auto location_marker = remaining.find(" on "); + if (location_marker != std::string_view::npos) { + remaining = remaining.substr(0, location_marker); + } + + const auto actor = trim(remaining); + if (actor.empty()) { + return false; + } + + event.username.assign(actor); + event.event_type = EventType::SuAuthFailure; + return true; + } + + if (message.starts_with(success_prefix)) { + const auto by_position = message.find(" by "); + if (by_position == std::string_view::npos) { + return false; + } + + auto actor = message.substr(by_position + std::string_view{" by "}.size()); + const auto actor_end = actor.find_first_of("( "); + if (actor_end != std::string_view::npos) { + actor = actor.substr(0, actor_end); + } + + actor = trim(actor); + if (actor.empty()) { + return false; + } + + event.username.assign(actor); + event.event_type = EventType::SessionOpened; + return true; + } + + return false; +} + +} // namespace + +HandlerResult handle_su_event(const Event& source) { + Event event = source; + if (parse_su_message(event.message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/su_handlers.hpp b/src/parser/su_handlers.hpp new file mode 100644 index 0000000..7248ff3 --- /dev/null +++ b/src/parser/su_handlers.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include "parser/handler_result.hpp" + +namespace loglens::parser_internal { + +HandlerResult handle_su_event(const Event& source); + +} // namespace loglens::parser_internal diff --git a/src/parser/sudo_handlers.cpp b/src/parser/sudo_handlers.cpp new file mode 100644 index 0000000..bad42b6 --- /dev/null +++ b/src/parser/sudo_handlers.cpp @@ -0,0 +1,56 @@ +#include "parser/sudo_handlers.hpp" + +#include "parser/failure_classifier.hpp" +#include "parser/text_utils.hpp" + +#include +#include + +namespace loglens::parser_internal { +namespace { + +bool parse_sudo_message(std::string_view message, Event& event) { + auto remaining = trim_left(message); + const auto separator = remaining.find(':'); + if (separator == std::string_view::npos) { + return false; + } + + const auto username = trim(remaining.substr(0, separator)); + if (username.empty()) { + return false; + } + + event.username.assign(username); + const auto details = trim_left(remaining.substr(separator + 1)); + if (details.find("incorrect password attempt") != std::string_view::npos) { + event.event_type = EventType::SudoAuthFailure; + return true; + } + + if (details.find("user NOT in sudoers") != std::string_view::npos + || details.find("command not allowed") != std::string_view::npos) { + event.event_type = EventType::SudoPolicyDenied; + return true; + } + + if (details.find("COMMAND=") == std::string_view::npos) { + return false; + } + + event.event_type = EventType::SudoCommand; + return true; +} + +} // namespace + +HandlerResult handle_sudo_event(const Event& source) { + Event event = source; + if (parse_sudo_message(event.message, event)) { + return matched_event(std::move(event)); + } + + return classify_unrecognized_event(source); +} + +} // namespace loglens::parser_internal diff --git a/src/parser/sudo_handlers.hpp b/src/parser/sudo_handlers.hpp new file mode 100644 index 0000000..3219647 --- /dev/null +++ b/src/parser/sudo_handlers.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include "parser/handler_result.hpp" + +namespace loglens::parser_internal { + +HandlerResult handle_sudo_event(const Event& source); + +} // namespace loglens::parser_internal diff --git a/src/parser/text_utils.cpp b/src/parser/text_utils.cpp new file mode 100644 index 0000000..b068e24 --- /dev/null +++ b/src/parser/text_utils.cpp @@ -0,0 +1,108 @@ +#include "parser/text_utils.hpp" + +#include +#include + +namespace loglens::parser_internal { + +std::string_view trim_left(std::string_view value) { + while (!value.empty() && std::isspace(static_cast(value.front())) != 0) { + value.remove_prefix(1); + } + return value; +} + +std::string_view trim(std::string_view value) { + value = trim_left(value); + while (!value.empty() && std::isspace(static_cast(value.back())) != 0) { + value.remove_suffix(1); + } + return value; +} + +std::string_view consume_token(std::string_view& input) { + input = trim_left(input); + if (input.empty()) { + return {}; + } + + const auto separator = input.find(' '); + if (separator == std::string_view::npos) { + const auto token = input; + input = {}; + return token; + } + + const auto token = input.substr(0, separator); + input.remove_prefix(separator + 1); + return token; +} + +bool parse_int(std::string_view token, int& value) { + const auto* begin = token.data(); + const auto* end = token.data() + token.size(); + const auto result = std::from_chars(begin, end, value); + return result.ec == std::errc{} && result.ptr == end; +} + +std::string extract_token_after(std::string_view input, std::string_view marker) { + const auto marker_position = input.find(marker); + if (marker_position == std::string_view::npos) { + return {}; + } + + auto remaining = input.substr(marker_position + marker.size()); + return std::string(consume_token(remaining)); +} + +std::string extract_kv_value(std::string_view input, std::string_view key) { + std::size_t search_position = 0; + while (search_position < input.size()) { + const auto key_position = input.find(key, search_position); + if (key_position == std::string_view::npos) { + return {}; + } + + if (key_position == 0 + || std::isspace(static_cast(input[key_position - 1])) != 0 + || input[key_position - 1] == ';') { + auto remaining = input.substr(key_position + key.size()); + const auto end = remaining.find_first_of(" ;"); + if (end != std::string_view::npos) { + remaining = remaining.substr(0, end); + } + return std::string(remaining); + } + + search_position = key_position + key.size(); + } + + return {}; +} + +std::string sanitize_pattern_label(std::string_view value) { + std::string normalized; + normalized.reserve(value.size()); + + bool previous_was_separator = false; + for (const char character : value) { + if (std::isalnum(static_cast(character)) != 0) { + normalized.push_back(static_cast(std::tolower(static_cast(character)))); + previous_was_separator = false; + continue; + } + + if (!normalized.empty() && !previous_was_separator) { + normalized.push_back('_'); + previous_was_separator = true; + } + } + + while (!normalized.empty() && normalized.back() == '_') { + normalized.pop_back(); + } + + return normalized.empty() ? "unknown_pattern" : normalized; +} + +} // namespace loglens::parser_internal diff --git a/src/parser/text_utils.hpp b/src/parser/text_utils.hpp new file mode 100644 index 0000000..733c1f5 --- /dev/null +++ b/src/parser/text_utils.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace loglens::parser_internal { + +std::string_view trim_left(std::string_view value); +std::string_view trim(std::string_view value); +std::string_view consume_token(std::string_view& input); +bool parse_int(std::string_view token, int& value); +std::string extract_token_after(std::string_view input, std::string_view marker); +std::string extract_kv_value(std::string_view input, std::string_view key); +std::string sanitize_pattern_label(std::string_view value); + +} // namespace loglens::parser_internal diff --git a/src/parser/timestamp_parser.cpp b/src/parser/timestamp_parser.cpp new file mode 100644 index 0000000..1cbb84b --- /dev/null +++ b/src/parser/timestamp_parser.cpp @@ -0,0 +1,248 @@ +#include "parser/timestamp_parser.hpp" + +#include "parser/text_utils.hpp" + +#include +#include +#include +#include +#include + +namespace loglens::parser_internal { +namespace { + +struct ClockTime { + int hour = 0; + int minute = 0; + int second = 0; +}; + +bool parse_month(std::string_view token, unsigned& month_index) { + static constexpr std::array months = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + + for (std::size_t index = 0; index < months.size(); ++index) { + if (months[index] == token) { + month_index = static_cast(index + 1); + return true; + } + } + + return false; +} + +bool parse_clock_token(std::string_view token, ClockTime& time) { + if (token.size() < 8 || token[2] != ':' || token[5] != ':') { + return false; + } + + if (!parse_int(token.substr(0, 2), time.hour) + || !parse_int(token.substr(3, 2), time.minute) + || !parse_int(token.substr(6, 2), time.second)) { + return false; + } + + if (token.size() == 8) { + return time.hour >= 0 && time.hour <= 23 + && time.minute >= 0 && time.minute <= 59 + && time.second >= 0 && time.second <= 59; + } + + if (token[8] != '.' || token.size() == 9) { + return false; + } + + for (std::size_t index = 9; index < token.size(); ++index) { + if (std::isdigit(static_cast(token[index])) == 0) { + return false; + } + } + + return time.hour >= 0 && time.hour <= 23 + && time.minute >= 0 && time.minute <= 59 + && time.second >= 0 && time.second <= 59; +} + +std::optional build_timestamp(int year_value, + unsigned month_index, + int day_value, + const ClockTime& time, + std::chrono::minutes offset = std::chrono::minutes{0}) { + using namespace std::chrono; + + const year_month_day date{year{year_value}, month{month_index}, day{static_cast(day_value)}}; + if (!date.ok()) { + return std::nullopt; + } + + const auto timestamp = sys_days{date} + + hours{time.hour} + + minutes{time.minute} + + seconds{time.second}; + return timestamp - offset; +} + +bool parse_calendar_date_parts(std::string_view token, + int& year_value, + unsigned& month_index, + int& day_value) { + int parsed_month = 0; + if (token.size() != 10 || token[4] != '-' || token[7] != '-') { + return false; + } + + return parse_int(token.substr(0, 4), year_value) + && parse_int(token.substr(5, 2), parsed_month) + && parse_int(token.substr(8, 2), day_value) + && parsed_month >= 1 && parsed_month <= 12 + && (month_index = static_cast(parsed_month), true); +} + +bool parse_timezone_token(std::string_view token, std::chrono::minutes& offset) { + using namespace std::chrono; + + if (token == "UTC" || token == "GMT" || token == "Z") { + offset = minutes{0}; + return true; + } + + if (token.size() != 5 && token.size() != 6) { + return false; + } + + if (token.front() != '+' && token.front() != '-') { + return false; + } + + const bool negative = token.front() == '-'; + const auto digits = token.substr(1); + int parsed_hours = 0; + int minutes_value = 0; + + if (digits.size() == 4) { + if (!parse_int(digits.substr(0, 2), parsed_hours) + || !parse_int(digits.substr(2, 2), minutes_value)) { + return false; + } + } else { + if (digits[2] != ':' + || !parse_int(digits.substr(0, 2), parsed_hours) + || !parse_int(digits.substr(3, 2), minutes_value)) { + return false; + } + } + + if (parsed_hours < 0 || parsed_hours > 23 || minutes_value < 0 || minutes_value > 59) { + return false; + } + + offset = std::chrono::hours{parsed_hours} + minutes{minutes_value}; + if (negative) { + offset = -offset; + } + return true; +} + +HandlerResult parse_syslog_timestamp(const ParserConfig& config, + std::string_view& remaining, + std::size_t line_number) { + if (!config.assumed_year.has_value()) { + return failed_event( + ParserFailureCategory::UnknownTimestamp, + "syslog_legacy mode requires assume_year"); + } + + const auto month_token = consume_token(remaining); + const auto day_token = consume_token(remaining); + const auto time_token = consume_token(remaining); + const auto hostname_token = consume_token(remaining); + + if (month_token.empty() || day_token.empty() || time_token.empty() || hostname_token.empty()) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "missing syslog header fields"); + } + + unsigned month_index = 0; + int day_value = 0; + ClockTime time; + + if (!parse_month(month_token, month_index)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid month token"); + } + if (!parse_int(day_token, day_value)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid day token"); + } + if (!parse_clock_token(time_token, time)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid time token"); + } + + const auto timestamp = build_timestamp(*config.assumed_year, month_index, day_value, time); + if (!timestamp.has_value()) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid calendar date"); + } + + Event event; + event.timestamp = *timestamp; + event.hostname.assign(hostname_token); + event.line_number = line_number; + return matched_event(std::move(event)); +} + +HandlerResult parse_journalctl_timestamp(std::string_view& remaining, std::size_t line_number) { + const auto weekday_token = consume_token(remaining); + const auto date_token = consume_token(remaining); + const auto time_token = consume_token(remaining); + const auto timezone_token = consume_token(remaining); + const auto hostname_token = consume_token(remaining); + + if (weekday_token.empty() || date_token.empty() || time_token.empty() + || timezone_token.empty() || hostname_token.empty()) { + return failed_event( + ParserFailureCategory::UnknownTimestamp, + "missing journalctl short-full header fields"); + } + + int year_value = 0; + unsigned month_index = 0; + int day_value = 0; + ClockTime time; + std::chrono::minutes timezone_offset{0}; + + if (!parse_calendar_date_parts(date_token, year_value, month_index, day_value)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid journalctl date token"); + } + if (!parse_clock_token(time_token, time)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid time token"); + } + if (!parse_timezone_token(timezone_token, timezone_offset)) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid timezone token"); + } + + const auto timestamp = build_timestamp(year_value, month_index, day_value, time, timezone_offset); + if (!timestamp.has_value()) { + return failed_event(ParserFailureCategory::UnknownTimestamp, "invalid calendar date"); + } + + Event event; + event.timestamp = *timestamp; + event.hostname.assign(hostname_token); + event.line_number = line_number; + return matched_event(std::move(event)); +} + +} // namespace + +HandlerResult parse_timestamp_and_hostname(const ParserConfig& config, + std::string_view& remaining, + std::size_t line_number) { + switch (config.input_mode) { + case InputMode::SyslogLegacy: + return parse_syslog_timestamp(config, remaining, line_number); + case InputMode::JournalctlShortFull: + return parse_journalctl_timestamp(remaining, line_number); + default: + return failed_event(ParserFailureCategory::UnknownProgram, "unsupported input mode"); + } +} + +} // namespace loglens::parser_internal diff --git a/src/parser/timestamp_parser.hpp b/src/parser/timestamp_parser.hpp new file mode 100644 index 0000000..e49211f --- /dev/null +++ b/src/parser/timestamp_parser.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "parser/handler_result.hpp" + +#include +#include + +namespace loglens::parser_internal { + +HandlerResult parse_timestamp_and_hostname(const ParserConfig& config, + std::string_view& remaining, + std::size_t line_number); + +} // namespace loglens::parser_internal diff --git a/tests/test_parser.cpp b/tests/test_parser.cpp index e3a980d..b412d24 100644 --- a/tests/test_parser.cpp +++ b/tests/test_parser.cpp @@ -1315,6 +1315,55 @@ void test_mixed_auth_corpus_fixture_file() { expect(actual == expected, "expected mixed auth parser coverage artifact to match fixture"); } +void test_program_handler_registry_routes_supported_families() { + struct RegistryCase { + std::string line; + std::string program; + loglens::EventType event_type; + }; + + const std::vector cases{ + {"Mar 10 08:20:01 example-host sshd[4101]: Failed password for user-a from 203.0.113.41 port 50101 ssh2", + "sshd", + loglens::EventType::SshFailedPassword}, + {"Mar 10 08:20:02 example-host pam_unix(sshd:auth): authentication failure; rhost=203.0.113.42 user=user-b", + "pam_unix(sshd:auth)", + loglens::EventType::PamAuthFailure}, + {"Mar 10 08:20:03 example-host pam_faillock(sshd:auth): Authentication failure for user user-c from 203.0.113.43", + "pam_faillock(sshd:auth)", + loglens::EventType::PamAuthFailure}, + {"Mar 10 08:20:04 example-host pam_sss(sshd:auth): received for user user-d: 7 (Authentication failure)", + "pam_sss(sshd:auth)", + loglens::EventType::PamAuthFailure}, + {"Mar 10 08:20:05 example-host sudo[4105]: user-e : TTY=pts/1 ; PWD=/home/user/project ; USER=root ; COMMAND=/usr/bin/id", + "sudo", + loglens::EventType::SudoCommand}, + {"Mar 10 08:20:06 example-host su[4106]: FAILED SU (to root) user-f on pts/2", + "su", + loglens::EventType::SuAuthFailure}, + }; + + const auto parser = make_syslog_parser(); + for (std::size_t index = 0; index < cases.size(); ++index) { + const auto event = parser.parse_line(cases[index].line, index + 1); + expect(event.has_value(), "expected registered program handler to emit an event"); + expect(event->program == cases[index].program, "expected registry to preserve the source program"); + expect(event->event_type == cases[index].event_type, "expected registry to select the matching handler"); + } +} + +void test_parse_stream_accepts_crlf_line_terminator() { + std::istringstream input( + "Mar 10 08:21:01 example-host pam_faillock(sshd:auth): Authentication failure for user user-a from 203.0.113.51\r\n"); + + const auto parser = make_syslog_parser(); + const auto result = parser.parse_stream(input); + + expect(result.events.size() == 1, "expected CRLF input to emit one event"); + expect(result.warnings.empty(), "expected CRLF input not to emit a malformed-IP warning"); + expect(result.events.front().source_ip == "203.0.113.51", "expected CR to be excluded from source IP"); +} + } // namespace int main() { @@ -1368,5 +1417,7 @@ int main() { test_journalctl_fixture_matrix_file(); test_noisy_auth_fixture_matrix_file(); test_mixed_auth_corpus_fixture_file(); + test_program_handler_registry_routes_supported_families(); + test_parse_stream_accepts_crlf_line_terminator(); return 0; }