From 4d049101213f987ec7ea9a95d9d08461c1d37229 Mon Sep 17 00:00:00 2001 From: Ernst Leierzopf Date: Mon, 9 Mar 2026 15:42:26 +0100 Subject: [PATCH 01/16] unfinished draft for NewEventDetector. --- docs/detectors.md | 1 + docs/detectors/new_event.md | 4 + src/detectmatelibrary/detectors/__init__.py | 3 +- .../detectors/new_event_detector.py | 104 ++++++++++++++++++ .../test_detectors/test_new_event_detector.py | 0 5 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 docs/detectors/new_event.md create mode 100644 src/detectmatelibrary/detectors/new_event_detector.py create mode 100644 tests/test_detectors/test_new_event_detector.py diff --git a/docs/detectors.md b/docs/detectors.md index 5dad456..cb987f2 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -87,6 +87,7 @@ List of detectors: * [Random detector](detectors/random_detector.md): Generates random alerts. * [New Value](detectors/new_value.md): Detect new values in the variables in the logs. * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. +* [New Event](detectors/new_event.md): Detect new events in the variables in the logs. ## Auto-configuration (optional) diff --git a/docs/detectors/new_event.md b/docs/detectors/new_event.md new file mode 100644 index 0000000..dcfed58 --- /dev/null +++ b/docs/detectors/new_event.md @@ -0,0 +1,4 @@ +TODO PAGE + +TODO: new_event_detector +TODO: test_new_event_detector \ No newline at end of file diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 7ca736e..96ecd3c 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -6,5 +6,6 @@ "RandomDetectorConfig", "NewValueDetector", "NewValueDetectorConfig", - "RandomDetector" + "RandomDetector", + "NewEventDetector" ] diff --git a/src/detectmatelibrary/detectors/new_event_detector.py b/src/detectmatelibrary/detectors/new_event_detector.py new file mode 100644 index 0000000..4f0353d --- /dev/null +++ b/src/detectmatelibrary/detectors/new_event_detector.py @@ -0,0 +1,104 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig + +from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables + +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode + +from detectmatelibrary.schemas import ParserSchema, DetectorSchema + +from typing import Any + + +class NewEventDetectorConfig(CoreDetectorConfig): + method_type: str = "new_value_detector" + + events: EventsConfig | dict[str, Any] = {} + + +class NewEventDetector(CoreDetector): + """Detect new values in log data as anomalies based on learned values.""" + + def __init__( + self, + name: str = "NewValueDetector", + config: NewEventDetectorConfig = NewEventDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = NewEventDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: NewEventDetectorConfig # type narrowing for IDE + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + configured_variables = get_configured_variables(input_, self.config.events) + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables=configured_variables + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new values in the input data.""" + alerts: dict[str, str] = {} + configured_variables = get_configured_variables(input_, self.config.events) + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_data() + + if current_event_id in known_events: + event_tracker = known_events[current_event_id] + for var_name, multi_tracker in event_tracker.get_data().items(): + value = configured_variables.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"EventID {current_event_id} - {var_name}"] = ( + f"Unknown value: '{value}'" + ) + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = f"{self.name} detects values not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + variables=input_["variables"], + named_variables=input_["logFormatVariables"], + ) + + def set_configuration(self) -> None: + variables = {} + for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): + stable_vars = tracker.get_variables_by_classification("STABLE") # type: ignore + variables[event_id] = stable_vars + config_dict = generate_detector_config( + variable_selection=variables, + detector_name=self.name, + method_type=self.config.method_type, + ) + # Update the config object from the dictionary instead of replacing it + self.config = NewEventDetectorConfig.from_dict(config_dict, self.name) diff --git a/tests/test_detectors/test_new_event_detector.py b/tests/test_detectors/test_new_event_detector.py new file mode 100644 index 0000000..e69de29 From 8fbb34dd7c9cc8da286440846982f45562df19fe Mon Sep 17 00:00:00 2001 From: Ernst Leierzopf Date: Tue, 10 Mar 2026 06:46:49 +0100 Subject: [PATCH 02/16] unfinished draft NewEventDetector. --- docs/detectors/new_event.md | 5 +- src/detectmatelibrary/common/detector.py | 1 + src/detectmatelibrary/detectors/__init__.py | 4 +- .../detectors/new_event_detector.py | 19 +- .../test_detectors/test_new_event_detector.py | 257 ++++++++++++++++++ 5 files changed, 281 insertions(+), 5 deletions(-) diff --git a/docs/detectors/new_event.md b/docs/detectors/new_event.md index dcfed58..946bdab 100644 --- a/docs/detectors/new_event.md +++ b/docs/detectors/new_event.md @@ -1,4 +1,7 @@ TODO PAGE TODO: new_event_detector -TODO: test_new_event_detector \ No newline at end of file +TODO: test_new_event_detector +- Tests need to be reworked, just copied from new_value_detector + +TODO: pipeline_config_Default.yaml diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 18b67b9..e999441 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -52,6 +52,7 @@ def get_configured_variables( event_config = log_variables[event_id] if event_id in log_variables else None if event_config is None: return result + # print(event_id, event_config, log_variables) # Extract template variables by position if hasattr(event_config, "variables"): diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 96ecd3c..c10328e 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -1,5 +1,6 @@ from .random_detector import RandomDetector, RandomDetectorConfig from .new_value_detector import NewValueDetector, NewValueDetectorConfig +from .new_event_detector import NewEventDetector, NewEventDetectorConfig __all__ = [ "random_detector", @@ -7,5 +8,6 @@ "NewValueDetector", "NewValueDetectorConfig", "RandomDetector", - "NewEventDetector" + "NewEventDetector", + "NewEventDetectorConfig" ] diff --git a/src/detectmatelibrary/detectors/new_event_detector.py b/src/detectmatelibrary/detectors/new_event_detector.py index 4f0353d..28cd1b4 100644 --- a/src/detectmatelibrary/detectors/new_event_detector.py +++ b/src/detectmatelibrary/detectors/new_event_detector.py @@ -15,7 +15,7 @@ class NewEventDetectorConfig(CoreDetectorConfig): - method_type: str = "new_value_detector" + method_type: str = "new_event_detector" events: EventsConfig | dict[str, Any] = {} @@ -25,7 +25,7 @@ class NewEventDetector(CoreDetector): def __init__( self, - name: str = "NewValueDetector", + name: str = "NewEventDetector", config: NewEventDetectorConfig = NewEventDetectorConfig() ) -> None: @@ -34,6 +34,7 @@ def __init__( super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) self.config: NewEventDetectorConfig # type narrowing for IDE + #print(self.config, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") self.persistency = EventPersistency( event_data_class=EventStabilityTracker, ) @@ -45,6 +46,15 @@ def __init__( def train(self, input_: ParserSchema) -> None: # type: ignore """Train the detector by learning values from the input data.""" configured_variables = get_configured_variables(input_, self.config.events) + #print(input_) + #print(self.config.events) + #print(input_["logFormatVariables"]["Type"], self.config.events) + d = self.config.events[input_["EventID"]] + #print("bbb", d) + #print("ccc", hasattr(d, "header_variables"), d.header_variables.keys()) + #print("ccc", configured_variables) + configured_variables = {k: v for k, v in configured_variables.items() if k in d.header_variables} + print("conf", configured_variables) self.persistency.ingest_event( event_id=input_["EventID"], event_template=input_["template"], @@ -57,10 +67,13 @@ def detect( """Detect new values in the input data.""" alerts: dict[str, str] = {} configured_variables = get_configured_variables(input_, self.config.events) + #print("br", configured_variables) overall_score = 0.0 current_event_id = input_["EventID"] known_events = self.persistency.get_events_data() + #print(input_["logFormatVariables"]["Type"]) + #print(current_event_id, input_) if current_event_id in known_events: event_tracker = known_events[current_event_id] @@ -98,7 +111,7 @@ def set_configuration(self) -> None: config_dict = generate_detector_config( variable_selection=variables, detector_name=self.name, - method_type=self.config.method_type, + method_type=self.config.method_type ) # Update the config object from the dictionary instead of replacing it self.config = NewEventDetectorConfig.from_dict(config_dict, self.name) diff --git a/tests/test_detectors/test_new_event_detector.py b/tests/test_detectors/test_new_event_detector.py index e69de29..2aae691 100644 --- a/tests/test_detectors/test_new_event_detector.py +++ b/tests/test_detectors/test_new_event_detector.py @@ -0,0 +1,257 @@ +"""Tests for NewEventDetector class. + +This module tests the NewEventDetector implementation including: +- Initialization and configuration +- Training functionality to learn known values +- Detection logic for new/unknown values +- Event-specific configuration handling +- Input/output schema validation +""" + +from detectmatelibrary.detectors.new_event_detector import NewEventDetector, BufferMode +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From +import detectmatelibrary.schemas as schemas + +from detectmatelibrary.utils.aux import time_test_mode + + +# Set time test mode for consistent timestamps +time_test_mode() + + +config = { + "detectors": { + # "CustomInit": { + # "method_type": "new_event_detector", + # "auto_config": False, + # "params": {}, + # "events": { + # 1: { + # "instance1": { + # "params": {}, + # "variables": [{ + # "pos": 0, "name": "sad", "params": {} + # }] + # } + # } + # } + # }, + # "MultipleDetector": { + # "method_type": "new_event_detector", + # "auto_config": False, + # "params": {}, + # "events": { + # 1: { + # "test": { + # "params": {}, + # "variables": [{ + # "pos": 1, "name": "test", "params": {} + # }], + # "header_variables": [{ + # "pos": "level", "params": {} + # }] + # } + # } + # } + # }, + "NewEventDetector": { + "method_type": "new_event_detector", + "auto_config": False, + "params": {}, + # "events": { + # 3: { + # "test": { + # "params": {}, + # #"variables": [{ + # # "pos": 1, "name": "test", "params": {} + # #}], + # "header_variables": [{ + # "pos": "Type", "params": {} + # }] + # } + # } + # } + "events": {} + } + } +} + + +# class TestNewEventDetectorInitialization: +# """Test NewEventDetector initialization and configuration.""" +# +# def test_default_initialization(self): +# """Test detector initialization with default parameters.""" +# detector = NewEventDetector() +# +# assert detector.name == "NewEventDetector" +# assert hasattr(detector, 'config') +# assert detector.data_buffer.mode == BufferMode.NO_BUF +# assert detector.input_schema == schemas.ParserSchema +# assert detector.output_schema == schemas.DetectorSchema +# assert hasattr(detector, 'persistency') +# +# def test_custom_config_initialization(self): +# """Test detector initialization with custom configuration.""" +# detector = NewEventDetector(name="CustomInit", config=config) +# +# assert detector.name == "CustomInit" +# assert hasattr(detector, 'persistency') +# assert isinstance(detector.persistency.events_data, dict) +# +# +# class TestNewEventDetectorTraining: +# """Test NewEventDetector training functionality.""" +# +# def test_train_multiple_values(self): +# """Test training with multiple different values.""" +# detector = NewEventDetector(config=config, name="MultipleDetector") +# # Train with multiple values (only event 1 should be tracked per config) +# for event in range(3): +# for level in ["INFO", "WARNING", "ERROR"]: +# parser_data = schemas.ParserSchema({ +# "parserType": "test", +# "EventID": event, +# "template": "test template", +# "variables": ["0", "assa"], +# "logID": "1", +# "parsedLogID": "1", +# "parserID": "test_parser", +# "log": "test log message", +# "logFormatVariables": {"level": level} +# }) +# detector.train(parser_data) +# +# # Only event 1 should be tracked (based on events config) +# assert len(detector.persistency.events_data) == 1 +# event_data = detector.persistency.get_event_data(1) +# assert event_data is not None +# # Check the level values +# assert "INFO" in event_data["level"].unique_set +# assert "WARNING" in event_data["level"].unique_set +# assert "ERROR" in event_data["level"].unique_set +# # Check the variable at position 1 (named "test") +# assert "assa" in event_data["test"].unique_set +# +# +# class TestNewEventDetectorDetection: +# """Test NewEventDetector detection functionality.""" +# +# def test_detect_known_value_no_alert(self): +# detector = NewEventDetector(config=config, name="MultipleDetector") +# +# # Train with a value +# train_data = schemas.ParserSchema({ +# "parserType": "test", +# "EventID": 1, +# "template": "test template", +# "variables": ["adsasd", "asdasd"], +# "logID": "1", +# "parsedLogID": "1", +# "parserID": "test_parser", +# "log": "test log message", +# "logFormatVariables": {"level": "INFO"} +# }) +# detector.train(train_data) +# +# # Detect with the same value +# test_data = schemas.ParserSchema({ +# "parserType": "test", +# "EventID": 12, +# "template": "test template", +# "variables": ["adsasd"], +# "logID": "2", +# "parsedLogID": "2", +# "parserID": "test_parser", +# "log": "test log message", +# "logFormatVariables": {"level": "CRITICAL"} +# }) +# output = schemas.DetectorSchema() +# +# result = detector.detect(test_data, output) +# +# assert not result +# assert output.score == 0.0 +# +# def test_detect_known_value_alert(self): +# detector = NewEventDetector(config=config, name="MultipleDetector") +# +# # Train with a value +# train_data = schemas.ParserSchema({ +# "parserType": "test", +# "EventID": 1, +# "template": "test template", +# "variables": ["adsasd", "asdasd"], +# "logID": "1", +# "parsedLogID": "1", +# "parserID": "test_parser", +# "log": "test log message", +# "logFormatVariables": {"level": "INFO"} +# }) +# detector.train(train_data) +# +# # Detect with the same value +# test_data = schemas.ParserSchema({ +# "parserType": "test", +# "EventID": 1, +# "template": "test template", +# "variables": ["adsasd", "asdasd"], +# "logID": "2", +# "parsedLogID": "2", +# "parserID": "test_parser", +# "log": "test log message", +# "logFormatVariables": {"level": "CRITICAL"} +# }) +# output = schemas.DetectorSchema() +# +# result = detector.detect(test_data, output) +# +# assert result +# assert output.score == 1.0 +# +# +_PARSER_CONFIG = { + "parsers": { + "MatcherParser": { + "method_type": "matcher_parser", + "auto_config": False, + "log_format": "type= msg=audit(