diff --git a/.github/ISSUE_TEMPLATE/01_bug_report.md b/.github/ISSUE_TEMPLATE/01_bug_report.md new file mode 100644 index 0000000..8495c6c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/01_bug_report.md @@ -0,0 +1,21 @@ +--- +name: 🐜 Bug report +about: If something isn't working 🔧 +--- + +### Subject of the issue +Describe your issue here. + +### Your environment +* Version of detectmate +* Version of python +* Docker or manual installation? + +### Steps to reproduce +Tell us how to reproduce this issue. + +### Expected behaviour +Tell us what should happen + +### Actual behaviour +Tell us what happens instead diff --git a/.github/ISSUE_TEMPLATE/02_feature_request.md b/.github/ISSUE_TEMPLATE/02_feature_request.md new file mode 100644 index 0000000..442b05c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/02_feature_request.md @@ -0,0 +1,20 @@ +--- +name: 🚀 Feature request +about: If you have a feature request 💡 +--- + +**Context** + +What are you trying to do and how would you want to do it differently? Is it something you currently you cannot do? Is this related to an issue/problem? + +**Alternatives** + +Can you achieve the same result doing it in an alternative way? Is the alternative considerable? + +**Has the feature been requested before?** + +Please provide a link to the issue. + +**If the feature request is approved, would you be willing to submit a PR?** + +Yes / No _(Help can be provided if you need assistance submitting a PR)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..3ba13e0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..53dec30 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,19 @@ +# Task + + +# Description + + + + +# How Has This Been Tested? + + +# Checklist + + +- [ ] This Pull-Request goes to the **development** branch. +- [ ] I have successfully run prek locally. +- [ ] I have added tests to cover my changes. +- [ ] I have linked the issue-id to the task-description. +- [ ] I have performed a self-review of my own code. diff --git a/.gitignore b/.gitignore index 3113ce1..b0a557c 100644 --- a/.gitignore +++ b/.gitignore @@ -199,3 +199,6 @@ cython_debug/ local/ test.ipynb test.py + +# claude code +CLAUDE.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..e7cbac5 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,32 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 1.x.x | :white_check_mark: | +| < 1.0.0 | :x: | + +> [!IMPORTANT] +> Currently DetectMateService is a work in progress and heavily under development. Possible vulnerabilities will not be treated any special and can be issued using [GitHub-Issues](https://github.com/ait-detectmate/DetectMateService/issues) + +## Reporting a Vulnerability + +Please email reports about any security related issues you find to aecid@ait.ac.at. This mail is delivered to a small developer team. Your email will be acknowledged within one business day, and you'll receive a more detailed response to your email within 7 days indicating the next steps in handling your report. + +Please use a descriptive subject line for your report email. After the initial reply to your report, our team will endeavor to keep you informed of the progress being made towards a fix and announcement. + +In addition, please include the following information along with your report: + +* Your name and affiliation (if any). +* A description of the technical details of the vulnerabilities. It is very important to let us know how we can reproduce your findings. +* An explanation who can exploit this vulnerability, and what they gain when doing so -- write an attack scenario. This will help us evaluate your report quickly, especially if the issue is complex. +* Whether this vulnerability public or known to third parties. If it is, please provide details. +* Whether we could mention your name in the changelogs. + +Once an issue is reported we use the following disclosure process: + +* When a report is received, we confirm the issue and determine its severity. +* If we know of specific third-party services or software based on DetectMateService that require mitigation before publication, those projects will be notified. +* Fixes are prepared for the last minor release of the latest major release. +* Patch releases are published for all fixed released versions. diff --git a/config/pipeline_config_default.yaml b/config/pipeline_config_default.yaml index 0475495..4271752 100644 --- a/config/pipeline_config_default.yaml +++ b/config/pipeline_config_default.yaml @@ -68,8 +68,6 @@ detectors: NewValueComboDetector: method_type: new_value_combo_detector auto_config: False - params: - comb_size: 3 events: 1: test: diff --git a/docs/detectors.md b/docs/detectors.md index 3366ab7..7625b9d 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -87,6 +87,7 @@ List of detectors: * [Random detector](detectors/random_detector.md): Generates random alerts. * [New Value](detectors/new_value.md): Detect new values in the variables in the logs. * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. +* [New Event](detectors/new_event.md): Detect new events in the variables in the logs. ## Configuration @@ -192,7 +193,7 @@ The `set_configuration()` method queries the tracker results and generates the f def set_configuration(self): variables = {} for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): - stable_vars = tracker.get_variables_by_classification("STABLE") + stable_vars = tracker.get_features_by_classification("STABLE") variables[event_id] = stable_vars config_dict = generate_detector_config( diff --git a/docs/detectors/combo.md b/docs/detectors/combo.md index 0272668..1440167 100644 --- a/docs/detectors/combo.md +++ b/docs/detectors/combo.md @@ -18,7 +18,7 @@ detectors: method_type: new_value_combo_detector auto_config: False params: - comb_size: 3 + max_combo_size: 3 events: 1: test: diff --git a/docs/detectors/new_event.md b/docs/detectors/new_event.md new file mode 100644 index 0000000..946bdab --- /dev/null +++ b/docs/detectors/new_event.md @@ -0,0 +1,7 @@ +TODO PAGE + +TODO: new_event_detector +TODO: test_new_event_detector +- Tests need to be reworked, just copied from new_value_detector + +TODO: pipeline_config_Default.yaml diff --git a/src/detectmatelibrary/common/_config/_compile.py b/src/detectmatelibrary/common/_config/_compile.py index 5a864e8..0f01d5d 100644 --- a/src/detectmatelibrary/common/_config/_compile.py +++ b/src/detectmatelibrary/common/_config/_compile.py @@ -142,7 +142,7 @@ def generate_detector_config( detector_name: Name of the detector, used as the base instance_id. method_type: Type of detection method (e.g., "new_value_detector"). **additional_params: Additional parameters for the detector's params - dict (e.g., comb_size=3). + dict (e.g., max_combo_size=3). Returns: Dictionary with structure compatible with detector config classes. @@ -162,7 +162,7 @@ def generate_detector_config( variable_selection={1: [("username", "src_ip"), ("var_0", "var_1")]}, detector_name="MyDetector", method_type="new_value_combo_detector", - comb_size=2, + max_combo_size=2, ) """ var_pattern = re.compile(r"^var_(\d+)$") diff --git a/src/detectmatelibrary/common/_config/_formats.py b/src/detectmatelibrary/common/_config/_formats.py index 564d5e5..dcee7e2 100644 --- a/src/detectmatelibrary/common/_config/_formats.py +++ b/src/detectmatelibrary/common/_config/_formats.py @@ -6,16 +6,17 @@ # Sub-formats ********************************************************+ class Variable(BaseModel): - pos: int - name: str + pos: str | int + name: str = "" params: Dict[str, Any] = {} def to_dict(self) -> Dict[str, Any]: """Convert Variable to YAML-compatible dictionary.""" result: Dict[str, Any] = { "pos": self.pos, - "name": self.name, } + if self.name: + result["name"] = self.name if self.params: result["params"] = self.params return result @@ -38,7 +39,7 @@ def to_dict(self) -> Dict[str, Any]: class _EventInstance(BaseModel): """Configuration for a specific instance within an event.""" params: Dict[str, Any] = {} - variables: Dict[int, Variable] = {} + variables: Dict[str | int, Variable] = {} header_variables: Dict[str, Header] = {} @classmethod @@ -79,7 +80,7 @@ def _init(cls, instances_dict: Dict[str, Dict[str, Any]]) -> "_EventConfig": return cls(instances=instances) @property - def variables(self) -> Dict[int, Variable]: + def variables(self) -> Dict[str | int, Variable]: """Pass-through to first instance for compatibility.""" if self.instances: return next(iter(self.instances.values())).variables diff --git a/src/detectmatelibrary/common/_core_op/_fit_logic.py b/src/detectmatelibrary/common/_core_op/_fit_logic.py index c702524..7a4aa61 100644 --- a/src/detectmatelibrary/common/_core_op/_fit_logic.py +++ b/src/detectmatelibrary/common/_core_op/_fit_logic.py @@ -74,6 +74,9 @@ def __init__( self._configuration_done = False self.config_finished = False + self._training_done = False + self.training_finished = False + self.data_use_configure = data_use_configure self.data_use_training = data_use_training @@ -84,6 +87,13 @@ def finish_config(self) -> bool: return False + def finish_training(self) -> bool: + if self._training_done and not self.training_finished: + self.training_finished = True + return True + + return False + def run(self) -> FitLogicState: if do_configure( data_use_configure=self.data_use_configure, @@ -103,5 +113,7 @@ def run(self) -> FitLogicState: ): self.data_used_train += 1 return FitLogicState.DO_TRAIN + elif self.data_used_train > 0 and not self._training_done: + self._training_done = True return FitLogicState.NOTHING \ No newline at end of file diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 34d5a00..02afb3c 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -54,6 +54,9 @@ def configure( def set_configuration(self) -> None: pass + def post_train(self) -> None: + pass + def get_config(self) -> Dict[str, Any]: return self.config.get_config() @@ -100,6 +103,9 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: if fit_state == FitLogicState.DO_TRAIN: logger.info(f"<<{self.name}>> use data for training") self.train(input_=data_buffered) + elif self.fitlogic.finish_training(): + logger.info(f"<<{self.name}>> finalizing training") + self.post_train() output_ = self.output_schema() logger.info(f"<<{self.name}>> processing data") diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index e224f68..2be7153 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -3,6 +3,7 @@ from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode from detectmatelibrary.utils.aux import get_timestamp +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency from detectmatelibrary.schemas import ParserSchema, DetectorSchema @@ -10,6 +11,7 @@ from typing import Dict, List, Optional, Any from detectmatelibrary.utils.time_format_handler import TimeFormatHandler +from tools.logging import logger _time_handler = TimeFormatHandler() @@ -56,7 +58,7 @@ def get_configured_variables( # Extract template variables by position if hasattr(event_config, "variables"): for pos, var in event_config.variables.items(): - if pos < len(input_["variables"]): + if isinstance(pos, int) and pos < len(input_["variables"]): result[var.name] = input_["variables"][pos] # Extract header/log format variables by name @@ -89,6 +91,45 @@ def get_global_variables( return result +def validate_config_coverage( + detector_name: str, + config_events: EventsConfig | dict[str, Any], + persistency: EventPersistency, +) -> None: + """Log warnings when configured EventIDs or variables have no training + data. + + Args: + detector_name: Name of the detector (used in warning messages). + config_events: The detector's events configuration. + persistency: The persistency object populated during training. + """ + config_ids = ( + config_events.events.keys() + if isinstance(config_events, EventsConfig) + else config_events.keys() + ) + if not config_ids: + return + + events_seen = persistency.get_events_seen() + events_with_data = set(persistency.get_events_data().keys()) + + for event_id in config_ids: + if event_id not in events_seen: + logger.warning( + f"[{detector_name}] EventID {event_id!r} is configured but was " + "never observed in training data. Verify that EventIDs in your " + "config match those produced by the parser." + ) + elif event_id not in events_with_data: + logger.warning( + f"[{detector_name}] EventID {event_id!r} was observed in training " + "data but no configured variables were extracted. Verify that " + "variable names/positions in your config match those in the data." + ) + + class CoreDetectorConfig(CoreConfig): component_type: str = "detectors" method_type: str = "core_detector" @@ -158,3 +199,7 @@ def configure( @override def set_configuration(self) -> None: pass + + @override + def post_train(self) -> None: + pass diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 7ca736e..c10328e 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -1,10 +1,13 @@ from .random_detector import RandomDetector, RandomDetectorConfig from .new_value_detector import NewValueDetector, NewValueDetectorConfig +from .new_event_detector import NewEventDetector, NewEventDetectorConfig __all__ = [ "random_detector", "RandomDetectorConfig", "NewValueDetector", "NewValueDetectorConfig", - "RandomDetector" + "RandomDetector", + "NewEventDetector", + "NewEventDetectorConfig" ] diff --git a/src/detectmatelibrary/detectors/new_event_detector.py b/src/detectmatelibrary/detectors/new_event_detector.py new file mode 100644 index 0000000..51d345d --- /dev/null +++ b/src/detectmatelibrary/detectors/new_event_detector.py @@ -0,0 +1,105 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig + +from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables + +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode + +from detectmatelibrary.schemas import ParserSchema, DetectorSchema + +from typing import Any + + +class NewEventDetectorConfig(CoreDetectorConfig): + method_type: str = "new_event_detector" + + events: EventsConfig | dict[str, Any] = {} + + +class NewEventDetector(CoreDetector): + """Detect new values in log data as anomalies based on learned values.""" + + def __init__( + self, + name: str = "NewEventDetector", + config: NewEventDetectorConfig = NewEventDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = NewEventDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: NewEventDetectorConfig # type narrowing for IDE + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + configured_variables = get_configured_variables(input_, self.config.events) + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables=configured_variables + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new values in the input data.""" + alerts: dict[str, str] = {} + configured_variables = get_configured_variables(input_, self.config.events) + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_data() + + if current_event_id in known_events: + event_tracker = known_events[current_event_id] + for var_name, multi_tracker in event_tracker.get_data().items(): + value = configured_variables.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"EventID {current_event_id} - {var_name}"] = ( + f"Unknown value: '{value}'" + ) + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = f"{self.name} detects values not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: # type: ignore + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + variables=input_["variables"], + named_variables=input_["logFormatVariables"], + ) + + def set_configuration(self) -> None: + variables = {} + for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): + classified_vars = (tracker.get_variables_by_classification("STABLE") + # type: ignore + tracker.get_variables_by_classification("STATIC")) # type: ignore + variables[event_id] = classified_vars + config_dict = generate_detector_config( + variable_selection=variables, + detector_name=self.name, + method_type=self.config.method_type + ) + # Update the config object from the dictionary instead of replacing it + self.config = NewEventDetectorConfig.from_dict(config_dict, self.name) diff --git a/src/detectmatelibrary/detectors/new_value_combo_detector.py b/src/detectmatelibrary/detectors/new_value_combo_detector.py index c461526..5f5a781 100644 --- a/src/detectmatelibrary/detectors/new_value_combo_detector.py +++ b/src/detectmatelibrary/detectors/new_value_combo_detector.py @@ -1,10 +1,12 @@ from detectmatelibrary.common._config import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig from detectmatelibrary.common.detector import ( CoreDetectorConfig, CoreDetector, get_configured_variables, - get_global_variables + get_global_variables, + validate_config_coverage, ) from detectmatelibrary.utils.data_buffer import BufferMode @@ -19,6 +21,9 @@ from typing import Any, Dict, Sequence, cast, Tuple from itertools import combinations +from typing_extensions import override +from tools.logging import logger + def get_combo(variables: Dict[str, Any]) -> Dict[Tuple[str, ...], Tuple[Any, ...]]: """Get a single combination of all variables as a key-value pair.""" @@ -52,7 +57,9 @@ def get_all_possible_combos( class NewValueComboDetectorConfig(CoreDetectorConfig): method_type: str = "new_value_combo_detector" - comb_size: int = 2 + max_combo_size: int = 3 + use_stable_vars: bool = True + use_static_vars: bool = False class NewValueComboDetector(CoreDetector): @@ -144,6 +151,12 @@ def detect( return True return False + @override + def post_train(self) -> None: + config = cast(NewValueComboDetectorConfig, self.config) + if not config.auto_config: + validate_config_coverage(self.name, config.events, self.persistency) + def configure(self, input_: ParserSchema) -> None: # type: ignore """Configure the detector based on the stability of individual variables, then learn value combinations based on that @@ -159,7 +172,7 @@ def configure(self, input_: ParserSchema) -> None: # type: ignore named_variables=input_["logFormatVariables"], ) - def set_configuration(self, max_combo_size: int = 3) -> None: + def set_configuration(self, max_combo_size: int | None = None) -> None: """Set the detector configuration based on the stability of variable combinations. @@ -169,17 +182,18 @@ def set_configuration(self, max_combo_size: int = 3) -> None: 3. Re-ingest all events to learn the stability of these combos (testing all possible combos right away would explode combinatorially). """ + config = cast(NewValueComboDetectorConfig, self.config) # run WITH auto_conf_persistency variable_combos = {} for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): - stable_vars = tracker.get_variables_by_classification("STABLE") # type: ignore + stable_vars = tracker.get_features_by_classification("STABLE") # type: ignore if len(stable_vars) > 1: variable_combos[event_id] = stable_vars config_dict = generate_detector_config( variable_selection=variable_combos, detector_name=self.name, method_type=self.config.method_type, - comb_size=max_combo_size + max_combo_size=max_combo_size or config.max_combo_size ) # Update the config object from the dictionary instead of replacing it self.config = NewValueComboDetectorConfig.from_dict(config_dict, self.name) @@ -196,15 +210,28 @@ def set_configuration(self, max_combo_size: int = 3) -> None: # rerun to set final config WITH auto_conf_persistency_combos combo_selection = {} for event_id, tracker in self.auto_conf_persistency_combos.get_events_data().items(): - stable_combos = tracker.get_variables_by_classification("STABLE") # type: ignore + stable_combos = [] + if self.config.use_stable_vars: + stable_combos = tracker.get_features_by_classification("STABLE") # type: ignore + static_combos = [] + if self.config.use_static_vars: + static_combos = tracker.get_features_by_classification("STATIC") # type: ignore + combos = stable_combos + static_combos # Keep combos as tuples - each will become a separate config entry - if len(stable_combos) >= 1: - combo_selection[event_id] = stable_combos + if len(combos) > 0: + combo_selection[event_id] = combos config_dict = generate_detector_config( variable_selection=combo_selection, detector_name=self.name, method_type=self.config.method_type, - comb_size=max_combo_size + max_combo_size=max_combo_size or self.config.max_combo_size ) # Update the config object from the dictionary instead of replacing it self.config = NewValueComboDetectorConfig.from_dict(config_dict, self.name) + events = self.config.events + if isinstance(events, EventsConfig) and not events.events: + logger.warning( + f"[{self.name}] auto_config=True generated an empty configuration. " + "No stable variable combinations were found in configure-phase data. " + "The detector will produce no alerts." + ) diff --git a/src/detectmatelibrary/detectors/new_value_detector.py b/src/detectmatelibrary/detectors/new_value_detector.py index e9fe7c6..b7a051f 100644 --- a/src/detectmatelibrary/detectors/new_value_detector.py +++ b/src/detectmatelibrary/detectors/new_value_detector.py @@ -1,10 +1,12 @@ from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig from detectmatelibrary.common.detector import ( CoreDetectorConfig, CoreDetector, get_configured_variables, - get_global_variables + get_global_variables, + validate_config_coverage, ) from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( EventStabilityTracker @@ -15,10 +17,16 @@ from detectmatelibrary.schemas import ParserSchema, DetectorSchema from detectmatelibrary.constants import GLOBAL_EVENT_ID +from typing_extensions import override +from tools.logging import logger + class NewValueDetectorConfig(CoreDetectorConfig): method_type: str = "new_value_detector" + use_stable_vars: bool = True + use_static_vars: bool = True + class NewValueDetector(CoreDetector): """Detect new values in log data as anomalies based on learned values.""" @@ -109,11 +117,23 @@ def configure(self, input_: ParserSchema) -> None: # type: ignore named_variables=input_["logFormatVariables"], ) + @override + def post_train(self) -> None: + if not self.config.auto_config: + validate_config_coverage(self.name, self.config.events, self.persistency) + def set_configuration(self) -> None: variables = {} for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): - stable_vars = tracker.get_variables_by_classification("STABLE") # type: ignore - variables[event_id] = stable_vars + stable = [] + if self.config.use_stable_vars: + stable = tracker.get_features_by_classification("STABLE") # type: ignore + static = [] + if self.config.use_static_vars: + static = tracker.get_features_by_classification("STATIC") # type: ignore + vars_ = stable + static + if len(vars_) > 0: + variables[event_id] = vars_ config_dict = generate_detector_config( variable_selection=variables, detector_name=self.name, @@ -121,3 +141,10 @@ def set_configuration(self) -> None: ) # Update the config object from the dictionary instead of replacing it self.config = NewValueDetectorConfig.from_dict(config_dict, self.name) + events = self.config.events + if isinstance(events, EventsConfig) and not events.events: + logger.warning( + f"[{self.name}] auto_config=True generated an empty configuration. " + "No stable variables were found in configure-phase data. " + "The detector will produce no alerts." + ) diff --git a/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py b/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py index 8760ab2..0c41a05 100644 --- a/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py +++ b/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py @@ -1,8 +1,17 @@ from collections import defaultdict -from typing import Dict, List, Any, Tuple +from typing import Dict, List, Any, Tuple, TypedDict import regex import re +from detectmatelibrary.common._config._formats import ( + EventsConfig, _EventConfig, _EventInstance, Variable +) + + +class TemplateMetadata(TypedDict): + event_id_label: str | None + labels: list[str] + def safe_search(pattern: str, string: str, timeout: int = 1) -> regex.Match[str] | None: """Perform regex search with a timeout to prevent catastrophic @@ -64,6 +73,7 @@ class TemplatesManager: def __init__( self, template_list: list[str], + metadata: dict[int, TemplateMetadata] | None = None, remove_spaces: bool = True, remove_punctuation: bool = True, lowercase: bool = True @@ -96,6 +106,61 @@ def __init__( first = tokens[0] if tokens else "" self._prefix_index[first].append(idx) + _metadata: dict[int, TemplateMetadata] = metadata or {} + self._event_label_to_idx: dict[str, int] = { + m["event_id_label"]: i + for i, m in _metadata.items() + if m["event_id_label"] + } + self._idx_to_var_map: dict[int, dict[str, int]] = { + i: {label: pos for pos, label in enumerate(m["labels"])} + for i, m in _metadata.items() + if m["labels"] + } + + def compile_events_config(self, events_config: EventsConfig) -> EventsConfig: + """Resolve named event IDs and named variable labels to positional + ints. + + Translates user-friendly named format to the internal positional + representation. Returns a new EventsConfig with only int keys + and int positions. + """ + new_events: Dict[Any, _EventConfig] = {} + + for event_key, event_config in events_config.events.items(): + if isinstance(event_key, str) and event_key in self._event_label_to_idx: + resolved_key: str | int = self._event_label_to_idx[event_key] + else: + resolved_key = event_key + + var_map = self._idx_to_var_map.get(resolved_key if isinstance(resolved_key, int) else -1, {}) + + new_instances: Dict[str, _EventInstance] = {} + for instance_id, instance in event_config.instances.items(): + new_vars: Dict[str | int, Variable] = {} + for pos, var in instance.variables.items(): + if isinstance(pos, str): + if pos not in var_map: + raise ValueError( + f"Label '{pos}' not found in template for event '{event_key}'. " + f"Available labels: {list(var_map)}" + ) + resolved_pos = var_map[pos] + new_vars[resolved_pos] = Variable( + pos=resolved_pos, name=pos, params=var.params + ) + else: + new_vars[pos] = var + new_instances[instance_id] = _EventInstance( + params=instance.params, + variables=new_vars, + header_variables=instance.header_variables, + ) + new_events[resolved_key] = _EventConfig(instances=new_instances) + + return EventsConfig(events=new_events) + def candidate_indices(self, s: str) -> Tuple[str, List[int]]: pre_s = self.preprocess(s) candidates = [] @@ -110,17 +175,28 @@ class TemplateMatcher: def __init__( self, template_list: list[str], + metadata: dict[int, TemplateMetadata] | None = None, remove_spaces: bool = True, remove_punctuation: bool = True, lowercase: bool = True ) -> None: self.manager = TemplatesManager( template_list=template_list, + metadata=metadata, remove_spaces=remove_spaces, remove_punctuation=remove_punctuation, lowercase=lowercase ) + def compile_detector_config(self, events_config: EventsConfig) -> EventsConfig: + """Resolve named event IDs and variable labels to positional ints. + + Call once at setup time. Returns a new EventsConfig using the + internal positional representation, compatible with + get_configured_variables(). + """ + return self.manager.compile_events_config(events_config) + @staticmethod def extract_parameters(log: str, template: str) -> tuple[str, ...] | None: """Extract parameters from the log based on the template.""" @@ -129,7 +205,6 @@ def extract_parameters(log: str, template: str) -> tuple[str, ...] | None: pattern_parts_escaped = [re.escape(part) for part in pattern_parts] regex_pattern = "(.*?)".join(pattern_parts_escaped) regex = "^" + regex_pattern + "$" - # matches = re.search(regex, log) matches = safe_search(regex, log, 1) if matches: groups: tuple[str, ...] = matches.groups() diff --git a/src/detectmatelibrary/parsers/template_matcher/_parser.py b/src/detectmatelibrary/parsers/template_matcher/_parser.py index 3192a84..edcae99 100644 --- a/src/detectmatelibrary/parsers/template_matcher/_parser.py +++ b/src/detectmatelibrary/parsers/template_matcher/_parser.py @@ -1,10 +1,13 @@ -from detectmatelibrary.parsers.template_matcher._matcher_op import TemplateMatcher +from detectmatelibrary.parsers.template_matcher._matcher_op import TemplateMatcher, TemplateMetadata from detectmatelibrary.common.parser import CoreParser, CoreParserConfig from detectmatelibrary import schemas from typing import Any import csv import os +import re + +_NAMED_WC_RE = re.compile(r'<([A-Za-z_]\w*)>') class TemplatesNotFoundError(Exception): @@ -15,34 +18,93 @@ class TemplateNoPermissionError(Exception): pass -def load_templates(path: str) -> list[str]: +def _compile_templates( + raw_templates: list[str], + event_id_labels: list[str | None] | None = None, +) -> tuple[list[str], dict[int, TemplateMetadata]]: + """Convert named wildcards to <*> and record label order and event ID + labels. + + Args: + raw_templates: Raw template strings, possibly containing named wildcards. + event_id_labels: Optional per-template event ID labels (from CSV EventId column). + If provided, must have the same length as raw_templates. + + Returns: + compiled: Template strings with only <*> wildcards, ready for TemplatesManager. + metadata: Mapping of template index to TemplateMetadata. + + Raises: + ValueError: If a template mixes <*> and named wildcards. + """ + compiled: list[str] = [] + metadata: dict[int, TemplateMetadata] = {} + + for i, raw in enumerate(raw_templates): + has_anon = "<*>" in raw + labels = _NAMED_WC_RE.findall(raw) + has_named = bool(labels) + + if has_anon and has_named: + raise ValueError( + f"Template mixes <*> and named wildcards: {raw!r}. " + "Use either <*> (positional) or