Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/detectmatelibrary/common/_core_op/_fit_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def __init__(
self._configuration_done = False
self.config_finished = False

self._training_done = False
self.training_finished = False

self.data_use_configure = data_use_configure
self.data_use_training = data_use_training

Expand All @@ -84,6 +87,13 @@ def finish_config(self) -> bool:

return False

def finish_training(self) -> bool:
if self._training_done and not self.training_finished:
self.training_finished = True
return True

return False

def run(self) -> FitLogicState:
if do_configure(
data_use_configure=self.data_use_configure,
Expand All @@ -103,5 +113,7 @@ def run(self) -> FitLogicState:
):
self.data_used_train += 1
return FitLogicState.DO_TRAIN
elif self.data_used_train > 0 and not self._training_done:
self._training_done = True

return FitLogicState.NOTHING
6 changes: 6 additions & 0 deletions src/detectmatelibrary/common/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def configure(
def set_configuration(self) -> None:
pass

def post_train(self) -> None:
pass

def get_config(self) -> Dict[str, Any]:
return self.config.get_config()

Expand Down Expand Up @@ -100,6 +103,9 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None:
if fit_state == FitLogicState.DO_TRAIN:
logger.info(f"<<{self.name}>> use data for training")
self.train(input_=data_buffered)
elif self.fitlogic.finish_training():
logger.info(f"<<{self.name}>> finalizing training")
self.post_train()

output_ = self.output_schema()
logger.info(f"<<{self.name}>> processing data")
Expand Down
45 changes: 45 additions & 0 deletions src/detectmatelibrary/common/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode
from detectmatelibrary.utils.aux import get_timestamp
from detectmatelibrary.utils.persistency.event_persistency import EventPersistency

from detectmatelibrary.schemas import ParserSchema, DetectorSchema

from typing_extensions import override
from typing import Dict, List, Optional, Any

from detectmatelibrary.utils.time_format_handler import TimeFormatHandler
from tools.logging import logger


_time_handler = TimeFormatHandler()
Expand Down Expand Up @@ -89,6 +91,45 @@ def get_global_variables(
return result


def validate_config_coverage(
detector_name: str,
config_events: EventsConfig | dict[str, Any],
persistency: EventPersistency,
) -> None:
"""Log warnings when configured EventIDs or variables have no training
data.

Args:
detector_name: Name of the detector (used in warning messages).
config_events: The detector's events configuration.
persistency: The persistency object populated during training.
"""
config_ids = (
config_events.events.keys()
if isinstance(config_events, EventsConfig)
else config_events.keys()
)
if not config_ids:
return

events_seen = persistency.get_events_seen()
events_with_data = set(persistency.get_events_data().keys())

for event_id in config_ids:
if event_id not in events_seen:
logger.warning(
f"[{detector_name}] EventID {event_id!r} is configured but was "
"never observed in training data. Verify that EventIDs in your "
"config match those produced by the parser."
)
elif event_id not in events_with_data:
logger.warning(
f"[{detector_name}] EventID {event_id!r} was observed in training "
"data but no configured variables were extracted. Verify that "
"variable names/positions in your config match those in the data."
)


class CoreDetectorConfig(CoreConfig):
component_type: str = "detectors"
method_type: str = "core_detector"
Expand Down Expand Up @@ -158,3 +199,7 @@ def configure(
@override
def set_configuration(self) -> None:
pass

@override
def post_train(self) -> None:
pass
20 changes: 19 additions & 1 deletion src/detectmatelibrary/detectors/new_value_combo_detector.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from detectmatelibrary.common._config import generate_detector_config
from detectmatelibrary.common._config._formats import EventsConfig

from detectmatelibrary.common.detector import (
CoreDetectorConfig,
CoreDetector,
get_configured_variables,
get_global_variables
get_global_variables,
validate_config_coverage,
)

from detectmatelibrary.utils.data_buffer import BufferMode
Expand All @@ -19,6 +21,9 @@
from typing import Any, Dict, Sequence, cast, Tuple
from itertools import combinations

from typing_extensions import override
from tools.logging import logger


def get_combo(variables: Dict[str, Any]) -> Dict[Tuple[str, ...], Tuple[Any, ...]]:
"""Get a single combination of all variables as a key-value pair."""
Expand Down Expand Up @@ -144,6 +149,12 @@ def detect(
return True
return False

@override
def post_train(self) -> None:
config = cast(NewValueComboDetectorConfig, self.config)
if not config.auto_config:
validate_config_coverage(self.name, config.events, self.persistency)

def configure(self, input_: ParserSchema) -> None: # type: ignore
"""Configure the detector based on the stability of individual
variables, then learn value combinations based on that
Expand Down Expand Up @@ -208,3 +219,10 @@ def set_configuration(self, max_combo_size: int = 3) -> None:
)
# Update the config object from the dictionary instead of replacing it
self.config = NewValueComboDetectorConfig.from_dict(config_dict, self.name)
events = self.config.events
if isinstance(events, EventsConfig) and not events.events:
logger.warning(
f"[{self.name}] auto_config=True generated an empty configuration. "
"No stable variable combinations were found in configure-phase data. "
"The detector will produce no alerts."
)
19 changes: 18 additions & 1 deletion src/detectmatelibrary/detectors/new_value_detector.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from detectmatelibrary.common._config._compile import generate_detector_config
from detectmatelibrary.common._config._formats import EventsConfig

from detectmatelibrary.common.detector import (
CoreDetectorConfig,
CoreDetector,
get_configured_variables,
get_global_variables
get_global_variables,
validate_config_coverage,
)
from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import (
EventStabilityTracker
Expand All @@ -15,6 +17,9 @@
from detectmatelibrary.schemas import ParserSchema, DetectorSchema
from detectmatelibrary.constants import GLOBAL_EVENT_ID

from typing_extensions import override
from tools.logging import logger


class NewValueDetectorConfig(CoreDetectorConfig):
method_type: str = "new_value_detector"
Expand Down Expand Up @@ -109,6 +114,11 @@ def configure(self, input_: ParserSchema) -> None: # type: ignore
named_variables=input_["logFormatVariables"],
)

@override
def post_train(self) -> None:
if not self.config.auto_config:
validate_config_coverage(self.name, self.config.events, self.persistency)

def set_configuration(self) -> None:
variables = {}
for event_id, tracker in self.auto_conf_persistency.get_events_data().items():
Expand All @@ -121,3 +131,10 @@ def set_configuration(self) -> None:
)
# Update the config object from the dictionary instead of replacing it
self.config = NewValueDetectorConfig.from_dict(config_dict, self.name)
events = self.config.events
if isinstance(events, EventsConfig) and not events.events:
logger.warning(
f"[{self.name}] auto_config=True generated an empty configuration. "
"No stable variables were found in configure-phase data. "
"The detector will produce no alerts."
)
7 changes: 7 additions & 0 deletions src/detectmatelibrary/utils/persistency/event_persistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
event_data_kwargs: Optional[dict[str, Any]] = None,
):
self.events_data: Dict[int | str, EventDataStructure] = {}
self.events_seen: set[int | str] = set()
self.event_data_class = event_data_class
self.event_data_kwargs = event_data_kwargs or {}
self.variable_blacklist = variable_blacklist or []
Expand All @@ -39,6 +40,7 @@ def ingest_event(
named_variables: Dict[str, Any] = {}
) -> None:
"""Ingest event data into the appropriate EventData store."""
self.events_seen.add(event_id)
if not variables and not named_variables:
return
self.event_templates[event_id] = event_template
Expand All @@ -52,6 +54,11 @@ def ingest_event(
data = data_structure.to_data(all_variables)
data_structure.add_data(data)

def get_events_seen(self) -> set[int | str]:
"""Retrieve all event IDs observed via ingest_event(), regardless of
whether variables were extracted."""
return self.events_seen

def get_event_data(self, event_id: int | str) -> Any | None:
"""Retrieve the data for a specific event ID."""
data_structure = self.events_data.get(event_id)
Expand Down
56 changes: 56 additions & 0 deletions tests/test_common/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,59 @@ def test_set_configuration_called_once(self) -> None:
component.process(self._make_log(i))

assert component.set_configuration_called == 1


class MockConfigWithPostTrain(CoreConfig):
data_use_training: int | None = 3


class MockComponentWithPostTrain(CoreComponent):
def __init__(self, name: str, config: CoreConfig = MockConfigWithPostTrain()) -> None:
super().__init__(
name=name, type_="Dummy", config=config, input_schema=schemas.LogSchema
)
self.post_train_called: int = 0

def train(self, input_) -> None:
pass

def post_train(self) -> None:
self.post_train_called += 1

def run(self, input_, output_) -> bool:
return False


class TestPostTrain:
def _make_log(self, i: int) -> schemas.LogSchema:
return schemas.LogSchema({
"__version__": "1.0.0",
"logID": str(i),
"logSource": "test",
"hostname": "test_hostname"
})

def test_post_train_called_once_after_training(self) -> None:
component = MockComponentWithPostTrain(name="PostTrain1")
for i in range(10):
component.process(self._make_log(i))
assert component.post_train_called == 1

def test_post_train_not_called_without_training(self) -> None:
component = MockComponentWithPostTrain(name="PostTrain2", config=CoreConfig())
for i in range(10):
component.process(self._make_log(i))
assert component.post_train_called == 0

def test_post_train_called_on_first_detection_item(self) -> None:
"""post_train fires on the item immediately after training ends."""
component = MockComponentWithPostTrain(name="PostTrain3")
# data_use_training=3, so 4th item triggers post_train
for i in range(3):
component.process(self._make_log(i))
assert component.post_train_called == 0
component.process(self._make_log(3))
assert component.post_train_called == 1
# subsequent items don't re-trigger it
component.process(self._make_log(4))
assert component.post_train_called == 1
62 changes: 62 additions & 0 deletions tests/test_common/test_fit_logic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Tests for FitLogic training lifecycle hooks."""

from detectmatelibrary.common._core_op._fit_logic import (
FitLogic, FitLogicState, TrainState
)


class TestFinishTraining:
"""Test that finish_training() fires exactly once after bounded training."""

def test_finish_training_fires_once_after_bounded_training(self) -> None:
logic = FitLogic(data_use_configure=None, data_use_training=3)
finish_calls = []
for _ in range(6):
logic.run()
finish_calls.append(logic.finish_training())
assert finish_calls.count(True) == 1

def test_finish_training_fires_on_first_nothing_after_training(self) -> None:
logic = FitLogic(data_use_configure=None, data_use_training=2)
states = []
finishes = []
for _ in range(5):
state = logic.run()
states.append(state)
finishes.append(logic.finish_training())
# First two calls are DO_TRAIN, third is first NOTHING
assert states[:2] == [FitLogicState.DO_TRAIN, FitLogicState.DO_TRAIN]
assert states[2] == FitLogicState.NOTHING
assert finishes[2] is True
assert all(not f for f in finishes[:2])
assert all(not f for f in finishes[3:])

def test_finish_training_not_called_without_training(self) -> None:
logic = FitLogic(data_use_configure=None, data_use_training=None)
for _ in range(5):
logic.run()
assert logic.finish_training() is False

def test_finish_training_not_called_during_training(self) -> None:
logic = FitLogic(data_use_configure=None, data_use_training=5)
for _ in range(5):
state = logic.run()
assert state == FitLogicState.DO_TRAIN
assert logic.finish_training() is False

def test_finish_training_not_called_with_keep_training(self) -> None:
logic = FitLogic(data_use_configure=None, data_use_training=None)
logic.train_state = TrainState.KEEP_TRAINING
for _ in range(10):
state = logic.run()
assert state == FitLogicState.DO_TRAIN
assert logic.finish_training() is False

def test_finish_training_after_configure_and_training(self) -> None:
"""finish_training fires correctly even when configure phase precedes training."""
logic = FitLogic(data_use_configure=2, data_use_training=3)
finish_calls = []
for _ in range(8):
logic.run()
finish_calls.append(logic.finish_training())
assert finish_calls.count(True) == 1
Loading
Loading