From b6c2fbe1fbdb089a4bf72d50f7c344374d21cd14 Mon Sep 17 00:00:00 2001 From: stealthwhizz Date: Wed, 18 Mar 2026 22:41:58 +0530 Subject: [PATCH 1/3] fix(detectors): reject non-dict config and handle non-numeric comparisons Bug 020: BaseDetector now raises TypeError at init when config is not a dict, instead of deferring to AttributeError in _validate_config(). Bug 034: ToolCallDetector._check_condition numeric operators (gt, gte, lt, lte) now return False when actual or expected values cannot be converted to float, instead of raising unhandled ValueError. --- finbot/ctf/detectors/base.py | 2 + finbot/ctf/detectors/primitives/tool_call.py | 21 ++++--- tests/unit/ctf/test_base_detector.py | 57 +++++++++++++++++++ tests/unit/ctf/test_tool_call_detector.py | 59 ++++++++++++++++++++ 4 files changed, 131 insertions(+), 8 deletions(-) create mode 100644 tests/unit/ctf/test_base_detector.py create mode 100644 tests/unit/ctf/test_tool_call_detector.py diff --git a/finbot/ctf/detectors/base.py b/finbot/ctf/detectors/base.py index 01b4c114..79325511 100644 --- a/finbot/ctf/detectors/base.py +++ b/finbot/ctf/detectors/base.py @@ -26,6 +26,8 @@ def __init__(self, challenge_id: str, config: dict[str, Any] | None = None): config: Optional detector configuration (detector-specific) """ self.challenge_id = challenge_id + if config is not None and not isinstance(config, dict): + raise TypeError(f"config must be a dict, got {type(config).__name__}") self.config = config or {} self._validate_config() diff --git a/finbot/ctf/detectors/primitives/tool_call.py b/finbot/ctf/detectors/primitives/tool_call.py index f7ea7317..b3bc542f 100644 --- a/finbot/ctf/detectors/primitives/tool_call.py +++ b/finbot/ctf/detectors/primitives/tool_call.py @@ -188,14 +188,19 @@ def _check_condition(self, actual: Any, condition: Any) -> bool: return actual not in expected if op == "contains": return expected in str(actual).lower() - if op == "gt": - return float(actual) > float(expected) - if op == "gte": - return float(actual) >= float(expected) - if op == "lt": - return float(actual) < float(expected) - if op == "lte": - return float(actual) <= float(expected) + if op in ("gt", "gte", "lt", "lte"): + try: + actual_f = float(actual) + expected_f = float(expected) + except (ValueError, TypeError): + return False + if op == "gt": + return actual_f > expected_f + if op == "gte": + return actual_f >= expected_f + if op == "lt": + return actual_f < expected_f + return actual_f <= expected_f if op == "matches": return bool(re.search(expected, str(actual), re.IGNORECASE)) diff --git a/tests/unit/ctf/test_base_detector.py b/tests/unit/ctf/test_base_detector.py new file mode 100644 index 00000000..49bc516a --- /dev/null +++ b/tests/unit/ctf/test_base_detector.py @@ -0,0 +1,57 @@ +"""Tests for BaseDetector config validation. + +Covers Bug 020 (DET-THR-NEG-001): BaseDetector must raise TypeError when +config is not a dict, instead of deferring to an AttributeError later. +""" + +import pytest +from unittest.mock import MagicMock + +from finbot.ctf.detectors.base import BaseDetector +from finbot.ctf.detectors.result import DetectionResult + + +class ConcreteDetector(BaseDetector): + """Minimal concrete subclass for testing the base class.""" + + def get_relevant_event_types(self): + return ["agent.*"] + + async def check_event(self, event, db=None): + return DetectionResult(detected=False) + + +class TestBaseDetectorConfigValidation: + """Bug 020: non-dict config must raise TypeError at init time.""" + + def test_string_config_raises_type_error(self): + with pytest.raises(TypeError, match="config must be a dict"): + ConcreteDetector(challenge_id="c", config="not_a_dict") + + def test_list_config_raises_type_error(self): + with pytest.raises(TypeError, match="config must be a dict"): + ConcreteDetector(challenge_id="c", config=["a", "b"]) + + def test_int_config_raises_type_error(self): + with pytest.raises(TypeError, match="config must be a dict"): + ConcreteDetector(challenge_id="c", config=42) + + def test_bool_config_raises_type_error(self): + with pytest.raises(TypeError, match="config must be a dict"): + ConcreteDetector(challenge_id="c", config=True) + + def test_dict_config_accepted(self): + d = ConcreteDetector(challenge_id="c", config={"key": "value"}) + assert d.config == {"key": "value"} + + def test_none_config_defaults_to_empty_dict(self): + d = ConcreteDetector(challenge_id="c", config=None) + assert d.config == {} + + def test_no_config_defaults_to_empty_dict(self): + d = ConcreteDetector(challenge_id="c") + assert d.config == {} + + def test_empty_dict_config_accepted(self): + d = ConcreteDetector(challenge_id="c", config={}) + assert d.config == {} diff --git a/tests/unit/ctf/test_tool_call_detector.py b/tests/unit/ctf/test_tool_call_detector.py new file mode 100644 index 00000000..a04278f5 --- /dev/null +++ b/tests/unit/ctf/test_tool_call_detector.py @@ -0,0 +1,59 @@ +"""Tests for ToolCallDetector._check_condition numeric operator handling. + +Covers Bug 034 (PRM-TOL-019): _check_condition must return False (not raise +ValueError) when actual value is a non-numeric string and operator is +gt / gte / lt / lte. +""" + +import pytest +from unittest.mock import MagicMock + +from finbot.ctf.detectors.primitives.tool_call import ToolCallDetector + + +@pytest.fixture +def detector(): + d = ToolCallDetector.__new__(ToolCallDetector) + d.config = {"tool_name": "test_tool"} + d.challenge_id = "test" + return d + + +class TestCheckConditionNumericOperators: + """Bug 034: numeric operators should not raise on non-numeric strings.""" + + @pytest.mark.parametrize("op", ["gt", "gte", "lt", "lte"]) + def test_non_numeric_actual_returns_false(self, detector, op): + result = detector._check_condition("not_a_number", {op: 100}) + assert result is False + + @pytest.mark.parametrize("op", ["gt", "gte", "lt", "lte"]) + def test_non_numeric_expected_returns_false(self, detector, op): + result = detector._check_condition(50, {op: "not_a_number"}) + assert result is False + + @pytest.mark.parametrize("op", ["gt", "gte", "lt", "lte"]) + def test_none_actual_returns_false(self, detector, op): + result = detector._check_condition(None, {op: 100}) + assert result is False + + def test_gt_valid_numeric(self, detector): + assert detector._check_condition(200, {"gt": 100}) is True + assert detector._check_condition(50, {"gt": 100}) is False + + def test_gte_valid_numeric(self, detector): + assert detector._check_condition(100, {"gte": 100}) is True + assert detector._check_condition(99, {"gte": 100}) is False + + def test_lt_valid_numeric(self, detector): + assert detector._check_condition(50, {"lt": 100}) is True + assert detector._check_condition(200, {"lt": 100}) is False + + def test_lte_valid_numeric(self, detector): + assert detector._check_condition(100, {"lte": 100}) is True + assert detector._check_condition(101, {"lte": 100}) is False + + def test_string_numeric_actual_works(self, detector): + """String representations of numbers should still compare correctly.""" + assert detector._check_condition("200", {"gt": 100}) is True + assert detector._check_condition("50", {"gt": 100}) is False From d76b263ff7df71949daf5c25ea959c2aea82610c Mon Sep 17 00:00:00 2001 From: stealthwhizz Date: Wed, 18 Mar 2026 22:55:28 +0530 Subject: [PATCH 2/3] fix: address Copilot review feedback on PR #261 - Remove unused MagicMock imports from both test files - Use real ToolCallDetector constructor instead of __new__ bypass - Catch TypeError in create_detector() so non-dict config returns None with a log instead of crashing ChallengeService --- finbot/ctf/detectors/registry.py | 2 +- tests/unit/ctf/test_base_detector.py | 1 - tests/unit/ctf/test_tool_call_detector.py | 6 +----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/finbot/ctf/detectors/registry.py b/finbot/ctf/detectors/registry.py index eb1bbba1..39c0d797 100644 --- a/finbot/ctf/detectors/registry.py +++ b/finbot/ctf/detectors/registry.py @@ -56,7 +56,7 @@ def create_detector( try: detector_class = get_detector_class(detector_class_name) return detector_class(challenge_id=challenge_id, config=config) - except ValueError as e: + except (ValueError, TypeError) as e: logger.error("Failed to create detector: %s", e) return None diff --git a/tests/unit/ctf/test_base_detector.py b/tests/unit/ctf/test_base_detector.py index 49bc516a..73718d28 100644 --- a/tests/unit/ctf/test_base_detector.py +++ b/tests/unit/ctf/test_base_detector.py @@ -5,7 +5,6 @@ """ import pytest -from unittest.mock import MagicMock from finbot.ctf.detectors.base import BaseDetector from finbot.ctf.detectors.result import DetectionResult diff --git a/tests/unit/ctf/test_tool_call_detector.py b/tests/unit/ctf/test_tool_call_detector.py index a04278f5..943fd8f8 100644 --- a/tests/unit/ctf/test_tool_call_detector.py +++ b/tests/unit/ctf/test_tool_call_detector.py @@ -6,17 +6,13 @@ """ import pytest -from unittest.mock import MagicMock from finbot.ctf.detectors.primitives.tool_call import ToolCallDetector @pytest.fixture def detector(): - d = ToolCallDetector.__new__(ToolCallDetector) - d.config = {"tool_name": "test_tool"} - d.challenge_id = "test" - return d + return ToolCallDetector(challenge_id="test", config={"tool_name": "test_tool"}) class TestCheckConditionNumericOperators: From d39ed917d8f1dd62ea5a38cd7207a324c798c041 Mon Sep 17 00:00:00 2001 From: stealthwhizz Date: Wed, 18 Mar 2026 23:01:59 +0530 Subject: [PATCH 3/3] fix(detectors): make contains operator case-insensitive on both sides Bug 033: contains operator lowercased actual but not expected, so search terms like "Gambling" never matched "gambling services". Now both sides are lowercased. Also fixes #130. --- finbot/ctf/detectors/primitives/tool_call.py | 2 +- tests/unit/ctf/test_tool_call_detector.py | 28 +++++++++++++++++--- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/finbot/ctf/detectors/primitives/tool_call.py b/finbot/ctf/detectors/primitives/tool_call.py index b3bc542f..bd2addfd 100644 --- a/finbot/ctf/detectors/primitives/tool_call.py +++ b/finbot/ctf/detectors/primitives/tool_call.py @@ -187,7 +187,7 @@ def _check_condition(self, actual: Any, condition: Any) -> bool: if op == "not_in": return actual not in expected if op == "contains": - return expected in str(actual).lower() + return str(expected).lower() in str(actual).lower() if op in ("gt", "gte", "lt", "lte"): try: actual_f = float(actual) diff --git a/tests/unit/ctf/test_tool_call_detector.py b/tests/unit/ctf/test_tool_call_detector.py index 943fd8f8..32227e46 100644 --- a/tests/unit/ctf/test_tool_call_detector.py +++ b/tests/unit/ctf/test_tool_call_detector.py @@ -1,8 +1,9 @@ -"""Tests for ToolCallDetector._check_condition numeric operator handling. +"""Tests for ToolCallDetector._check_condition operator handling. -Covers Bug 034 (PRM-TOL-019): _check_condition must return False (not raise -ValueError) when actual value is a non-numeric string and operator is -gt / gte / lt / lte. +Covers: +- Bug 033 (PRM-TOL-018): contains operator false negative on uppercase expected +- Bug 034 (PRM-TOL-019): numeric operators raise unhandled ValueError on + non-numeric strings """ import pytest @@ -53,3 +54,22 @@ def test_string_numeric_actual_works(self, detector): """String representations of numbers should still compare correctly.""" assert detector._check_condition("200", {"gt": 100}) is True assert detector._check_condition("50", {"gt": 100}) is False + + +class TestCheckConditionContainsOperator: + """Bug 033: contains operator must be case-insensitive on both sides.""" + + def test_uppercase_expected_matches(self, detector): + assert detector._check_condition("gambling services", {"contains": "Gambling"}) is True + + def test_uppercase_actual_matches(self, detector): + assert detector._check_condition("GAMBLING SERVICES", {"contains": "gambling"}) is True + + def test_both_uppercase_matches(self, detector): + assert detector._check_condition("GAMBLING SERVICES", {"contains": "Gambling"}) is True + + def test_no_match_returns_false(self, detector): + assert detector._check_condition("legitimate services", {"contains": "Gambling"}) is False + + def test_exact_case_still_works(self, detector): + assert detector._check_condition("gambling", {"contains": "gambling"}) is True