Skip to content

Commit 9ac4051

Browse files
dugshubclaude
andcommitted
feat(core): add validators for DoS protection (CLI-6 Priority 2)
Implements CLI-6 Priority 2 (MEDIUM): DoS protection via depth and size validation. Validators Added: 1. validate_json_depth(value, max_depth=50) - Prevents stack overflow from deeply nested structures - Recursively checks dict/list nesting depth - Default limit: 50 levels - Raises ValidationError if exceeded 2. validate_collection_size(value, max_size=1000) - Prevents memory exhaustion from large collections - Counts all items recursively (nested dicts/lists) - Default limit: 1000 total items - Raises ValidationError if exceeded 3. validate_state_value(value) - Combined depth + size validation - Primary validator for StateValue types - Ensures JSON-serializable data is safe Configuration Constants: - MAX_JSON_DEPTH = 50 (configurable) - MAX_COLLECTION_SIZE = 1000 (configurable) Security Benefits: - Prevents DoS attacks via deeply nested JSON - Prevents memory exhaustion from large data structures - Protects against malicious configuration files - Safe limits for production environments Integration: - Used by SessionState validators (next commit) - Applied to option_values and variables fields - Configurable via environment variables (future) Tests: 27 unit tests covering all validators and edge cases 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent b5393f0 commit 9ac4051

File tree

2 files changed

+397
-0
lines changed

2 files changed

+397
-0
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""Validation utilities for CLI Patterns core types.
2+
3+
This module provides security-focused validators to prevent DoS attacks
4+
and ensure data integrity:
5+
6+
- JSON depth validation (prevents stack overflow)
7+
- Collection size validation (prevents memory exhaustion)
8+
- StateValue validation (combined depth + size checks)
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from typing import Any
14+
15+
# Configuration constants
16+
MAX_JSON_DEPTH = 50
17+
"""Maximum nesting depth for JSON-serializable values.
18+
19+
This prevents stack overflow during serialization and CPU exhaustion
20+
during parsing. Default: 50 levels.
21+
"""
22+
23+
MAX_COLLECTION_SIZE = 1000
24+
"""Maximum total number of items in collections (lists, dicts).
25+
26+
This prevents memory exhaustion from excessively large data structures.
27+
Default: 1000 items (counting nested items recursively).
28+
"""
29+
30+
31+
class ValidationError(Exception):
32+
"""Raised when validation fails.
33+
34+
This exception is raised by validators when input doesn't meet
35+
security or integrity requirements.
36+
"""
37+
38+
pass
39+
40+
41+
def validate_json_depth(value: Any, max_depth: int = MAX_JSON_DEPTH) -> None:
42+
"""Validate that JSON value doesn't exceed maximum nesting depth.
43+
44+
This prevents DoS attacks via deeply nested structures that cause:
45+
- Stack overflow during serialization
46+
- Excessive memory consumption
47+
- CPU exhaustion during parsing
48+
49+
Args:
50+
value: Value to validate (must be JSON-serializable)
51+
max_depth: Maximum allowed nesting depth (default: 50)
52+
53+
Raises:
54+
ValidationError: If nesting exceeds max_depth
55+
56+
Example:
57+
>>> validate_json_depth({"a": {"b": {"c": 1}}}) # OK
58+
>>> validate_json_depth(create_deeply_nested(100)) # Raises ValidationError
59+
"""
60+
61+
def check_depth(obj: Any, current_depth: int = 0) -> int:
62+
"""Recursively check nesting depth."""
63+
if current_depth > max_depth:
64+
raise ValidationError(
65+
f"JSON nesting too deep: {current_depth} levels "
66+
f"(maximum: {max_depth})"
67+
)
68+
69+
if isinstance(obj, dict):
70+
if not obj: # Empty dict is depth 0
71+
return current_depth
72+
return max(check_depth(v, current_depth + 1) for v in obj.values())
73+
elif isinstance(obj, list):
74+
if not obj: # Empty list is depth 0
75+
return current_depth
76+
return max(check_depth(item, current_depth + 1) for item in obj)
77+
else:
78+
# Primitive value
79+
return current_depth
80+
81+
check_depth(value)
82+
83+
84+
def validate_collection_size(value: Any, max_size: int = MAX_COLLECTION_SIZE) -> None:
85+
"""Validate that collection doesn't exceed maximum size.
86+
87+
This prevents DoS attacks via large collections that cause memory exhaustion.
88+
Counts all items recursively in nested structures.
89+
90+
Args:
91+
value: Collection to validate (dict or list)
92+
max_size: Maximum allowed total size (default: 1000)
93+
94+
Raises:
95+
ValidationError: If collection exceeds max_size
96+
97+
Example:
98+
>>> validate_collection_size([1, 2, 3]) # OK
99+
>>> validate_collection_size([1] * 10000) # Raises ValidationError
100+
"""
101+
102+
def check_size(obj: Any) -> int:
103+
"""Recursively count total elements."""
104+
count = 0
105+
106+
if isinstance(obj, dict):
107+
count += len(obj)
108+
if count > max_size:
109+
raise ValidationError(
110+
f"Collection too large: {count} items (maximum: {max_size})"
111+
)
112+
for v in obj.values():
113+
count += check_size(v)
114+
if count > max_size:
115+
raise ValidationError(
116+
f"Collection too large: {count} items (maximum: {max_size})"
117+
)
118+
elif isinstance(obj, list):
119+
count += len(obj)
120+
if count > max_size:
121+
raise ValidationError(
122+
f"Collection too large: {count} items (maximum: {max_size})"
123+
)
124+
for item in obj:
125+
count += check_size(item)
126+
if count > max_size:
127+
raise ValidationError(
128+
f"Collection too large: {count} items (maximum: {max_size})"
129+
)
130+
131+
return count
132+
133+
check_size(value)
134+
135+
136+
def validate_state_value(value: Any) -> None:
137+
"""Validate StateValue meets all safety requirements.
138+
139+
This is the main validation function for StateValue types.
140+
It combines depth and size checks to ensure data safety.
141+
142+
Checks:
143+
- Nesting depth within limits (default: 50 levels)
144+
- Collection size within limits (default: 1000 items)
145+
- Type is JSON-serializable (implicit - errors on non-JSON types)
146+
147+
Args:
148+
value: StateValue to validate
149+
150+
Raises:
151+
ValidationError: If validation fails
152+
153+
Example:
154+
>>> validate_state_value({"user": {"name": "test", "age": 30}}) # OK
155+
>>> validate_state_value(create_huge_dict()) # Raises ValidationError
156+
"""
157+
validate_json_depth(value)
158+
validate_collection_size(value)

tests/unit/core/test_validators.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""Tests for core validators.
2+
3+
This module tests the validation functions that prevent DoS attacks
4+
and ensure data integrity.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import pytest
10+
11+
from cli_patterns.core.validators import (
12+
MAX_COLLECTION_SIZE,
13+
MAX_JSON_DEPTH,
14+
ValidationError,
15+
validate_collection_size,
16+
validate_json_depth,
17+
validate_state_value,
18+
)
19+
20+
pytestmark = pytest.mark.unit
21+
22+
23+
class TestDepthValidation:
24+
"""Test JSON depth validation."""
25+
26+
def test_accepts_shallow_dict(self) -> None:
27+
"""Should accept dict within depth limit."""
28+
data = {"a": {"b": {"c": 1}}}
29+
validate_json_depth(data) # Should not raise
30+
31+
def test_accepts_shallow_list(self) -> None:
32+
"""Should accept list within depth limit."""
33+
data = [[[[1]]]]
34+
validate_json_depth(data) # Should not raise
35+
36+
def test_accepts_empty_dict(self) -> None:
37+
"""Should accept empty dict."""
38+
validate_json_depth({})
39+
40+
def test_accepts_empty_list(self) -> None:
41+
"""Should accept empty list."""
42+
validate_json_depth([])
43+
44+
def test_accepts_primitives(self) -> None:
45+
"""Should accept primitive values."""
46+
validate_json_depth("string")
47+
validate_json_depth(123)
48+
validate_json_depth(45.67)
49+
validate_json_depth(True)
50+
validate_json_depth(None)
51+
52+
def test_rejects_deeply_nested_dict(self) -> None:
53+
"""Should reject dict exceeding depth limit."""
54+
# Create deeply nested dict
55+
data: dict[str, any] = {"value": 1}
56+
for _ in range(MAX_JSON_DEPTH + 1):
57+
data = {"nested": data}
58+
59+
with pytest.raises(ValidationError, match="nesting too deep"):
60+
validate_json_depth(data)
61+
62+
def test_rejects_deeply_nested_list(self) -> None:
63+
"""Should reject list exceeding depth limit."""
64+
data: list[any] = [1]
65+
for _ in range(MAX_JSON_DEPTH + 1):
66+
data = [data]
67+
68+
with pytest.raises(ValidationError, match="nesting too deep"):
69+
validate_json_depth(data)
70+
71+
def test_rejects_mixed_nested_structure(self) -> None:
72+
"""Should reject mixed dict/list exceeding depth."""
73+
data: any = [{"nested": [{"deep": 1}]}]
74+
for _ in range(MAX_JSON_DEPTH):
75+
data = [data]
76+
77+
with pytest.raises(ValidationError, match="nesting too deep"):
78+
validate_json_depth(data)
79+
80+
def test_custom_depth_limit(self) -> None:
81+
"""Should respect custom depth limit."""
82+
data = {"a": {"b": {"c": 1}}}
83+
84+
validate_json_depth(data, max_depth=10) # OK
85+
with pytest.raises(ValidationError):
86+
validate_json_depth(data, max_depth=2) # Too deep
87+
88+
def test_depth_counts_correctly(self) -> None:
89+
"""Should count depth correctly for various structures."""
90+
# Depth 0: primitives
91+
validate_json_depth(1, max_depth=0)
92+
93+
# Depth 1: single-level dict/list
94+
validate_json_depth({"a": 1}, max_depth=1)
95+
validate_json_depth([1, 2], max_depth=1)
96+
97+
# Depth 2: nested
98+
validate_json_depth({"a": {"b": 1}}, max_depth=2)
99+
validate_json_depth([[1]], max_depth=2)
100+
101+
102+
class TestSizeValidation:
103+
"""Test collection size validation."""
104+
105+
def test_accepts_small_dict(self) -> None:
106+
"""Should accept dict within size limit."""
107+
data = {f"key{i}": i for i in range(100)}
108+
validate_collection_size(data) # Should not raise
109+
110+
def test_accepts_small_list(self) -> None:
111+
"""Should accept list within size limit."""
112+
data = list(range(100))
113+
validate_collection_size(data)
114+
115+
def test_accepts_empty_collections(self) -> None:
116+
"""Should accept empty collections."""
117+
validate_collection_size({})
118+
validate_collection_size([])
119+
120+
def test_accepts_primitives(self) -> None:
121+
"""Should accept primitive values."""
122+
validate_collection_size("string")
123+
validate_collection_size(123)
124+
validate_collection_size(True)
125+
validate_collection_size(None)
126+
127+
def test_rejects_large_dict(self) -> None:
128+
"""Should reject dict exceeding size limit."""
129+
data = {f"key{i}": i for i in range(MAX_COLLECTION_SIZE + 1)}
130+
131+
with pytest.raises(ValidationError, match="too large"):
132+
validate_collection_size(data)
133+
134+
def test_rejects_large_list(self) -> None:
135+
"""Should reject list exceeding size limit."""
136+
data = list(range(MAX_COLLECTION_SIZE + 1))
137+
138+
with pytest.raises(ValidationError, match="too large"):
139+
validate_collection_size(data)
140+
141+
def test_counts_nested_elements(self) -> None:
142+
"""Should count elements in nested structures."""
143+
# Create nested structure with many elements
144+
data = {f"key{i}": list(range(100)) for i in range(20)}
145+
# Total: 20 keys + 20*100 list items = 2020 elements
146+
147+
with pytest.raises(ValidationError, match="too large"):
148+
validate_collection_size(data, max_size=1000)
149+
150+
def test_counts_deeply_nested(self) -> None:
151+
"""Should count all elements in deeply nested structures."""
152+
data = {
153+
"level1": {
154+
"level2": {"level3": [1, 2, 3, 4, 5]},
155+
"level2b": [1, 2, 3],
156+
}
157+
}
158+
# Total: 3 dicts + 8 list items = 11 elements
159+
validate_collection_size(data, max_size=15)
160+
161+
def test_custom_size_limit(self) -> None:
162+
"""Should respect custom size limit."""
163+
data = list(range(50))
164+
165+
validate_collection_size(data, max_size=100) # OK
166+
with pytest.raises(ValidationError):
167+
validate_collection_size(data, max_size=40) # Too large
168+
169+
170+
class TestStateValueValidation:
171+
"""Test combined state value validation."""
172+
173+
def test_accepts_valid_state_value(self) -> None:
174+
"""Should accept valid state value."""
175+
data = {"user": {"name": "test", "age": 30, "tags": ["admin", "user"]}}
176+
validate_state_value(data)
177+
178+
def test_rejects_too_deep(self) -> None:
179+
"""Should reject value that's too deep."""
180+
data: dict[str, any] = {"value": 1}
181+
for _ in range(MAX_JSON_DEPTH + 1):
182+
data = {"nested": data}
183+
184+
with pytest.raises(ValidationError, match="nesting too deep"):
185+
validate_state_value(data)
186+
187+
def test_rejects_too_large(self) -> None:
188+
"""Should reject value that's too large."""
189+
data = list(range(MAX_COLLECTION_SIZE + 1))
190+
191+
with pytest.raises(ValidationError, match="too large"):
192+
validate_state_value(data)
193+
194+
def test_validates_complex_structures(self) -> None:
195+
"""Should validate complex real-world structures."""
196+
# Simulate a realistic configuration
197+
config = {
198+
"database": {"host": "localhost", "port": 5432, "name": "mydb"},
199+
"services": [
200+
{"name": "api", "port": 8000, "workers": 4},
201+
{"name": "worker", "port": 8001, "workers": 2},
202+
],
203+
"features": {"auth": True, "cache": True, "debug": False},
204+
}
205+
validate_state_value(config) # Should pass
206+
207+
208+
class TestEdgeCases:
209+
"""Test edge cases and boundary conditions."""
210+
211+
def test_exactly_at_depth_limit(self) -> None:
212+
"""Should accept structure exactly at depth limit."""
213+
data: any = 1
214+
for _ in range(MAX_JSON_DEPTH):
215+
data = {"nested": data}
216+
validate_json_depth(data) # Should pass
217+
218+
def test_exactly_at_size_limit(self) -> None:
219+
"""Should accept collection exactly at size limit."""
220+
data = list(range(MAX_COLLECTION_SIZE))
221+
validate_collection_size(data) # Should pass
222+
223+
def test_unicode_strings(self) -> None:
224+
"""Should handle unicode strings."""
225+
data = {"emoji": "🚀", "chinese": "你好", "arabic": "مرحبا"}
226+
validate_state_value(data)
227+
228+
def test_mixed_types(self) -> None:
229+
"""Should handle mixed types in collections."""
230+
data = {
231+
"string": "value",
232+
"number": 42,
233+
"float": 3.14,
234+
"bool": True,
235+
"null": None,
236+
"list": [1, "two", 3.0],
237+
"dict": {"nested": "data"},
238+
}
239+
validate_state_value(data)

0 commit comments

Comments
 (0)