From 2e50e8acd37301e1345fca80c9d0648d084cc60e Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 13:15:03 +0530 Subject: [PATCH 01/12] added sensitive data redactor processor --- docs/processors.md | 217 +++++ src/structlog/processors.py | 521 ++++++++++ tests/processors/test_processors.py | 1380 +++++++++++++++++++++++++++ 3 files changed, 2118 insertions(+) diff --git a/docs/processors.md b/docs/processors.md index 5c6b13b5..7f2c8c81 100644 --- a/docs/processors.md +++ b/docs/processors.md @@ -156,6 +156,223 @@ Advanced log aggregation and analysis tools like [*Logstash*](https://www.elasti For a list of shipped processors, check out the {ref}`API documentation `. +## Redacting Sensitive Data + +When logging in production environments, it's critical to ensure sensitive information like passwords, API keys, personal data, and financial information doesn't end up in your logs. +*structlog* provides the {class}`~structlog.processors.SensitiveDataRedactor` processor to automatically identify and redact sensitive fields from log events. + +### Basic Usage + +```python +import structlog +from structlog.processors import SensitiveDataRedactor + +# Create a redactor for common sensitive fields +redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key", "secret", "token"] +) + +structlog.configure( + processors=[ + structlog.stdlib.add_log_level, + redactor, # Place before renderers! + structlog.processors.JSONRenderer(), + ] +) + +log = structlog.get_logger() +log.info("user_login", user="alice", password="secret123") +# Output: {"event": "user_login", "user": "alice", "password": "[REDACTED]", "level": "info"} +``` + +### Pattern Matching + +Instead of listing every possible field name, use glob-style patterns with `*` (matches any sequence) and `?` (matches single character): + +```python +redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*password*", # Matches: password, user_password, password_hash + "api_*", # Matches: api_key, api_secret, api_token + "*_token", # Matches: auth_token, refresh_token, access_token + "*secret*", # Matches: secret, client_secret, secret_key + ] +) +``` + +### Case-Insensitive Matching + +Enable case-insensitive matching when field names may have inconsistent casing: + +```python +redactor = SensitiveDataRedactor( + sensitive_fields=["password", "apikey"], + case_insensitive=True +) +# Now matches: password, PASSWORD, Password, ApiKey, APIKEY, etc. +``` + +### Nested Structures + +The redactor automatically traverses nested dictionaries and lists: + +```python +log.info( + "config_loaded", + config={ + "database": { + "host": "localhost", + "password": "db_secret" # Will be redacted + }, + "api_keys": [ + {"service": "stripe", "api_key": "sk_live_xxx"}, # Will be redacted + {"service": "twilio", "api_key": "AC_xxx"} # Will be redacted + ] + } +) +``` + +### Custom Redaction Logic + +For more control over how values are redacted, provide a custom callback: + +```python +def partial_mask(field_name, value, path): + """Show first/last 2 characters for debugging.""" + if isinstance(value, str) and len(value) > 4: + return f"{value[:2]}{'*' * (len(value) - 4)}{value[-2:]}" + return "[REDACTED]" + +redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "*token*"], + redaction_callback=partial_mask +) + +log.info("auth", password="mysecretpassword") +# Output: {"event": "auth", "password": "my**********rd"} +``` + +The callback receives: +- `field_name`: The name of the field being redacted +- `value`: The original value +- `path`: The full path to the field (e.g., `"config.database.password"`) + +### Compliance Use Cases + +#### GDPR Compliance + +Protect personally identifiable information (PII) in logs: + +```python +import logging + +# Separate audit logger for compliance records +audit_logger = logging.getLogger("gdpr.audit") + +def gdpr_audit(field_name, value, path): + """Log redaction events for GDPR compliance auditing.""" + audit_logger.info( + "PII field redacted", + extra={ + "field_name": field_name, + "field_path": path, + "value_type": type(value).__name__, + } + ) + +gdpr_redactor = SensitiveDataRedactor( + sensitive_fields=[ + # Personal identifiers + "*email*", "*phone*", "*mobile*", + "*name*", "*first_name*", "*last_name*", + # Government IDs + "*ssn*", "*social_security*", "*passport*", + "*national_id*", "*tax_id*", + # Location data + "*address*", "*zip*", "*postal*", + # Dates that could identify + "*birth*", "*dob*", + ], + case_insensitive=True, + audit_callback=gdpr_audit, +) +``` + +#### PCI-DSS Compliance + +Protect payment card data: + +```python +def mask_card_number(field_name, value, path): + """PCI-DSS compliant card masking - show only last 4 digits.""" + if "card" in field_name.lower() and isinstance(value, str): + # Remove any spaces/dashes and show last 4 + digits = "".join(c for c in value if c.isdigit()) + if len(digits) >= 4: + return f"****-****-****-{digits[-4:]}" + return "[REDACTED]" + +pci_redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*card*", "*pan*", # Card numbers + "*cvv*", "*cvc*", "*cvn*", # Security codes + "*expir*", # Expiration dates + "*account_number*", # Bank accounts + "*routing*", # Routing numbers + ], + case_insensitive=True, + redaction_callback=mask_card_number, +) +``` + +#### HIPAA Compliance + +Protect health information: + +```python +hipaa_redactor = SensitiveDataRedactor( + sensitive_fields=[ + # Patient identifiers + "*patient_id*", "*medical_record*", "*mrn*", + # Health information + "*diagnosis*", "*prescription*", "*medication*", + "*treatment*", "*procedure*", + # Insurance + "*insurance_id*", "*policy_number*", + # Also include general PII patterns + "*ssn*", "*dob*", "*birth*", + ], + case_insensitive=True, +) +``` + +### Combining Multiple Redactors + +For applications with different compliance requirements, you can chain multiple redactors: + +```python +structlog.configure( + processors=[ + structlog.stdlib.add_log_level, + gdpr_redactor, # GDPR PII protection + pci_redactor, # PCI-DSS payment data + hipaa_redactor, # HIPAA health data + structlog.processors.JSONRenderer(), + ] +) +``` + +### Performance Considerations + +- **Prefer exact matches over patterns** when possible for better performance +- **Use `frozenset` internally** for O(1) exact match lookups +- **Patterns are compiled once** at initialization, not on every log call +- **Place the redactor before expensive operations** like JSON serialization + +:::{versionadded} 25.1.0 +::: + + ## Third-Party packages *structlog* was specifically designed to be as composable and reusable as possible, so whatever you're missing: diff --git a/src/structlog/processors.py b/src/structlog/processors.py index 80841561..60cbdd95 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -55,6 +55,7 @@ "JSONRenderer", "KeyValueRenderer", "LogfmtRenderer", + "SensitiveDataRedactor", "StackInfoRenderer", "TimeStamper", "UnicodeDecoder", @@ -969,3 +970,523 @@ def __call__( event_dict["event"] = replace_by return event_dict + + +def _compile_sensitive_pattern( + pattern: str, case_insensitive: bool +) -> Callable[[str], bool]: + """ + Compile a glob-style pattern into a matcher function. + + Args: + pattern: A glob-style pattern string containing ``*`` and/or ``?``. + case_insensitive: Whether matching should ignore case. + + Returns: + A function that takes a string key and returns True if it matches + the pattern. + + Note: + Uses :func:`fnmatch.translate` to convert glob patterns to regex. + ``*`` matches any sequence of characters (including empty). + ``?`` matches exactly one character. + """ + import fnmatch + import re + + # Convert glob pattern to regex + regex_pattern = fnmatch.translate(pattern) + flags = re.IGNORECASE if case_insensitive else 0 + compiled = re.compile(regex_pattern, flags) + + def matcher(key: str) -> bool: + return compiled.fullmatch(key) is not None + + return matcher + + +#: Type alias for the redaction callback function. +#: +#: The callback signature is ``(field_name, original_value, field_path) -> redacted_value``: +#: +#: - ``field_name`` (str): The name of the field being redacted. +#: - ``original_value`` (Any): The original value before redaction. +#: - ``field_path`` (str): Dot-separated path to the field (e.g., ``"user.credentials.password"``). +#: +#: Returns: +#: The value to use as the replacement (can be any type). +RedactionCallback = Callable[[str, Any, str], Any] + +#: Type alias for the audit callback function. +#: +#: The callback signature is ``(field_name, original_value, field_path) -> None``: +#: +#: - ``field_name`` (str): The name of the field being redacted. +#: - ``original_value`` (Any): The original value before redaction. +#: - ``field_path`` (str): Dot-separated path to the field (e.g., ``"user.credentials.password"``). +#: +#: The callback is invoked *before* redaction occurs, allowing you to log or +#: record the original value for audit purposes. +AuditCallback = Callable[[str, Any, str], None] + + +class SensitiveDataRedactor: + """ + Redact sensitive fields from event dictionaries. + + This processor automatically identifies and redacts sensitive data from log + events before they are written to log destinations. It is designed to help + with data protection compliance (GDPR, HIPAA, PCI-DSS, etc.) by ensuring + that sensitive information like passwords, API keys, personal data, and + other confidential fields are not exposed in logs. + + The processor supports: + + - **Exact field matching**: Specify exact field names to redact. + - **Pattern matching**: Use glob-style patterns (``*`` and ``?``) to match + field names dynamically. + - **Case-insensitive matching**: Optionally ignore case when matching field + names. + - **Nested structures**: Automatically traverses nested dictionaries and + lists to find and redact sensitive fields at any depth. + - **Custom redaction**: Provide a callback function for custom redaction + logic (e.g., partial masking, hashing). + - **Audit logging**: Track redaction events for compliance auditing. + + Args: + sensitive_fields: + A collection of field names or glob-style patterns to redact. + + **Exact matches**: Simple strings match field names exactly:: + + ["password", "api_key", "ssn"] + + **Glob patterns**: Use ``*`` to match any sequence of characters, + and ``?`` to match exactly one character:: + + ["*password*"] # Matches: password, user_password, password_hash + ["api_*"] # Matches: api_key, api_secret, api_token + ["*_token"] # Matches: auth_token, refresh_token + ["key?"] # Matches: key1, key2, keyA (but not key or key12) + + Common sensitive field patterns for compliance: + + - **Authentication**: ``*password*``, ``*secret*``, ``*token*``, + ``*credential*``, ``*api_key*`` + - **Personal Data (GDPR)**: ``*email*``, ``*phone*``, ``*address*``, + ``*ssn*``, ``*birth*``, ``*name*`` + - **Financial (PCI-DSS)**: ``*card*``, ``*cvv*``, ``*account*``, + ``*routing*`` + - **Health (HIPAA)**: ``*diagnosis*``, ``*prescription*``, + ``*medical*``, ``*health*`` + + placeholder: + The string used to replace redacted values. Default is + ``"[REDACTED]"``. This parameter is ignored if *redaction_callback* + is provided. + + Examples of common placeholders:: + + "[REDACTED]" # Default, clear indication of redaction + "***" # Shorter placeholder + "" # Alternative marker + "" # Empty string (removes value) + + case_insensitive: + When ``True``, field name matching ignores case. Default is + ``False``. + + This is useful when field names may have inconsistent casing:: + + # With case_insensitive=True, matches: password, PASSWORD, Password + SensitiveDataRedactor(["password"], case_insensitive=True) + + redaction_callback: + An optional callable for custom redaction logic. When provided, + this takes precedence over *placeholder*. + + The callback receives three arguments: + + - ``field_name`` (str): The name of the field being redacted. + - ``original_value`` (Any): The original value before redaction. + - ``field_path`` (str): The full path to the field in dot notation + (e.g., ``"config.database.password"`` or ``"users[0].ssn"``). + + The callback should return the redacted value. + + Example - Partial masking for debugging:: + + def partial_mask(field_name, value, path): + if isinstance(value, str) and len(value) > 4: + return value[:2] + "*" * (len(value) - 4) + value[-2:] + return "[REDACTED]" + + Example - Hash sensitive values for correlation:: + + import hashlib + def hash_value(field_name, value, path): + h = hashlib.sha256(str(value).encode()).hexdigest()[:8] + return f"[HASH:{h}]" + + audit_callback: + An optional callable invoked for each redacted field, useful for + compliance auditing and monitoring. + + The callback receives the same three arguments as *redaction_callback* + but returns nothing. It is called *before* the value is redacted, + so it receives the original value. + + Example - Log redaction events:: + + import logging + audit_logger = logging.getLogger("security.audit") + + def audit_redaction(field_name, value, path): + audit_logger.info( + "Redacted sensitive field", + extra={"field": field_name, "path": path} + ) + + Example - Count redactions for metrics:: + + from collections import Counter + redaction_counts = Counter() + + def count_redactions(field_name, value, path): + redaction_counts[field_name] += 1 + + Attributes: + This class uses ``__slots__`` for memory efficiency and does not expose + public attributes. Use the constructor parameters to configure behavior. + + Raises: + This processor does not raise exceptions during normal operation. Invalid + patterns or field names will simply not match any fields. + + Examples: + **Basic usage**:: + + from structlog.processors import SensitiveDataRedactor + + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key", "secret"] + ) + + # In structlog configuration + structlog.configure( + processors=[ + structlog.stdlib.add_log_level, + redactor, # Add before renderers + structlog.processors.JSONRenderer(), + ] + ) + + **Nested dictionary handling**:: + + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + event = { + "user": "alice", + "credentials": { + "password": "secret123", + "mfa_enabled": True + } + } + result = redactor(None, None, event) + # Result: {"user": "alice", "credentials": {"password": "[REDACTED]", "mfa_enabled": True}} + + **Pattern matching for flexible redaction**:: + + redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*password*", # Any field containing "password" + "api_*", # Any field starting with "api_" + "*_token", # Any field ending with "_token" + ] + ) + + **Case-insensitive matching**:: + + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "apikey"], + case_insensitive=True + ) + # Matches: password, PASSWORD, Password, ApiKey, APIKEY, etc. + + **Custom redaction with partial masking**:: + + def mask_email(field_name, value, path): + if field_name == "email" and "@" in str(value): + local, domain = str(value).split("@") + return f"{local[0]}***@{domain}" + return "[REDACTED]" + + redactor = SensitiveDataRedactor( + sensitive_fields=["email", "password"], + redaction_callback=mask_email + ) + + **GDPR compliance with audit trail**:: + + import logging + + # Separate audit logger for compliance records + audit_logger = logging.getLogger("gdpr.audit") + + def gdpr_audit(field_name, value, path): + audit_logger.info( + "PII field redacted for GDPR compliance", + extra={ + "field_name": field_name, + "field_path": path, + "value_type": type(value).__name__, + } + ) + + gdpr_redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*email*", "*phone*", "*address*", + "*name*", "*birth*", "*ssn*", "*passport*", + ], + case_insensitive=True, + audit_callback=gdpr_audit, + ) + + **PCI-DSS compliance for payment data**:: + + def mask_card_number(field_name, value, path): + if "card" in field_name.lower() and isinstance(value, str): + if len(value) >= 4: + return f"****-****-****-{value[-4:]}" + return "[REDACTED]" + + pci_redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*card*", "*cvv*", "*cvc*", + "*account_number*", "*routing*", + ], + case_insensitive=True, + redaction_callback=mask_card_number, + ) + + Note: + - Place this processor **before** any renderers (like ``JSONRenderer``) + in your processor chain to ensure sensitive data is redacted before + being serialized. + - The processor modifies the event dictionary in place for efficiency. + - For performance-critical applications with many sensitive fields, + prefer exact matches over patterns where possible. + - The processor is pickleable, allowing it to be used with multiprocessing. + + See Also: + - :doc:`/processors` for general information about processors. + - :class:`JSONRenderer` for rendering redacted logs as JSON. + + .. versionadded:: 25.1.0 + """ + + __slots__ = ( + "_exact_fields", + "_pattern_matchers", + "_placeholder", + "_case_insensitive", + "_sensitive_fields", + "_redaction_callback", + "_audit_callback", + ) + + _exact_fields: frozenset[str] + _pattern_matchers: tuple[Callable[[str], bool], ...] + _placeholder: str + _case_insensitive: bool + _sensitive_fields: tuple[str, ...] + _redaction_callback: RedactionCallback | None + _audit_callback: AuditCallback | None + + def __init__( + self, + sensitive_fields: Collection[str], + placeholder: str = "[REDACTED]", + case_insensitive: bool = False, + redaction_callback: RedactionCallback | None = None, + audit_callback: AuditCallback | None = None, + ) -> None: + """ + Initialize the SensitiveDataRedactor processor. + + Args: + sensitive_fields: Field names or patterns to redact. + placeholder: Replacement string for redacted values. + case_insensitive: Whether to ignore case when matching. + redaction_callback: Custom function for redaction logic. + audit_callback: Function called for each redaction event. + """ + self._placeholder = placeholder + self._case_insensitive = case_insensitive + self._redaction_callback = redaction_callback + self._audit_callback = audit_callback + # Store original fields for pickling + self._sensitive_fields = tuple(sensitive_fields) + + # Separate exact matches from patterns for optimized matching + exact_fields: list[str] = [] + pattern_matchers: list[Callable[[str], bool]] = [] + + for field in sensitive_fields: + if "*" in field or "?" in field: + # It's a glob pattern - compile to regex matcher + pattern_matchers.append( + _compile_sensitive_pattern(field, case_insensitive) + ) + else: + # Exact match - normalize case if needed + if case_insensitive: + exact_fields.append(field.lower()) + else: + exact_fields.append(field) + + self._exact_fields = frozenset(exact_fields) + self._pattern_matchers = tuple(pattern_matchers) + + def _is_sensitive(self, key: str) -> bool: + """ + Check if a field key matches any sensitive field or pattern. + + Args: + key: The field name to check. + + Returns: + True if the key matches a sensitive field or pattern, False otherwise. + + Note: + Exact matches are checked first (O(1) lookup) before falling back + to pattern matching for better performance. + """ + check_key = key.lower() if self._case_insensitive else key + + # Check exact matches first (fast path - O(1) frozenset lookup) + if check_key in self._exact_fields: + return True + + # Check patterns (slower path - iterate through compiled patterns) + for matcher in self._pattern_matchers: + if matcher(key): + return True + + return False + + def _get_redacted_value(self, key: str, value: Any, path: str) -> Any: + """ + Compute the redacted value for a sensitive field. + + This method first calls the audit callback (if configured), then + determines the replacement value using either the custom redaction + callback or the default placeholder. + + Args: + key: The field name being redacted. + value: The original value to redact. + path: The full dot-separated path to the field. + + Returns: + The redacted value to use as replacement. + """ + # Call audit callback before redaction if provided + if self._audit_callback is not None: + self._audit_callback(key, value, path) + + # Use custom callback if provided, otherwise use placeholder + if self._redaction_callback is not None: + return self._redaction_callback(key, value, path) + return self._placeholder + + def __call__( + self, logger: WrappedLogger, name: str, event_dict: EventDict + ) -> EventDict: + """ + Process an event dictionary, redacting sensitive fields. + + This is the main entry point called by structlog's processor chain. + + Args: + logger: The wrapped logger instance (unused by this processor). + name: The name of the log method called (unused by this processor). + event_dict: The event dictionary to process. + + Returns: + The modified event dictionary with sensitive fields redacted. + + Note: + The event dictionary is modified in place for efficiency. + """ + return self._redact_dict(event_dict, "") + + def _redact_dict(self, d: dict[str, Any], parent_path: str) -> dict[str, Any]: + """ + Recursively redact sensitive fields from a dictionary. + + Args: + d: The dictionary to process. + parent_path: The dot-separated path to this dictionary's location. + + Returns: + The same dictionary with sensitive fields redacted. + """ + for key, value in d.items(): + current_path = f"{parent_path}.{key}" if parent_path else key + if self._is_sensitive(key): + d[key] = self._get_redacted_value(key, value, current_path) + elif isinstance(value, dict): + d[key] = self._redact_dict(value, current_path) + elif isinstance(value, list): + d[key] = self._redact_list(value, current_path) + return d + + def _redact_list(self, lst: list[Any], parent_path: str) -> list[Any]: + """ + Recursively redact sensitive fields from items in a list. + + Args: + lst: The list to process. + parent_path: The dot-separated path to this list's location. + + Returns: + The same list with sensitive fields in nested structures redacted. + """ + for i, item in enumerate(lst): + current_path = f"{parent_path}[{i}]" + if isinstance(item, dict): + lst[i] = self._redact_dict(item, current_path) + elif isinstance(item, list): + lst[i] = self._redact_list(item, current_path) + return lst + + def __getstate__(self) -> dict[str, Any]: + """ + Get state for pickling. + + Returns: + A dictionary containing all configuration needed to reconstruct + this processor instance. + """ + return { + "sensitive_fields": self._sensitive_fields, + "placeholder": self._placeholder, + "case_insensitive": self._case_insensitive, + "redaction_callback": self._redaction_callback, + "audit_callback": self._audit_callback, + } + + def __setstate__(self, state: dict[str, Any]) -> None: + """ + Restore state after unpickling. + + Args: + state: The state dictionary from ``__getstate__``. + """ + self.__init__( + sensitive_fields=state["sensitive_fields"], + placeholder=state["placeholder"], + case_insensitive=state["case_insensitive"], + redaction_callback=state.get("redaction_callback"), + audit_callback=state.get("audit_callback"), + ) diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index e43485e1..be7704c7 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -29,6 +29,7 @@ EventRenamer, ExceptionPrettyPrinter, JSONRenderer, + SensitiveDataRedactor, StackInfoRenderer, UnicodeDecoder, UnicodeEncoder, @@ -709,3 +710,1382 @@ def test_replace_by_key_is_optional(self): assert {"msg": "hi", "foo": "bar"} == EventRenamer("msg", "missing")( None, None, {"event": "hi", "foo": "bar"} ) + + +class TestSensitiveDataRedactor: + def test_redacts_sensitive_field(self): + """ + Sensitive fields are replaced with the placeholder. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + + assert {"user": "alice", "password": "[REDACTED]"} == redactor( + None, None, {"user": "alice", "password": "s3cr3t"} + ) + + def test_redacts_multiple_sensitive_fields(self): + """ + Multiple sensitive fields are all redacted. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key", "secret"] + ) + + assert { + "user": "alice", + "password": "[REDACTED]", + "api_key": "[REDACTED]", + "secret": "[REDACTED]", + } == redactor( + None, + None, + { + "user": "alice", + "password": "s3cr3t", + "api_key": "abc123", + "secret": "xyz789", + }, + ) + + def test_custom_placeholder(self): + """ + A custom placeholder can be specified. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], placeholder="***" + ) + + assert {"password": "***"} == redactor( + None, None, {"password": "s3cr3t"} + ) + + def test_redacts_nested_dict(self): + """ + Sensitive fields in nested dictionaries are redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["api_key"]) + + assert {"config": {"api_key": "[REDACTED]", "timeout": 30}} == redactor( + None, None, {"config": {"api_key": "abc123", "timeout": 30}} + ) + + def test_redacts_deeply_nested_dict(self): + """ + Sensitive fields in deeply nested dictionaries are redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["secret"]) + + assert { + "level1": {"level2": {"level3": {"secret": "[REDACTED]"}}} + } == redactor( + None, + None, + {"level1": {"level2": {"level3": {"secret": "deep_secret"}}}}, + ) + + def test_redacts_in_list_of_dicts(self): + """ + Sensitive fields in dictionaries within lists are redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + + assert { + "users": [ + {"name": "alice", "password": "[REDACTED]"}, + {"name": "bob", "password": "[REDACTED]"}, + ] + } == redactor( + None, + None, + { + "users": [ + {"name": "alice", "password": "pass1"}, + {"name": "bob", "password": "pass2"}, + ] + }, + ) + + def test_redacts_nested_lists(self): + """ + Sensitive fields in nested list structures are redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["token"]) + + assert { + "data": [[{"token": "[REDACTED]"}]] + } == redactor( + None, + None, + {"data": [[{"token": "secret_token"}]]}, + ) + + def test_leaves_non_sensitive_fields_unchanged(self): + """ + Non-sensitive fields are not modified. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + + assert { + "user": "alice", + "email": "alice@example.com", + "age": 30, + } == redactor( + None, + None, + {"user": "alice", "email": "alice@example.com", "age": 30}, + ) + + def test_empty_sensitive_fields(self): + """ + When no sensitive fields are specified, nothing is redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=[]) + + assert {"password": "s3cr3t"} == redactor( + None, None, {"password": "s3cr3t"} + ) + + def test_empty_event_dict(self): + """ + An empty event dict is handled gracefully. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + + assert {} == redactor(None, None, {}) + + def test_redacts_various_value_types(self): + """ + Sensitive fields with various value types are all redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["secret"]) + + assert {"secret": "[REDACTED]"} == redactor( + None, None, {"secret": "string"} + ) + assert {"secret": "[REDACTED]"} == redactor( + None, None, {"secret": 12345} + ) + assert {"secret": "[REDACTED]"} == redactor( + None, None, {"secret": ["list", "of", "values"]} + ) + assert {"secret": "[REDACTED]"} == redactor( + None, None, {"secret": {"nested": "dict"}} + ) + assert {"secret": "[REDACTED]"} == redactor( + None, None, {"secret": None} + ) + + def test_pickleable(self): + """ + An instance of SensitiveDataRedactor can be pickled. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key"], placeholder="***" + ) + pickle.dumps(redactor) + + def test_pattern_star_prefix(self): + """ + Pattern with * prefix matches fields ending with the suffix. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["*_key"]) + + assert { + "api_key": "[REDACTED]", + "secret_key": "[REDACTED]", + "user": "alice", + } == redactor( + None, + None, + {"api_key": "abc123", "secret_key": "xyz789", "user": "alice"}, + ) + + def test_pattern_star_suffix(self): + """ + Pattern with * suffix matches fields starting with the prefix. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["api_*"]) + + assert { + "api_key": "[REDACTED]", + "api_secret": "[REDACTED]", + "api_token_v2": "[REDACTED]", + "user": "alice", + } == redactor( + None, + None, + { + "api_key": "abc", + "api_secret": "xyz", + "api_token_v2": "123", + "user": "alice", + }, + ) + + def test_pattern_star_contains(self): + """ + Pattern with * on both sides matches fields containing the substring. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["*password*"]) + + assert { + "password": "[REDACTED]", + "user_password": "[REDACTED]", + "password_hash": "[REDACTED]", + "old_password_backup": "[REDACTED]", + "username": "alice", + } == redactor( + None, + None, + { + "password": "abc", + "user_password": "xyz", + "password_hash": "hash123", + "old_password_backup": "old", + "username": "alice", + }, + ) + + def test_pattern_question_mark(self): + """ + Pattern with ? matches exactly one character. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["key?"]) + + assert { + "key1": "[REDACTED]", + "key2": "[REDACTED]", + "keyA": "[REDACTED]", + "key": "unchanged", + "key12": "unchanged", + } == redactor( + None, + None, + { + "key1": "a", + "key2": "b", + "keyA": "c", + "key": "unchanged", + "key12": "unchanged", + }, + ) + + def test_multiple_patterns(self): + """ + Multiple patterns can be used together. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "api_*", "*_token"] + ) + + assert { + "user_password": "[REDACTED]", + "api_key": "[REDACTED]", + "auth_token": "[REDACTED]", + "username": "alice", + } == redactor( + None, + None, + { + "user_password": "pass", + "api_key": "key", + "auth_token": "token", + "username": "alice", + }, + ) + + def test_mixed_exact_and_patterns(self): + """ + Exact matches and patterns can be mixed. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "*_secret", "api_*"] + ) + + assert { + "password": "[REDACTED]", + "user_secret": "[REDACTED]", + "api_key": "[REDACTED]", + "username": "alice", + } == redactor( + None, + None, + { + "password": "pass", + "user_secret": "secret", + "api_key": "key", + "username": "alice", + }, + ) + + def test_case_insensitive_exact_match(self): + """ + Case-insensitive mode matches regardless of case for exact fields. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], case_insensitive=True + ) + + assert { + "password": "[REDACTED]", + "PASSWORD": "[REDACTED]", + "Password": "[REDACTED]", + "PaSsWoRd": "[REDACTED]", + "username": "alice", + } == redactor( + None, + None, + { + "password": "a", + "PASSWORD": "b", + "Password": "c", + "PaSsWoRd": "d", + "username": "alice", + }, + ) + + def test_case_insensitive_pattern(self): + """ + Case-insensitive mode works with patterns. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*"], case_insensitive=True + ) + + assert { + "user_password": "[REDACTED]", + "USER_PASSWORD": "[REDACTED]", + "UserPassword": "[REDACTED]", + "username": "alice", + } == redactor( + None, + None, + { + "user_password": "a", + "USER_PASSWORD": "b", + "UserPassword": "c", + "username": "alice", + }, + ) + + def test_case_sensitive_by_default(self): + """ + Case-sensitive matching is the default behavior. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + + assert { + "password": "[REDACTED]", + "PASSWORD": "still_visible", + "Password": "also_visible", + } == redactor( + None, + None, + { + "password": "secret", + "PASSWORD": "still_visible", + "Password": "also_visible", + }, + ) + + def test_pattern_in_nested_dict(self): + """ + Patterns work in nested dictionaries. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["*_key"]) + + assert { + "config": { + "api_key": "[REDACTED]", + "secret_key": "[REDACTED]", + "timeout": 30, + } + } == redactor( + None, + None, + { + "config": { + "api_key": "abc", + "secret_key": "xyz", + "timeout": 30, + } + }, + ) + + def test_pattern_in_list_of_dicts(self): + """ + Patterns work in lists of dictionaries. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["*password*"]) + + assert { + "users": [ + {"name": "alice", "user_password": "[REDACTED]"}, + {"name": "bob", "password_hash": "[REDACTED]"}, + ] + } == redactor( + None, + None, + { + "users": [ + {"name": "alice", "user_password": "pass1"}, + {"name": "bob", "password_hash": "hash2"}, + ] + }, + ) + + def test_pickleable_with_patterns(self): + """ + An instance with patterns can be pickled and unpickled. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "api_*"], + placeholder="***", + case_insensitive=True, + ) + pickled = pickle.dumps(redactor) + unpickled = pickle.loads(pickled) + + assert {"user_password": "***", "api_key": "***"} == unpickled( + None, None, {"user_password": "secret", "api_key": "key"} + ) + # Also verify case insensitive works after unpickling + assert {"USER_PASSWORD": "***"} == unpickled( + None, None, {"USER_PASSWORD": "secret"} + ) + + def test_redaction_callback_basic(self): + """ + A custom redaction callback is used instead of placeholder. + """ + + def custom_redactor(field_name, value, path): + return f"" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + redaction_callback=custom_redactor, + ) + + assert {"password": ""} == redactor( + None, None, {"password": "secret"} + ) + + def test_redaction_callback_receives_value(self): + """ + The redaction callback receives the original value. + """ + + def length_redactor(field_name, value, path): + if isinstance(value, str): + return f"[{len(value)} chars]" + return "[REDACTED]" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + redaction_callback=length_redactor, + ) + + assert {"password": "[9 chars]"} == redactor( + None, None, {"password": "secret123"} + ) + + def test_redaction_callback_receives_path(self): + """ + The redaction callback receives the field path. + """ + paths_received = [] + + def path_collector(field_name, value, path): + paths_received.append(path) + return "[REDACTED]" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + redaction_callback=path_collector, + ) + + redactor( + None, + None, + { + "password": "top_level", + "config": {"password": "nested"}, + "users": [{"password": "in_list"}], + }, + ) + + assert "password" in paths_received + assert "config.password" in paths_received + assert "users[0].password" in paths_received + + def test_redaction_callback_with_pattern(self): + """ + Redaction callback works with pattern matching. + """ + + def custom_redactor(field_name, value, path): + return f"***{field_name}***" + + redactor = SensitiveDataRedactor( + sensitive_fields=["*_key"], + redaction_callback=custom_redactor, + ) + + assert { + "api_key": "***api_key***", + "secret_key": "***secret_key***", + } == redactor(None, None, {"api_key": "abc", "secret_key": "xyz"}) + + def test_redaction_callback_overrides_placeholder(self): + """ + When both callback and placeholder are provided, callback takes precedence. + """ + + def custom_redactor(field_name, value, path): + return "CUSTOM" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + placeholder="PLACEHOLDER", + redaction_callback=custom_redactor, + ) + + assert {"password": "CUSTOM"} == redactor( + None, None, {"password": "secret"} + ) + + def test_audit_callback_basic(self): + """ + Audit callback is called for each redacted field. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append({"field": field_name, "value": value, "path": path}) + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + audit_callback=audit, + ) + + redactor(None, None, {"user": "alice", "password": "secret"}) + + assert len(audit_log) == 1 + assert audit_log[0]["field"] == "password" + assert audit_log[0]["value"] == "secret" + assert audit_log[0]["path"] == "password" + + def test_audit_callback_multiple_fields(self): + """ + Audit callback is called for each redacted field. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append(path) + + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key"], + audit_callback=audit, + ) + + redactor( + None, None, {"password": "pass", "api_key": "key", "user": "alice"} + ) + + assert len(audit_log) == 2 + assert "password" in audit_log + assert "api_key" in audit_log + + def test_audit_callback_nested_paths(self): + """ + Audit callback receives correct paths for nested fields. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append(path) + + redactor = SensitiveDataRedactor( + sensitive_fields=["secret"], + audit_callback=audit, + ) + + redactor( + None, + None, + { + "secret": "top", + "config": {"database": {"secret": "nested"}}, + "items": [{"secret": "in_list"}, {"secret": "in_list_2"}], + }, + ) + + assert "secret" in audit_log + assert "config.database.secret" in audit_log + assert "items[0].secret" in audit_log + assert "items[1].secret" in audit_log + + def test_audit_callback_called_before_redaction(self): + """ + Audit callback receives the original value before redaction. + """ + original_values = [] + + def audit(field_name, value, path): + original_values.append(value) + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + audit_callback=audit, + ) + + result = redactor(None, None, {"password": "my_secret_password"}) + + assert result["password"] == "[REDACTED]" + assert original_values == ["my_secret_password"] + + def test_audit_and_redaction_callbacks_together(self): + """ + Both audit and redaction callbacks can be used together. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append({"field": field_name, "path": path}) + + def custom_redactor(field_name, value, path): + return f"<{field_name}:hidden>" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + redaction_callback=custom_redactor, + audit_callback=audit, + ) + + result = redactor(None, None, {"password": "secret"}) + + assert result == {"password": ""} + assert audit_log == [{"field": "password", "path": "password"}] + + def test_audit_callback_with_patterns(self): + """ + Audit callback works with pattern matching. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append(field_name) + + redactor = SensitiveDataRedactor( + sensitive_fields=["*_secret"], + audit_callback=audit, + ) + + redactor( + None, + None, + {"db_secret": "abc", "api_secret": "xyz", "username": "alice"}, + ) + + assert "db_secret" in audit_log + assert "api_secret" in audit_log + assert "username" not in audit_log + + def test_field_path_format_deeply_nested(self): + """ + Field paths are correctly formatted for deeply nested structures. + """ + paths = [] + + def audit(field_name, value, path): + paths.append(path) + + redactor = SensitiveDataRedactor( + sensitive_fields=["key"], + audit_callback=audit, + ) + + redactor( + None, + None, + { + "a": { + "b": { + "c": {"key": "value"}, + }, + }, + "list": [[[{"key": "nested_list"}]]], + }, + ) + + assert "a.b.c.key" in paths + assert "list[0][0][0].key" in paths + + def test_pickleable_with_callbacks(self): + """ + An instance with callbacks can be pickled (callbacks are preserved). + """ + + def custom_redactor(field_name, value, path): + return "***" + + def audit(field_name, value, path): + pass + + redactor = SensitiveDataRedactor( + sensitive_fields=["password"], + redaction_callback=custom_redactor, + audit_callback=audit, + ) + + pickled = pickle.dumps(redactor) + unpickled = pickle.loads(pickled) + + assert {"password": "***"} == unpickled( + None, None, {"password": "secret"} + ) + + +class TestSensitiveDataRedactorIntegration: + """Integration tests for SensitiveDataRedactor with full processor chains.""" + + def test_with_json_renderer(self): + """ + SensitiveDataRedactor works correctly with JSONRenderer in a chain. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key", "*_secret"] + ) + renderer = JSONRenderer() + + # Simulate processor chain + event_dict = { + "event": "user_login", + "user": "alice", + "password": "s3cr3t", + "api_key": "abc123", + "db_secret": "xyz789", + } + + # Run through redactor first + redacted = redactor(None, None, event_dict) + # Then through JSON renderer + output = renderer(None, None, redacted) + + # Parse the JSON output + result = json.loads(output) + + assert result["event"] == "user_login" + assert result["user"] == "alice" + assert result["password"] == "[REDACTED]" + assert result["api_key"] == "[REDACTED]" + assert result["db_secret"] == "[REDACTED]" + + def test_with_json_renderer_nested(self): + """ + SensitiveDataRedactor handles nested structures with JSONRenderer. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["*password*", "*token*"]) + renderer = JSONRenderer() + + event_dict = { + "event": "config_loaded", + "config": { + "database": { + "host": "localhost", + "password": "db_pass", + }, + "auth": { + "access_token": "token123", + "refresh_token": "refresh456", + }, + }, + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["config"]["database"]["host"] == "localhost" + assert result["config"]["database"]["password"] == "[REDACTED]" + assert result["config"]["auth"]["access_token"] == "[REDACTED]" + assert result["config"]["auth"]["refresh_token"] == "[REDACTED]" + + def test_with_json_renderer_list_of_dicts(self): + """ + SensitiveDataRedactor handles lists of dicts with JSONRenderer. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password", "ssn"]) + renderer = JSONRenderer() + + event_dict = { + "event": "batch_process", + "users": [ + {"name": "alice", "password": "pass1", "ssn": "123-45-6789"}, + {"name": "bob", "password": "pass2", "ssn": "987-65-4321"}, + ], + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["users"][0]["name"] == "alice" + assert result["users"][0]["password"] == "[REDACTED]" + assert result["users"][0]["ssn"] == "[REDACTED]" + assert result["users"][1]["name"] == "bob" + assert result["users"][1]["password"] == "[REDACTED]" + assert result["users"][1]["ssn"] == "[REDACTED]" + + def test_with_key_value_renderer(self): + """ + SensitiveDataRedactor works with KeyValueRenderer. + """ + from structlog.processors import KeyValueRenderer + + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = KeyValueRenderer() + + event_dict = { + "event": "login", + "user": "alice", + "password": "secret", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + + assert "password='[REDACTED]'" in output + assert "user='alice'" in output + assert "secret" not in output + + def test_with_custom_callback_and_json_renderer(self): + """ + Custom redaction callback works with JSONRenderer. + """ + + def mask_partial(field_name, value, path): + if isinstance(value, str) and len(value) > 4: + return f"{value[:2]}***{value[-2:]}" + return "[REDACTED]" + + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key"], + redaction_callback=mask_partial, + ) + renderer = JSONRenderer() + + event_dict = { + "event": "auth", + "password": "mysecretpassword", + "api_key": "sk_live_abc123xyz", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["password"] == "my***rd" + assert result["api_key"] == "sk***yz" + + def test_with_audit_callback_and_json_renderer(self): + """ + Audit callback is invoked when used with JSONRenderer. + """ + audit_log = [] + + def audit(field_name, value, path): + audit_log.append({"field": field_name, "path": path}) + + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*"], + audit_callback=audit, + ) + renderer = JSONRenderer() + + event_dict = { + "event": "update", + "old_password": "old123", + "new_password": "new456", + "user": "alice", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["old_password"] == "[REDACTED]" + assert result["new_password"] == "[REDACTED]" + assert result["user"] == "alice" + assert len(audit_log) == 2 + assert {"field": "old_password", "path": "old_password"} in audit_log + assert {"field": "new_password", "path": "new_password"} in audit_log + + def test_multiple_redactors_in_chain(self): + """ + Multiple SensitiveDataRedactors can be chained together. + """ + # First redactor for auth fields + auth_redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "*token*"], + placeholder="[AUTH_REDACTED]", + ) + # Second redactor for PII + pii_redactor = SensitiveDataRedactor( + sensitive_fields=["*email*", "*ssn*"], + placeholder="[PII_REDACTED]", + ) + renderer = JSONRenderer() + + event_dict = { + "event": "user_create", + "password": "secret", + "email": "user@example.com", + "ssn": "123-45-6789", + "name": "John", + } + + # Chain: auth_redactor -> pii_redactor -> renderer + step1 = auth_redactor(None, None, event_dict) + step2 = pii_redactor(None, None, step1) + output = renderer(None, None, step2) + result = json.loads(output) + + assert result["password"] == "[AUTH_REDACTED]" + assert result["email"] == "[PII_REDACTED]" + assert result["ssn"] == "[PII_REDACTED]" + assert result["name"] == "John" + + def test_with_add_log_level(self): + """ + SensitiveDataRedactor works with add_log_level processor. + """ + from structlog.processors import add_log_level + + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = JSONRenderer() + + event_dict = {"event": "login", "password": "secret"} + + # Simulate: add_log_level -> redactor -> renderer + step1 = add_log_level(None, "info", event_dict) + step2 = redactor(None, "info", step1) + output = renderer(None, "info", step2) + result = json.loads(output) + + assert result["level"] == "info" + assert result["password"] == "[REDACTED]" + + def test_case_insensitive_with_json_renderer(self): + """ + Case-insensitive matching works with JSONRenderer. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "apikey"], + case_insensitive=True, + ) + renderer = JSONRenderer() + + event_dict = { + "event": "config", + "PASSWORD": "pass1", + "Password": "pass2", + "ApiKey": "key1", + "APIKEY": "key2", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["PASSWORD"] == "[REDACTED]" + assert result["Password"] == "[REDACTED]" + assert result["ApiKey"] == "[REDACTED]" + assert result["APIKEY"] == "[REDACTED]" + + def test_preserves_event_key(self): + """ + The 'event' key is preserved and not accidentally redacted. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = JSONRenderer() + + event_dict = { + "event": "user_authenticated", + "password": "secret", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["event"] == "user_authenticated" + assert result["password"] == "[REDACTED]" + + def test_with_timestamper(self): + """ + SensitiveDataRedactor works with TimeStamper processor. + """ + from structlog.processors import TimeStamper + + timestamper = TimeStamper(fmt="iso") + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = JSONRenderer() + + event_dict = {"event": "login", "password": "secret"} + + # Chain: timestamper -> redactor -> renderer + step1 = timestamper(None, None, event_dict) + step2 = redactor(None, None, step1) + output = renderer(None, None, step2) + result = json.loads(output) + + assert "timestamp" in result + assert result["password"] == "[REDACTED]" + + def test_deeply_nested_with_json_renderer(self): + """ + Deeply nested structures are properly redacted and rendered. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["secret"]) + renderer = JSONRenderer() + + event_dict = { + "event": "deep_config", + "level1": { + "level2": { + "level3": { + "level4": { + "secret": "deep_secret", + "public": "visible", + } + } + } + }, + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["level1"]["level2"]["level3"]["level4"]["secret"] == "[REDACTED]" + assert result["level1"]["level2"]["level3"]["level4"]["public"] == "visible" + + def test_empty_event_dict_with_renderer(self): + """ + Empty event dict is handled correctly with renderer. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = JSONRenderer() + + event_dict = {} + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result == {} + + def test_special_characters_in_values(self): + """ + Values with special characters are handled correctly. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = JSONRenderer() + + event_dict = { + "event": "test", + "password": 'secret with "quotes" and \\backslash', + "data": "normal", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["password"] == "[REDACTED]" + assert result["data"] == "normal" + + def test_unicode_field_names_and_values(self): + """ + Unicode field names and values are handled correctly. + """ + redactor = SensitiveDataRedactor(sensitive_fields=["密码", "password"]) + renderer = JSONRenderer() + + event_dict = { + "event": "测试", + "密码": "秘密", + "password": "secret", + "user": "用户", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["密码"] == "[REDACTED]" + assert result["password"] == "[REDACTED]" + assert result["user"] == "用户" + assert result["event"] == "测试" + + def test_with_console_renderer(self): + """ + SensitiveDataRedactor works with ConsoleRenderer. + """ + from structlog.dev import ConsoleRenderer + + redactor = SensitiveDataRedactor(sensitive_fields=["password", "api_key"]) + renderer = ConsoleRenderer(colors=False) + + event_dict = { + "event": "user_login", + "user": "alice", + "password": "secret123", + "api_key": "sk_live_xxx", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + + # ConsoleRenderer returns a string + assert "[REDACTED]" in output + assert "secret123" not in output + assert "sk_live_xxx" not in output + assert "alice" in output + + def test_with_console_renderer_nested(self): + """ + Nested structures work with ConsoleRenderer. + """ + from structlog.dev import ConsoleRenderer + + redactor = SensitiveDataRedactor(sensitive_fields=["*password*"]) + renderer = ConsoleRenderer(colors=False) + + event_dict = { + "event": "config", + "db": {"password": "db_secret"}, + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + + assert "[REDACTED]" in output + assert "db_secret" not in output + + def test_with_logfmt_renderer(self): + """ + SensitiveDataRedactor works with LogfmtRenderer. + """ + from structlog.processors import LogfmtRenderer + + redactor = SensitiveDataRedactor(sensitive_fields=["password"]) + renderer = LogfmtRenderer() + + event_dict = { + "event": "login", + "user": "alice", + "password": "secret", + } + + redacted = redactor(None, None, event_dict) + output = renderer(None, None, redacted) + + # LogfmtRenderer produces key=value pairs + assert "password=[REDACTED]" in output + assert "user=alice" in output + assert "secret" not in output + + def test_full_structlog_configuration(self): + """ + SensitiveDataRedactor works in a full structlog configuration. + """ + import structlog + from io import StringIO + + output = StringIO() + + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "*secret*", "*token*", "*key*"], + case_insensitive=True, + ) + + # Configure structlog with our redactor + structlog.configure( + processors=[ + structlog.stdlib.add_log_level, + redactor, + structlog.processors.JSONRenderer(), + ], + wrapper_class=structlog.stdlib.BoundLogger, + context_class=dict, + logger_factory=structlog.PrintLoggerFactory(file=output), + cache_logger_on_first_use=False, + ) + + log = structlog.get_logger() + log.info( + "user_authenticated", + user="alice", + password="s3cr3t", + api_key="abc123", + session_token="xyz789", + ) + + logged = output.getvalue() + result = json.loads(logged) + + assert result["user"] == "alice" + assert result["password"] == "[REDACTED]" + assert result["api_key"] == "[REDACTED]" + assert result["session_token"] == "[REDACTED]" + assert "s3cr3t" not in logged + assert "abc123" not in logged + assert "xyz789" not in logged + + def test_with_stdlib_logging_integration(self): + """ + SensitiveDataRedactor works with stdlib logging integration. + """ + import logging + from io import StringIO + + import structlog + from structlog.stdlib import ProcessorFormatter + + # Create a string stream to capture output + stream = StringIO() + + # Set up stdlib logging + handler = logging.StreamHandler(stream) + handler.setLevel(logging.DEBUG) + + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key"] + ) + + handler.setFormatter( + ProcessorFormatter( + processor=structlog.processors.JSONRenderer(), + foreign_pre_chain=[ + structlog.stdlib.add_log_level, + redactor, + ], + ) + ) + + logger = logging.getLogger("test_redactor") + logger.handlers = [handler] + logger.setLevel(logging.DEBUG) + + # Log with sensitive data + logger.info( + "login attempt", + extra={"password": "secret", "api_key": "key123", "user": "bob"}, + ) + + logged = stream.getvalue() + + # The sensitive data should be redacted + assert "[REDACTED]" in logged + assert "secret" not in logged + assert "key123" not in logged + + def test_gdpr_compliance_scenario(self): + """ + GDPR compliance scenario with PII redaction and audit trail. + """ + audit_events = [] + + def gdpr_audit(field_name, value, path): + audit_events.append({ + "field": field_name, + "path": path, + "value_type": type(value).__name__, + }) + + gdpr_redactor = SensitiveDataRedactor( + sensitive_fields=[ + "*email*", "*phone*", "*address*", + "*name*", "*ssn*", "*birth*", + ], + case_insensitive=True, + audit_callback=gdpr_audit, + ) + renderer = JSONRenderer() + + event_dict = { + "event": "user_registration", + "user_id": "12345", + "email_address": "user@example.com", + "phone_number": "+1-555-123-4567", + "full_name": "John Doe", + "date_of_birth": "1990-01-15", + "ssn": "123-45-6789", + "preferences": {"newsletter": True}, + } + + redacted = gdpr_redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + # All PII should be redacted + assert result["email_address"] == "[REDACTED]" + assert result["phone_number"] == "[REDACTED]" + assert result["full_name"] == "[REDACTED]" + assert result["date_of_birth"] == "[REDACTED]" + assert result["ssn"] == "[REDACTED]" + # Non-PII should be preserved + assert result["user_id"] == "12345" + assert result["preferences"]["newsletter"] is True + + # Audit trail should have all redacted fields + assert len(audit_events) == 5 + audit_fields = {e["field"] for e in audit_events} + assert "email_address" in audit_fields + assert "phone_number" in audit_fields + assert "full_name" in audit_fields + assert "date_of_birth" in audit_fields + assert "ssn" in audit_fields + + def test_pci_dss_card_masking_scenario(self): + """ + PCI-DSS compliance scenario with card number masking. + """ + + def mask_card(field_name, value, path): + if "card" in field_name.lower() and isinstance(value, str): + # Show only last 4 digits + digits = "".join(c for c in value if c.isdigit()) + if len(digits) >= 4: + return f"****-****-****-{digits[-4:]}" + return "[REDACTED]" + + pci_redactor = SensitiveDataRedactor( + sensitive_fields=["*card*", "*cvv*", "*cvc*"], + case_insensitive=True, + redaction_callback=mask_card, + ) + renderer = JSONRenderer() + + event_dict = { + "event": "payment_processed", + "transaction_id": "txn_123", + "card_number": "4111-1111-1111-1234", + "card_cvv": "123", + "amount": 99.99, + } + + redacted = pci_redactor(None, None, event_dict) + output = renderer(None, None, redacted) + result = json.loads(output) + + assert result["card_number"] == "****-****-****-1234" + assert result["card_cvv"] == "[REDACTED]" + assert result["transaction_id"] == "txn_123" + assert result["amount"] == 99.99 + # Original card number should not appear + assert "4111" not in output From 8e796b9ac66e0d84af3ccdaae130cbb9ca68d5c3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 07:53:19 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/structlog/processors.py | 19 +++++----- tests/processors/test_processors.py | 55 ++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index 60cbdd95..6d0ce5ef 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -1285,13 +1285,13 @@ def mask_card_number(field_name, value, path): """ __slots__ = ( + "_audit_callback", + "_case_insensitive", "_exact_fields", "_pattern_matchers", "_placeholder", - "_case_insensitive", - "_sensitive_fields", "_redaction_callback", - "_audit_callback", + "_sensitive_fields", ) _exact_fields: frozenset[str] @@ -1337,12 +1337,11 @@ def __init__( pattern_matchers.append( _compile_sensitive_pattern(field, case_insensitive) ) + # Exact match - normalize case if needed + elif case_insensitive: + exact_fields.append(field.lower()) else: - # Exact match - normalize case if needed - if case_insensitive: - exact_fields.append(field.lower()) - else: - exact_fields.append(field) + exact_fields.append(field) self._exact_fields = frozenset(exact_fields) self._pattern_matchers = tuple(pattern_matchers) @@ -1420,7 +1419,9 @@ def __call__( """ return self._redact_dict(event_dict, "") - def _redact_dict(self, d: dict[str, Any], parent_path: str) -> dict[str, Any]: + def _redact_dict( + self, d: dict[str, Any], parent_path: str + ) -> dict[str, Any]: """ Recursively redact sensitive fields from a dictionary. diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index be7704c7..bc86e586 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -765,7 +765,9 @@ def test_redacts_nested_dict(self): """ redactor = SensitiveDataRedactor(sensitive_fields=["api_key"]) - assert {"config": {"api_key": "[REDACTED]", "timeout": 30}} == redactor( + assert { + "config": {"api_key": "[REDACTED]", "timeout": 30} + } == redactor( None, None, {"config": {"api_key": "abc123", "timeout": 30}} ) @@ -811,9 +813,7 @@ def test_redacts_nested_lists(self): """ redactor = SensitiveDataRedactor(sensitive_fields=["token"]) - assert { - "data": [[{"token": "[REDACTED]"}]] - } == redactor( + assert {"data": [[{"token": "[REDACTED]"}]]} == redactor( None, None, {"data": [[{"token": "secret_token"}]]}, @@ -1262,7 +1262,9 @@ def test_audit_callback_basic(self): audit_log = [] def audit(field_name, value, path): - audit_log.append({"field": field_name, "value": value, "path": path}) + audit_log.append( + {"field": field_name, "value": value, "path": path} + ) redactor = SensitiveDataRedactor( sensitive_fields=["password"], @@ -1487,7 +1489,9 @@ def test_with_json_renderer_nested(self): """ SensitiveDataRedactor handles nested structures with JSONRenderer. """ - redactor = SensitiveDataRedactor(sensitive_fields=["*password*", "*token*"]) + redactor = SensitiveDataRedactor( + sensitive_fields=["*password*", "*token*"] + ) renderer = JSONRenderer() event_dict = { @@ -1770,8 +1774,14 @@ def test_deeply_nested_with_json_renderer(self): output = renderer(None, None, redacted) result = json.loads(output) - assert result["level1"]["level2"]["level3"]["level4"]["secret"] == "[REDACTED]" - assert result["level1"]["level2"]["level3"]["level4"]["public"] == "visible" + assert ( + result["level1"]["level2"]["level3"]["level4"]["secret"] + == "[REDACTED]" + ) + assert ( + result["level1"]["level2"]["level3"]["level4"]["public"] + == "visible" + ) def test_empty_event_dict_with_renderer(self): """ @@ -1837,7 +1847,9 @@ def test_with_console_renderer(self): """ from structlog.dev import ConsoleRenderer - redactor = SensitiveDataRedactor(sensitive_fields=["password", "api_key"]) + redactor = SensitiveDataRedactor( + sensitive_fields=["password", "api_key"] + ) renderer = ConsoleRenderer(colors=False) event_dict = { @@ -1903,9 +1915,10 @@ def test_full_structlog_configuration(self): """ SensitiveDataRedactor works in a full structlog configuration. """ - import structlog from io import StringIO + import structlog + output = StringIO() redactor = SensitiveDataRedactor( @@ -1951,9 +1964,11 @@ def test_with_stdlib_logging_integration(self): SensitiveDataRedactor works with stdlib logging integration. """ import logging + from io import StringIO import structlog + from structlog.stdlib import ProcessorFormatter # Create a string stream to capture output @@ -2001,16 +2016,22 @@ def test_gdpr_compliance_scenario(self): audit_events = [] def gdpr_audit(field_name, value, path): - audit_events.append({ - "field": field_name, - "path": path, - "value_type": type(value).__name__, - }) + audit_events.append( + { + "field": field_name, + "path": path, + "value_type": type(value).__name__, + } + ) gdpr_redactor = SensitiveDataRedactor( sensitive_fields=[ - "*email*", "*phone*", "*address*", - "*name*", "*ssn*", "*birth*", + "*email*", + "*phone*", + "*address*", + "*name*", + "*ssn*", + "*birth*", ], case_insensitive=True, audit_callback=gdpr_audit, From cad1823cdd280428866661881032d06e9bbbf12e Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 13:30:51 +0530 Subject: [PATCH 03/12] fixed linting issues --- src/structlog/processors.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index 6d0ce5ef..4e6dae44 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -1367,11 +1367,7 @@ def _is_sensitive(self, key: str) -> bool: return True # Check patterns (slower path - iterate through compiled patterns) - for matcher in self._pattern_matchers: - if matcher(key): - return True - - return False + return any(matcher(key) for matcher in self._pattern_matchers) def _get_redacted_value(self, key: str, value: Any, path: str) -> Any: """ From 8e703b7d2ff8dba4e6b68ac6f60844f17609d2ea Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 13:39:28 +0530 Subject: [PATCH 04/12] fixed type annotations --- src/structlog/processors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index 4e6dae44..5c76353d 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -19,7 +19,7 @@ import threading import time -from collections.abc import Collection, Sequence +from collections.abc import Collection, MutableMapping, MutableSequence, Sequence from types import FrameType, TracebackType from typing import ( Any, @@ -1416,8 +1416,8 @@ def __call__( return self._redact_dict(event_dict, "") def _redact_dict( - self, d: dict[str, Any], parent_path: str - ) -> dict[str, Any]: + self, d: MutableMapping[str, Any], parent_path: str + ) -> MutableMapping[str, Any]: """ Recursively redact sensitive fields from a dictionary. @@ -1438,7 +1438,7 @@ def _redact_dict( d[key] = self._redact_list(value, current_path) return d - def _redact_list(self, lst: list[Any], parent_path: str) -> list[Any]: + def _redact_list(self, lst: MutableSequence[Any], parent_path: str) -> MutableSequence[Any]: """ Recursively redact sensitive fields from items in a list. From b86ccb99a8906f99fd4c7d2e729935e84bd06e3f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 08:09:39 +0000 Subject: [PATCH 05/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/structlog/processors.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index 5c76353d..cc073a4e 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -19,7 +19,12 @@ import threading import time -from collections.abc import Collection, MutableMapping, MutableSequence, Sequence +from collections.abc import ( + Collection, + MutableMapping, + MutableSequence, + Sequence, +) from types import FrameType, TracebackType from typing import ( Any, @@ -1438,7 +1443,9 @@ def _redact_dict( d[key] = self._redact_list(value, current_path) return d - def _redact_list(self, lst: MutableSequence[Any], parent_path: str) -> MutableSequence[Any]: + def _redact_list( + self, lst: MutableSequence[Any], parent_path: str + ) -> MutableSequence[Any]: """ Recursively redact sensitive fields from items in a list. From 32002fcca37a343562ef1c46da1aad9c3907d465 Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 13:50:44 +0530 Subject: [PATCH 06/12] fixed tests and mypy issues --- src/structlog/processors.py | 32 ++++++++++++++++----- tests/processors/test_processors.py | 43 ++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index cc073a4e..fe3db1bf 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -1487,10 +1487,28 @@ def __setstate__(self, state: dict[str, Any]) -> None: Args: state: The state dictionary from ``__getstate__``. """ - self.__init__( - sensitive_fields=state["sensitive_fields"], - placeholder=state["placeholder"], - case_insensitive=state["case_insensitive"], - redaction_callback=state.get("redaction_callback"), - audit_callback=state.get("audit_callback"), - ) + sensitive_fields = state["sensitive_fields"] + case_insensitive = state["case_insensitive"] + + self._placeholder = state["placeholder"] + self._case_insensitive = case_insensitive + self._redaction_callback = state.get("redaction_callback") + self._audit_callback = state.get("audit_callback") + self._sensitive_fields = tuple(sensitive_fields) + + # Rebuild exact fields and pattern matchers + exact_fields: list[str] = [] + pattern_matchers: list[Callable[[str], bool]] = [] + + for field in sensitive_fields: + if "*" in field or "?" in field: + pattern_matchers.append( + _compile_sensitive_pattern(field, case_insensitive) + ) + elif case_insensitive: + exact_fields.append(field.lower()) + else: + exact_fields.append(field) + + self._exact_fields = frozenset(exact_fields) + self._pattern_matchers = tuple(pattern_matchers) diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index bc86e586..dcf8659c 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -712,6 +712,30 @@ def test_replace_by_key_is_optional(self): ) + +def _test_pickle_redactor(field_name, value, path): + return "***" + + +def _test_pickle_audit(field_name, value, path): + pass + + +def _extract_extra(logger, log_method, event_dict): + """ + Extracts extra attributes from LogRecord to event_dict for testing. + """ + record = event_dict.get("_record") + if record: + if hasattr(record, "password"): + event_dict["password"] = record.password + if hasattr(record, "api_key"): + event_dict["api_key"] = record.api_key + if hasattr(record, "user"): + event_dict["user"] = record.user + return event_dict + + class TestSensitiveDataRedactor: def test_redacts_sensitive_field(self): """ @@ -1429,17 +1453,10 @@ def test_pickleable_with_callbacks(self): """ An instance with callbacks can be pickled (callbacks are preserved). """ - - def custom_redactor(field_name, value, path): - return "***" - - def audit(field_name, value, path): - pass - redactor = SensitiveDataRedactor( sensitive_fields=["password"], - redaction_callback=custom_redactor, - audit_callback=audit, + redaction_callback=_test_pickle_redactor, + audit_callback=_test_pickle_audit, ) pickled = pickle.dumps(redactor) @@ -1984,10 +2001,16 @@ def test_with_stdlib_logging_integration(self): handler.setFormatter( ProcessorFormatter( - processor=structlog.processors.JSONRenderer(), + # Use foreign_pre_chain for basic setup foreign_pre_chain=[ structlog.stdlib.add_log_level, + ], + # Use main processors list for redaction to ensure it runs + # after extra attributes are merged (which happens in ProcessorFormatter) + processors=[ + _extract_extra, redactor, + structlog.processors.JSONRenderer(), ], ) ) From 0a515badd7cb3490e7282ba210539306ccec27af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 08:21:48 +0000 Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/processors/test_processors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index dcf8659c..2637b9b9 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -712,7 +712,6 @@ def test_replace_by_key_is_optional(self): ) - def _test_pickle_redactor(field_name, value, path): return "***" From 1687d90e88f0995f87273fe7b021919d77605687 Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 13:55:39 +0530 Subject: [PATCH 08/12] fixed docs build --- docs/api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 2c16414f..78db51ff 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -226,6 +226,8 @@ API Reference >>> LogfmtRenderer(key_order=["b", "a"], bool_as_flag=False)(None, "", event_dict) 'b="[1, 2, 3]" a=42 flag=true' +.. autoclass:: SensitiveDataRedactor + .. autoclass:: EventRenamer .. autofunction:: add_log_level From 9997397b13781027b08b91775c5cc82f294d0916 Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 14:00:41 +0530 Subject: [PATCH 09/12] fixed docstring syntax for sphinx --- src/structlog/processors.py | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index fe3db1bf..a3d606dd 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -1012,27 +1012,7 @@ def matcher(key: str) -> bool: #: Type alias for the redaction callback function. #: -#: The callback signature is ``(field_name, original_value, field_path) -> redacted_value``: -#: -#: - ``field_name`` (str): The name of the field being redacted. -#: - ``original_value`` (Any): The original value before redaction. -#: - ``field_path`` (str): Dot-separated path to the field (e.g., ``"user.credentials.password"``). -#: -#: Returns: -#: The value to use as the replacement (can be any type). -RedactionCallback = Callable[[str, Any, str], Any] -#: Type alias for the audit callback function. -#: -#: The callback signature is ``(field_name, original_value, field_path) -> None``: -#: -#: - ``field_name`` (str): The name of the field being redacted. -#: - ``original_value`` (Any): The original value before redaction. -#: - ``field_path`` (str): Dot-separated path to the field (e.g., ``"user.credentials.password"``). -#: -#: The callback is invoked *before* redaction occurs, allowing you to log or -#: record the original value for audit purposes. -AuditCallback = Callable[[str, Any, str], None] class SensitiveDataRedactor: @@ -1164,9 +1144,7 @@ def count_redactions(field_name, value, path): This class uses ``__slots__`` for memory efficiency and does not expose public attributes. Use the constructor parameters to configure behavior. - Raises: - This processor does not raise exceptions during normal operation. Invalid - patterns or field names will simply not match any fields. + Examples: **Basic usage**:: @@ -1304,16 +1282,16 @@ def mask_card_number(field_name, value, path): _placeholder: str _case_insensitive: bool _sensitive_fields: tuple[str, ...] - _redaction_callback: RedactionCallback | None - _audit_callback: AuditCallback | None + _redaction_callback: Callable[[str, Any, str], Any] | None + _audit_callback: Callable[[str, Any, str], None] | None def __init__( self, sensitive_fields: Collection[str], placeholder: str = "[REDACTED]", case_insensitive: bool = False, - redaction_callback: RedactionCallback | None = None, - audit_callback: AuditCallback | None = None, + redaction_callback: Callable[[str, Any, str], Any] | None = None, + audit_callback: Callable[[str, Any, str], None] | None = None, ) -> None: """ Initialize the SensitiveDataRedactor processor. From 1ef278ba155a44c64b9a66f30ecab1ee3ac06ded Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 08:31:17 +0000 Subject: [PATCH 10/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/structlog/processors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/structlog/processors.py b/src/structlog/processors.py index a3d606dd..4fadca05 100644 --- a/src/structlog/processors.py +++ b/src/structlog/processors.py @@ -1014,7 +1014,6 @@ def matcher(key: str) -> bool: #: - class SensitiveDataRedactor: """ Redact sensitive fields from event dictionaries. From a25c2b04022366ca5fb9d0b4b863494b161b2242 Mon Sep 17 00:00:00 2001 From: yk0007 Date: Wed, 14 Jan 2026 14:12:49 +0530 Subject: [PATCH 11/12] added tests for 100% coverage --- tests/processors/test_processors.py | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index 2637b9b9..1bb010c3 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -1465,6 +1465,57 @@ def test_pickleable_with_callbacks(self): None, None, {"password": "secret"} ) + def test_pickleable_case_insensitive_exact(self): + """ + An instance with case-insensitive exact matches can be pickled. + """ + redactor = SensitiveDataRedactor( + sensitive_fields=["PASSWORD"], + case_insensitive=True, + ) + + pickled = pickle.dumps(redactor) + unpickled = pickle.loads(pickled) + + assert {"password": "[REDACTED]"} == unpickled( + None, None, {"password": "secret"} + ) + + def test_redacts_nested_lists_deeply(self): + """ + Redacts sensitive fields deeply nested within lists of lists. + """ + redactor = SensitiveDataRedactor(["password"]) + event = { + "users": [ + [{"name": "alice", "password": "secret"}], + {"data": [{"password": "secret"}]} + ] + } + redactor(None, None, event) + assert event["users"][0][0]["password"] == "[REDACTED]" + assert event["users"][1]["data"][0]["password"] == "[REDACTED]" + + def test_redacts_mixed_list_types(self): + """ + Handles lists containing a mix of dicts, lists, and primitives. + Ensures all branches in _redact_list are covered. + """ + redactor = SensitiveDataRedactor(["password"]) + event = { + "data": [ + "string", # Primitive (elif False -> loop) + 123, # Primitive (elif False -> loop) + {"password": "secret"}, # Dict (if True) + ["nested", {"password": "secret"}], # List (elif True) + ] + } + redactor(None, None, event) + assert event["data"][2]["password"] == "[REDACTED]" + assert event["data"][3][1]["password"] == "[REDACTED]" + assert event["data"][0] == "string" + assert event["data"][1] == 123 + class TestSensitiveDataRedactorIntegration: """Integration tests for SensitiveDataRedactor with full processor chains.""" From bbe0b0d6ae50984fe1bc2e5fae4f81c29af5c070 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 08:43:05 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/processors/test_processors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index 1bb010c3..eec2c64d 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -1489,7 +1489,7 @@ def test_redacts_nested_lists_deeply(self): event = { "users": [ [{"name": "alice", "password": "secret"}], - {"data": [{"password": "secret"}]} + {"data": [{"password": "secret"}]}, ] } redactor(None, None, event) @@ -1505,7 +1505,7 @@ def test_redacts_mixed_list_types(self): event = { "data": [ "string", # Primitive (elif False -> loop) - 123, # Primitive (elif False -> loop) + 123, # Primitive (elif False -> loop) {"password": "secret"}, # Dict (if True) ["nested", {"password": "secret"}], # List (elif True) ]