From 615244dcfab503faa72b7d397df20f4307e8c700 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 23 Jun 2026 13:25:02 +0200 Subject: [PATCH 1/3] feat: entropy-based secret detection for exception code variables Add a last-resort entropy-based detector that redacts high-entropy, secret-looking values (API keys, tokens, strong passwords) sitting in innocuously-named code variables, after the existing name-pattern and URL-credential checks. - Known vendor key formats (OpenAI, Anthropic, AWS, Stripe, GitHub, GitLab, Slack, Google, JWT, PEM private keys) are matched directly. - Structured identifiers (UUIDs, Mongo ObjectIds, hashes), object reprs, file paths and URLs are never flagged. - Exposed as the `code_variables_detect_secrets` option (default True) with a per-context override, threaded through client/contexts. - Tighten the masking size caps to keep capture cost bounded. Co-Authored-By: Claude Opus 4.8 --- .sampo/changesets/entropy-secret-detection.md | 5 + posthog/__init__.py | 21 ++ posthog/client.py | 20 ++ posthog/contexts.py | 29 +++ posthog/exception_utils.py | 168 ++++++++++++++-- posthog/test/test_code_variables.py | 183 +++++++++++++++++- references/public_api_snapshot.txt | 23 ++- 7 files changed, 431 insertions(+), 18 deletions(-) create mode 100644 .sampo/changesets/entropy-secret-detection.md diff --git a/.sampo/changesets/entropy-secret-detection.md b/.sampo/changesets/entropy-secret-detection.md new file mode 100644 index 00000000..738cdc7c --- /dev/null +++ b/.sampo/changesets/entropy-secret-detection.md @@ -0,0 +1,5 @@ +--- +'pypi/posthog': patch +--- + +Detect and redact high-entropy secrets (API keys, tokens, passwords) in exception code variables. Adds the `code_variables_detect_secrets` option (default `True`). diff --git a/posthog/__init__.py b/posthog/__init__.py index e2b52858..871b3ceb 100644 --- a/posthog/__init__.py +++ b/posthog/__init__.py @@ -32,6 +32,9 @@ from posthog.contexts import ( set_code_variables_mask_url_credentials_context as inner_set_code_variables_mask_url_credentials_context, ) +from posthog.contexts import ( + set_code_variables_detect_secrets_context as inner_set_code_variables_detect_secrets_context, +) from posthog.contexts import ( set_context_device_id as inner_set_context_device_id, ) @@ -45,6 +48,7 @@ get_tags as inner_get_tags, ) from posthog.exception_utils import ( + DEFAULT_CODE_VARIABLES_DETECT_SECRETS, DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS, DEFAULT_CODE_VARIABLES_MASK_PATTERNS, DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS, @@ -244,6 +248,18 @@ def set_code_variables_mask_url_credentials_context(enabled: bool): return inner_set_code_variables_mask_url_credentials_context(enabled) +def set_code_variables_detect_secrets_context(enabled: bool): + """ + Whether to apply entropy-based secret detection as a last-resort redaction of + high-entropy values (API keys, tokens, strong passwords) in captured code + variables for the current context. + + Category: + Contexts + """ + return inner_set_code_variables_detect_secrets_context(enabled) + + def tag(name: str, value: Any): """ Add a tag to the current context. @@ -321,6 +337,9 @@ def get_tags() -> Dict[str, Any]: code variables. code_variables_ignore_patterns: Variable-name patterns to omit when capturing code variables. + code_variables_detect_secrets: Last-resort entropy-based redaction of + high-entropy secret-looking values (API keys, tokens, strong passwords) + in captured code variables. Defaults to True. in_app_modules: Module/package prefixes treated as in-app frames in captured exceptions. enable_exception_autocapture_rate_limiting: Rate limit autocaptured @@ -365,6 +384,7 @@ def get_tags() -> Dict[str, Any]: code_variables_mask_patterns = DEFAULT_CODE_VARIABLES_MASK_PATTERNS code_variables_ignore_patterns = DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS code_variables_mask_url_credentials = DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS +code_variables_detect_secrets = DEFAULT_CODE_VARIABLES_DETECT_SECRETS in_app_modules = None # type: Optional[list[str]] enable_exception_autocapture_rate_limiting = False # type: bool exception_autocapture_bucket_size = ExceptionCapture.DEFAULT_BUCKET_SIZE # type: int @@ -1149,6 +1169,7 @@ def setup() -> Client: code_variables_mask_patterns=code_variables_mask_patterns, code_variables_ignore_patterns=code_variables_ignore_patterns, code_variables_mask_url_credentials=code_variables_mask_url_credentials, + code_variables_detect_secrets=code_variables_detect_secrets, in_app_modules=in_app_modules, enable_exception_autocapture_rate_limiting=enable_exception_autocapture_rate_limiting, exception_autocapture_bucket_size=exception_autocapture_bucket_size, diff --git a/posthog/client.py b/posthog/client.py index c8627892..0ae99bc2 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -20,6 +20,7 @@ from posthog.contexts import ( _get_current_context, get_capture_exception_code_variables_context, + get_code_variables_detect_secrets_context, get_code_variables_ignore_patterns_context, get_code_variables_mask_patterns_context, get_code_variables_mask_url_credentials_context, @@ -37,6 +38,7 @@ from posthog.exception_capture import ExceptionCapture from posthog._logging import _configure_posthog_logging from posthog.exception_utils import ( + DEFAULT_CODE_VARIABLES_DETECT_SECRETS, DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS, DEFAULT_CODE_VARIABLES_MASK_PATTERNS, DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS, @@ -251,6 +253,7 @@ def __init__( code_variables_mask_patterns=None, code_variables_ignore_patterns=None, code_variables_mask_url_credentials=None, + code_variables_detect_secrets=None, in_app_modules: list[str] | None = None, enable_exception_autocapture_rate_limiting=False, exception_autocapture_bucket_size=ExceptionCapture.DEFAULT_BUCKET_SIZE, @@ -319,6 +322,11 @@ def __init__( code_variables_mask_url_credentials: Scrub credentials embedded in URLs/DSNs (e.g. ``user:pass@host``) from captured code variables, regardless of the surrounding variable name. Defaults to True. + code_variables_detect_secrets: Last-resort entropy-based detection that + redacts high-entropy secret-looking values (API keys, tokens, strong + passwords) sitting in innocuously-named variables, after the name and + URL checks. Skips structured ids (UUIDs, ObjectIds, hashes). Defaults + to True. in_app_modules: Module/package prefixes treated as in-app frames in captured exceptions. enable_exception_autocapture_rate_limiting: Rate limit @@ -417,6 +425,11 @@ def __init__( if code_variables_mask_url_credentials is not None else DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS ) + self.code_variables_detect_secrets = ( + code_variables_detect_secrets + if code_variables_detect_secrets is not None + else DEFAULT_CODE_VARIABLES_DETECT_SECRETS + ) self.in_app_modules = in_app_modules if project_root is None: @@ -1399,6 +1412,7 @@ def capture_exception( context_mask_url_credentials = ( get_code_variables_mask_url_credentials_context() ) + context_detect_secrets = get_code_variables_detect_secrets_context() enabled = ( context_enabled @@ -1420,6 +1434,11 @@ def capture_exception( if context_mask_url_credentials is not None else self.code_variables_mask_url_credentials ) + detect_secrets = ( + context_detect_secrets + if context_detect_secrets is not None + else self.code_variables_detect_secrets + ) if enabled: try_attach_code_variables_to_frames( @@ -1428,6 +1447,7 @@ def capture_exception( mask_patterns=mask_patterns, ignore_patterns=ignore_patterns, mask_url_credentials=mask_url_credentials, + detect_secrets=detect_secrets, ) if self.log_captured_exceptions: diff --git a/posthog/contexts.py b/posthog/contexts.py index 79323ede..bb643b1f 100644 --- a/posthog/contexts.py +++ b/posthog/contexts.py @@ -27,6 +27,7 @@ def __init__( self.code_variables_mask_patterns: Optional[list] = None self.code_variables_ignore_patterns: Optional[list] = None self.code_variables_mask_url_credentials: Optional[bool] = None + self.code_variables_detect_secrets: Optional[bool] = None def set_session_id(self, session_id: str): self.session_id = session_id @@ -52,6 +53,9 @@ def set_code_variables_ignore_patterns(self, ignore_patterns: list): def set_code_variables_mask_url_credentials(self, enabled: bool): self.code_variables_mask_url_credentials = enabled + def set_code_variables_detect_secrets(self, enabled: bool): + self.code_variables_detect_secrets = enabled + def get_parent(self): return self.parent @@ -113,6 +117,13 @@ def get_code_variables_mask_url_credentials(self) -> Optional[bool]: return self.parent.get_code_variables_mask_url_credentials() return None + def get_code_variables_detect_secrets(self) -> Optional[bool]: + if self.code_variables_detect_secrets is not None: + return self.code_variables_detect_secrets + if self.parent is not None and not self.fresh: + return self.parent.get_code_variables_detect_secrets() + return None + _context_stack: contextvars.ContextVar[Optional[ContextScope]] = contextvars.ContextVar( "posthog_context_stack", default=None @@ -390,6 +401,17 @@ def set_code_variables_mask_url_credentials_context(enabled: bool) -> None: current_context.set_code_variables_mask_url_credentials(enabled) +def set_code_variables_detect_secrets_context(enabled: bool) -> None: + """ + Whether to apply entropy-based secret detection as a last-resort redaction of + high-entropy values (API keys, tokens, strong passwords) in captured code + variables for the current context. + """ + current_context = _get_current_context() + if current_context: + current_context.set_code_variables_detect_secrets(enabled) + + def get_capture_exception_code_variables_context() -> Optional[bool]: current_context = _get_current_context() if current_context: @@ -418,6 +440,13 @@ def get_code_variables_mask_url_credentials_context() -> Optional[bool]: return None +def get_code_variables_detect_secrets_context() -> Optional[bool]: + current_context = _get_current_context() + if current_context: + return current_context.get_code_variables_detect_secrets() + return None + + F = TypeVar("F", bound=Callable[..., Any]) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 5828e68b..be0cdd3a 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -14,6 +14,7 @@ import re import sys import types +from collections import Counter from datetime import datetime from types import FrameType, TracebackType # noqa: F401 from typing import ( # noqa: F401 @@ -72,19 +73,23 @@ DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS = True +# Last-resort entropy-based redaction of secret-looking values, after name/URL masking. +DEFAULT_CODE_VARIABLES_DETECT_SECRETS = True + CODE_VARIABLES_REDACTED_VALUE = "$$_posthog_redacted_based_on_masking_rules_$$" CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" -_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000 -_MAX_COLLECTION_ITEMS_TO_SCAN = 100 +# Strings longer than this are redacted as "too long" rather than scanned. +_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 2_048 +_MAX_COLLECTION_ITEMS_TO_SCAN = 50 _REGEX_METACHARACTERS = frozenset(r"\.^$*+?{}[]|()") # Max recursion depth into nested structures while masking (cycles are guarded separately). -_MAX_MASK_DEPTH = 25 +_MAX_MASK_DEPTH = 12 # Cap on total non-scalar nodes traversed per top-level value; the depth/collection caps # don't bound aggregate work, so this stops a wide-and-deep graph from fanning out. -_MAX_TOTAL_NODES_TO_MASK = 200 +_MAX_TOTAL_NODES_TO_MASK = 100 # Matches `user:pass` credentials in URLs/DSNs (e.g. `postgresql://user:pass@host`); the # bounded scheme length avoids catastrophic backtracking. @@ -101,7 +106,7 @@ def _redact_url_credentials(value): ) -DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 +DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 10 * 1024 class VariableSizeLimiter: @@ -1069,6 +1074,7 @@ class _MaskingConfig: Tuple[Optional[Pattern], List[Pattern]] ] # variable-name skip matcher mask_url_credentials: bool + detect_secrets: bool = DEFAULT_CODE_VARIABLES_DETECT_SECRETS max_length: int = DEFAULT_MAX_VALUE_LENGTH @classmethod @@ -1077,6 +1083,7 @@ def build( mask_patterns=None, ignore_patterns=None, mask_url_credentials=True, + detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS, max_length=DEFAULT_MAX_VALUE_LENGTH, ): # type: (...) -> _MaskingConfig @@ -1084,17 +1091,138 @@ def build( mask=_build_matcher(_compile_patterns(mask_patterns or [])), ignore=_build_matcher(_compile_patterns(ignore_patterns or [])), mask_url_credentials=mask_url_credentials, + detect_secrets=detect_secrets, max_length=max_length, ) +# --- Entropy-based secret detection (last resort) ------------------------------------ +# Catches high-entropy secrets (API keys, tokens, strong passwords) in innocuously-named +# variables, after name/keyword masking. Trade-off: pure-hex strings are treated as +# ids/hashes, so UUIDs/ObjectIds/SHAs are never flagged (and hex-encoded secrets slip by). + +_SECRET_MIN_LENGTH = 16 # also the shortest known vendor format +_SECRET_MIN_ENTROPY_BITS = 3.8 # Shannon bits/char +_SECRET_MIN_CHAR_CLASSES = 3 # of {lower, upper, digit, symbol} + +_HEX_DIGITS = frozenset("0123456789abcdefABCDEF") + +# Punctuation seen in object reprs / structured strings but never in a bare token; keeps +# high-entropy reprs like `Thing(id=UUID('...'))` from being redacted. +_SECRET_REJECT_CHARS = frozenset("()[]{}<>'\"`,;") + +_UUID_RE = re.compile( + r"\A[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\Z" +) + +# Lowercase word-like path segment; two around a '/' marks a filesystem path. +_PATH_WORD_RE = re.compile(r"\A[a-z][a-z.]*\Z") + +# Well-known credential formats, matched regardless of entropy. +_KNOWN_SECRET_PATTERNS = [ + r"sk-ant-[A-Za-z0-9_-]{16,}", # Anthropic + r"sk-(?:proj-)?[A-Za-z0-9_-]{20,}", # OpenAI + r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}", # Stripe + r"AKIA[0-9A-Z]{16}", # AWS access key id + r"(?:ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ABIA|ACCA)[0-9A-Z]{16}", # other AWS ids + r"gh[pousr]_[A-Za-z0-9]{36}", # GitHub + r"github_pat_[A-Za-z0-9_]{20,}", # GitHub fine-grained PAT + r"glpat-[A-Za-z0-9_-]{20}", # GitLab + r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack + r"AIza[A-Za-z0-9_-]{35}", # Google + r"eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{6,}", # JWT +] +_KNOWN_SECRET_RE = re.compile("|".join(_KNOWN_SECRET_PATTERNS)) + +_PEM_PRIVATE_KEY_MARKER = "PRIVATE KEY-----" # covers RSA/EC/OpenSSH/PKCS8 +_KNOWN_SECRET_MAX_SCAN_LENGTH = 200 + + +def _looks_like_path_or_url(value): + if "://" in value or "\\" in value: + return True + if "/" in value: + word_segments = 0 + for segment in value.split("/"): + if segment and _PATH_WORD_RE.match(segment): + word_segments += 1 + if word_segments >= 2: + return True + return False + + +def _is_high_entropy_secret(value): + """Generic gate: long, character-class-diverse, high-entropy strings that aren't + structured ids/paths/reprs. Assumes ``len(value) >= _SECRET_MIN_LENGTH``. Cost is + dominated by one C-level ``Counter`` pass; everything else runs over distinct chars.""" + if " " in value: # prose; the common benign case, bailed before any scan + return False + if _looks_like_path_or_url(value): + return False + if _UUID_RE.match(value): + return False + + counts = Counter(value) + distinct = counts.keys() + if not _SECRET_REJECT_CHARS.isdisjoint(distinct): + return False + + has_lower = has_upper = has_digit = has_symbol = False + hex_only = True + for ch in distinct: + if ch.isspace(): + return False + if ch.islower(): + has_lower = True + if ch not in _HEX_DIGITS: + hex_only = False + elif ch.isupper(): + has_upper = True + if ch not in _HEX_DIGITS: + hex_only = False + elif ch.isdigit(): + has_digit = True + else: + has_symbol = True + hex_only = False + + if hex_only: # ObjectId / SHA / md5 digest + return False + if (has_lower + has_upper + has_digit + has_symbol) < _SECRET_MIN_CHAR_CLASSES: + return False # identifiers, enums, slugs, numbers + + n = len(value) + entropy = 0.0 + for occurrences in counts.values(): + p = occurrences / n + entropy -= p * math.log2(p) + return entropy >= _SECRET_MIN_ENTROPY_BITS + + +def _looks_like_secret(value): + """Last-resort credential check, layered cheapest first.""" + if _PEM_PRIVATE_KEY_MARKER in value: + return True + n = len(value) + if n < _SECRET_MIN_LENGTH: + return False + if _is_high_entropy_secret(value): + return True + # Known formats the entropy gate misses (e.g. AWS AKIA ids: only two char classes). + if n <= _KNOWN_SECRET_MAX_SCAN_LENGTH: + return _KNOWN_SECRET_RE.search(value) is not None + return False + + def _mask_string(value, config): - """Apply the string masking policy: over-length cap, name/value patterns, then - embedded URL credentials.""" + """Apply the string masking policy: over-length cap, name/value patterns, + entropy-based secret detection, then embedded URL credentials.""" if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: return CODE_VARIABLES_TOO_LONG_VALUE if _matcher_matches(value, config.mask): return CODE_VARIABLES_REDACTED_VALUE + if config.detect_secrets and _looks_like_secret(value): + return CODE_VARIABLES_REDACTED_VALUE if config.mask_url_credentials: return _redact_url_credentials(value) return value @@ -1220,8 +1348,12 @@ def _mask_value(value, config, seen=None, depth=0): """Turn any Python value into a JSON-safe, masked value. Single source of truth for what gets redacted; the result contains only JSON-native types so it can be handed straight to ``json.dumps``.""" - # Name masking and URL scrubbing are independent toggles; skip only when both are off. - if config.mask is None and not config.mask_url_credentials: + # Nothing to do only when every redaction toggle is off. + if ( + config.mask is None + and not config.mask_url_credentials + and not config.detect_secrets + ): return value # Exact-type fast paths for plain scalars, avoiding the isinstance ladder below. @@ -1410,6 +1542,7 @@ def serialize_code_variables( ignore_patterns=None, max_length=1024, mask_url_credentials=True, + detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS, ): """Serialize a single frame's locals. Convenience wrapper that builds a one-off config; the hot path builds the config once - see ``attach_code_variables_to_frames``.""" @@ -1417,13 +1550,19 @@ def serialize_code_variables( mask_patterns=mask_patterns, ignore_patterns=ignore_patterns, mask_url_credentials=mask_url_credentials, + detect_secrets=detect_secrets, max_length=max_length, ) return _serialize_frame_variables(frame, limiter, config) def try_attach_code_variables_to_frames( - all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True + all_exceptions, + exc_info, + mask_patterns, + ignore_patterns, + mask_url_credentials=True, + detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS, ): try: attach_code_variables_to_frames( @@ -1432,13 +1571,19 @@ def try_attach_code_variables_to_frames( mask_patterns, ignore_patterns, mask_url_credentials, + detect_secrets, ) except Exception: pass def attach_code_variables_to_frames( - all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True + all_exceptions, + exc_info, + mask_patterns, + ignore_patterns, + mask_url_credentials=True, + detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS, ): exc_type, exc_value, traceback = exc_info @@ -1455,6 +1600,7 @@ def attach_code_variables_to_frames( mask_patterns=mask_patterns, ignore_patterns=ignore_patterns, mask_url_credentials=mask_url_credentials, + detect_secrets=detect_secrets, ) limiter = VariableSizeLimiter() diff --git a/posthog/test/test_code_variables.py b/posthog/test/test_code_variables.py index 6e7d451b..047bdf1a 100644 --- a/posthog/test/test_code_variables.py +++ b/posthog/test/test_code_variables.py @@ -20,6 +20,8 @@ _MaskingConfig, _compile_patterns, _encode_variable, + _is_high_entropy_secret, + _looks_like_secret, _mask_value, _pattern_matches, _redact_url_credentials, @@ -360,8 +362,8 @@ def tree(width, depth): return {} return {f"k{i}": tree(width, depth - 1) for i in range(width)} - assert TOO_LONG in json.dumps(mask(tree(25, 3))) # ~16k nodes, over the budget - assert TOO_LONG not in json.dumps(mask(tree(5, 3))) # ~150 nodes, well under + assert TOO_LONG in json.dumps(mask(tree(8, 3))) # ~580 nodes, over the budget + assert TOO_LONG not in json.dumps(mask(tree(4, 2))) # ~20 nodes, well under # --- 6. object traversal ------------------------------------------------------------- @@ -908,3 +910,180 @@ def trigger_error(): ) assert "code_variables" in output assert "p4ssRUNTIME" in output # URL masking disabled -> credential retained + + +# --- entropy-based secret detection (last resort) ------------------------------------ + + +# Synthetic, format-correct fakes (no real credentials). Vendor keys are assembled from +# prefix + body so no complete secret literal lives in source (which trips secret scanners). +def _key(prefix, body): + return prefix + body + + +KNOWN_FORMAT_SECRETS = [ + _key("sk-proj-", "T3BlbkFJabcd1234efgh5678ijkl9012mnop3456qrst7890wxyz"), # OpenAI + _key( + "sk-", "Hf8sJd72hsKbNd83jdH5sQp2T3BlbkFJabcdEFGH1234ijklMNOPqrst" + ), # OpenAI legacy + _key( + "sk-ant-", "api03-aBcDeFgHiJkLmNoPqRsTuVwX0123456789-AbCdEf_gHiJkLmQQ" + ), # Anthropic + _key("AKIA", "IOSFODNN7EXAMPLE"), # AWS access key id (AWS's own doc example) + _key("sk_live_", "4eC39HqLyjWDarjtT1zdp7dc"), # Stripe secret key + _key("pk_live_", "TYooMQauvdEDq54NiTphI7jx"), # Stripe publishable key + _key("ghp_", "16C7e42F292c6912E7710c838347Ae178B4a"), # GitHub PAT + _key( + "github_pat_", "11ABCDEFG0aBcDeFgHiJkL_mNoPqRsTuVwXyZ0123456789abcdef" + ), # GitHub + _key("glpat-", "aB1cD2eF3gH4iJ5kL6mN"), # GitLab PAT + _key( + "xoxb-", "1234567890-1234567890123-AbCdEfGhIjKlMnOpQrStUvWx" + ), # Slack bot token + _key("AIza", "SyD-1a2B3c4D5e6F7g8H9i0JkLmNoPqRsTuVw"), # Google API key + _key( + "eyJ", "hbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N" + ), # JWT + "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA1234", # PEM private key + "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXkt", # OpenSSH private key +] + +HIGH_ENTROPY_SECRETS = [ + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", # AWS secret key (no prefix, base64) + "xK9#mP2$vL5nQ8w!", # strong password with symbols + "P@ssw0rd!2024#Secure$Key", # strong password + "xK9mP2vL5nQ8wRtZ", # strong password, no symbols + "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH", # random mixed-case+digit token + "dGhpc2lzYVNlY3JldFRva2VuMTIzNA==", # base64 blob +] + +NON_SECRETS = [ + # structured high-entropy ids/hashes/encodings -- must never be flagged + "550e8400-e29b-41d4-a716-446655440000", # UUID v4 (lowercase) + "F47AC10B-58CC-4372-A567-0E02B2C3D479", # UUID (uppercase) + "507f1f77bcf86cd799439011", # Mongo ObjectId + "e83c5163316f89bfbde7d9ab23ca2e25604af290", # git SHA-1 + "d41d8cd98f00b204e9800998ecf8427e", # md5 hex digest + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", # sha256 hex + "a7F2c9E1b4D8a3C6e0F5b2D9c7A1e4F8", # pure mixed-case hex (treated as an id) + # object reprs / structured strings -- common in code variables, high-entropy but + # must stay readable (regression: a real prod event flagged the first one) + "CheckActivityInput(proxy_record_id=UUID('019df333-e9e2-0000-fa8e-ba3dd4217c09'))", + "", + "ExecuteActivityInput(fn=check_status,args=[Input(id=42,name=widget)])", + # ordinary code values -- must stay readable for debugging + "user_authentication_handler", # snake_case identifier + "getUserByIdAndOrganization", # camelCase identifier + "getUserById2024", # camelCase with digits + "ApplicationConfigurationManager", # PascalCase class name + "PENDING_APPROVAL", # SCREAMING_CASE enum + "created-at-descending", # dashed slug + "application/json", # mime type + "alice.smith@example.com", # email + "the quick brown fox jumps over", # prose (has spaces) + "/usr/local/lib/python3.13/site-packages/posthog/client.py", # unix path + "C:\\Users\\admin\\app\\config.yaml", # windows path + "https://api.example.com/v2/users/12345/orders", # url + "1234567890123456", # long number + "3.141592653589793", # float + "2026-06-23T11:11:00.000Z", # ISO timestamp + "v1.2.3-beta.4", # version string + "active", # short word +] + + +class TestSecretDetection: + """The entropy-based last-resort detector: catches vendor keys and strong + random secrets, while leaving ids/hashes/identifiers/paths readable.""" + + @pytest.mark.parametrize("value", KNOWN_FORMAT_SECRETS) + def test_known_vendor_formats_are_detected(self, value): + assert _looks_like_secret(value) is True + + @pytest.mark.parametrize("value", HIGH_ENTROPY_SECRETS) + def test_high_entropy_secrets_are_detected(self, value): + assert _looks_like_secret(value) is True + + @pytest.mark.parametrize("value", NON_SECRETS) + def test_non_secrets_are_not_flagged(self, value): + assert _looks_like_secret(value) is False + + def test_uuids_and_object_ids_are_never_flagged(self): + assert _looks_like_secret("550e8400-e29b-41d4-a716-446655440000") is False + assert _looks_like_secret("507f1f77bcf86cd799439011") is False + + def test_short_strings_bail_cheaply(self): + assert _looks_like_secret("xK9#mP2$") is False + assert _is_high_entropy_secret("xK9#mP2$") is False + + def test_pure_hex_is_treated_as_an_id_not_a_secret(self): + assert _is_high_entropy_secret("d41d8cd98f00b204e9800998ecf8427e") is False + + # -- integration with the masking pipeline -------------------------------------- + + def test_high_entropy_value_in_a_neutral_variable_is_redacted(self): + result = extract(api_response="n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH") + assert result == {"api_response": REDACTED} + + def test_secret_inside_a_nested_structure_is_redacted(self): + out = mask( + {"entries": [_key("sk_live_", "4eC39HqLyjWDarjtT1zdp7dc"), "plain-value"]} + ) + assert out == {"entries": [REDACTED, "plain-value"]} + + def test_object_id_value_survives(self): + result = extract(order_id="507f1f77bcf86cd799439011") + assert result == {"order_id": "507f1f77bcf86cd799439011"} + + def test_detection_can_be_disabled(self): + config = _MaskingConfig.build( + list(DEFAULT_CODE_VARIABLES_MASK_PATTERNS), [], detect_secrets=False + ) + secret = "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH" + assert _mask_value(secret, config) == secret + + def test_detection_is_independent_of_url_scrubbing(self): + config = _MaskingConfig.build( + [], [], mask_url_credentials=False, detect_secrets=True + ) + assert _mask_value("n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH", config) == REDACTED + + def test_end_to_end_neutral_named_secret_is_redacted(self, tmpdir): + # neutral local, no keyword, no known prefix -> only the entropy gate can catch it + output = run_app( + tmpdir, + """ + make_client(capture_exception_code_variables=True) + + def trigger_error(): + handle = os.environ["TEST_TOKEN"] + 1 / 0 + + trigger_error() + """, + env={"TEST_TOKEN": "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH"}, + ) + assert "code_variables" in output + assert "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH" not in output + assert REDACTED in output + + def test_end_to_end_detection_disabled_via_client_option(self, tmpdir): + # detection off on the client -> value captured verbatim (full public plumbing) + output = run_app( + tmpdir, + """ + make_client( + capture_exception_code_variables=True, + code_variables_detect_secrets=False, + ) + + def trigger_error(): + handle = os.environ["TEST_TOKEN"] + 1 / 0 + + trigger_error() + """, + env={"TEST_TOKEN": "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH"}, + ) + assert "code_variables" in output + assert "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH" in output # detection off -> retained diff --git a/references/public_api_snapshot.txt b/references/public_api_snapshot.txt index d5512f32..163a9cfa 100644 --- a/references/public_api_snapshot.txt +++ b/references/public_api_snapshot.txt @@ -5,6 +5,7 @@ # names. External imports are excluded. alias posthog.BeforeSendCallback -> posthog.types.BeforeSendCallback alias posthog.Client -> posthog.client.Client +alias posthog.DEFAULT_CODE_VARIABLES_DETECT_SECRETS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_DETECT_SECRETS alias posthog.DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS alias posthog.DEFAULT_CODE_VARIABLES_MASK_PATTERNS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_PATTERNS alias posthog.DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS @@ -216,6 +217,7 @@ alias posthog.args.SendFeatureFlagsOptions -> posthog.types.SendFeatureFlagsOpti alias posthog.client.AI_EVENTS_ENDPOINT -> posthog.request.AI_EVENTS_ENDPOINT alias posthog.client.APIError -> posthog.request.APIError alias posthog.client.Consumer -> posthog.consumer.Consumer +alias posthog.client.DEFAULT_CODE_VARIABLES_DETECT_SECRETS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_DETECT_SECRETS alias posthog.client.DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS alias posthog.client.DEFAULT_CODE_VARIABLES_MASK_PATTERNS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_PATTERNS alias posthog.client.DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS -> posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS @@ -255,6 +257,7 @@ alias posthog.client.exceptions_from_error_tuple -> posthog.exception_utils.exce alias posthog.client.flags -> posthog.request.flags alias posthog.client.get -> posthog.request.get alias posthog.client.get_capture_exception_code_variables_context -> posthog.contexts.get_capture_exception_code_variables_context +alias posthog.client.get_code_variables_detect_secrets_context -> posthog.contexts.get_code_variables_detect_secrets_context alias posthog.client.get_code_variables_ignore_patterns_context -> posthog.contexts.get_code_variables_ignore_patterns_context alias posthog.client.get_code_variables_mask_patterns_context -> posthog.contexts.get_code_variables_mask_patterns_context alias posthog.client.get_code_variables_mask_url_credentials_context -> posthog.contexts.get_code_variables_mask_url_credentials_context @@ -301,6 +304,7 @@ alias posthog.inner_identify_context -> posthog.contexts.identify_context alias posthog.inner_new_context -> posthog.contexts.new_context alias posthog.inner_scoped -> posthog.contexts.scoped alias posthog.inner_set_capture_exception_code_variables_context -> posthog.contexts.set_capture_exception_code_variables_context +alias posthog.inner_set_code_variables_detect_secrets_context -> posthog.contexts.set_code_variables_detect_secrets_context alias posthog.inner_set_code_variables_ignore_patterns_context -> posthog.contexts.set_code_variables_ignore_patterns_context alias posthog.inner_set_code_variables_mask_patterns_context -> posthog.contexts.set_code_variables_mask_patterns_context alias posthog.inner_set_code_variables_mask_url_credentials_context -> posthog.contexts.set_code_variables_mask_url_credentials_context @@ -456,6 +460,7 @@ attribute posthog.bucketed_rate_limiter.log = logging.getLogger('posthog') attribute posthog.capture_exception_code_variables = False attribute posthog.client.Client.api_key = (project_api_key or '').strip() attribute posthog.client.Client.capture_exception_code_variables = capture_exception_code_variables +attribute posthog.client.Client.code_variables_detect_secrets = code_variables_detect_secrets if code_variables_detect_secrets is not None else DEFAULT_CODE_VARIABLES_DETECT_SECRETS attribute posthog.client.Client.code_variables_ignore_patterns = code_variables_ignore_patterns if code_variables_ignore_patterns is not None else DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS attribute posthog.client.Client.code_variables_mask_patterns = code_variables_mask_patterns if code_variables_mask_patterns is not None else DEFAULT_CODE_VARIABLES_MASK_PATTERNS attribute posthog.client.Client.code_variables_mask_url_credentials = code_variables_mask_url_credentials if code_variables_mask_url_credentials is not None else DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS @@ -499,6 +504,7 @@ attribute posthog.client.Client.super_properties = super_properties attribute posthog.client.Client.sync_mode = sync_mode attribute posthog.client.Client.timeout = timeout attribute posthog.client.MAX_DICT_SIZE = 50000 +attribute posthog.code_variables_detect_secrets = DEFAULT_CODE_VARIABLES_DETECT_SECRETS attribute posthog.code_variables_ignore_patterns = DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS attribute posthog.code_variables_mask_patterns = DEFAULT_CODE_VARIABLES_MASK_PATTERNS attribute posthog.code_variables_mask_url_credentials = DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS @@ -522,6 +528,7 @@ attribute posthog.consumer.MAX_MSG_SIZE = 900 * 1024 attribute posthog.contexts.ContextScope.capture_exception_code_variables: Optional[bool] = None attribute posthog.contexts.ContextScope.capture_exceptions = capture_exceptions attribute posthog.contexts.ContextScope.client: Optional[Client] = client +attribute posthog.contexts.ContextScope.code_variables_detect_secrets: Optional[bool] = None attribute posthog.contexts.ContextScope.code_variables_ignore_patterns: Optional[list] = None attribute posthog.contexts.ContextScope.code_variables_mask_patterns: Optional[list] = None attribute posthog.contexts.ContextScope.code_variables_mask_url_credentials: Optional[bool] = None @@ -554,11 +561,12 @@ attribute posthog.exception_utils.AnnotatedValue.value = value attribute posthog.exception_utils.BASE64_ALPHABET = re.compile('^[a-zA-Z0-9/+=]*$') attribute posthog.exception_utils.CODE_VARIABLES_REDACTED_VALUE = '$$_posthog_redacted_based_on_masking_rules_$$' attribute posthog.exception_utils.CODE_VARIABLES_TOO_LONG_VALUE = '$$_posthog_value_too_long_$$' +attribute posthog.exception_utils.DEFAULT_CODE_VARIABLES_DETECT_SECRETS = True attribute posthog.exception_utils.DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS = ['^__.*'] attribute posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_PATTERNS = ['(?i)password', '(?i)secret', '(?i)passwd', '(?i)pwd', '(?i)api_key', '(?i)apikey', '(?i)auth', '(?i)credentials', '(?i)privatekey', '(?i)private_key', '(?i)token', '(?i)aws_access_key_id', '(?i)_pass', '(?i)sk_', '(?i)jwt', '(?i)connection_string', '(?i)connectionstring', '(?i)conn_str', '(?i)connstr', '(?i)dsn'] attribute posthog.exception_utils.DEFAULT_CODE_VARIABLES_MASK_URL_CREDENTIALS = True attribute posthog.exception_utils.DEFAULT_MAX_VALUE_LENGTH = 1024 -attribute posthog.exception_utils.DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 +attribute posthog.exception_utils.DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 10 * 1024 attribute posthog.exception_utils.Event = TypedDict('Event', {'breadcrumbs': Dict[Literal['values'], List[Dict[str, Any]]], 'check_in_id': str, 'contexts': Dict[str, Dict[str, object]], 'dist': str, 'duration': Optional[float], 'environment': str, 'errors': List[Dict[str, Any]], 'event_id': str, 'exception': Dict[Literal['values'], List[Dict[str, Any]]], 'level': LogLevelStr, 'logger': str, 'message': str, 'modules': Dict[str, str], 'monitor_slug': Optional[str], 'platform': Literal['python'], 'profile': object, 'release': str, 'request': Dict[str, object], 'server_name': str, 'spans': List[Dict[str, object]], 'stacktrace': Dict[str, object], 'start_timestamp': datetime, 'status': Optional[str], 'threads': Dict[Literal['values'], List[Dict[str, Any]]], 'timestamp': Optional[datetime], 'transaction': str, 'type': Literal['check_in', 'transaction'], 'user': Dict[str, object], '_metrics_summary': Dict[str, object]}, total=False) attribute posthog.exception_utils.HAS_CHAINED_EXCEPTIONS = hasattr(Exception, '__suppress_context__') attribute posthog.exception_utils.LogLevelStr = Literal['fatal', 'critical', 'error', 'warning', 'info', 'debug'] @@ -754,7 +762,7 @@ class posthog.ai.types.ToolInProgress class posthog.args.OptionalCaptureArgs class posthog.args.OptionalSetArgs class posthog.bucketed_rate_limiter.BucketedRateLimiter(bucket_size: Number, refill_rate: Number, refill_interval_seconds: Number, on_bucket_rate_limited: Optional[Callable[[Hashable], None]] = None, clock: Callable[[], float] = time.monotonic) -class posthog.client.Client(project_api_key: str, host=None, debug=False, max_queue_size=10000, send=True, on_error=None, flush_at=100, flush_interval=5.0, gzip=False, max_retries=3, sync_mode=False, timeout=15, thread=1, poll_interval=30, personal_api_key=None, disabled=False, disable_geoip=True, is_server=True, historical_migration=False, feature_flags_request_timeout_seconds=3, super_properties=None, enable_exception_autocapture=False, log_captured_exceptions=False, project_root=None, privacy_mode=False, before_send=None, flag_fallback_cache_url=None, enable_local_evaluation=True, flag_definition_cache_provider: Optional[FlagDefinitionCacheProvider] = None, capture_exception_code_variables=False, code_variables_mask_patterns=None, code_variables_ignore_patterns=None, code_variables_mask_url_credentials=None, in_app_modules: list[str] | None = None, enable_exception_autocapture_rate_limiting=False, exception_autocapture_bucket_size=ExceptionCapture.DEFAULT_BUCKET_SIZE, exception_autocapture_refill_rate=ExceptionCapture.DEFAULT_REFILL_RATE, exception_autocapture_refill_interval_seconds=ExceptionCapture.DEFAULT_REFILL_INTERVAL_SECONDS, _dedicated_ai_endpoint=False) +class posthog.client.Client(project_api_key: str, host=None, debug=False, max_queue_size=10000, send=True, on_error=None, flush_at=100, flush_interval=5.0, gzip=False, max_retries=3, sync_mode=False, timeout=15, thread=1, poll_interval=30, personal_api_key=None, disabled=False, disable_geoip=True, is_server=True, historical_migration=False, feature_flags_request_timeout_seconds=3, super_properties=None, enable_exception_autocapture=False, log_captured_exceptions=False, project_root=None, privacy_mode=False, before_send=None, flag_fallback_cache_url=None, enable_local_evaluation=True, flag_definition_cache_provider: Optional[FlagDefinitionCacheProvider] = None, capture_exception_code_variables=False, code_variables_mask_patterns=None, code_variables_ignore_patterns=None, code_variables_mask_url_credentials=None, code_variables_detect_secrets=None, in_app_modules: list[str] | None = None, enable_exception_autocapture_rate_limiting=False, exception_autocapture_bucket_size=ExceptionCapture.DEFAULT_BUCKET_SIZE, exception_autocapture_refill_rate=ExceptionCapture.DEFAULT_REFILL_RATE, exception_autocapture_refill_interval_seconds=ExceptionCapture.DEFAULT_REFILL_INTERVAL_SECONDS, _dedicated_ai_endpoint=False) class posthog.consumer.Consumer(queue, api_key, flush_at=100, host=None, on_error=None, flush_interval=5.0, gzip=False, retries=10, timeout=15, historical_migration=False, dedicated_ai_endpoint=False) class posthog.contexts.ContextScope(parent=None, fresh: bool = False, capture_exceptions: bool = True, client: Optional[Client] = None) class posthog.exception_capture.ExceptionCapture(client: Client, rate_limiting_enabled=False, bucket_size=DEFAULT_BUCKET_SIZE, refill_rate=DEFAULT_REFILL_RATE, refill_interval_seconds=DEFAULT_REFILL_INTERVAL_SECONDS) @@ -873,6 +881,7 @@ function posthog.client.get_identity_state(passed) -> tuple[str, bool] function posthog.client.no_throw(default_return=None) function posthog.client.stringify_id(val) function posthog.contexts.get_capture_exception_code_variables_context() -> Optional[bool] +function posthog.contexts.get_code_variables_detect_secrets_context() -> Optional[bool] function posthog.contexts.get_code_variables_ignore_patterns_context() -> Optional[list] function posthog.contexts.get_code_variables_mask_patterns_context() -> Optional[list] function posthog.contexts.get_code_variables_mask_url_credentials_context() -> Optional[bool] @@ -884,6 +893,7 @@ function posthog.contexts.identify_context(distinct_id: str) -> None function posthog.contexts.new_context(fresh: bool = False, capture_exceptions: Optional[bool] = None, client: Optional[Client] = None) function posthog.contexts.scoped(fresh: bool = False, capture_exceptions: Optional[bool] = None) function posthog.contexts.set_capture_exception_code_variables_context(enabled: bool) -> None +function posthog.contexts.set_code_variables_detect_secrets_context(enabled: bool) -> None function posthog.contexts.set_code_variables_ignore_patterns_context(ignore_patterns: list) -> None function posthog.contexts.set_code_variables_mask_patterns_context(mask_patterns: list) -> None function posthog.contexts.set_code_variables_mask_url_credentials_context(enabled: bool) -> None @@ -891,7 +901,7 @@ function posthog.contexts.set_context_device_id(device_id: str) -> None function posthog.contexts.set_context_session(session_id: str) -> None function posthog.contexts.tag(key: str, value: Any) -> None function posthog.evaluate_flags(distinct_id: Optional[ID_TYPES] = None, groups: Optional[Mapping[str, Union[str, int]]] = None, person_properties: Optional[Dict[str, Any]] = None, group_properties: Optional[Dict[str, Dict[str, Any]]] = None, only_evaluate_locally: bool = False, disable_geoip: Optional[bool] = None, flag_keys: Optional[list[str]] = None, device_id: Optional[str] = None) -> FeatureFlagEvaluations -function posthog.exception_utils.attach_code_variables_to_frames(all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True) +function posthog.exception_utils.attach_code_variables_to_frames(all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True, detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS) function posthog.exception_utils.construct_artificial_traceback(e) function posthog.exception_utils.event_hint_with_exc_info(exc_info=None) function posthog.exception_utils.exc_info_from_error(error) @@ -913,7 +923,7 @@ function posthog.exception_utils.iter_stacks(tb) function posthog.exception_utils.mark_exception_as_captured(error, uuid) function posthog.exception_utils.safe_repr(value) function posthog.exception_utils.safe_str(value) -function posthog.exception_utils.serialize_code_variables(frame, limiter, mask_patterns=None, ignore_patterns=None, max_length=1024, mask_url_credentials=True) +function posthog.exception_utils.serialize_code_variables(frame, limiter, mask_patterns=None, ignore_patterns=None, max_length=1024, mask_url_credentials=True, detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS) function posthog.exception_utils.serialize_frame(frame, tb_lineno=None, max_value_length=None) function posthog.exception_utils.set_in_app_in_frames(frames, in_app_exclude, in_app_include, project_root=None) function posthog.exception_utils.should_hide_frame(frame: FrameType) -> bool @@ -921,7 +931,7 @@ function posthog.exception_utils.single_exception_from_error_tuple(exc_type, exc function posthog.exception_utils.strip_string(value, max_length=None) function posthog.exception_utils.to_string(value) function posthog.exception_utils.to_timestamp(value) -function posthog.exception_utils.try_attach_code_variables_to_frames(all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True) +function posthog.exception_utils.try_attach_code_variables_to_frames(all_exceptions, exc_info, mask_patterns, ignore_patterns, mask_url_credentials=True, detect_secrets=DEFAULT_CODE_VARIABLES_DETECT_SECRETS) function posthog.exception_utils.walk_exception_chain(exc_info) function posthog.feature_enabled(key: str, distinct_id: ID_TYPES, groups: Optional[Mapping[str, Union[str, int]]] = None, person_properties: Optional[Dict[str, Any]] = None, group_properties: Optional[Dict[str, Dict[str, Any]]] = None, only_evaluate_locally: bool = False, send_feature_flag_events: bool = True, disable_geoip: Optional[bool] = None, device_id: Optional[str] = None) -> Optional[bool] function posthog.feature_flag_definitions() @@ -967,6 +977,7 @@ function posthog.request.set_socket_options(socket_options: Optional[SocketOptio function posthog.scoped(fresh=False, capture_exceptions: Optional[bool] = None) function posthog.set(**kwargs: Unpack[OptionalSetArgs]) -> Optional[str] function posthog.set_capture_exception_code_variables_context(enabled: bool) +function posthog.set_code_variables_detect_secrets_context(enabled: bool) function posthog.set_code_variables_ignore_patterns_context(ignore_patterns: list[str]) function posthog.set_code_variables_mask_patterns_context(mask_patterns: list[str]) function posthog.set_code_variables_mask_url_credentials_context(enabled: bool) @@ -1093,6 +1104,7 @@ method posthog.consumer.Consumer.upload() method posthog.contexts.ContextScope.add_tag(key: str, value: Any) method posthog.contexts.ContextScope.collect_tags() -> Dict[str, Any] method posthog.contexts.ContextScope.get_capture_exception_code_variables() -> Optional[bool] +method posthog.contexts.ContextScope.get_code_variables_detect_secrets() -> Optional[bool] method posthog.contexts.ContextScope.get_code_variables_ignore_patterns() -> Optional[list] method posthog.contexts.ContextScope.get_code_variables_mask_patterns() -> Optional[list] method posthog.contexts.ContextScope.get_code_variables_mask_url_credentials() -> Optional[bool] @@ -1101,6 +1113,7 @@ method posthog.contexts.ContextScope.get_distinct_id() -> Optional[str] method posthog.contexts.ContextScope.get_parent() method posthog.contexts.ContextScope.get_session_id() -> Optional[str] method posthog.contexts.ContextScope.set_capture_exception_code_variables(enabled: bool) +method posthog.contexts.ContextScope.set_code_variables_detect_secrets(enabled: bool) method posthog.contexts.ContextScope.set_code_variables_ignore_patterns(ignore_patterns: list) method posthog.contexts.ContextScope.set_code_variables_mask_patterns(mask_patterns: list) method posthog.contexts.ContextScope.set_code_variables_mask_url_credentials(enabled: bool) From 5bbafdc751fa1a4183538080f3ecfd399f609dc8 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 23 Jun 2026 16:01:33 +0200 Subject: [PATCH 2/3] fix: run secret detection on the repr fallback An opaque object whose __repr__ returns a bare high-entropy token bypassed detection, since _safe_repr only checked the name/keyword mask. Run _looks_like_secret on the rendered repr too; normal reprs (with parens/brackets/ angle-brackets) are rejected by the repr-punctuation guard, so only bare-token reprs are caught. Co-Authored-By: Claude Opus 4.8 --- posthog/exception_utils.py | 3 +++ posthog/test/test_code_variables.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index be0cdd3a..e858adef 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -1249,6 +1249,9 @@ def _safe_repr(value, config): return CODE_VARIABLES_REDACTED_VALUE if _matcher_matches(rendered, config.mask): return CODE_VARIABLES_REDACTED_VALUE + # A __repr__ that is itself a bare secret would otherwise bypass detection. + if config.detect_secrets and _looks_like_secret(rendered): + return CODE_VARIABLES_REDACTED_VALUE if config.mask_url_credentials: return _redact_url_credentials(rendered) return rendered diff --git a/posthog/test/test_code_variables.py b/posthog/test/test_code_variables.py index 047bdf1a..9c7c3672 100644 --- a/posthog/test/test_code_variables.py +++ b/posthog/test/test_code_variables.py @@ -1035,6 +1035,26 @@ def test_object_id_value_survives(self): result = extract(order_id="507f1f77bcf86cd799439011") assert result == {"order_id": "507f1f77bcf86cd799439011"} + def test_opaque_object_whose_repr_is_a_secret_is_redacted(self): + # an untraversable object whose __repr__ is a bare token must not bypass + # detection through the repr fallback + class OpaqueToken: + __slots__ = () + + def __repr__(self): + return "n8fK2pQ9vX7mL4wR8tY3uZ6bC1dE5gH" + + assert mask(OpaqueToken()) == REDACTED + + def test_ordinary_object_repr_is_not_redacted(self): + class Thing: + __slots__ = () + + def __repr__(self): + return "" + + assert mask(Thing()) == "" + def test_detection_can_be_disabled(self): config = _MaskingConfig.build( list(DEFAULT_CODE_VARIABLES_MASK_PATTERNS), [], detect_secrets=False From 1d454bc2b40bd792279712914224fc97577e15a6 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 23 Jun 2026 18:23:59 +0200 Subject: [PATCH 3/3] feat: expand known secret patterns with more vendor formats Add high-confidence, distinctive-prefix patterns adapted from the gitleaks / detect-secrets rule sets: Hugging Face, Google OAuth, DigitalOcean, Square, Grafana, Twilio, SendGrid, Mailgun, Mailchimp, npm, PyPI, Databricks, Doppler, Postman, Linear, Notion, Shopify, New Relic, and more GitLab/Slack variants. The entropy gate already catches most high-entropy keys generically; the known list mainly adds deterministic guarantees and covers low-character-class formats (e.g. AWS AKIA) the entropy gate rejects. Runs only on the fallback path, so no measurable perf change. Co-Authored-By: Claude Opus 4.8 --- posthog/exception_utils.py | 37 +++++++++++++++++++++++++---- posthog/test/test_code_variables.py | 17 +++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index e858adef..ab7775a4 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -1118,18 +1118,45 @@ def build( # Lowercase word-like path segment; two around a '/' marks a filesystem path. _PATH_WORD_RE = re.compile(r"\A[a-z][a-z.]*\Z") -# Well-known credential formats, matched regardless of entropy. +# Well-known credential formats, matched regardless of entropy. High-confidence, +# distinctive-prefix patterns adapted from the gitleaks / detect-secrets rule sets. _KNOWN_SECRET_PATTERNS = [ + # AI / LLM providers r"sk-ant-[A-Za-z0-9_-]{16,}", # Anthropic r"sk-(?:proj-)?[A-Za-z0-9_-]{20,}", # OpenAI - r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}", # Stripe + r"hf_[A-Za-z0-9]{34}", # Hugging Face + # Cloud providers r"AKIA[0-9A-Z]{16}", # AWS access key id r"(?:ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ABIA|ACCA)[0-9A-Z]{16}", # other AWS ids + r"AIza[A-Za-z0-9_-]{35}", # Google API key + r"ya29\.[A-Za-z0-9_-]{20,}", # Google OAuth token + r"do[opr]_v1_[a-f0-9]{64}", # DigitalOcean + # Payments + r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}", # Stripe + r"sq0[a-z]{3}-[A-Za-z0-9_-]{22,43}", # Square + # Source control / CI/CD r"gh[pousr]_[A-Za-z0-9]{36}", # GitHub r"github_pat_[A-Za-z0-9_]{20,}", # GitHub fine-grained PAT - r"glpat-[A-Za-z0-9_-]{20}", # GitLab - r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack - r"AIza[A-Za-z0-9_-]{35}", # Google + r"gl(?:pat|ptt|rt|soat)-[A-Za-z0-9_-]{20}", # GitLab + r"glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8}", # Grafana service account + # Messaging / email + r"xox[abeoprs]-[A-Za-z0-9-]{10,}", # Slack + r"xapp-[0-9]-[A-Za-z0-9-]{10,}", # Slack app-level + r"SK[0-9a-fA-F]{32}", # Twilio API key + r"SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}", # SendGrid + r"key-[0-9a-f]{32}", # Mailgun + r"[0-9a-f]{32}-us[0-9]{1,2}", # Mailchimp + # Dev tools / registries / SaaS + r"npm_[A-Za-z0-9]{36}", # npm + r"pypi-AgEI[A-Za-z0-9_-]{50,}", # PyPI upload token + r"dapi[0-9a-f]{32}", # Databricks + r"dp\.pt\.[A-Za-z0-9]{40,}", # Doppler + r"PMAK-[a-f0-9]{24}-[a-f0-9]{34}", # Postman + r"lin_api_[A-Za-z0-9]{40}", # Linear + r"ntn_[A-Za-z0-9]{40,}", # Notion + r"shp(?:at|ca|pa|ss)_[a-fA-F0-9]{32}", # Shopify + r"NR(?:AK|JS|II|MA|RA)-[A-Za-z0-9]{27}", # New Relic + # Tokens with embedded structure r"eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{6,}", # JWT ] _KNOWN_SECRET_RE = re.compile("|".join(_KNOWN_SECRET_PATTERNS)) diff --git a/posthog/test/test_code_variables.py b/posthog/test/test_code_variables.py index 9c7c3672..ad197a9c 100644 --- a/posthog/test/test_code_variables.py +++ b/posthog/test/test_code_variables.py @@ -944,6 +944,23 @@ def _key(prefix, body): _key( "eyJ", "hbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N" ), # JWT + _key("hf_", "aBcDeFgHiJkLmNoPqRsTuVwXyZ01234567"), # Hugging Face + _key("ya29.", "a0AfH6SMBx1y2z3-_abcDEFghiJKLmnoPQ"), # Google OAuth + _key("sq0atp-", "1a2B3c4D5e6F7g8H9i0JkL"), # Square + _key("glsa_", "aBcDeFgHiJkLmNoPqRsTuVwXyZ012345_a1b2c3d4"), # Grafana + _key("SK", "0123456789abcdef0123456789abcdef"), # Twilio + _key( + "SG.", "aBcDeFgHiJkLmNoPqRsTuV.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ" + ), # SendGrid + _key("npm_", "aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789"), # npm + _key("dapi", "0123456789abcdef0123456789abcdef"), # Databricks + _key( + "PMAK-", "0123456789abcdef01234567-0123456789abcdef0123456789abcdef0123" + ), # Postman + _key("lin_api_", "0123456789abcdef0123456789abcdef01234567"), # Linear + _key("shpat_", "0123456789abcdef0123456789abcdef"), # Shopify + _key("NRAK-", "0123456789ABCDEFGHIJKLMNOPQ"), # New Relic + _key("0123456789abcdef0123456789abcdef", "-us12"), # Mailchimp "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA1234", # PEM private key "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXkt", # OpenSSH private key ]