From f4d980c72dc2e2a537fad706d92e730d16d84dc3 Mon Sep 17 00:00:00 2001 From: Joe Martin <84011561+joethreepwood@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:20:05 +0100 Subject: [PATCH 1/2] feat: add two robust PostHog PII rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends PII coverage beyond the single posthog_pii_in_capture_call rule (which matches PII by key name in capture/identify): - posthog_pii_in_person_properties: sensitive PII passed to person-property call-sites (register / register_once / setPersonProperties / setPersonPropertiesForFlags). Mirrors the identify() split — email and names are standard person properties and are NOT flagged; only regulated PII (SSN, DOB, financial, gov ID, medical) is. - posthog_pii_value_in_tracking_call: PII-shaped literal VALUES (email, US SSN, separator-grouped card number) under any property key — catches PII hidden behind innocuous keys like { referrer: 'jane@example.com' }. Both follow house conventions: one rule per file, all six meta fields, no regex lookarounds, same first-object FP-avoidance scoping as the sibling rule. Email values are flagged in capture() only (email is valid in identify/person-properties). Card detection requires separators since YARA-X can't run a Luhn check. 19 tests; full suite green (535). Co-Authored-By: Claude Opus 4.8 --- .../posthog_pii_in_person_properties.test.ts | 87 +++++++++++++++++++ ...posthog_pii_value_in_tracking_call.test.ts | 80 +++++++++++++++++ .../posthog_pii_in_person_properties.yar | 36 ++++++++ .../posthog_pii_value_in_tracking_call.yar | 34 ++++++++ 4 files changed, 237 insertions(+) create mode 100644 src/scanner/__tests__/rules/posthog_pii_in_person_properties.test.ts create mode 100644 src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts create mode 100644 src/scanner/rules/posthog_pii_in_person_properties.yar create mode 100644 src/scanner/rules/posthog_pii_value_in_tracking_call.yar diff --git a/src/scanner/__tests__/rules/posthog_pii_in_person_properties.test.ts b/src/scanner/__tests__/rules/posthog_pii_in_person_properties.test.ts new file mode 100644 index 0000000..d7435f8 --- /dev/null +++ b/src/scanner/__tests__/rules/posthog_pii_in_person_properties.test.ts @@ -0,0 +1,87 @@ +import { describe, it } from 'vitest'; +import { + expectRuleMatch, + expectRuleDidNotMatch, + expectRuleMetadata, +} from '../helpers.js'; + +const RULE = 'posthog_pii_in_person_properties'; + +describe(RULE, () => { + describe('positive cases – should match', () => { + it('matches an SSN in setPersonProperties()', async () => { + await expectRuleMatch( + `posthog.setPersonProperties({ ssn: userSsn })`, + RULE, + ); + }); + + it('matches a credit card in register()', async () => { + await expectRuleMatch( + `posthog.register({ credit_card: card })`, + RULE, + ); + }); + + it('matches a date of birth in setPersonPropertiesForFlags()', async () => { + await expectRuleMatch( + `posthog.setPersonPropertiesForFlags({ dateOfBirth: dob })`, + RULE, + ); + }); + + it('matches a bank account in register_once()', async () => { + await expectRuleMatch( + `posthog.register_once({ bank_account: acct })`, + RULE, + ); + }); + + it('matches a quoted-key JSON-style config', async () => { + await expectRuleMatch( + `posthog.setPersonProperties({ "passport_number": value })`, + RULE, + ); + }); + }); + + describe('negative cases – should NOT match', () => { + it('does NOT match email (a standard person property)', async () => { + await expectRuleDidNotMatch( + `posthog.setPersonProperties({ email: user.email })`, + RULE, + ); + }); + + it('does NOT match a name (a standard person property)', async () => { + await expectRuleDidNotMatch( + `posthog.register({ first_name: user.firstName })`, + RULE, + ); + }); + + it('does NOT match sensitive PII in capture() (handled by the sibling rule)', async () => { + await expectRuleDidNotMatch( + `posthog.capture('checkout', { ssn: userSsn })`, + RULE, + ); + }); + + it('does NOT match a nested $set object (FP-avoidance scoping)', async () => { + await expectRuleDidNotMatch( + `posthog.setPersonProperties({ $set: { ssn: userSsn } })`, + RULE, + ); + }); + }); + + describe('metadata', () => { + it('exposes required metadata fields', async () => { + await expectRuleMetadata( + `posthog.setPersonProperties({ ssn: userSsn })`, + RULE, + { severity: 'high', category: 'posthog_pii', action: 'remediate' }, + ); + }); + }); +}); diff --git a/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts b/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts new file mode 100644 index 0000000..fe400df --- /dev/null +++ b/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts @@ -0,0 +1,80 @@ +import { describe, it } from 'vitest'; +import { + expectRuleMatch, + expectRuleDidNotMatch, + expectRuleMetadata, +} from '../helpers.js'; + +const RULE = 'posthog_pii_value_in_tracking_call'; + +describe(RULE, () => { + describe('positive cases – should match', () => { + it('matches an email literal under an innocuous key in capture()', async () => { + await expectRuleMatch( + `posthog.capture('signup', { referrer: 'jane@example.com' })`, + RULE, + ); + }); + + it('matches a dashed SSN value in capture()', async () => { + await expectRuleMatch( + `posthog.capture('kyc', { value: '123-45-6789' })`, + RULE, + ); + }); + + it('matches a dashed SSN value in identify()', async () => { + await expectRuleMatch( + `posthog.identify('user-1', { note: '123-45-6789' })`, + RULE, + ); + }); + + it('matches a space-grouped card number in capture()', async () => { + await expectRuleMatch( + `posthog.capture('payment', { num: '4111 1111 1111 1111' })`, + RULE, + ); + }); + + it('matches a dash-grouped card number in setPersonProperties()', async () => { + await expectRuleMatch( + `posthog.setPersonProperties({ ref: '4111-1111-1111-1111' })`, + RULE, + ); + }); + }); + + describe('negative cases – should NOT match', () => { + it('does NOT match an email value in identify() (the standard pattern)', async () => { + await expectRuleDidNotMatch( + `posthog.identify('user-1', { email: 'jane@example.com' })`, + RULE, + ); + }); + + it('does NOT match an unseparated 16-digit run (no Luhn check available)', async () => { + await expectRuleDidNotMatch( + `posthog.capture('order', { orderId: '4111111111111111' })`, + RULE, + ); + }); + + it('does NOT match a capture call with no PII-shaped value', async () => { + await expectRuleDidNotMatch( + `posthog.capture('clicked', { button: 'subscribe', plan: 'pro' })`, + RULE, + ); + }); + }); + + describe('metadata', () => { + it('exposes required metadata fields', async () => { + await expectRuleMetadata( + `posthog.capture('signup', { referrer: 'jane@example.com' })`, + RULE, + { severity: 'high', category: 'posthog_pii', action: 'remediate' }, + ); + }); + }); +}); diff --git a/src/scanner/rules/posthog_pii_in_person_properties.yar b/src/scanner/rules/posthog_pii_in_person_properties.yar new file mode 100644 index 0000000..95b30be --- /dev/null +++ b/src/scanner/rules/posthog_pii_in_person_properties.yar @@ -0,0 +1,36 @@ +// Detects sensitive PII passed to PostHog person-property calls: +// register(), register_once(), setPersonProperties(), and +// setPersonPropertiesForFlags(). Companion to posthog_pii_in_capture_call, +// which only covers capture()/identify(). +// +// Mirrors the identify() half of that rule: email and names are standard, +// expected person properties and are NOT flagged here — only sensitive, +// regulated PII is (SSN, DOB, financial, government ID, medical). +// +// FP avoidance (same as the sibling rule): only top-level keys in the first +// `{ ... }` object match, so a nested `$set: { ssn: ... }` won't. + +rule posthog_pii_in_person_properties +{ + meta: + description = "Sensitive PII (SSN, DOB, financial, government ID, or similar) passed to a PostHog person-property call (register / setPersonProperties)." + remediation = "Remove regulated PII from person properties. Keep only non-sensitive identifying fields in PostHog – https://posthog.com/docs/product-analytics/person-properties" + severity = "high" + category = "posthog_pii" + action = "remediate" + scan_context = "output" + + strings: + $pp_ssn = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(social_security_number|socialSecurityNumber|social_security|socialSecurity|ssn)\b['"]?\s*[:=]/i + $pp_dob = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(date_of_birth|dateOfBirth|birth_date|birthDate|birthday|dob)\b['"]?\s*[:=]/i + $pp_credit_card = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(credit_card|creditCard|card_number|cardNumber|cc_number|ccNumber|cvv|cvc)\b['"]?\s*[:=]/i + $pp_address = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(street_address|streetAddress|home_address|homeAddress|billing_address|billingAddress|mailing_address|mailingAddress)\b['"]?\s*[:=]/i + $pp_passport = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(passport_number|passportNumber|passport)\b['"]?\s*[:=]/i + $pp_drivers_license = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(drivers_license|driversLicense|driver_license|driverLicense|license_number|licenseNumber|dl_number|dlNumber)\b['"]?\s*[:=]/i + $pp_bank_account = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(bank_account|bankAccount|account_number|accountNumber|routing_number|routingNumber|iban)\b['"]?\s*[:=]/i + $pp_medical = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(medical_record|medicalRecord|health_record|healthRecord|patient_id|patientId|mrn)\b['"]?\s*[:=]/i + $pp_gov_id = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(national_id|nationalId|tax_id|taxId|citizen_id|citizenId)\b['"]?\s*[:=]/i + + condition: + any of them +} diff --git a/src/scanner/rules/posthog_pii_value_in_tracking_call.yar b/src/scanner/rules/posthog_pii_value_in_tracking_call.yar new file mode 100644 index 0000000..ae250d0 --- /dev/null +++ b/src/scanner/rules/posthog_pii_value_in_tracking_call.yar @@ -0,0 +1,34 @@ +// Detects PII-shaped literal VALUES passed to a PostHog tracking call, +// regardless of the property key. Complements posthog_pii_in_capture_call +// (which matches by key NAME) by catching PII hidden under an innocuous key, +// e.g. `posthog.capture('signup', { referrer: 'jane@example.com' })`. +// +// FP avoidance (same scoping as the key-name rule): only the top-level of the +// first `{ ... }` object is searched. Value patterns are deliberately +// high-precision to keep false positives low: +// - email: real address shape, and only in capture() — an email value in +// identify()/person-properties is the standard, correct pattern. +// - ssn: dashed US SSN (123-45-6789). +// - card: separator-grouped 16-digit PAN. YARA-X cannot run a Luhn check, +// so an unseparated 16-digit run is intentionally NOT matched (too noisy). + +rule posthog_pii_value_in_tracking_call +{ + meta: + description = "A PII-shaped literal value (email address, US SSN, or formatted credit-card number) passed to a PostHog tracking call." + remediation = "Remove the literal PII value from the event. Reference users by distinct ID and keep raw PII out of PostHog – https://posthog.com/docs/product-analytics/person-properties" + severity = "high" + category = "posthog_pii" + action = "remediate" + scan_context = "output" + + strings: + // Email value — capture() only (email is a valid identify/person property). + $val_email = /\.capture\s*\([^{]*\{[^{}]*['"][a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}['"]/i + // Sensitive PII values — flagged across every tracking call-site. + $val_ssn = /\.(capture|identify|register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b\d{3}-\d{2}-\d{4}\b/ + $val_card = /\.(capture|identify|register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b\d{4}[ -]\d{4}[ -]\d{4}[ -]\d{4}\b/ + + condition: + any of them +} From 7ffc9983c40b97a2be526e390b904da1377ba787 Mon Sep 17 00:00:00 2001 From: Joe Martin <84011561+joethreepwood@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:08:46 +0100 Subject: [PATCH 2/2] docs+test: note literal-only limit and cover nested $set for value rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback (PostHog/warlock#33): - Document that posthog_pii_value_in_tracking_call only catches quoted/ literal values, not variables (runtime triage covers the rest). - Add a negative test confirming the rule does not fire on an SSN nested inside a $set object — matching the coverage the person-properties rule already has. Co-Authored-By: Claude Opus 4.8 --- .../rules/posthog_pii_value_in_tracking_call.test.ts | 7 +++++++ src/scanner/rules/posthog_pii_value_in_tracking_call.yar | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts b/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts index fe400df..040022a 100644 --- a/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts +++ b/src/scanner/__tests__/rules/posthog_pii_value_in_tracking_call.test.ts @@ -66,6 +66,13 @@ describe(RULE, () => { RULE, ); }); + + it('does NOT match an SSN value nested inside a $set object', async () => { + await expectRuleDidNotMatch( + `posthog.setPersonProperties({ $set: { ssn: '123-45-6789' } })`, + RULE, + ); + }); }); describe('metadata', () => { diff --git a/src/scanner/rules/posthog_pii_value_in_tracking_call.yar b/src/scanner/rules/posthog_pii_value_in_tracking_call.yar index ae250d0..897f17c 100644 --- a/src/scanner/rules/posthog_pii_value_in_tracking_call.yar +++ b/src/scanner/rules/posthog_pii_value_in_tracking_call.yar @@ -11,6 +11,11 @@ // - ssn: dashed US SSN (123-45-6789). // - card: separator-grouped 16-digit PAN. YARA-X cannot run a Luhn check, // so an unseparated 16-digit run is intentionally NOT matched (too noisy). +// +// Known limit: only catches quoted/literal values, not variables (e.g. +// `{ email: userEmail }`). Runtime triage catches most of those at the +// payload level; this rule is the static-analysis backstop for hardcoded +// leaks. rule posthog_pii_value_in_tracking_call {