-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add two robust PostHog PII rules #33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import { describe, it } from 'vitest'; | ||
| import { | ||
| expectRuleMatch, | ||
| expectRuleDidNotMatch, | ||
| expectRuleMetadata, | ||
| } from '../helpers.js'; | ||
|
|
||
| const RULE = 'posthog_pii_in_person_properties'; | ||
|
|
||
| describe(RULE, () => { | ||
| describe('positive cases – should match', () => { | ||
| it('matches an SSN in setPersonProperties()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.setPersonProperties({ ssn: userSsn })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a credit card in register()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.register({ credit_card: card })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a date of birth in setPersonPropertiesForFlags()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.setPersonPropertiesForFlags({ dateOfBirth: dob })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a bank account in register_once()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.register_once({ bank_account: acct })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a quoted-key JSON-style config', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.setPersonProperties({ "passport_number": value })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
| }); | ||
|
|
||
| describe('negative cases – should NOT match', () => { | ||
| it('does NOT match email (a standard person property)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.setPersonProperties({ email: user.email })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match a name (a standard person property)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.register({ first_name: user.firstName })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match sensitive PII in capture() (handled by the sibling rule)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.capture('checkout', { ssn: userSsn })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match a nested $set object (FP-avoidance scoping)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.setPersonProperties({ $set: { ssn: userSsn } })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
| }); | ||
|
|
||
| describe('metadata', () => { | ||
| it('exposes required metadata fields', async () => { | ||
| await expectRuleMetadata( | ||
| `posthog.setPersonProperties({ ssn: userSsn })`, | ||
| RULE, | ||
| { severity: 'high', category: 'posthog_pii', action: 'remediate' }, | ||
| ); | ||
| }); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import { describe, it } from 'vitest'; | ||
| import { | ||
| expectRuleMatch, | ||
| expectRuleDidNotMatch, | ||
| expectRuleMetadata, | ||
| } from '../helpers.js'; | ||
|
|
||
| const RULE = 'posthog_pii_value_in_tracking_call'; | ||
|
|
||
| describe(RULE, () => { | ||
| describe('positive cases – should match', () => { | ||
| it('matches an email literal under an innocuous key in capture()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.capture('signup', { referrer: 'jane@example.com' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a dashed SSN value in capture()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.capture('kyc', { value: '123-45-6789' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a dashed SSN value in identify()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.identify('user-1', { note: '123-45-6789' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a space-grouped card number in capture()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.capture('payment', { num: '4111 1111 1111 1111' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('matches a dash-grouped card number in setPersonProperties()', async () => { | ||
| await expectRuleMatch( | ||
| `posthog.setPersonProperties({ ref: '4111-1111-1111-1111' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
| }); | ||
|
|
||
| describe('negative cases – should NOT match', () => { | ||
| it('does NOT match an email value in identify() (the standard pattern)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.identify('user-1', { email: 'jane@example.com' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match an unseparated 16-digit run (no Luhn check available)', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.capture('order', { orderId: '4111111111111111' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match a capture call with no PII-shaped value', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.capture('clicked', { button: 'subscribe', plan: 'pro' })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
|
|
||
| it('does NOT match an SSN value nested inside a $set object', async () => { | ||
| await expectRuleDidNotMatch( | ||
| `posthog.setPersonProperties({ $set: { ssn: '123-45-6789' } })`, | ||
| RULE, | ||
| ); | ||
| }); | ||
| }); | ||
|
|
||
| describe('metadata', () => { | ||
| it('exposes required metadata fields', async () => { | ||
| await expectRuleMetadata( | ||
| `posthog.capture('signup', { referrer: 'jane@example.com' })`, | ||
| RULE, | ||
| { severity: 'high', category: 'posthog_pii', action: 'remediate' }, | ||
| ); | ||
| }); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // Detects sensitive PII passed to PostHog person-property calls: | ||
| // register(), register_once(), setPersonProperties(), and | ||
| // setPersonPropertiesForFlags(). Companion to posthog_pii_in_capture_call, | ||
| // which only covers capture()/identify(). | ||
| // | ||
| // Mirrors the identify() half of that rule: email and names are standard, | ||
| // expected person properties and are NOT flagged here — only sensitive, | ||
| // regulated PII is (SSN, DOB, financial, government ID, medical). | ||
| // | ||
| // FP avoidance (same as the sibling rule): only top-level keys in the first | ||
| // `{ ... }` object match, so a nested `$set: { ssn: ... }` won't. | ||
|
|
||
| rule posthog_pii_in_person_properties | ||
| { | ||
| meta: | ||
| description = "Sensitive PII (SSN, DOB, financial, government ID, or similar) passed to a PostHog person-property call (register / setPersonProperties)." | ||
| remediation = "Remove regulated PII from person properties. Keep only non-sensitive identifying fields in PostHog – https://posthog.com/docs/product-analytics/person-properties" | ||
| severity = "high" | ||
| category = "posthog_pii" | ||
| action = "remediate" | ||
| scan_context = "output" | ||
|
|
||
| strings: | ||
| $pp_ssn = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(social_security_number|socialSecurityNumber|social_security|socialSecurity|ssn)\b['"]?\s*[:=]/i | ||
| $pp_dob = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(date_of_birth|dateOfBirth|birth_date|birthDate|birthday|dob)\b['"]?\s*[:=]/i | ||
| $pp_credit_card = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(credit_card|creditCard|card_number|cardNumber|cc_number|ccNumber|cvv|cvc)\b['"]?\s*[:=]/i | ||
| $pp_address = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(street_address|streetAddress|home_address|homeAddress|billing_address|billingAddress|mailing_address|mailingAddress)\b['"]?\s*[:=]/i | ||
| $pp_passport = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(passport_number|passportNumber|passport)\b['"]?\s*[:=]/i | ||
| $pp_drivers_license = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(drivers_license|driversLicense|driver_license|driverLicense|license_number|licenseNumber|dl_number|dlNumber)\b['"]?\s*[:=]/i | ||
| $pp_bank_account = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(bank_account|bankAccount|account_number|accountNumber|routing_number|routingNumber|iban)\b['"]?\s*[:=]/i | ||
| $pp_medical = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(medical_record|medicalRecord|health_record|healthRecord|patient_id|patientId|mrn)\b['"]?\s*[:=]/i | ||
| $pp_gov_id = /\.(register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b(national_id|nationalId|tax_id|taxId|citizen_id|citizenId)\b['"]?\s*[:=]/i | ||
|
|
||
| condition: | ||
| any of them | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| // Detects PII-shaped literal VALUES passed to a PostHog tracking call, | ||
| // regardless of the property key. Complements posthog_pii_in_capture_call | ||
| // (which matches by key NAME) by catching PII hidden under an innocuous key, | ||
| // e.g. `posthog.capture('signup', { referrer: 'jane@example.com' })`. | ||
| // | ||
| // FP avoidance (same scoping as the key-name rule): only the top-level of the | ||
| // first `{ ... }` object is searched. Value patterns are deliberately | ||
| // high-precision to keep false positives low: | ||
| // - email: real address shape, and only in capture() — an email value in | ||
| // identify()/person-properties is the standard, correct pattern. | ||
|
Comment on lines
+9
to
+10
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tiny nit. this rule only catches emails written as literal strings, not when passed as a variable. it's fine as a limit if that's intended, but might be worth adding a comment here for context: // Known limit: only catches quoted email values, not variables triage will catch most of this at runtime but it could potentially be a gap and having the context for robots and humans is helpful!
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in 7ffc998 — a 'Known limit' note in the rule header: only catches quoted/literal values, not variables (e.g. |
||
| // - ssn: dashed US SSN (123-45-6789). | ||
| // - card: separator-grouped 16-digit PAN. YARA-X cannot run a Luhn check, | ||
| // so an unseparated 16-digit run is intentionally NOT matched (too noisy). | ||
| // | ||
| // Known limit: only catches quoted/literal values, not variables (e.g. | ||
| // `{ email: userEmail }`). Runtime triage catches most of those at the | ||
| // payload level; this rule is the static-analysis backstop for hardcoded | ||
| // leaks. | ||
|
|
||
| rule posthog_pii_value_in_tracking_call | ||
| { | ||
| meta: | ||
| description = "A PII-shaped literal value (email address, US SSN, or formatted credit-card number) passed to a PostHog tracking call." | ||
| remediation = "Remove the literal PII value from the event. Reference users by distinct ID and keep raw PII out of PostHog – https://posthog.com/docs/product-analytics/person-properties" | ||
| severity = "high" | ||
| category = "posthog_pii" | ||
| action = "remediate" | ||
| scan_context = "output" | ||
|
|
||
| strings: | ||
| // Email value — capture() only (email is a valid identify/person property). | ||
| $val_email = /\.capture\s*\([^{]*\{[^{}]*['"][a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}['"]/i | ||
| // Sensitive PII values — flagged across every tracking call-site. | ||
| $val_ssn = /\.(capture|identify|register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b\d{3}-\d{2}-\d{4}\b/ | ||
| $val_card = /\.(capture|identify|register|register_once|setPersonProperties|setPersonPropertiesForFlags)\s*\([^{]*\{[^{}]*\b\d{4}[ -]\d{4}[ -]\d{4}[ -]\d{4}\b/ | ||
|
|
||
| condition: | ||
| any of them | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rule 1 has a test that confirms it correctly doesn't fire on
{ $set: { ssn: '...' } }rule 2 behaves the same way but there's no test for it. could we add a matching negative test? something like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added your suggested test verbatim in 7ffc998 — confirms the value rule stays silent on
setPersonProperties({ $set: { ssn: '123-45-6789' } }). The nested object isn't matched because the[^{}]*scope stops at the inner brace, same FP-avoidance as rule 1. Suite green (536).