Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion internal/qualitygate/publiccontent/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ func namedPlaceholderValue(value string) bool {
case "...", "placeholder", "redacted", "<redacted>", "xxxx", "test-secret":
return true
}
return strings.Contains(value, "cli_example") || allXPlaceholder(value)
return strings.Contains(value, "cli_example") ||
allXPlaceholder(value) ||
conventionalNamedPlaceholderValue(value)
}

func allXPlaceholder(value string) bool {
Expand All @@ -81,6 +83,41 @@ func allXPlaceholder(value string) bool {
return true
}

func conventionalNamedPlaceholderValue(value string) bool {
if !delimitedPlaceholderIdentifier(value) {
return false
}
normalized := strings.ReplaceAll(value, "-", "_")
if rest, ok := strings.CutPrefix(normalized, "your_"); ok {
return conventionalCredentialPlaceholderName(rest)
}
if rest, ok := strings.CutSuffix(normalized, "_here"); ok {
return conventionalCredentialPlaceholderName(rest)
}
return false
}

func conventionalCredentialPlaceholderName(value string) bool {
switch value {
case "api_key",
"access_key",
"private_key",
"secret",
"password",
"passwd",
"token",
"webhook",
"access_token",
"refresh_token",
"bearer_token",
"session_token",
"client_secret":
return true
default:
return false
}
}

func urlWithAnglePlaceholder(value string) bool {
if !strings.Contains(value, "://") ||
!strings.Contains(value, "<") ||
Expand Down
54 changes: 47 additions & 7 deletions internal/qualitygate/publiccontent/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
package publiccontent

import (
"encoding/base64"
"encoding/json"
"fmt"
"path/filepath"
"sort"
Expand Down Expand Up @@ -63,12 +65,15 @@
out = append(out, newFinding("public_content_generic_credential", file, lineNo, source, redactAssignment(match[0])))
}
for _, match := range jwtLikeRE.FindAllString(line, -1) {
if isSchemaDottedIdentifier(line, match) {
if !isJWTToken(match) {
continue
}
out = append(out, newFinding("public_content_jwt_like_token", file, lineNo, source, redactToken(match)))
}
for range bearerHeaderRE.FindAllString(line, -1) {
for _, match := range bearerHeaderRE.FindAllString(line, -1) {
if isPlaceholderBearerHeader(match) {
continue
}
out = append(out, newFinding("public_content_bearer_header", file, lineNo, source, "Authorization: Bearer <redacted>"))
}
for _, match := range credentialURLRE.FindAllString(line, -1) {
Expand Down Expand Up @@ -391,10 +396,6 @@
return false
}

func isSchemaDottedIdentifier(line, match string) bool {
return strings.Contains(line, "schema ") && strings.Contains(match, "_")
}

func isNonSecretLiteralValue(value string) bool {
switch strings.ToLower(strings.TrimSpace(strings.Trim(value, `"'`))) {
case "true", "false", "null", "nil", "{", "[":
Expand All @@ -404,6 +405,40 @@
}
}

func isJWTToken(value string) bool {
parts := strings.Split(value, ".")
if len(parts) != 3 {
return false

Check warning on line 411 in internal/qualitygate/publiccontent/scan.go

View check run for this annotation

Codecov / codecov/patch

internal/qualitygate/publiccontent/scan.go#L411

Added line #L411 was not covered by tests
}
header, err := decodeBase64URLSegment(parts[0])
if err != nil || !json.Valid(header) {
return false
}
var fields map[string]interface{}
if err := json.Unmarshal(header, &fields); err != nil {
return false

Check warning on line 419 in internal/qualitygate/publiccontent/scan.go

View check run for this annotation

Codecov / codecov/patch

internal/qualitygate/publiccontent/scan.go#L419

Added line #L419 was not covered by tests
}
alg, ok := fields["alg"].(string)
return ok && alg != ""
}

func decodeBase64URLSegment(value string) ([]byte, error) {
if decoded, err := base64.RawURLEncoding.DecodeString(value); err == nil {
return decoded, nil
}
return base64.URLEncoding.DecodeString(value)
}

func isPlaceholderBearerHeader(match string) bool {
normalized := strings.ToLower(match)
idx := strings.LastIndex(normalized, "bearer ")
if idx < 0 {
return false

Check warning on line 436 in internal/qualitygate/publiccontent/scan.go

View check run for this annotation

Codecov / codecov/patch

internal/qualitygate/publiccontent/scan.go#L436

Added line #L436 was not covered by tests
}
value := strings.TrimSpace(match[idx+len("bearer "):])
return isPlaceholderValue(value)
}
Comment thread
HanShaoshuai-k marked this conversation as resolved.

func isWebhookCredentialKey(key string) bool {
return strings.Contains(strings.ReplaceAll(key, "_", ""), "webhook")
}
Expand Down Expand Up @@ -741,7 +776,12 @@
text = strings.ReplaceAll(text, `<redacted>"`, `<redacted>`)
text = strings.ReplaceAll(text, `<redacted>'`, `<redacted>`)
text = semanticBearerHeaderRE.ReplaceAllString(text, "Authorization: Bearer <redacted>")
text = jwtLikeRE.ReplaceAllString(text, "<jwt-like-token>")
text = jwtLikeRE.ReplaceAllStringFunc(text, func(match string) string {
if isJWTToken(match) {
return "<jwt-like-token>"
}
return match
})
text = credentialURLRE.ReplaceAllStringFunc(text, sanitizeCredentialURL)
return strings.Join(strings.Fields(text), " ")
}
Expand Down
116 changes: 103 additions & 13 deletions internal/qualitygate/publiccontent/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ func TestSemanticCandidateCoversRealE2ESemanticCases(t *testing.T) {
}

func TestScanFileDetectsDetectorFingerprintOnlyInPublicRuleFiles(t *testing.T) {
got := ScanFile(".gitleaks.toml", []byte("[[rules]]\nid = \"public"+"-content-leakage\"\n"))
got := ScanFile("testdata/publiccontent/.gitleaks.toml", []byte("[[rules]]\nid = \"public"+"-content-leakage\"\n"))
if !findingRules(got)["public_content_detector_fingerprint"] {
t.Fatalf("expected detector fingerprint finding, got %#v", got)
}
Expand Down Expand Up @@ -549,7 +549,7 @@ func TestScanFileDetectsCredentialURLWithEmptyUsername(t *testing.T) {
}

func TestScanFileAllowsPrivateKeyStateBooleans(t *testing.T) {
got := ScanFile("internal/qualitygate/publiccontent/collect.go", []byte(strings.Join([]string{
got := ScanFile("fixtures/scanner_state.go", []byte(strings.Join([]string{
"inPrivateKey = true",
"inPrivateKey = false",
"hasPrivateKey: false",
Expand Down Expand Up @@ -725,7 +725,7 @@ func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) {
}

func TestScanFileAllowsTestFixtureSecretValues(t *testing.T) {
got := ScanFile("shortcuts/calendar/calendar_meeting_test.go", []byte(`AppID: "test-app", AppSecret: "test-secret", Brand: core.BrandFeishu,`+"\n"))
got := ScanFile("fixtures/calendar_meeting_test.go", []byte(`AppID: "test-app", AppSecret: "test-secret", Brand: core.BrandFeishu,`+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("test fixture secret should not be credential finding: %#v", got)
Expand All @@ -734,7 +734,7 @@ func TestScanFileAllowsTestFixtureSecretValues(t *testing.T) {
}

func TestScanFileAllowsRegexpTokenValidators(t *testing.T) {
got := ScanFile("shortcuts/minutes/minutes_detail.go", []byte("var validMinuteTokenDetail = regexp.MustCompile(`^[a-z0-9]+$`)\n"))
got := ScanFile("fixtures/minutes_detail.go", []byte("var validMinuteTokenDetail = regexp.MustCompile(`^[a-z0-9]+$`)\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("regexp token validator should not be credential finding: %#v", got)
Expand All @@ -743,7 +743,7 @@ func TestScanFileAllowsRegexpTokenValidators(t *testing.T) {
}

func TestScanFileAllowsBenignSourceCodeCredentialExpressions(t *testing.T) {
got := ScanFile("cmd/config/binder.go", []byte(strings.Join([]string{
got := ScanFile("fixtures/config_binder.go", []byte(strings.Join([]string{
"AppSecret: stored,",
"AccessToken: result.Token.AccessToken,",
`token := runtime.Str("token")`,
Expand All @@ -756,7 +756,7 @@ func TestScanFileAllowsBenignSourceCodeCredentialExpressions(t *testing.T) {
}

func TestScanFileAllowsPythonArgumentTokens(t *testing.T) {
got := ScanFile("skills/lark-slides/scripts/iconpark_tool.py", []byte(strings.Join([]string{
got := ScanFile("fixtures/iconpark_tool.py", []byte(strings.Join([]string{
"def normalize_token(value: str) -> str:",
" token = rest[index]",
" next_token = rest[index + 1] if index + 1 < len(rest) else None",
Expand All @@ -771,7 +771,7 @@ func TestScanFileAllowsPythonArgumentTokens(t *testing.T) {
}

func TestScanFileAllowsEllipsisCredentialPlaceholders(t *testing.T) {
got := ScanFile("skills/lark-doc/references/lark-doc-fetch.md", []byte(strings.Join([]string{
got := ScanFile("fixtures/lark-doc-fetch.md", []byte(strings.Join([]string{
`<img token="..." url="https://..." width="..." height="..."/>`,
`<sheet token="..." sheet-id="...">`,
}, "\n")+"\n"))
Expand All @@ -783,16 +783,46 @@ func TestScanFileAllowsEllipsisCredentialPlaceholders(t *testing.T) {
}

func TestScanFileAllowsSchemaDottedIdentifiers(t *testing.T) {
got := ScanFile("skills/lark-mail/references/lark-mail-recall.md", []byte("lark-cli schema mail.user_mailbox.sent_messages.get_recall_detail\n"))
got := ScanFile("fixtures/lark-mail-recall.md", []byte("lark-cli schema mail.user_mailbox.sent_messages.get_recall_detail\n"))
for _, item := range got {
if item.Rule == "public_content_jwt_like_token" {
t.Fatalf("schema dotted identifier should not be jwt finding: %#v", got)
}
}
}

func TestScanFileAllowsMarkdownDottedAPIIdentifiers(t *testing.T) {
got := ScanFile("fixtures/mail_api_table.md", []byte(strings.Join([]string{
"| Method | Permission |",
"| --- | --- |",
"| `user_mailbox.sent_messages.get_recall_detail` | `mail:user_mailbox.message:readonly` |",
"| `user_mailbox.allow_sender.batch_create` | `mail:user_mailbox.message:modify` |",
"| `user_mailbox.allow_sender.batch_remove` | `mail:user_mailbox.message:modify` |",
"| `user_mailbox.blocked_sender.batch_create` | `mail:user_mailbox.message:modify` |",
"| `user_mailbox.blocked_sender.batch_remove` | `mail:user_mailbox.message:modify` |",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_jwt_like_token" {
t.Fatalf("markdown dotted API identifier should not be jwt finding: %#v", got)
}
}
}

func TestScanFileAllowsNonJWTDottedTaxonomy(t *testing.T) {
got := ScanFile("docs/api.md", []byte(strings.Join([]string{
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"corehr:employment.international_assignment.custom_field.apaas_id__c:read",
"user_mailbox.sent_messages.get_recall_detail queries recall detail.",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_jwt_like_token" {
t.Fatalf("non-JWT dotted taxonomy should not be jwt finding: %#v", got)
}
}
}

func TestScanFileAllowsClientTokenIdempotencyExamples(t *testing.T) {
got := ScanFile("skills/idempotency.md", []byte(strings.Join([]string{
got := ScanFile("fixtures/idempotency.md", []byte(strings.Join([]string{
`{"client_token":"1704067200"}`,
`{"client_token":"fe599b60-450f-46ff-b2ef-9f6675625b97"}`,
}, "\n")+"\n"))
Expand All @@ -805,7 +835,7 @@ func TestScanFileAllowsClientTokenIdempotencyExamples(t *testing.T) {

func TestScanFileDetectsCredentialShapedClientTokenValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("skills/idempotency.md", []byte(strings.Join([]string{
got := ScanFile("fixtures/idempotency.md", []byte(strings.Join([]string{
`{"client_token":"` + stripeLike + `"}`,
`{"client_token":"real-client-secret-value"}`,
}, "\n")+"\n"))
Expand All @@ -821,7 +851,7 @@ func TestScanFileDetectsCredentialShapedClientTokenValues(t *testing.T) {
}

func TestScanFileAllowsTokenLikePlaceholderExamples(t *testing.T) {
got := ScanFile("skills/placeholders.md", []byte(strings.Join([]string{
got := ScanFile("fixtures/placeholders.md", []byte(strings.Join([]string{
`{ "block_token": "boardXXXX" }`,
`{ "resource_token": "doc_token_or_url" }`,
`{ "token": "canonical_token" }`,
Expand All @@ -841,7 +871,7 @@ func TestScanFileAllowsTokenLikePlaceholderExamples(t *testing.T) {

func TestScanFileDetectsCredentialShapedTokenLikePlaceholderValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("skills/placeholders.md", []byte(strings.Join([]string{
got := ScanFile("fixtures/placeholders.md", []byte(strings.Join([]string{
`{ "resource_token": "` + stripeLike + `" }`,
`{ "block_token": "real-client-secret-value" }`,
}, "\n")+"\n"))
Expand All @@ -857,7 +887,7 @@ func TestScanFileDetectsCredentialShapedTokenLikePlaceholderValues(t *testing.T)
}

func TestScanFileDetectsNonFixtureMinuteTokenValues(t *testing.T) {
got := ScanFile("shortcuts/minutes/minutes_search_test.go", []byte(`{"token":"minute_real_secret"}`+"\n"))
got := ScanFile("fixtures/minutes_search_test.go", []byte(`{"token":"minute_real_secret"}`+"\n"))
if !findingRules(got)["public_content_generic_credential"] {
t.Fatalf("non-fixture minute token should be credential finding: %#v", got)
}
Expand Down Expand Up @@ -958,6 +988,19 @@ func TestScanFileDetectsJSONBearerHeaders(t *testing.T) {
}
}

func TestScanFileAllowsBearerHeaderPlaceholders(t *testing.T) {
got := ScanFile("docs/auth.md", []byte(strings.Join([]string{
"Authorization: Bearer YOUR_ACCESS_TOKEN",
`{"Authorization":"Bearer ACCESS_TOKEN_HERE"}`,
"Authorization: Bearer <access-token>",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_bearer_header" {
t.Fatalf("bearer placeholder should not be bearer finding: %#v", got)
}
}
}
Comment thread
HanShaoshuai-k marked this conversation as resolved.

func TestSemanticCandidateRedactsJSONBearerHeaders(t *testing.T) {
token := "abcdefghijklmnopqrstuvwxyz"
text := "private launch plan for internal rollout on Friday\n" +
Expand All @@ -975,6 +1018,22 @@ func TestSemanticCandidateRedactsJSONBearerHeaders(t *testing.T) {
}
}

func TestSemanticCandidateKeepsNonJWTDottedTaxonomy(t *testing.T) {
text := "private launch plan for internal rollout on Friday\n" +
"Supported MIME type: application/vnd.openxmlformats-officedocument.presentationml.presentation\n"

got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
if strings.Contains(got[0].Excerpt, "<jwt-like-token>") {
t.Fatalf("semantic candidate should not redact non-JWT dotted taxonomy: %#v", got[0])
}
if !strings.Contains(got[0].Excerpt, "application/vnd.openxmlformats-officedocument.presentationml.presentation") {
t.Fatalf("semantic candidate should keep non-JWT dotted taxonomy, got %#v", got[0])
}
}

func TestScanFileDetectsCommonProvenanceMarkers(t *testing.T) {
text := strings.Join([]string{
"Generated with automated code assistant",
Expand Down Expand Up @@ -1012,6 +1071,37 @@ func TestScanFileAllowsPercentWrappedPlaceholder(t *testing.T) {
}
}

func TestScanFileAllowsConventionalCredentialPlaceholders(t *testing.T) {
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
"client_secret: YOUR_CLIENT_SECRET",
"api_key: YOUR_API_KEY",
"password: YOUR_PASSWORD",
"access_token: ACCESS_TOKEN_HERE",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("conventional credential placeholder should not be credential finding: %#v", got)
}
}
}
Comment thread
HanShaoshuai-k marked this conversation as resolved.

func TestScanFileDetectsCredentialShapedPlaceholderLookalikes(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
"client_secret: " + stripeLike + "_HERE",
"api_key: YOUR_" + stripeLike,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 2 {
t.Fatalf("credential-shaped placeholder lookalike findings = %d, want 2: %#v", count, got)
}
}

func TestScanFileDetectsPercentWrappedCredentialValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
Expand Down
Loading