Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions plugin/action/hash/normalize/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ We support a set of patterns out of the box.
| 7 | email | `<email>` | test@host1.host2.com |
| 8 | url | `<url>` | https://some.host.com/page1?a=1<br>ws://some.host1.host2.net<br>ftp://login:pass@serv.example.com:21/ |
| 9 | host | `<host>` | www.weather.jp |
| 10 | uuid | `<uuid>` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f |
| 11 | sha1 | `<sha1>` | a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 |
| 12 | md5 | `<md5>` | 098f6bcd4621d373cade4e832627b4f6 |
| 10 | filepath | `<filepath>` | /home/user/photos |
| 11 | uuid | `<uuid>` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f |
| 12 | hash | `<hash>` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff<br>a94a8fe5ccb19ba61c4c0873d391e987982fbbd3<br>098f6bcd4621d373cade4e832627b4f6 |
| 13 | datetime | `<datetime>` | 2025-01-13T10:20:40.999999Z<br>2025-01-13T10:20:40+04:00<br>2025-01-13 10:20:40<br>2025-01-13<br>10:20:40 |
| 14 | ip | `<ip>` | 1.2.3.4<br>01.102.103.104 |
| 15 | duration | `<duration>` | -1m5s<br>1w2d3h4m5s6ms7us8ns |
Expand All @@ -36,4 +36,4 @@ We support a set of patterns out of the box.
### Limitations of the RE language
We use the [lexmachine](https://github.com/timtadh/lexmachine) package to search for tokens according to the described patterns (lexical analysis).

This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file.
This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file.
40 changes: 23 additions & 17 deletions plugin/action/hash/normalize/token_normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ const (
pEmail
pUrl
pHost
pFilepath
pUuid
pSha1
pMd5
pHash
pDatetime
pIp
pDuration
Expand All @@ -53,9 +53,9 @@ var patternById = map[string]int{
"email": pEmail,
"url": pUrl,
"host": pHost,
"filepath": pFilepath,
"uuid": pUuid,
"sha1": pSha1,
"md5": pMd5,
"hash": pHash,
"datetime": pDatetime,
"ip": pIp,
"duration": pDuration,
Expand All @@ -75,9 +75,9 @@ var placeholderByPattern = map[int]string{
pEmail: "<email>",
pUrl: "<url>",
pHost: "<host>",
pFilepath: "<filepath>",
pUuid: "<uuid>",
pSha1: "<sha1>",
pMd5: "<md5>",
pHash: "<hash>",
pDatetime: "<datetime>",
pIp: "<ip>",
pDuration: "<duration>",
Expand Down Expand Up @@ -482,6 +482,11 @@ var builtinTokenPatterns = []TokenPattern{

mask: pHost,
},
{
Placeholder: placeholderByPattern[pFilepath],
RE: `([a-zA-Z]:[\\/]|[\\/])[^ \t\n\r]*`,
mask: pFilepath,
},
{
Placeholder: placeholderByPattern[pUuid],
RE: fmt.Sprintf(`%s-%s-%s-%s-%s`,
Expand All @@ -495,21 +500,22 @@ var builtinTokenPatterns = []TokenPattern{
mask: pUuid,
},
{
Placeholder: placeholderByPattern[pSha1],
RE: strings.Repeat(`[0-9a-fA-F]`, 40),

mask: pSha1,
},
{
Placeholder: placeholderByPattern[pMd5],
RE: strings.Repeat(`[0-9a-fA-F]`, 32),
// SHA512, SHA256, SHA1, MD5
Placeholder: placeholderByPattern[pHash],
RE: fmt.Sprintf("(%s)|(%s)|(%s)|(%s)",
strings.Repeat("[0-9a-fA-F]", 128),
strings.Repeat("[0-9a-fA-F]", 64),
strings.Repeat(`[0-9a-fA-F]`, 40),
strings.Repeat(`[0-9a-fA-F]`, 32),
),

mask: pMd5,
mask: pHash,
},
{
// RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly
// RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly, Go time with monotonic clock
Placeholder: placeholderByPattern[pDatetime],
RE: fmt.Sprintf(`(%s)|(%s)|(%s)`,
RE: fmt.Sprintf(`(%s)|(%s)|(%s)|(%s)`,
`\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ [+\-]\d\d\d\d [A-Z]+ m=[+\-]\d+\.\d+`,
`\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?(Z|[\+\-]\d\d:\d\d)`,
`\d\d:\d\d:\d\d`,
`\d\d\d\d-\d\d-\d\d( \d\d:\d\d:\d\d)?`,
Expand Down
35 changes: 24 additions & 11 deletions plugin/action/hash/normalize/token_normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,20 +250,22 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
want: "some <uuid> here",
},
{
name: "sha1",
inputs: []string{"some a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 here"},
patterns: "sha1",
want: "some <sha1> here",
},
{
name: "md5",
inputs: []string{"some 098f6bcd4621d373cade4e832627b4f6 here"},
patterns: "md5",
want: "some <md5> here",
name: "hash",
inputs: []string{
"some cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e here",
"some 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff here",
"some a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 here",
"some 098f6bcd4621d373cade4e832627b4f6 here",
},
patterns: "hash",
want: "some <hash> here",
},
{
name: "datetime",
inputs: []string{
"some 2025-01-13 20:58:04.019973588 +0000 UTC m=+1417512.275697914 here",
"some 2025-01-13 20:58:04.019973588 -0700 MST m=-123.456789012 here",
"some 2025-01-13 20:58:04.019973588 +0300 MSK m=+0.123456789 here",
"some 2025-01-13T10:20:40Z here",
"some 2025-01-13T10:20:40.999999999Z here",
"some 2025-01-13T10:20:40-06:00 here",
Expand Down Expand Up @@ -310,6 +312,15 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
patterns: "duration",
want: "some <duration> here",
},
{
name: "filepath",
inputs: []string{
`some C:\Windows\System32\drivers\etc\hosts here`,
"some /Users/seq-ui/action/playlist here",
},
patterns: "filepath",
want: "some <filepath> here",
},
{
name: "hex",
inputs: []string{
Expand Down Expand Up @@ -366,6 +377,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
- request: www.weather.jp
- ip: 1.2.3.4
- email: user@subdomain.domain.org
- file: /home/user/photos

Downloaded from https://some.host.test for 5.5s.
`,
Expand All @@ -379,12 +391,13 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
- <float> milk
- <bool> bananas
- <hex> onions
- <uuid>, <sha1>, <md5>
- <uuid>, <hash>, <hash>

User info:
- request: <host>
- ip: <ip>
- email: <email>
- file: <filepath>

Downloaded from <url> for <duration>.
`,
Expand Down
Loading