From fda2bcc62716139d7b420796f1e84ac27b471de3 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Wed, 25 Feb 2026 13:22:09 +0300 Subject: [PATCH 1/3] corrected patterns --- plugin/action/hash/normalize/README.md | 12 ++--- .../action/hash/normalize/token_normalizer.go | 37 ++++++++------ .../hash/normalize/token_normalizer_test.go | 51 +++++++++++++++---- 3 files changed, 67 insertions(+), 33 deletions(-) diff --git a/plugin/action/hash/normalize/README.md b/plugin/action/hash/normalize/README.md index 7eaf796b6..bf3c53108 100644 --- a/plugin/action/hash/normalize/README.md +++ b/plugin/action/hash/normalize/README.md @@ -23,11 +23,11 @@ We support a set of patterns out of the box. | 8 | url | `` | https://some.host.com/page1?a=1
ws://some.host1.host2.net
ftp://login:pass@serv.example.com:21/ | | 9 | host | `` | www.weather.jp | | 10 | uuid | `` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f | -| 11 | sha1 | `` | a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 | -| 12 | md5 | `` | 098f6bcd4621d373cade4e832627b4f6 | -| 13 | datetime | `` | 2025-01-13T10:20:40.999999Z
2025-01-13T10:20:40+04:00
2025-01-13 10:20:40
2025-01-13
10:20:40 | -| 14 | ip | `` | 1.2.3.4
01.102.103.104 | -| 15 | duration | `` | -1m5s
1w2d3h4m5s6ms7us8ns | +| 11 | hash | `` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff
a94a8fe5ccb19ba61c4c0873d391e987982fbbd3
098f6bcd4621d373cade4e832627b4f6 | +| 12 | datetime | `` | 2025-01-13T10:20:40.999999Z
2025-01-13T10:20:40+04:00
2025-01-13 10:20:40
2025-01-13
10:20:40 | +| 13 | ip | `` | 1.2.3.4
01.102.103.104 | +| 14 | duration | `` | -1m5s
1w2d3h4m5s6ms7us8ns | +| 15 | filepath | `` | /home/user/photos | | 16 | hex | `` | 0x13eb85e69dfbc0758b12acdaae36287d
0X553026A59C | | 17 | float | `` | 100.23
-4.56 | | 18 | int | `` | 100
-200 | @@ -36,4 +36,4 @@ We support a set of patterns out of the box. ### Limitations of the RE language We use the [lexmachine](https://github.com/timtadh/lexmachine) package to search for tokens according to the described patterns (lexical analysis). -This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file. \ No newline at end of file +This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file. diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index cbf4b4dfa..5c1fd93c5 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -29,11 +29,11 @@ const ( pUrl pHost pUuid - pSha1 - pMd5 + pHash pDatetime pIp pDuration + pFilepath pHex pFloat pInt @@ -54,11 +54,11 @@ var patternById = map[string]int{ "url": pUrl, "host": pHost, "uuid": pUuid, - "sha1": pSha1, - "md5": pMd5, + "hash": pHash, "datetime": pDatetime, "ip": pIp, "duration": pDuration, + "filepath": pFilepath, "hex": pHex, "float": pFloat, "int": pInt, @@ -76,11 +76,11 @@ var placeholderByPattern = map[int]string{ pUrl: "", pHost: "", pUuid: "", - pSha1: "", - pMd5: "", + pHash: "", pDatetime: "", pIp: "", pDuration: "", + pFilepath: "", pHex: "", pFloat: "", pInt: "", @@ -495,21 +495,21 @@ var builtinTokenPatterns = []TokenPattern{ mask: pUuid, }, { - Placeholder: placeholderByPattern[pSha1], - RE: strings.Repeat(`[0-9a-fA-F]`, 40), - - mask: pSha1, - }, - { - Placeholder: placeholderByPattern[pMd5], - RE: strings.Repeat(`[0-9a-fA-F]`, 32), + // pSha256, pSha1, Md5 + Placeholder: placeholderByPattern[pHash], + RE: fmt.Sprintf("(%s)|(%s)|(%s)", + strings.Repeat("[0-9a-fA-F]", 64), + strings.Repeat(`[0-9a-fA-F]`, 40), + strings.Repeat(`[0-9a-fA-F]`, 32), + ), - mask: pMd5, + mask: pHash, }, { // RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly Placeholder: placeholderByPattern[pDatetime], - RE: fmt.Sprintf(`(%s)|(%s)|(%s)`, + RE: fmt.Sprintf(`(%s)|(%s)|(%s)|(%s)`, + `\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ \+\d\d\d\d UTC m=\+\d+\.\d+`, `\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?(Z|[\+\-]\d\d:\d\d)`, `\d\d:\d\d:\d\d`, `\d\d\d\d-\d\d-\d\d( \d\d:\d\d:\d\d)?`, @@ -530,6 +530,11 @@ var builtinTokenPatterns = []TokenPattern{ mask: pDuration, }, + { + Placeholder: placeholderByPattern[pFilepath], + RE: `(/[a-zA-Z0-9-]+)+`, + mask: pFilepath, + }, { Placeholder: placeholderByPattern[pHex], RE: `0[xX][0-9a-fA-F]+`, diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index 78582193f..05f89d7ed 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -250,20 +250,19 @@ func TestTokenNormalizerBuiltin(t *testing.T) { want: "some here", }, { - name: "sha1", - inputs: []string{"some a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 here"}, - patterns: "sha1", - want: "some here", - }, - { - name: "md5", - inputs: []string{"some 098f6bcd4621d373cade4e832627b4f6 here"}, - patterns: "md5", - want: "some here", + name: "hash", + inputs: []string{ + "some 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff here", + "some a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 here", + "some 098f6bcd4621d373cade4e832627b4f6 here", + }, + patterns: "hash", + want: "some here", }, { name: "datetime", inputs: []string{ + "some 2025-01-13 20:58:04.019973588 +0000 UTC m=+1417512.275697914 here", "some 2025-01-13T10:20:40Z here", "some 2025-01-13T10:20:40.999999999Z here", "some 2025-01-13T10:20:40-06:00 here", @@ -310,6 +309,16 @@ func TestTokenNormalizerBuiltin(t *testing.T) { patterns: "duration", want: "some here", }, + { + name: "filepath", + inputs: []string{ + "some /plugin/action/normalize here", + "some /Users/seq-ui/action/playlist here", + "some /home/user/photos here", + }, + patterns: "filepath", + want: "some here", + }, { name: "hex", inputs: []string{ @@ -366,6 +375,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { - request: www.weather.jp - ip: 1.2.3.4 - email: user@subdomain.domain.org + - file: /home/user/photos Downloaded from https://some.host.test for 5.5s. `, @@ -379,12 +389,13 @@ func TestTokenNormalizerBuiltin(t *testing.T) { - milk - bananas - onions - - , , + - , , User info: - request: - ip: - email: + - file: Downloaded from for . `, @@ -443,6 +454,24 @@ func TestTokenNormalizerCustom(t *testing.T) { }, want: "some \"asdfasd\" and here", }, + { + name: "entity", + params: TokenNormalizerParams{ + BuiltinPatterns: "no", + CustomPatterns: []TokenPattern{ + { + Placeholder: "", + RE: `[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)+`, + }, + }, + }, + inputs: []string{ + "some client-123a-456b-789c here", + "some server-88f63-0876-45cf-aabb here", + "some resource-data-container-12345 here", + }, + want: "some here", + }, { name: "custom_with_builtin", params: TokenNormalizerParams{ From 7116a9cf6d52de7a5d284a1201098d98718d0b45 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 10 Mar 2026 01:07:51 +0300 Subject: [PATCH 2/3] correct filepath pattern --- plugin/action/hash/normalize/README.md | 12 ++++----- .../action/hash/normalize/token_normalizer.go | 25 ++++++++++--------- .../hash/normalize/token_normalizer_test.go | 24 +++--------------- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/plugin/action/hash/normalize/README.md b/plugin/action/hash/normalize/README.md index bf3c53108..2324d616b 100644 --- a/plugin/action/hash/normalize/README.md +++ b/plugin/action/hash/normalize/README.md @@ -22,12 +22,12 @@ We support a set of patterns out of the box. | 7 | email | `` | test@host1.host2.com | | 8 | url | `` | https://some.host.com/page1?a=1
ws://some.host1.host2.net
ftp://login:pass@serv.example.com:21/ | | 9 | host | `` | www.weather.jp | -| 10 | uuid | `` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f | -| 11 | hash | `` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff
a94a8fe5ccb19ba61c4c0873d391e987982fbbd3
098f6bcd4621d373cade4e832627b4f6 | -| 12 | datetime | `` | 2025-01-13T10:20:40.999999Z
2025-01-13T10:20:40+04:00
2025-01-13 10:20:40
2025-01-13
10:20:40 | -| 13 | ip | `` | 1.2.3.4
01.102.103.104 | -| 14 | duration | `` | -1m5s
1w2d3h4m5s6ms7us8ns | -| 15 | filepath | `` | /home/user/photos | +| 10 | filepath | `` | /home/user/photos | +| 11 | uuid | `` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f | +| 12 | hash | `` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff
a94a8fe5ccb19ba61c4c0873d391e987982fbbd3
098f6bcd4621d373cade4e832627b4f6 | +| 13 | datetime | `` | 2025-01-13T10:20:40.999999Z
2025-01-13T10:20:40+04:00
2025-01-13 10:20:40
2025-01-13
10:20:40 | +| 14 | ip | `` | 1.2.3.4
01.102.103.104 | +| 15 | duration | `` | -1m5s
1w2d3h4m5s6ms7us8ns | | 16 | hex | `` | 0x13eb85e69dfbc0758b12acdaae36287d
0X553026A59C | | 17 | float | `` | 100.23
-4.56 | | 18 | int | `` | 100
-200 | diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 5c1fd93c5..9f01a536c 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -28,12 +28,12 @@ const ( pEmail pUrl pHost + pFilepath pUuid pHash pDatetime pIp pDuration - pFilepath pHex pFloat pInt @@ -53,12 +53,12 @@ var patternById = map[string]int{ "email": pEmail, "url": pUrl, "host": pHost, + "filepath": pFilepath, "uuid": pUuid, "hash": pHash, "datetime": pDatetime, "ip": pIp, "duration": pDuration, - "filepath": pFilepath, "hex": pHex, "float": pFloat, "int": pInt, @@ -75,12 +75,12 @@ var placeholderByPattern = map[int]string{ pEmail: "", pUrl: "", pHost: "", + pFilepath: "", pUuid: "", pHash: "", pDatetime: "", pIp: "", pDuration: "", - pFilepath: "", pHex: "", pFloat: "", pInt: "", @@ -482,6 +482,11 @@ var builtinTokenPatterns = []TokenPattern{ mask: pHost, }, + { + Placeholder: placeholderByPattern[pFilepath], + RE: `([a-zA-Z]:[\\/]|[\\/])[^ \t\n\r]*`, + mask: pFilepath, + }, { Placeholder: placeholderByPattern[pUuid], RE: fmt.Sprintf(`%s-%s-%s-%s-%s`, @@ -495,9 +500,10 @@ var builtinTokenPatterns = []TokenPattern{ mask: pUuid, }, { - // pSha256, pSha1, Md5 + // SHA512, SHA256, SHA1, MD5 Placeholder: placeholderByPattern[pHash], - RE: fmt.Sprintf("(%s)|(%s)|(%s)", + RE: fmt.Sprintf("(%s)|(%s)|(%s)|(%s)", + strings.Repeat("[0-9a-fA-F]", 128), strings.Repeat("[0-9a-fA-F]", 64), strings.Repeat(`[0-9a-fA-F]`, 40), strings.Repeat(`[0-9a-fA-F]`, 32), @@ -506,10 +512,10 @@ var builtinTokenPatterns = []TokenPattern{ mask: pHash, }, { - // RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly + // RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly, Go time with monotonic clock Placeholder: placeholderByPattern[pDatetime], RE: fmt.Sprintf(`(%s)|(%s)|(%s)|(%s)`, - `\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ \+\d\d\d\d UTC m=\+\d+\.\d+`, + `\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ [+\-]\d\d\d\d [A-Z]+ m=[+\-]\d+\.\d+`, `\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?(Z|[\+\-]\d\d:\d\d)`, `\d\d:\d\d:\d\d`, `\d\d\d\d-\d\d-\d\d( \d\d:\d\d:\d\d)?`, @@ -530,11 +536,6 @@ var builtinTokenPatterns = []TokenPattern{ mask: pDuration, }, - { - Placeholder: placeholderByPattern[pFilepath], - RE: `(/[a-zA-Z0-9-]+)+`, - mask: pFilepath, - }, { Placeholder: placeholderByPattern[pHex], RE: `0[xX][0-9a-fA-F]+`, diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index 05f89d7ed..b713398e6 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -252,6 +252,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { { name: "hash", inputs: []string{ + "some cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e here", "some 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff here", "some a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 here", "some 098f6bcd4621d373cade4e832627b4f6 here", @@ -263,6 +264,8 @@ func TestTokenNormalizerBuiltin(t *testing.T) { name: "datetime", inputs: []string{ "some 2025-01-13 20:58:04.019973588 +0000 UTC m=+1417512.275697914 here", + "some 2025-01-13 20:58:04.019973588 -0700 MST m=-123.456789012 here", + "some 2025-01-13 20:58:04.019973588 +0300 MSK m=+0.123456789 here", "some 2025-01-13T10:20:40Z here", "some 2025-01-13T10:20:40.999999999Z here", "some 2025-01-13T10:20:40-06:00 here", @@ -312,9 +315,8 @@ func TestTokenNormalizerBuiltin(t *testing.T) { { name: "filepath", inputs: []string{ - "some /plugin/action/normalize here", + `some C:\Windows\System32\drivers\etc\hosts here`, "some /Users/seq-ui/action/playlist here", - "some /home/user/photos here", }, patterns: "filepath", want: "some here", @@ -454,24 +456,6 @@ func TestTokenNormalizerCustom(t *testing.T) { }, want: "some \"asdfasd\" and here", }, - { - name: "entity", - params: TokenNormalizerParams{ - BuiltinPatterns: "no", - CustomPatterns: []TokenPattern{ - { - Placeholder: "", - RE: `[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)+`, - }, - }, - }, - inputs: []string{ - "some client-123a-456b-789c here", - "some server-88f63-0876-45cf-aabb here", - "some resource-data-container-12345 here", - }, - want: "some here", - }, { name: "custom_with_builtin", params: TokenNormalizerParams{ From 98e6598c1b78872772d4ee70fdf106997bad5fbb Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 10 Mar 2026 10:52:04 +0300 Subject: [PATCH 3/3] correct test filepath --- plugin/action/hash/normalize/token_normalizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index b713398e6..8b061569b 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -315,7 +315,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { { name: "filepath", inputs: []string{ - `some C:\Windows\System32\drivers\etc\hosts here`, + `some C:\\Windows\\System32\\drivers\\etc\\hosts here`, "some /Users/seq-ui/action/playlist here", }, patterns: "filepath",