diff --git a/docs/en/02-configuration.md b/docs/en/02-configuration.md index 7d3eda6..268515d 100644 --- a/docs/en/02-configuration.md +++ b/docs/en/02-configuration.md @@ -513,6 +513,95 @@ Config for `/seqapi` API handlers. Type of field. ++ **`masking`** *`Masking`* *`optional`* + + Masking configuration. + + ⚠ **Experimental feature** + + `Masking` fields: + + + **`masks`** *`[]Mask`* *`required`* + + List of masks. + + + **`process_fields`** *`[]string`* *`default=[]`* + + List of processed event fields. + + > It is wrong to set non-empty ignored fields list and non-empty processed fields list at the same time. + + + **`ignore_fields`** *`[]string`* *`default=[]`* + + List of ignored event fields. + + > It is wrong to set non-empty ignored fields list and non-empty processed fields list at the same time. + + `Mask` fields: + + + **`re`** *`string`* *`required`* + + Regular expression for masking. + + + **`groups`** *`[]int`* *`default=[]`* + + Groups are numbers of masking groups in expression. If set to empty list or the list **contains** `0`, the full expression will be masked. + + + **`mode`** *`string`* *`required`* *`options="mask"|"replace"|"cut"`* + + Masking mode: + - `mask` - asterisks (`*`) are used for masking + - `replace` - `replace_word` is used for masking + - `cut` - masking parts will be cut instead of being replaced + + + **`replace_word`** *`string`* *`default=""`* + + Replacement word used in `mode: replace`. + + > Ignored in other mods. + + + **`process_fields`** *`[]string`* *`default=[]`* + + List of mask-specific processed event fields. + + > It is wrong to set non-empty ignored fields list and non-empty processed fields list at the same time. + + + **`ignore_fields`** *`[]string`* *`default=[]`* + + List of mask-specific ignored event fields. + + > It is wrong to set non-empty ignored fields list and non-empty processed fields list at the same time. + + + **`field_filters`** *`FieldFilterSet`* *`optional`* + + Set of field filters to filter events before masking. + + `FieldFilterSet` fields: + + + **`condition`** *`string`* *`required`* *`options="and"|"or"|"not"`* + + Condition for combining filters. + + + **`filters`** *`[]FieldFilter`* *`required`* + + List of filters. + + > Maximum 1 when `condition: not`. + + `FieldFilter` fields: + + + **`field`** *`string`* *`required`* + + Event field. + + + **`mode`** *`string`* *`required`* *`options="equal"|"contains"|"prefix"|"suffix"`* + + Filter mode. + + + **`values`** *`[]string`* *`required`* + + List of event field values to filter. + ### Error groups **`error_groups`** *`ErrorGroups`* *`optional`* diff --git a/docs/ru/02-configuration.md b/docs/ru/02-configuration.md index 2303d5f..1f30e06 100644 --- a/docs/ru/02-configuration.md +++ b/docs/ru/02-configuration.md @@ -513,6 +513,95 @@ handlers: Тип поля. ++ **`masking`** *`Masking`* *`optional`* + + Конфигурация маскирования данных. + + ⚠ **Экспериментальная функция** + + Поля `Masking`: + + + **`masks`** *`[]Mask`* *`required`* + + Список масок. + + + **`process_fields`** *`[]string`* *`default=[]`* + + Список полей события, которые необходимо маскировать. + + > Параметры `process_fields` и `ignore_fields` не должны задаваться одновременно. + + + **`ignore_fields`** *`[]string`* *`default=[]`* + + Список полей события, которые нет необходимости маскировать. + + > Параметры `process_fields` и `ignore_fields` не должны задаваться одновременно. + + Поля `Mask`: + + + **`re`** *`string`* *`required`* + + Регулярное выражение для маскирования. + + + **`groups`** *`[]int`* *`default=[]`* + + Номера маскируемых групп в выражении. Если задан пустой массив или массив **содержит** `0`, то замаскировано будет все выражение. + + + **`mode`** *`string`* *`required`* *`options="mask"|"replace"|"cut"`* + + Режим маскирования: + - `mask` - используются звездочки (`*`) + - `replace` - используется `replace_word` + - `cut` - маскируемые группы будут вырезаны, а не заменены + + + **`replace_word`** *`string`* *`default=""`* + + Заменяющее слово, используемое в режиме `mode: replace`. + + > Игнорируется в других режимах . + + + **`process_fields`** *`[]string`* *`default=[]`* + + Список специфичных для маски полей события, которые необходимо маскировать. + + > Параметры `process_fields` и `ignore_fields` не должны задаваться одновременно. + + + **`ignore_fields`** *`[]string`* *`default=[]`* + + Список специфичных для маски полей события, которые нет необходимости маскировать. + + > Параметры `process_fields` и `ignore_fields` не должны задаваться одновременно. + + + **`field_filters`** *`FieldFilterSet`* *`optional`* + + Набор фильтров полей для фильтрации событий перед маскировкой. + + Поля `FieldFilterSet`: + + + **`condition`** *`string`* *`required`* *`options="and"|"or"|"not"`* + + Условие для объединения фильтров. + + + **`filters`** *`[]FieldFilter`* *`required`* + + Список фильтров. + + > Максимум 1 при `condition: not`. + + Поля `FieldFilter`: + + + **`field`** *`string`* *`required`* + + Поле события. + + + **`mode`** *`string`* *`required`* *`options="equal"|"contains"|"prefix"|"suffix"`* + + Режим фильтрации. + + + **`values`** *`[]string`* *`required`* + + Список значений поля события для фильтрации. + ### Error groups **`error_groups`** *`ErrorGroups`* *`optional`* diff --git a/internal/api/seqapi/v1/grpc/aggregation.go b/internal/api/seqapi/v1/grpc/aggregation.go index 5682515..3ce2233 100644 --- a/internal/api/seqapi/v1/grpc/aggregation.go +++ b/internal/api/seqapi/v1/grpc/aggregation.go @@ -61,5 +61,34 @@ func (a *API) GetAggregation(ctx context.Context, req *seqapi.GetAggregationRequ if err != nil { return nil, err } + + if a.masker != nil { + buf := make([]string, 0) + for i, agg := range resp.Aggregations { + if agg == nil { + continue + } + + buf = buf[:0] + for _, b := range agg.Buckets { + buf = append(buf, b.GetKey()) + } + + aggReq := req.Aggregations[i] + field := aggReq.Field + if aggReq.GroupBy != "" { + field = aggReq.GroupBy + } + + buf = a.masker.MaskAgg(field, buf) + + for j, key := range buf { + if agg.Buckets[j] != nil { + agg.Buckets[j].Key = key + } + } + } + } + return resp, nil } diff --git a/internal/api/seqapi/v1/grpc/api.go b/internal/api/seqapi/v1/grpc/api.go index 2383eb2..15ab039 100644 --- a/internal/api/seqapi/v1/grpc/api.go +++ b/internal/api/seqapi/v1/grpc/api.go @@ -3,12 +3,16 @@ package grpc import ( "time" + "go.uber.org/zap" + "github.com/ozontech/seq-ui/internal/api/profiles" "github.com/ozontech/seq-ui/internal/app/config" "github.com/ozontech/seq-ui/internal/app/types" "github.com/ozontech/seq-ui/internal/pkg/cache" "github.com/ozontech/seq-ui/internal/pkg/client/seqdb" + "github.com/ozontech/seq-ui/internal/pkg/mask" asyncsearches "github.com/ozontech/seq-ui/internal/pkg/service/async_searches" + "github.com/ozontech/seq-ui/logger" "github.com/ozontech/seq-ui/pkg/seqapi/v1" ) @@ -24,6 +28,7 @@ type API struct { pinnedFields []*seqapi.Field asyncSearches *asyncsearches.Service profiles *profiles.Profiles + masker *mask.Masker } func New( @@ -39,6 +44,11 @@ func New( fCache = newFieldsCache(cfg.FieldsCacheTTL) } + masker, err := mask.New(cfg.Masking) + if err != nil { + logger.Fatal("failed to init masking", zap.Error(err)) + } + return &API{ config: cfg, seqDB: seqDB, @@ -49,6 +59,7 @@ func New( pinnedFields: parsePinnedFields(cfg.PinnedFields), asyncSearches: asyncSearches, profiles: p, + masker: masker, } } diff --git a/internal/api/seqapi/v1/grpc/events.go b/internal/api/seqapi/v1/grpc/events.go index 7a6ce2a..6d6c3af 100644 --- a/internal/api/seqapi/v1/grpc/events.go +++ b/internal/api/seqapi/v1/grpc/events.go @@ -37,5 +37,9 @@ func (a *API) GetEvent(ctx context.Context, req *seqapi.GetEventRequest) (*seqap logger.Error("failed to marshal event proto for caching", zap.String("id", req.Id), zap.Error(err)) } + if a.masker != nil && resp.Event != nil { + a.masker.Mask(resp.Event.Data) + } + return resp, nil } diff --git a/internal/api/seqapi/v1/grpc/search.go b/internal/api/seqapi/v1/grpc/search.go index dd3cebf..d90682d 100644 --- a/internal/api/seqapi/v1/grpc/search.go +++ b/internal/api/seqapi/v1/grpc/search.go @@ -95,5 +95,11 @@ func (a *API) Search(ctx context.Context, req *seqapi.SearchRequest) (*seqapi.Se } } + if a.masker != nil { + for _, e := range resp.Events { + a.masker.Mask(e.Data) + } + } + return resp, nil } diff --git a/internal/api/seqapi/v1/http/aggregation.go b/internal/api/seqapi/v1/http/aggregation.go index f500a53..af682f4 100644 --- a/internal/api/seqapi/v1/http/aggregation.go +++ b/internal/api/seqapi/v1/http/aggregation.go @@ -73,7 +73,31 @@ func (a *API) serveGetAggregation(w http.ResponseWriter, r *http.Request) { return } - wr.WriteJson(getAggregationResponseFromProto(resp)) + getAggResp := getAggregationResponseFromProto(resp) + + if a.masker != nil { + buf := make([]string, 0) + for i, agg := range getAggResp.Aggregations { + buf = buf[:0] + for _, b := range agg.Buckets { + buf = append(buf, b.Key) + } + + aggReq := httpReq.Aggregations[i] + field := aggReq.Field + if aggReq.GroupBy != "" { + field = aggReq.GroupBy + } + + buf = a.masker.MaskAgg(field, buf) + + for j, key := range buf { + getAggResp.Aggregations[i].Buckets[j].Key = key + } + } + } + + wr.WriteJson(getAggResp) } type aggregationFunc string // @name seqapi.v1.AggregationFunc diff --git a/internal/api/seqapi/v1/http/api.go b/internal/api/seqapi/v1/http/api.go index d77539b..116fd62 100644 --- a/internal/api/seqapi/v1/http/api.go +++ b/internal/api/seqapi/v1/http/api.go @@ -5,6 +5,7 @@ import ( "github.com/go-chi/chi/v5" "github.com/gofrs/uuid" + "go.uber.org/zap" "github.com/ozontech/seq-ui/internal/api/profiles" "github.com/ozontech/seq-ui/internal/app/config" @@ -12,7 +13,9 @@ import ( "github.com/ozontech/seq-ui/internal/app/types" "github.com/ozontech/seq-ui/internal/pkg/cache" "github.com/ozontech/seq-ui/internal/pkg/client/seqdb" + "github.com/ozontech/seq-ui/internal/pkg/mask" asyncsearches "github.com/ozontech/seq-ui/internal/pkg/service/async_searches" + "github.com/ozontech/seq-ui/logger" "github.com/ozontech/seq-ui/pkg/seqapi/v1" ) @@ -27,6 +30,7 @@ type API struct { exportLimiter *tokenlimiter.Limiter asyncSearches *asyncsearches.Service profiles *profiles.Profiles + masker *mask.Masker } func New( @@ -42,6 +46,15 @@ func New( fCache = newFieldsCache(cfg.FieldsCacheTTL) } + masker, err := mask.New(cfg.Masking) + if err != nil { + logger.Fatal("failed to init masking", zap.Error(err)) + } + // for export + if masker != nil { + seqDB.WithMasking(masker) + } + return &API{ config: cfg, seqDB: seqDB, @@ -53,6 +66,7 @@ func New( exportLimiter: tokenlimiter.New(cfg.MaxParallelExportRequests), asyncSearches: asyncSearches, profiles: p, + masker: masker, } } diff --git a/internal/api/seqapi/v1/http/events.go b/internal/api/seqapi/v1/http/events.go index 695577b..53ec898 100644 --- a/internal/api/seqapi/v1/http/events.go +++ b/internal/api/seqapi/v1/http/events.go @@ -58,7 +58,12 @@ func (a *API) serveGetEvent(w http.ResponseWriter, r *http.Request) { logger.Error("failed to marshal event proto for caching", zap.String("id", id), zap.Error(err)) } - wr.WriteJson(getEventResponseFromProto(resp)) + eventResp := getEventResponseFromProto(resp) + if a.masker != nil { + a.masker.Mask(eventResp.Event.Data) + } + + wr.WriteJson(eventResp) } type event struct { diff --git a/internal/api/seqapi/v1/http/search.go b/internal/api/seqapi/v1/http/search.go index c686498..d53a4e2 100644 --- a/internal/api/seqapi/v1/http/search.go +++ b/internal/api/seqapi/v1/http/search.go @@ -112,7 +112,14 @@ func (a *API) serveSearch(w http.ResponseWriter, r *http.Request) { } } - wr.WriteJson(searchResponseFromProto(resp, httpReq.WithTotal)) + searchResp := searchResponseFromProto(resp, httpReq.WithTotal) + if a.masker != nil { + for i := range searchResp.Events { + a.masker.Mask(searchResp.Events[i].Data) + } + } + + wr.WriteJson(searchResp) } type order string // @name seqapi.v1.Order diff --git a/internal/app/config/config.go b/internal/app/config/config.go index 4e5322a..6c9318b 100644 --- a/internal/app/config/config.go +++ b/internal/app/config/config.go @@ -12,6 +12,19 @@ import ( const ( ProxyClientModeGRPC = "grpc" + MaskModeMask = "mask" + MaskModeReplace = "replace" + MaskModeCut = "cut" + + FieldFilterConditionAnd = "and" + FieldFilterConditionOr = "or" + FieldFilterConditionNot = "not" + + FieldFilterModeEqual = "equal" + FieldFilterModeContains = "contains" + FieldFilterModePrefix = "prefix" + FieldFilterModeSuffix = "suffix" + minGRPCKeepaliveTime = 10 * time.Second minGRPCKeepaliveTimeout = 1 * time.Second @@ -228,6 +241,36 @@ type SeqAPI struct { LogsLifespanCacheKey string `yaml:"logs_lifespan_cache_key"` LogsLifespanCacheTTL time.Duration `yaml:"logs_lifespan_cache_ttl"` FieldsCacheTTL time.Duration `yaml:"fields_cache_ttl"` + Masking *Masking `yaml:"masking"` +} + +type Masking struct { + Masks []Mask `yaml:"masks"` + ProcessFields []string `yaml:"process_fields"` + IgnoreFields []string `yaml:"ignore_fields"` +} + +type Mask struct { + Re string `yaml:"re"` + Groups []int `yaml:"groups"` + Mode string `yaml:"mode"` // "mask" or "replace" or "cut" + ReplaceWord string `yaml:"replace_word"` // for mode:replace + + ProcessFields []string `yaml:"process_fields"` + IgnoreFields []string `yaml:"ignore_fields"` + + FieldFilters *FieldFilterSet `yaml:"field_filters"` +} + +type FieldFilter struct { + Field string `yaml:"field"` + Mode string `yaml:"mode"` // "equal" or "contains" or "prefix" or "suffix" + Values []string `yaml:"values"` +} + +type FieldFilterSet struct { + Condition string `yaml:"condition"` // "and" or "or" or "not" + Filters []FieldFilter `yaml:"filters"` // max 1 if condition:not } type LogTagsMapping struct { diff --git a/internal/pkg/client/seqdb/client.go b/internal/pkg/client/seqdb/client.go index bbd042a..7230a43 100644 --- a/internal/pkg/client/seqdb/client.go +++ b/internal/pkg/client/seqdb/client.go @@ -5,6 +5,7 @@ import ( "time" "github.com/ozontech/seq-ui/internal/api/httputil" + "github.com/ozontech/seq-ui/internal/pkg/mask" "github.com/ozontech/seq-ui/pkg/seqapi/v1" ) @@ -21,6 +22,9 @@ type Client interface { GetAsyncSearchesList(context.Context, *seqapi.GetAsyncSearchesListRequest, []string) (*seqapi.GetAsyncSearchesListResponse, error) CancelAsyncSearch(context.Context, *seqapi.CancelAsyncSearchRequest) (*seqapi.CancelAsyncSearchResponse, error) DeleteAsyncSearch(context.Context, *seqapi.DeleteAsyncSearchRequest) (*seqapi.DeleteAsyncSearchResponse, error) + + // masking + WithMasking(m *mask.Masker) } type GRPCKeepaliveParams struct { diff --git a/internal/pkg/client/seqdb/export.go b/internal/pkg/client/seqdb/export.go index 8041802..bfb2e5d 100644 --- a/internal/pkg/client/seqdb/export.go +++ b/internal/pkg/client/seqdb/export.go @@ -58,24 +58,40 @@ func (c *GRPCClient) Export(ctx context.Context, req *seqapi.ExportRequest, cw * return err } - if eResp != nil && eResp.Doc != nil { - if req.Format == seqapi.ExportFormat_EXPORT_FORMAT_CSV { - m, err := newMapStringJson(eResp.Doc.Data) - if err != nil { - continue - } - if err := csvWriter.Write(m.getValues(req.Fields, false)); err != nil { - continue - } - } else { - timeJson, _ := json.Marshal(eResp.Doc.Time.AsTime()) - cw.WriteString(fmt.Sprintf(exportJSONLFormat, eResp.Doc.Id, eResp.Doc.Data, timeJson)) + if eResp == nil || eResp.Doc == nil { + continue + } + + if req.Format == seqapi.ExportFormat_EXPORT_FORMAT_CSV { + m, err := newMapStringString(eResp.Doc.Data) + if err != nil { + continue + } + if c.masker != nil { + c.masker.Mask(m) } + if err := csvWriter.Write(m.getValues(req.Fields, false)); err != nil { + continue + } + } else { + timeJson, _ := json.Marshal(eResp.Doc.Time.AsTime()) - if i%batchSize == 0 { - csvWriter.Flush() - cw.Flush() + data := eResp.Doc.Data + if c.masker != nil { + if m, err := newMapStringString(data); err == nil { + c.masker.Mask(m) + if d, err := json.Marshal(m); err == nil { + data = d + } + } } + + cw.WriteString(fmt.Sprintf(exportJSONLFormat, eResp.Doc.Id, data, timeJson)) + } + + if i%batchSize == 0 { + csvWriter.Flush() + cw.Flush() } } diff --git a/internal/pkg/client/seqdb/export_test.go b/internal/pkg/client/seqdb/export_test.go index eefe60c..eedcc29 100644 --- a/internal/pkg/client/seqdb/export_test.go +++ b/internal/pkg/client/seqdb/export_test.go @@ -159,7 +159,7 @@ func Test_GRPCClient_Export(t *testing.T) { docs: []seqproxyapi.Document{ {Id: "test1", Data: []byte(`{"key1":"val1,a","key2":"val2,b","key3":"test \"quoted\""}`), Time: eventTimePB}, }, - wantResp: "key1,key3\r\n\"val1,a\",\"test \\\"\"quoted\\\"\"\"\r\n", + wantResp: "key1,key3\r\n\"val1,a\",\"test \"\"quoted\"\"\"\r\n", }, { name: "err_proxy", diff --git a/internal/pkg/client/seqdb/grpc_client.go b/internal/pkg/client/seqdb/grpc_client.go index 80f2350..2cdddee 100644 --- a/internal/pkg/client/seqdb/grpc_client.go +++ b/internal/pkg/client/seqdb/grpc_client.go @@ -9,10 +9,6 @@ import ( "time" grpc_mw "github.com/grpc-ecosystem/go-grpc-middleware" - "github.com/ozontech/seq-ui/internal/app/types" - "github.com/ozontech/seq-ui/internal/pkg/client/seqdb/seqproxyapi/v1" - "github.com/ozontech/seq-ui/logger" - "github.com/ozontech/seq-ui/metric" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -20,6 +16,12 @@ import ( "google.golang.org/grpc/keepalive" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" + + "github.com/ozontech/seq-ui/internal/app/types" + "github.com/ozontech/seq-ui/internal/pkg/client/seqdb/seqproxyapi/v1" + "github.com/ozontech/seq-ui/internal/pkg/mask" + "github.com/ozontech/seq-ui/logger" + "github.com/ozontech/seq-ui/metric" ) type grpcSearchResp interface { @@ -123,6 +125,8 @@ type GRPCClient struct { initialRetryBackoff time.Duration maxRetryBackoff time.Duration reqRetries int + + masker *mask.Masker } func NewGRPCClient(ctx context.Context, params ClientParams) (*GRPCClient, error) { @@ -239,3 +243,7 @@ func (c *GRPCClient) sendRequest(ctx context.Context, reqFn grpcReqFn) (any, err } return grpcResp, err } + +func (c *GRPCClient) WithMasking(m *mask.Masker) { + c.masker = m +} diff --git a/internal/pkg/client/seqdb/map_types.go b/internal/pkg/client/seqdb/map_types.go index 550223a..8ca3a54 100644 --- a/internal/pkg/client/seqdb/map_types.go +++ b/internal/pkg/client/seqdb/map_types.go @@ -1,7 +1,7 @@ package seqdb import ( - "bytes" + "strconv" "strings" jsoniter "github.com/json-iterator/go" @@ -27,16 +27,26 @@ func (m mapStringJson) toStringMap() mapStringString { return mss } -func (m mapStringJson) getValues(keys []string, raw bool) []string { +type mapStringString map[string]string + +func newMapStringString(data []byte) (mapStringString, error) { + m, err := newMapStringJson(data) + if err != nil { + return nil, err + } + return m.toStringMap(), nil +} + +func (m mapStringString) getValues(keys []string, raw bool) []string { values := make([]string, 0, len(keys)) for _, k := range keys { if v, ok := m[k]; ok { - val := marshalRaw(v) if !raw { - val = bytes.TrimPrefix(val, []byte{'"'}) - val = bytes.TrimSuffix(val, []byte{'"'}) + if unq, err := strconv.Unquote(v); err == nil { + v = unq + } } - values = append(values, validateString(string(val))) + values = append(values, v) } else { val := "" if raw { @@ -48,16 +58,6 @@ func (m mapStringJson) getValues(keys []string, raw bool) []string { return values } -type mapStringString map[string]string - -func newMapStringString(data []byte) (mapStringString, error) { - m, err := newMapStringJson(data) - if err != nil { - return nil, err - } - return m.toStringMap(), nil -} - const invalidUTF8Replacement = "�" func validateString(s string) string { diff --git a/internal/pkg/client/seqdb/mock/client.go b/internal/pkg/client/seqdb/mock/client.go index b3f470d..efb9da6 100644 --- a/internal/pkg/client/seqdb/mock/client.go +++ b/internal/pkg/client/seqdb/mock/client.go @@ -14,6 +14,7 @@ import ( reflect "reflect" httputil "github.com/ozontech/seq-ui/internal/api/httputil" + mask "github.com/ozontech/seq-ui/internal/pkg/mask" seqapi "github.com/ozontech/seq-ui/pkg/seqapi/v1" gomock "go.uber.org/mock/gomock" ) @@ -220,3 +221,15 @@ func (mr *MockClientMockRecorder) Status(arg0, arg1 any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Status", reflect.TypeOf((*MockClient)(nil).Status), arg0, arg1) } + +// WithMasking mocks base method. +func (m_2 *MockClient) WithMasking(m *mask.Masker) { + m_2.ctrl.T.Helper() + m_2.ctrl.Call(m_2, "WithMasking", m) +} + +// WithMasking indicates an expected call of WithMasking. +func (mr *MockClientMockRecorder) WithMasking(m any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WithMasking", reflect.TypeOf((*MockClient)(nil).WithMasking), m) +} diff --git a/internal/pkg/mask/field_filter.go b/internal/pkg/mask/field_filter.go new file mode 100644 index 0000000..2eba3dc --- /dev/null +++ b/internal/pkg/mask/field_filter.go @@ -0,0 +1,168 @@ +package mask + +import ( + "errors" + "fmt" + "slices" + "strconv" + "strings" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +type filterCondition int8 + +const ( + filterCondUnknown filterCondition = iota + filterCondAnd + filterCondOr + filterCondNot +) + +type fieldFilterSet struct { + cond filterCondition + filters []fieldFilter +} + +func newFieldFilterSet(cfg *config.FieldFilterSet) (*fieldFilterSet, error) { + if cfg == nil { + return nil, nil + } + + if len(cfg.Filters) == 0 { + return nil, errors.New("empty field filters") + } + + var cond filterCondition + switch cfg.Condition { + case config.FieldFilterConditionAnd: + cond = filterCondAnd + case config.FieldFilterConditionOr: + cond = filterCondOr + case config.FieldFilterConditionNot: + cond = filterCondNot + default: + return nil, fmt.Errorf("unknown field filters condition %q", cfg.Condition) + } + + if cond == filterCondNot && len(cfg.Filters) != 1 { + return nil, errors.New("too many filters for 'not' condition") + } + + set := &fieldFilterSet{ + cond: cond, + filters: make([]fieldFilter, 0, len(cfg.Filters)), + } + + for i := range cfg.Filters { + ff, err := newFieldFilter(cfg.Filters[i]) + if err != nil { + return set, fmt.Errorf("failed to init field filter #%d: %w", i, err) + } + set.filters = append(set.filters, ff) + } + + return set, nil +} + +func (f *fieldFilterSet) match(event map[string]string) bool { + if f.cond == filterCondNot { + return !f.filters[0].match(event) + } + + for _, ff := range f.filters { + match := ff.match(event) + if match && f.cond == filterCondOr { + return true + } + if !match && f.cond == filterCondAnd { + return false + } + } + + // for 'and' - all matched, for 'or' - none + return f.cond == filterCondAnd +} + +type filterMode int8 + +const ( + filterModeUnknown filterMode = iota + filterModeEqual + filterModeContains + filterModePrefix + filterModeSuffix +) + +type fieldFilter struct { + mode filterMode + field string + values []string +} + +func newFieldFilter(cfg config.FieldFilter) (fieldFilter, error) { + if cfg.Field == "" { + return fieldFilter{}, errors.New("empty field") + } + + if len(cfg.Values) == 0 { + return fieldFilter{}, errors.New("empty values") + } + + var mode filterMode + switch cfg.Mode { + case config.FieldFilterModeEqual: + mode = filterModeEqual + case config.FieldFilterModeContains: + mode = filterModeContains + case config.FieldFilterModePrefix: + mode = filterModePrefix + case config.FieldFilterModeSuffix: + mode = filterModeSuffix + default: + return fieldFilter{}, fmt.Errorf("unknown mode %q", cfg.Mode) + } + + return fieldFilter{ + mode: mode, + field: cfg.Field, + values: cfg.Values, + }, nil +} + +func (f *fieldFilter) match(event map[string]string) bool { + val, ok := event[f.field] + if !ok { + return false + } + + // some map values are quoted strings + if len(val) >= 2 && val[0] == '"' { + unq, err := strconv.Unquote(val) + if err == nil { + val = unq + } + } + + var fn func(string, string) bool + switch f.mode { + case filterModeEqual: + return slices.Contains(f.values, val) + case filterModeContains: + fn = strings.Contains + case filterModePrefix: + fn = strings.HasPrefix + case filterModeSuffix: + fn = strings.HasSuffix + } + + for i := range f.values { + if len(val) < len(f.values[i]) { + continue + } + if fn(val, f.values[i]) { + return true + } + } + return false +} diff --git a/internal/pkg/mask/field_filter_test.go b/internal/pkg/mask/field_filter_test.go new file mode 100644 index 0000000..4c368bc --- /dev/null +++ b/internal/pkg/mask/field_filter_test.go @@ -0,0 +1,258 @@ +package mask + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +func TestFieldFilterSetCreate(t *testing.T) { + tests := []struct { + name string + + cfg *config.FieldFilterSet + noErr bool + }{ + { + name: "ok", + cfg: &config.FieldFilterSet{ + Condition: config.FieldFilterConditionAnd, + Filters: []config.FieldFilter{ + { + Field: "check_f1", + Mode: config.FieldFilterModeEqual, + Values: []string{"test1", "test2"}, + }, + { + Field: "check_f2", + Mode: config.FieldFilterModePrefix, + Values: []string{"pref1_", "pref2_"}, + }, + }, + }, + noErr: true, + }, + { + name: "empty_cfg", + cfg: nil, + noErr: true, + }, + { + name: "empty_filters", + cfg: &config.FieldFilterSet{ + Filters: nil, + }, + }, + { + name: "unknown_condition", + cfg: &config.FieldFilterSet{ + Condition: "unknown", + Filters: []config.FieldFilter{{}}, + }, + }, + { + name: "not_condition_too_many_filters", + cfg: &config.FieldFilterSet{ + Condition: config.FieldFilterConditionNot, + Filters: []config.FieldFilter{{}, {}}, + }, + }, + { + name: "filter_empty_field", + cfg: &config.FieldFilterSet{ + Condition: config.FieldFilterConditionAnd, + Filters: []config.FieldFilter{ + { + Field: "", + }, + }, + }, + }, + { + name: "filter_empty_values", + cfg: &config.FieldFilterSet{ + Condition: config.FieldFilterConditionAnd, + Filters: []config.FieldFilter{ + { + Field: "test", + Values: nil, + }, + }, + }, + }, + { + name: "filter_unknown_mode", + cfg: &config.FieldFilterSet{ + Condition: config.FieldFilterConditionAnd, + Filters: []config.FieldFilter{ + { + Field: "test", + Values: []string{"test1", "test2"}, + Mode: "unknown", + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + _, err := newFieldFilterSet(tt.cfg) + require.Equal(t, tt.noErr, err == nil) + }) + } +} + +func TestFieldFilterMatch(t *testing.T) { + tests := []struct { + name string + + cfg config.FieldFilter + + input map[string]string + matched bool + }{ + { + name: "equal_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeEqual, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": "test1", + }, + matched: true, + }, + { + name: "equal_not_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeEqual, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": "test3", + "f2": "test1", + }, + matched: false, + }, + { + name: "contains_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeContains, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": "sometest1here", + }, + matched: true, + }, + { + name: "contains_not_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeContains, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": "sometest3here", + "f2": "some test1 here", + "f3": "sometest2here", + }, + matched: false, + }, + { + name: "prefix_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModePrefix, + Values: []string{"pref1_", "pref2_"}, + }, + input: map[string]string{ + "f1": "pref2_here", + }, + matched: true, + }, + { + name: "prefix_not_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModePrefix, + Values: []string{"pref1_", "pref2_"}, + }, + input: map[string]string{ + "f1": "pref1here", + "f2": "pref2_here", + "f3": "pref1_here", + }, + matched: false, + }, + { + name: "suffix_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeSuffix, + Values: []string{"_suff1", "_suff2"}, + }, + input: map[string]string{ + "f1": "here_suff1", + }, + matched: true, + }, + { + name: "suffix_not_matched", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeSuffix, + Values: []string{"_suff1", "_suff2"}, + }, + input: map[string]string{ + "f1": "heresuff1", + "f2": "here_suff2", + "f3": "here_suff1", + }, + matched: false, + }, + { + name: "field_not_found", + cfg: config.FieldFilter{ + Field: "f2", + Mode: config.FieldFilterModeEqual, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": "test1", + }, + matched: false, + }, + { + name: "quoted", + cfg: config.FieldFilter{ + Field: "f1", + Mode: config.FieldFilterModeEqual, + Values: []string{"test1", "test2"}, + }, + input: map[string]string{ + "f1": `"test1"`, + }, + matched: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ff, err := newFieldFilter(tt.cfg) + require.NoError(t, err) + + require.Equal(t, tt.matched, ff.match(tt.input)) + }) + } +} diff --git a/internal/pkg/mask/fields.go b/internal/pkg/mask/fields.go new file mode 100644 index 0000000..a83a943 --- /dev/null +++ b/internal/pkg/mask/fields.go @@ -0,0 +1,40 @@ +package mask + +import "errors" + +type fieldsMode int8 + +const ( + fieldsModeUnknown fieldsMode = iota + fieldsModeProcess + fieldsModeIgnore +) + +type maskFields struct { + f map[string]struct{} + mode fieldsMode +} + +func parseFields(process, ignore []string) (*maskFields, error) { + newFields := func(fields []string, mode fieldsMode) *maskFields { + m := map[string]struct{}{} + for _, f := range fields { + m[f] = struct{}{} + } + return &maskFields{ + f: m, + mode: mode, + } + } + + switch { + case len(process) > 0 && len(ignore) > 0: + return nil, errors.New("igore and process fields cannot be specified at the same time") + case len(process) == 0 && len(ignore) == 0: + return nil, nil + case len(process) > 0: + return newFields(process, fieldsModeProcess), nil + default: + return newFields(ignore, fieldsModeIgnore), nil + } +} diff --git a/internal/pkg/mask/mask.go b/internal/pkg/mask/mask.go new file mode 100644 index 0000000..f3d4000 --- /dev/null +++ b/internal/pkg/mask/mask.go @@ -0,0 +1,223 @@ +package mask + +import ( + "errors" + "fmt" + "regexp" + "slices" + "strings" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +const maskSymbol = byte('*') + +type maskMode int8 + +const ( + maskModeUnknown maskMode = iota + maskModeMask + maskModeReplace + maskModeCut +) + +type mask struct { + re *regexp.Regexp + groups []int + mode maskMode + fields *maskFields + replaceWord string + + fieldFilters *fieldFilterSet +} + +func compileMasks(cfg []config.Mask, globalFields *maskFields) ([]mask, error) { + m := make([]mask, 0, len(cfg)) + for i := range cfg { + mask, err := compileMask(cfg[i], globalFields) + if err != nil { + return nil, fmt.Errorf("failed to compile mask #%d: %w", i, err) + } + m = append(m, mask) + } + return m, nil +} + +func compileMask(cfg config.Mask, globalFields *maskFields) (mask, error) { + if cfg.Re == "" { + return mask{}, errors.New("empty re") + } + + var mode maskMode + switch cfg.Mode { + case config.MaskModeMask: + mode = maskModeMask + case config.MaskModeReplace: + mode = maskModeReplace + case config.MaskModeCut: + mode = maskModeCut + default: + return mask{}, fmt.Errorf("unknown mask mode %q", cfg.Mode) + } + + if mode == maskModeReplace && cfg.ReplaceWord == "" { + return mask{}, errors.New("empty replace word") + } + + fields, err := parseFields(cfg.ProcessFields, cfg.IgnoreFields) + if err != nil { + return mask{}, fmt.Errorf("failed to parse fields: %w", err) + } + if fields == nil { + fields = globalFields + } + + re, err := regexp.Compile(cfg.Re) + if err != nil { + return mask{}, fmt.Errorf("failed to compile regexp: %w", err) + } + + var groups []int + if groups, err = verifyGroups(cfg.Groups, re.NumSubexp()); err != nil { + return mask{}, fmt.Errorf("failed to verify groups: %w", err) + } + + ff, err := newFieldFilterSet(cfg.FieldFilters) + if err != nil { + return mask{}, fmt.Errorf("failed to init field filters: %w", err) + } + + return mask{ + re: re, + groups: groups, + mode: mode, + fields: fields, + replaceWord: cfg.ReplaceWord, + fieldFilters: ff, + }, nil +} + +func verifyGroups(groups []int, compiledTotal int) ([]int, error) { + if len(groups) == 0 || compiledTotal == 0 || slices.Index(groups, 0) != -1 { + return []int{0}, nil + } + + if len(groups) > compiledTotal { + return nil, errors.New("too many groups") + } + + uniq := make(map[int]struct{}) + for _, g := range groups { + if g < 0 || g > compiledTotal { + return nil, fmt.Errorf("wrong group number %d", g) + } + if _, has := uniq[g]; has { + return nil, errors.New("group numbers must be unique") + } + uniq[g] = struct{}{} + } + + return groups, nil +} + +// processFields returns list of fields that must be processed and +// their presence in the config +func (m *mask) processFields(event map[string]string) ([]string, bool) { + if m.fields == nil { + return nil, false + } + + fields := make([]string, 0) + if m.fields.mode == fieldsModeProcess { + for f := range m.fields.f { + if _, has := event[f]; has { + fields = append(fields, f) + } + } + } else { + for f := range event { + if _, has := m.fields.f[f]; !has { + fields = append(fields, f) + } + } + } + return fields, true +} + +func (m *mask) apply(event map[string]string) { + if m.fieldFilters != nil && !m.fieldFilters.match(event) { + return + } + + fields, exists := m.processFields(event) + + if len(fields) == 0 { + // empty list when fields presented in config + if exists { + return + } + + for f, v := range event { + event[f] = m.maskValue(v) + } + } else { + for _, f := range fields { + event[f] = m.maskValue(event[f]) + } + } +} + +func (m *mask) applyAgg(field string, bucketKeys []string) []string { + if m.fields != nil { + _, has := m.fields.f[field] + if m.fields.mode == fieldsModeProcess && !has || + m.fields.mode == fieldsModeIgnore && has { + return bucketKeys + } + } + + for i := range bucketKeys { + bucketKeys[i] = m.maskValue(bucketKeys[i]) + } + return bucketKeys +} + +func (m *mask) maskValue(val string) string { + if val == "" { + return "" + } + + indexes := m.re.FindAllStringSubmatchIndex(val, -1) + if len(indexes) == 0 { + return val + } + + var sb strings.Builder + prevFinish := 0 + curStart, curFinish := 0, 0 + for _, idx := range indexes { + for _, grp := range m.groups { + curStart = idx[grp*2] + curFinish = idx[grp*2+1] + if curStart < 0 || curFinish < 0 { + continue + } + + sb.WriteString(val[prevFinish:curStart]) + prevFinish = curFinish + + switch m.mode { + case maskModeMask: + count := curFinish - curStart + for range count { + sb.WriteByte(maskSymbol) + } + case maskModeReplace: + sb.WriteString(m.replaceWord) + } + } + } + sb.WriteString(val[prevFinish:]) + + return sb.String() +} diff --git a/internal/pkg/mask/mask_test.go b/internal/pkg/mask/mask_test.go new file mode 100644 index 0000000..afe054e --- /dev/null +++ b/internal/pkg/mask/mask_test.go @@ -0,0 +1,711 @@ +package mask + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +func TestMaskCompile(t *testing.T) { + tests := []struct { + name string + + cfg config.Mask + noErr bool + }{ + { + name: "ok", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeReplace, + ReplaceWord: "test", + ProcessFields: []string{"f1", "f2"}, + }, + noErr: true, + }, + { + name: "empty_re", + cfg: config.Mask{ + Re: "", + }, + }, + { + name: "bad_re", + cfg: config.Mask{ + Re: "(test", + }, + }, + { + name: "unknown_mask_mode", + cfg: config.Mask{ + Re: "(test)", + Mode: "unknown", + }, + }, + { + name: "empty_replace_word", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + }, + { + name: "process_and_ignore", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeMask, + ProcessFields: []string{"f1", "f2"}, + IgnoreFields: []string{"f3", "f4"}, + }, + }, + { + name: "process_and_ignore", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeMask, + ProcessFields: []string{"f1", "f2"}, + IgnoreFields: []string{"f3", "f4"}, + }, + }, + { + name: "too_many_groups", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeMask, + Groups: []int{1, 2}, + }, + }, + { + name: "wrong_group_number_1", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeMask, + Groups: []int{-1}, + }, + }, + { + name: "wrong_group_number_2", + cfg: config.Mask{ + Re: "(test)", + Mode: config.MaskModeMask, + Groups: []int{10}, + }, + }, + { + name: "group_not_unique", + cfg: config.Mask{ + Re: "(test)-(test2)", + Mode: config.MaskModeMask, + Groups: []int{1, 1}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + _, err := compileMask(tt.cfg, nil) + require.Equal(t, tt.noErr, err == nil) + }) + } +} + +func TestMaskApply(t *testing.T) { + tests := []struct { + name string + + cfg config.Mask + globalProcessFields []string + globalIgnoreFields []string + + input map[string]string + want map[string]string + }{ + { + name: "full_mask", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ************;", + }, + }, + { + name: "full_replace", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ;", + }, + }, + { + name: "full_cut", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeCut, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ;", + }, + }, + { + name: "groups", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{1, 3}, + Mode: config.MaskModeMask, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ***-456-****;", + }, + }, + { + name: "no_groups", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeMask, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ************;", + }, + }, + { + name: "no_groups_in_re", + cfg: config.Mask{ + Re: `\d{3}-\d{3}-\d{4}`, + Groups: []int{1, 3}, + Mode: config.MaskModeMask, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ************;", + }, + }, + { + name: "groups_with_zero", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0, 1, 3}, + Mode: config.MaskModeMask, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890;", + "f2": "my number: ************;", + }, + }, + { + name: "process_local", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + ProcessFields: []string{"f1"}, + }, + globalProcessFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: ************;", + "f2": "my number: 123-456-7890;", + }, + }, + { + name: "process_local_not_exists", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + ProcessFields: []string{"f3"}, + }, + globalProcessFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + }, + { + name: "process_global", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalProcessFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: ************;", + }, + }, + { + name: "process_global_not_exists", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalProcessFields: []string{"f3"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + }, + { + name: "ignore_local", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + IgnoreFields: []string{"f1"}, + }, + globalIgnoreFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: ************;", + }, + }, + { + name: "ignore_local_all", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + IgnoreFields: []string{"f1", "f2"}, + }, + globalIgnoreFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + }, + { + name: "ignore_global", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalIgnoreFields: []string{"f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: ************;", + "f2": "my number: 123-456-7890;", + }, + }, + { + name: "ignore_global_all", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalIgnoreFields: []string{"f1", "f2"}, + input: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 098-765-4321;", + "f2": "my number: 123-456-7890;", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + var ( + globalFields *maskFields + err error + ) + if len(tt.globalProcessFields) > 0 || len(tt.globalIgnoreFields) > 0 { + globalFields, err = parseFields(tt.globalProcessFields, tt.globalIgnoreFields) + require.NoError(t, err) + } + + mask, err := compileMask(tt.cfg, globalFields) + require.NoError(t, err) + + in := tt.input + mask.apply(in) + + for k, v := range tt.want { + gotV, ok := in[k] + require.True(t, ok) + assert.Equal(t, v, gotV, "wrong value with key %q", k) + } + }) + } +} + +func TestMaskApplyAgg(t *testing.T) { + tests := []struct { + name string + + cfg config.Mask + globalProcessFields []string + globalIgnoreFields []string + + field string + inputBuckets []string + wantBuckets []string + }{ + { + name: "full_mask", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "full_replace", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ;", + }, + }, + { + name: "full_cut", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeCut, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ;", + }, + }, + { + name: "groups", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{1, 3}, + Mode: config.MaskModeMask, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ***-456-****;", + }, + }, + { + name: "no_groups", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeMask, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "no_groups_in_re", + cfg: config.Mask{ + Re: `\d{3}-\d{3}-\d{4}`, + Groups: []int{1, 3}, + Mode: config.MaskModeMask, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "groups_with_zero", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0, 1, 3}, + Mode: config.MaskModeMask, + }, + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "process_local", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + ProcessFields: []string{"f1"}, + }, + globalProcessFields: []string{"f2"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "process_local_not_exists", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + ProcessFields: []string{"f2"}, + }, + globalProcessFields: []string{"f1"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + }, + { + name: "process_global", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalProcessFields: []string{"f1"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "process_global_not_exists", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalProcessFields: []string{"f2"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + }, + { + name: "ignore_local_masked", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + IgnoreFields: []string{"f2"}, + }, + globalIgnoreFields: []string{"f1"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "ignore_local_not_masked", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + IgnoreFields: []string{"f1"}, + }, + globalIgnoreFields: []string{"f2"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + }, + { + name: "ignore_global_masked", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalIgnoreFields: []string{"f2"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: ************;", + }, + }, + { + name: "ignore_global_not_masked", + cfg: config.Mask{ + Re: `(\d{3})-(\d{3})-(\d{4})`, + Groups: []int{0}, + Mode: config.MaskModeMask, + }, + globalIgnoreFields: []string{"f1"}, + field: "f1", + inputBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + wantBuckets: []string{ + "my number: 123_456_7890;", + "my number: 123-456-7890;", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + var ( + globalFields *maskFields + err error + ) + if len(tt.globalProcessFields) > 0 || len(tt.globalIgnoreFields) > 0 { + globalFields, err = parseFields(tt.globalProcessFields, tt.globalIgnoreFields) + require.NoError(t, err) + } + + mask, err := compileMask(tt.cfg, globalFields) + require.NoError(t, err) + + inBuckets := tt.inputBuckets + inBuckets = mask.applyAgg(tt.field, inBuckets) + + for i, v := range tt.wantBuckets { + gotV := inBuckets[i] + assert.Equal(t, v, gotV, "wrong bucket with index %d", i) + } + }) + } +} diff --git a/internal/pkg/mask/masker.go b/internal/pkg/mask/masker.go new file mode 100644 index 0000000..90adb62 --- /dev/null +++ b/internal/pkg/mask/masker.go @@ -0,0 +1,52 @@ +package mask + +import ( + "fmt" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +type Masker struct { + masks []mask +} + +func New(cfg *config.Masking) (*Masker, error) { + if cfg == nil { + return nil, nil + } + + fields, err := parseFields(cfg.ProcessFields, cfg.IgnoreFields) + if err != nil { + return nil, fmt.Errorf("failed to parse fields: %w", err) + } + + masks, err := compileMasks(cfg.Masks, fields) + if err != nil { + return nil, fmt.Errorf("failed to compile masks: %w", err) + } + + return &Masker{ + masks: masks, + }, nil +} + +func (m *Masker) Mask(event map[string]string) { + if len(event) == 0 { + return + } + + for _, mask := range m.masks { + mask.apply(event) + } +} + +func (m *Masker) MaskAgg(field string, bucketKeys []string) []string { + if len(bucketKeys) == 0 { + return bucketKeys + } + + for _, mask := range m.masks { + bucketKeys = mask.applyAgg(field, bucketKeys) + } + return bucketKeys +} diff --git a/internal/pkg/mask/masker_test.go b/internal/pkg/mask/masker_test.go new file mode 100644 index 0000000..b8da9d2 --- /dev/null +++ b/internal/pkg/mask/masker_test.go @@ -0,0 +1,175 @@ +package mask + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ozontech/seq-ui/internal/app/config" +) + +func TestMaskerMask(t *testing.T) { + tests := []struct { + name string + + cfg *config.Masking + + input map[string]string + want map[string]string + }{ + { + name: "multiple_full_mask", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeMask, + }, + { + Re: `(test)`, + Mode: config.MaskModeMask, + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890;", + "f3": "my number: 123-456-7890 test;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 ****;", + "f2": "my number: ************;", + "f3": "my number: ************ ****;", + }, + }, + { + name: "multiple_full_replace", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + { + Re: `(test)`, + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890;", + "f3": "my number: 123-456-7890 test;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 ;", + "f2": "my number: ;", + "f3": "my number: ;", + }, + }, + { + name: "multiple_full_cut", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeCut, + }, + { + Re: `(test)`, + Mode: config.MaskModeCut, + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890;", + "f3": "my number: 123-456-7890 test;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 ;", + "f2": "my number: ;", + "f3": "my number: ;", + }, + }, + { + name: "single_twice_full_mask", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeMask, + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: ************ ************;", + }, + }, + { + name: "single_twice_full_replace", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeReplace, + ReplaceWord: "", + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: ;", + }, + }, + { + name: "single_twice_full_cut", + cfg: &config.Masking{ + Masks: []config.Mask{ + { + Re: `(\d{3})-(\d{3})-(\d{4})`, + Mode: config.MaskModeCut, + }, + }, + }, + input: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: 123-456-7890 123-456-7890;", + }, + want: map[string]string{ + "f1": "my number: 123_456_7890 test;", + "f2": "my number: ;", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + m, err := New(tt.cfg) + require.NoError(t, err) + + in := tt.input + m.Mask(in) + + for k, v := range tt.want { + gotV, ok := in[k] + require.True(t, ok) + assert.Equal(t, v, gotV, "wrong value with key %q", k) + } + }) + } +}