From 625487b8bb215c83899a964e4507d9946afce94d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 6 Oct 2025 08:34:07 +0400 Subject: [PATCH 01/48] Fraction tests WIP --- frac/fraction_test.go | 248 ++++++++++++++++++++++++++++++++++++++++++ frac/nop_counter.go | 33 ++++++ 2 files changed, 281 insertions(+) create mode 100644 frac/fraction_test.go create mode 100644 frac/nop_counter.go diff --git a/frac/fraction_test.go b/frac/fraction_test.go new file mode 100644 index 00000000..9bd07893 --- /dev/null +++ b/frac/fraction_test.go @@ -0,0 +1,248 @@ +package frac + +import ( + "os" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/alecthomas/units" + insaneJSON "github.com/ozontech/insane-json" + "github.com/ozontech/seq-db/cache" + "github.com/ozontech/seq-db/frac/sealed/lids" + "github.com/ozontech/seq-db/frac/sealed/seqids" + "github.com/ozontech/seq-db/frac/sealed/token" + "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/storage" + "github.com/stretchr/testify/suite" +) + +type FractionTestSuite struct { + suite.Suite + tmpDir string + docsCache *cache.Cache[[]byte] + sortCache *cache.Cache[[]byte] + indexCache *IndexCache + readLimiter *storage.ReadLimiter + config *Config + mapping seq.Mapping + + fraction Fraction + + insertDocuments func(doc string) []seq.ID +} + +func (s *FractionTestSuite) SetupSuite() { + var err error + s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + s.Require().NoError(err) + + s.config = &Config{ + Search: SearchConfig{ + AggLimits: AggLimits{ + MaxFieldTokens: 1000, + MaxGroupTokens: 1000, + MaxTIDsPerFraction: 1000, + }, + }, + SkipSortDocs: false, + KeepMetaFile: false, + } + + s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.indexCache = &IndexCache{ + MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + } + s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) + s.mapping = seq.Mapping{ + "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), + "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + } +} + +func (s *FractionTestSuite) TearDownTest() { + active, ok := s.fraction.(*Active) + if ok { + active.Release() + } + s.fraction.Suicide() + + err := os.RemoveAll(s.tmpDir) + s.NoError(err, "Failed to remove tmp dir") +} + +func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) []seq.ID { + docProvider := NewDocProvider() + ids := make([]seq.ID, 0, len(docs)) + + for i, docStr := range docs { + docBytes := []byte(docStr) + root := insaneJSON.Spawn() + err := root.DecodeBytes(docBytes) + s.Require().NoError(err, "not a valid JSON", i) + + id := seq.ID{ + MID: seq.MID(time.Now().UnixMilli()) + seq.MID(i*1000), // 1 second apart + RID: seq.RID(i + 1), + } + ids = append(ids, id) + tokens := s.extractTokens(root) + docProvider.Append(docBytes, root, id, tokens) + } + + docsBlock, metasBlock := docProvider.Provide() + + var wg sync.WaitGroup + wg.Add(1) + err := active.Append(docsBlock, metasBlock, &wg) + s.Require().NoError(err, "Append should succeed") + + wg.Wait() + return ids +} + +func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { + tokens := make([]seq.Token, 0) + + for fieldName, mappingTypes := range s.mapping { + fieldValue := root.Dig(fieldName) + if fieldValue == nil { + continue + } + + fieldBytes := fieldValue.AsBytes() + if len(fieldBytes) == 0 { + continue + } + + for _, _ = range mappingTypes.All { + tokens = append(tokens, seq.Token{ + Field: []byte(fieldName), + Val: fieldBytes, + }) + } + } + + return tokens +} + +func (s *FractionTestSuite) TestInsertSingleDocument() { + doc := `{"time":14589329034, "message":"single test document","level":"info","service":"test-service","status":"ok"}` + + ids := s.insertDocuments(doc) + + s.True(s.fraction.Contains(ids[0].MID)) +} + +/* +func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { + info := fraction.Info() + s.Equal(uint32(len(ids)), info.DocsTotal, "Fraction should contain %d documents", len(ids)) + + if len(ids) > 0 { + s.True(fraction.Contains(ids[0].MID), "Fraction should contain first document") + s.True(fraction.Contains(ids[len(ids)-1].MID), "Fraction should contain last document") + + s.True(fraction.IsIntersecting(ids[0].MID, ids[len(ids)-1].MID), + "Fraction should intersect with document range") + } +} +*/ + +type ActiveFractionSuite struct { + FractionTestSuite +} + +func (s *ActiveFractionSuite) SetupTest() { + baseName := filepath.Join(s.tmpDir, "test_fraction") + indexer := NewActiveIndexer(4, 10) + indexer.Start() + + active := NewActive( + baseName, + indexer, + s.readLimiter, + s.docsCache, + s.sortCache, + s.config, + ) + + s.fraction = active + s.insertDocuments = func(doc string) []seq.ID { + return s.InsertIntoActive(active, doc) + } +} + +func (s *ActiveFractionSuite) TearDownTest() { + s.FractionTestSuite.TearDownTest() +} + +type SealedFractionSuite struct { + FractionTestSuite +} + +func (s *SealedFractionSuite) SetupTest() { + s.insertDocuments = func(doc string) []seq.ID { + baseFile := filepath.Join(s.tmpDir, "test_fraction") + indexer := NewActiveIndexer(4, 10) + indexer.Start() + + active := NewActive( + baseFile, + indexer, + s.readLimiter, + s.docsCache, + s.sortCache, + s.config, + ) + + ids := s.InsertIntoActive(active, doc) + + sealParams := SealParams{ + IDsZstdLevel: 3, + LIDsZstdLevel: 3, + TokenListZstdLevel: 3, + DocsPositionsZstdLevel: 3, + TokenTableZstdLevel: 3, + DocBlocksZstdLevel: 3, + DocBlockSize: 1024 * 1024, + } + + preloaded, err := Seal(active, sealParams) + s.Require().NoError(err, "Sealing should succeed") + + sealed := NewSealedPreloaded( + baseFile, + preloaded, + s.readLimiter, + s.indexCache, + s.docsCache, + s.config, + ) + s.fraction = sealed + active.Release() + return ids + } +} + +func (s *SealedFractionSuite) TearDownTest() { + s.FractionTestSuite.TearDownTest() +} + +func TestFractionSuites(t *testing.T) { + suite.Run(t, new(ActiveFractionSuite)) + suite.Run(t, new(SealedFractionSuite)) +} diff --git a/frac/nop_counter.go b/frac/nop_counter.go new file mode 100644 index 00000000..c7b8666c --- /dev/null +++ b/frac/nop_counter.go @@ -0,0 +1,33 @@ +package frac + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_model/go" +) + +type NopCounter struct { +} + +func (n NopCounter) Desc() *prometheus.Desc { + return nil +} + +func (n NopCounter) Write(metric *io_prometheus_client.Metric) error { + return nil +} + +func (n NopCounter) Describe(descs chan<- *prometheus.Desc) { + +} + +func (n NopCounter) Collect(metrics chan<- prometheus.Metric) { + +} + +func (n NopCounter) Inc() { + +} + +func (n NopCounter) Add(f float64) { + +} From f55468146fc236809dc81d3e6415bf9261a7eea3 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 6 Oct 2025 10:32:57 +0400 Subject: [PATCH 02/48] Use new sealing API --- frac/fraction_test.go | 65 +++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 9bd07893..9086585e 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -10,7 +10,9 @@ import ( "github.com/alecthomas/units" insaneJSON "github.com/ozontech/insane-json" "github.com/ozontech/seq-db/cache" + "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed/lids" + "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/seq" @@ -30,7 +32,7 @@ type FractionTestSuite struct { fraction Fraction - insertDocuments func(doc string) []seq.ID + insertDocuments func(docs ...string) []seq.ID } func (s *FractionTestSuite) SetupSuite() { @@ -74,11 +76,13 @@ func (s *FractionTestSuite) SetupSuite() { } func (s *FractionTestSuite) TearDownTest() { - active, ok := s.fraction.(*Active) - if ok { - active.Release() + if s.fraction != nil { + active, ok := s.fraction.(*Active) + if ok { + active.Release() + } + s.fraction.Suicide() } - s.fraction.Suicide() err := os.RemoveAll(s.tmpDir) s.NoError(err, "Failed to remove tmp dir") @@ -148,6 +152,22 @@ func (s *FractionTestSuite) TestInsertSingleDocument() { } /* +func (s *FractionTestSuite) TestInsertMultipleDocuments() { + docs := []string{ + `{"time":14589329034, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"time":14589329035, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"time":14589329036, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + } + + ids := s.insertDocuments(docs...) + + s.Len(ids, 3, "Should return 3 document IDs") + s.True(s.fraction.Contains(ids[0].MID), "Fraction should contain first document") + s.True(s.fraction.Contains(ids[1].MID), "Fraction should contain second document") + s.True(s.fraction.Contains(ids[2].MID), "Fraction should contain third document") +} +*/ + func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { info := fraction.Info() s.Equal(uint32(len(ids)), info.DocsTotal, "Fraction should contain %d documents", len(ids)) @@ -160,13 +180,16 @@ func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { "Fraction should intersect with document range") } } -*/ type ActiveFractionSuite struct { FractionTestSuite } func (s *ActiveFractionSuite) SetupTest() { + // TODO setup test + err := os.MkdirAll(s.tmpDir, 0755) + s.Require().NoError(err, "Failed to create tmp dir") + baseName := filepath.Join(s.tmpDir, "test_fraction") indexer := NewActiveIndexer(4, 10) indexer.Start() @@ -181,8 +204,8 @@ func (s *ActiveFractionSuite) SetupTest() { ) s.fraction = active - s.insertDocuments = func(doc string) []seq.ID { - return s.InsertIntoActive(active, doc) + s.insertDocuments = func(docs ...string) []seq.ID { + return s.InsertIntoActive(active, docs...) } } @@ -195,7 +218,11 @@ type SealedFractionSuite struct { } func (s *SealedFractionSuite) SetupTest() { - s.insertDocuments = func(doc string) []seq.ID { + // Ensure tmpDir exists + err := os.MkdirAll(s.tmpDir, 0755) + s.Require().NoError(err, "Failed to create tmp dir") + + s.insertDocuments = func(docs ...string) []seq.ID { baseFile := filepath.Join(s.tmpDir, "test_fraction") indexer := NewActiveIndexer(4, 10) indexer.Start() @@ -209,9 +236,15 @@ func (s *SealedFractionSuite) SetupTest() { s.config, ) - ids := s.InsertIntoActive(active, doc) + ids := s.InsertIntoActive(active, docs...) - sealParams := SealParams{ + if len(ids) == 0 { + // TODO fail test? + active.Release() + return ids + } + + sealParams := common.SealParams{ IDsZstdLevel: 3, LIDsZstdLevel: 3, TokenListZstdLevel: 3, @@ -221,8 +254,12 @@ func (s *SealedFractionSuite) SetupTest() { DocBlockSize: 1024 * 1024, } - preloaded, err := Seal(active, sealParams) - s.Require().NoError(err, "Sealing should succeed") + time.Sleep(100 * time.Millisecond) + + activeSealingSource, err := NewActiveSealingSource(active, sealParams) + s.Require().NoError(err, "Sealing source creation failed") + preloaded, err := sealing.Seal(activeSealingSource, sealParams) + s.Require().NoError(err, "Sealing failed") sealed := NewSealedPreloaded( baseFile, @@ -233,7 +270,7 @@ func (s *SealedFractionSuite) SetupTest() { s.config, ) s.fraction = sealed - active.Release() + // active.Release() return ids } } From 93d2e10d9ae9138fac4ca576f60fd2a139699ca5 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:34:55 +0400 Subject: [PATCH 03/48] active and sealed suites work --- frac/fraction_test.go | 160 ++++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 59 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 9086585e..6dd48a79 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -36,56 +36,13 @@ type FractionTestSuite struct { } func (s *FractionTestSuite) SetupSuite() { - var err error - s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") - s.Require().NoError(err) - - s.config = &Config{ - Search: SearchConfig{ - AggLimits: AggLimits{ - MaxFieldTokens: 1000, - MaxGroupTokens: 1000, - MaxTIDsPerFraction: 1000, - }, - }, - SkipSortDocs: false, - KeepMetaFile: false, - } - - s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) - s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) - s.indexCache = &IndexCache{ - MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - } - s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) - s.mapping = seq.Mapping{ - "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), - "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - } } -func (s *FractionTestSuite) TearDownTest() { - if s.fraction != nil { - active, ok := s.fraction.(*Active) - if ok { - active.Release() - } - s.fraction.Suicide() - } - - err := os.RemoveAll(s.tmpDir) - s.NoError(err, "Failed to remove tmp dir") +func (s *FractionTestSuite) SetupTest() { + // TODO doesn't work. check + // var err error + // s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + // s.Require().NoError(err) } func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) []seq.ID { @@ -139,6 +96,10 @@ func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { }) } } + tokens = append(tokens, seq.Token{ + Field: []byte("_all_"), + Val: []byte(""), + }) return tokens } @@ -151,7 +112,6 @@ func (s *FractionTestSuite) TestInsertSingleDocument() { s.True(s.fraction.Contains(ids[0].MID)) } -/* func (s *FractionTestSuite) TestInsertMultipleDocuments() { docs := []string{ `{"time":14589329034, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, @@ -166,7 +126,6 @@ func (s *FractionTestSuite) TestInsertMultipleDocuments() { s.True(s.fraction.Contains(ids[1].MID), "Fraction should contain second document") s.True(s.fraction.Contains(ids[2].MID), "Fraction should contain third document") } -*/ func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { info := fraction.Info() @@ -186,9 +145,44 @@ type ActiveFractionSuite struct { } func (s *ActiveFractionSuite) SetupTest() { + s.config = &Config{ + Search: SearchConfig{ + AggLimits: AggLimits{ + MaxFieldTokens: 1000, + MaxGroupTokens: 1000, + MaxTIDsPerFraction: 1000, + }, + }, + SkipSortDocs: true, // TODO enabling will fail tests + KeepMetaFile: false, + } + + s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.indexCache = &IndexCache{ + MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + } + s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) + s.mapping = seq.Mapping{ + "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), + "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + } + // TODO setup test - err := os.MkdirAll(s.tmpDir, 0755) - s.Require().NoError(err, "Failed to create tmp dir") + var err error + s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + s.Require().NoError(err) baseName := filepath.Join(s.tmpDir, "test_fraction") indexer := NewActiveIndexer(4, 10) @@ -210,7 +204,16 @@ func (s *ActiveFractionSuite) SetupTest() { } func (s *ActiveFractionSuite) TearDownTest() { - s.FractionTestSuite.TearDownTest() + if s.fraction != nil { + active, ok := s.fraction.(*Active) + if ok { + active.Release() + } + s.fraction.Suicide() + } + + err := os.RemoveAll(s.tmpDir) + s.NoError(err, "Failed to remove tmp dir") } type SealedFractionSuite struct { @@ -218,9 +221,44 @@ type SealedFractionSuite struct { } func (s *SealedFractionSuite) SetupTest() { + s.config = &Config{ + Search: SearchConfig{ + AggLimits: AggLimits{ + MaxFieldTokens: 1000, + MaxGroupTokens: 1000, + MaxTIDsPerFraction: 1000, + }, + }, + SkipSortDocs: true, // TODO enabling will fail tests + KeepMetaFile: false, + } + + s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.indexCache = &IndexCache{ + MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + } + s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) + s.mapping = seq.Mapping{ + "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), + "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + } + // Ensure tmpDir exists - err := os.MkdirAll(s.tmpDir, 0755) - s.Require().NoError(err, "Failed to create tmp dir") + var err error + s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + s.Require().NoError(err) s.insertDocuments = func(docs ...string) []seq.ID { baseFile := filepath.Join(s.tmpDir, "test_fraction") @@ -254,10 +292,9 @@ func (s *SealedFractionSuite) SetupTest() { DocBlockSize: 1024 * 1024, } - time.Sleep(100 * time.Millisecond) - activeSealingSource, err := NewActiveSealingSource(active, sealParams) s.Require().NoError(err, "Sealing source creation failed") + preloaded, err := sealing.Seal(activeSealingSource, sealParams) s.Require().NoError(err, "Sealing failed") @@ -270,13 +307,18 @@ func (s *SealedFractionSuite) SetupTest() { s.config, ) s.fraction = sealed - // active.Release() + active.Release() return ids } } func (s *SealedFractionSuite) TearDownTest() { - s.FractionTestSuite.TearDownTest() + if s.fraction != nil { + s.fraction.Suicide() + } + + err := os.RemoveAll(s.tmpDir) + s.NoError(err, "Failed to remove tmp dir") } func TestFractionSuites(t *testing.T) { From d43200019d2a3922647fad3ccde31b58a7d8df69 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:50:13 +0400 Subject: [PATCH 04/48] assert search --- frac/fraction_test.go | 104 ++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 6dd48a79..81a7dfda 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1,6 +1,8 @@ package frac import ( + "context" + "math" "os" "path/filepath" "sync" @@ -11,10 +13,12 @@ import ( insaneJSON "github.com/ozontech/insane-json" "github.com/ozontech/seq-db/cache" "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/frac/processor" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" + "github.com/ozontech/seq-db/parser" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" "github.com/stretchr/testify/suite" @@ -36,6 +40,15 @@ type FractionTestSuite struct { } func (s *FractionTestSuite) SetupSuite() { + s.mapping = seq.Mapping{ + "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), + "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + } } func (s *FractionTestSuite) SetupTest() { @@ -69,7 +82,7 @@ func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) []s var wg sync.WaitGroup wg.Add(1) err := active.Append(docsBlock, metasBlock, &wg) - s.Require().NoError(err, "Append should succeed") + s.Require().NoError(err, "append to active failed") wg.Wait() return ids @@ -104,27 +117,75 @@ func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { return tokens } -func (s *FractionTestSuite) TestInsertSingleDocument() { - doc := `{"time":14589329034, "message":"single test document","level":"info","service":"test-service","status":"ok"}` +func (s *FractionTestSuite) AssertSearch(query string, originalDocs []string, indexes []int) { + seqql, err := parser.ParseSeqQL(query, s.mapping) + s.Require().NoError(err, "failed to parse query: %s", query) - ids := s.insertDocuments(doc) + dp, release := s.fraction.DataProvider(context.Background()) + defer release() - s.True(s.fraction.Contains(ids[0].MID)) + params := processor.SearchParams{ + AST: seqql.Root, + From: seq.MID(0), + To: seq.MID(math.MaxUint64), + Limit: math.MaxInt32, + } + + qpr, err := dp.Search(params) + s.Require().NoError(err, "search failed for query: %s", query) + + s.Require().Equal(len(indexes), qpr.IDs.Len(), + "expected %d documents but found %d for query: %s", len(indexes), qpr.IDs.Len(), query) + + docs, err := dp.Fetch(qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch documents for IDs: %v", qpr.IDs.IDs()) + + fetchedDocs := make([]string, 0, len(docs)) + for _, doc := range docs { + fetchedDocs = append(fetchedDocs, string(doc)) + } + + for i, fetchedDoc := range fetchedDocs { + if i < len(indexes) { + expectedDoc := originalDocs[indexes[i]] + s.Require().Equal(expectedDoc, fetchedDoc, + "document at index %d doesn't match expected document at original index %d for query: %s", + i, indexes[i], query) + } + } } -func (s *FractionTestSuite) TestInsertMultipleDocuments() { +func (s *FractionTestSuite) TestContainsDocuments() { docs := []string{ - `{"time":14589329034, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"time":14589329035, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"time":14589329036, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, } ids := s.insertDocuments(docs...) s.Len(ids, 3, "Should return 3 document IDs") - s.True(s.fraction.Contains(ids[0].MID), "Fraction should contain first document") - s.True(s.fraction.Contains(ids[1].MID), "Fraction should contain second document") - s.True(s.fraction.Contains(ids[2].MID), "Fraction should contain third document") + s.True(s.fraction.Contains(ids[0].MID)) + s.True(s.fraction.Contains(ids[1].MID)) + s.True(s.fraction.Contains(ids[2].MID)) +} + +func (s *FractionTestSuite) TestSearchKeyword() { + docs := []string{ + `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + } + + ids := s.insertDocuments(docs...) + s.Len(ids, 3, "Should return 3 document IDs") + + s.AssertSearch("level:info", docs, []int{0}) + s.AssertSearch("level:error", docs, []int{1}) + s.AssertSearch("level:debug", docs, []int{2}) + + s.AssertSearch("service:test-service", docs, []int{1, 0}) + s.AssertSearch("service:another-service", docs, []int{2}) } func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { @@ -169,15 +230,6 @@ func (s *ActiveFractionSuite) SetupTest() { Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), } s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) - s.mapping = seq.Mapping{ - "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), - "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - } // TODO setup test var err error @@ -245,17 +297,9 @@ func (s *SealedFractionSuite) SetupTest() { Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), } s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) - s.mapping = seq.Mapping{ - "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), - "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - } // Ensure tmpDir exists + // TODO here? var err error s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") s.Require().NoError(err) From f51e17b227967318e1bcfa396dfd7c9dd45bda98 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 6 Oct 2025 19:59:50 +0400 Subject: [PATCH 05/48] fix cache lock issue --- frac/fraction_test.go | 70 ++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 81a7dfda..e69e9259 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -27,7 +27,6 @@ import ( type FractionTestSuite struct { suite.Suite tmpDir string - docsCache *cache.Cache[[]byte] sortCache *cache.Cache[[]byte] indexCache *IndexCache readLimiter *storage.ReadLimiter @@ -40,6 +39,17 @@ type FractionTestSuite struct { } func (s *FractionTestSuite) SetupSuite() { + s.config = &Config{ + Search: SearchConfig{ + AggLimits: AggLimits{ + MaxFieldTokens: 1000, + MaxGroupTokens: 1000, + MaxTIDsPerFraction: 1000, + }, + }, + SkipSortDocs: true, // TODO enabling will fail tests + KeepMetaFile: false, + } s.mapping = seq.Mapping{ "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), @@ -175,19 +185,37 @@ func (s *FractionTestSuite) TestSearchKeyword() { `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"time":103, "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } - ids := s.insertDocuments(docs...) - s.Len(ids, 3, "Should return 3 document IDs") + s.insertDocuments(docs...) - s.AssertSearch("level:info", docs, []int{0}) + s.AssertSearch("level:info", docs, []int{3, 0}) s.AssertSearch("level:error", docs, []int{1}) s.AssertSearch("level:debug", docs, []int{2}) s.AssertSearch("service:test-service", docs, []int{1, 0}) - s.AssertSearch("service:another-service", docs, []int{2}) + s.AssertSearch("service:another-service", docs, []int{3, 2}) + + s.AssertSearch("status:ok", docs, []int{3, 2, 0}) + s.AssertSearch("status:fail", docs, []int{1}) } +/* +TODO not working now because we must properly tokenize message +func (s *FractionTestSuite) TestSearchFullText() { + docs := []string{ + `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"time":103, "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("message:document", docs, []int{3, 2, 1, 0}) +}*/ + func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { info := fraction.Info() s.Equal(uint32(len(ids)), info.DocsTotal, "Fraction should contain %d documents", len(ids)) @@ -206,19 +234,6 @@ type ActiveFractionSuite struct { } func (s *ActiveFractionSuite) SetupTest() { - s.config = &Config{ - Search: SearchConfig{ - AggLimits: AggLimits{ - MaxFieldTokens: 1000, - MaxGroupTokens: 1000, - MaxTIDsPerFraction: 1000, - }, - }, - SkipSortDocs: true, // TODO enabling will fail tests - KeepMetaFile: false, - } - - s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) s.indexCache = &IndexCache{ MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), @@ -244,7 +259,7 @@ func (s *ActiveFractionSuite) SetupTest() { baseName, indexer, s.readLimiter, - s.docsCache, + cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), s.sortCache, s.config, ) @@ -273,19 +288,6 @@ type SealedFractionSuite struct { } func (s *SealedFractionSuite) SetupTest() { - s.config = &Config{ - Search: SearchConfig{ - AggLimits: AggLimits{ - MaxFieldTokens: 1000, - MaxGroupTokens: 1000, - MaxTIDsPerFraction: 1000, - }, - }, - SkipSortDocs: true, // TODO enabling will fail tests - KeepMetaFile: false, - } - - s.docsCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) s.indexCache = &IndexCache{ MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), @@ -313,7 +315,7 @@ func (s *SealedFractionSuite) SetupTest() { baseFile, indexer, s.readLimiter, - s.docsCache, + cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), s.sortCache, s.config, ) @@ -347,7 +349,7 @@ func (s *SealedFractionSuite) SetupTest() { preloaded, s.readLimiter, s.indexCache, - s.docsCache, + cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), s.config, ) s.fraction = sealed From 7299d94bac069bb16da0fcc77eec01564c32ea4e Mon Sep 17 00:00:00 2001 From: Evgenii Guguchkin Date: Tue, 7 Oct 2025 02:50:15 +0300 Subject: [PATCH 06/48] chore: make existing token extraction code reusable --- frac/active_indexer.go | 5 +- frac/doc_provider.go | 117 ------------------ frac/meta_data_collector.go | 139 +--------------------- fracmanager/fetcher_test.go | 4 +- fracmanager/fracmanager_test.go | 9 +- fracmanager/sealed_frac_cache_test.go | 7 +- fracmanager/sealer_test.go | 7 +- {frac => indexer}/compress.go | 14 +-- {proxy/bulk => indexer}/indexer.go | 14 +-- indexer/meta_data.go | 103 ++++++++++++++++ indexer/metrics.go | 39 ++++++ {proxy/bulk => indexer}/processor.go | 89 ++++++++++---- {proxy/bulk => indexer}/processor_test.go | 2 +- indexer/test_doc_provider.go | 135 +++++++++++++++++++++ proxy/bulk/ingestor.go | 126 ++++---------------- proxy/bulk/ingestor_test.go | 65 +++++----- proxy/bulk/metrics.go | 29 +++++ seq/tokenizer.go | 33 ----- storeapi/grpc_v1_test.go | 7 +- tokenizer/exists_tokenizer.go | 4 +- tokenizer/keyword_tokenizer.go | 5 +- tokenizer/keyword_tokenizer_test.go | 34 +++--- tokenizer/meta_token.go | 43 +++++++ tokenizer/path_tokenizer.go | 7 +- tokenizer/path_tokenizer_test.go | 60 +++++----- tokenizer/text_tokenizer.go | 9 +- tokenizer/text_tokenizer_test.go | 94 +++++++-------- tokenizer/tokenizer.go | 4 +- 28 files changed, 607 insertions(+), 597 deletions(-) delete mode 100644 frac/doc_provider.go rename {frac => indexer}/compress.go (76%) rename {proxy/bulk => indexer}/indexer.go (93%) create mode 100644 indexer/meta_data.go create mode 100644 indexer/metrics.go rename {proxy/bulk => indexer}/processor.go (63%) rename {proxy/bulk => indexer}/processor_test.go (99%) create mode 100644 indexer/test_doc_provider.go create mode 100644 proxy/bulk/metrics.go create mode 100644 tokenizer/meta_token.go diff --git a/frac/active_indexer.go b/frac/active_indexer.go index 0594b247..0422c105 100644 --- a/frac/active_indexer.go +++ b/frac/active_indexer.go @@ -7,6 +7,7 @@ import ( "go.uber.org/zap" "github.com/ozontech/seq-db/bytespool" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" "github.com/ozontech/seq-db/metric/stopwatch" @@ -95,7 +96,7 @@ func (ai *ActiveIndexer) Stop() { var metaDataPool = sync.Pool{ New: func() any { - return new(MetaData) + return new(indexer.MetaData) }, } @@ -121,7 +122,7 @@ func (ai *ActiveIndexer) appendWorker(index int) { collector.Init(blockIndex) parsingMetric := sw.Start("metas_parsing") - meta := metaDataPool.Get().(*MetaData) + meta := metaDataPool.Get().(*indexer.MetaData) for len(metasPayload) > 0 { n := binary.LittleEndian.Uint32(metasPayload) metasPayload = metasPayload[4:] diff --git a/frac/doc_provider.go b/frac/doc_provider.go deleted file mode 100644 index bad513c8..00000000 --- a/frac/doc_provider.go +++ /dev/null @@ -1,117 +0,0 @@ -package frac - -import ( - "encoding/binary" - "math/rand" - "time" - - insaneJSON "github.com/ozontech/insane-json" - - "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/seq" - "github.com/ozontech/seq-db/storage" -) - -type DocProvider struct { - DocCount int - Docs []byte - Metas []byte - buf []byte -} - -func NewDocProvider() *DocProvider { - return &DocProvider{ - Docs: make([]byte, 0), - buf: make([]byte, 4), - } -} - -func (dp *DocProvider) appendDoc(doc []byte) { - dp.DocCount++ - numBuf := make([]byte, 4) - binary.LittleEndian.PutUint32(numBuf, uint32(len(doc))) - dp.Docs = append(dp.Docs, numBuf...) - dp.Docs = append(dp.Docs, doc...) -} - -func (dp *DocProvider) appendMeta(docLen int, id seq.ID, tokens []seq.Token) { - dp.buf = dp.buf[:4] - dp.buf = encodeMeta(dp.buf, tokens, id, docLen) - binary.LittleEndian.PutUint32(dp.buf, uint32(len(dp.buf)-4)) - - dp.Metas = append(dp.Metas, dp.buf...) -} - -func (dp *DocProvider) Append(doc []byte, docRoot *insaneJSON.Root, id seq.ID, tokens []seq.Token) { - if id.MID == 0 { - // this case runs only in the integration tests - t, _ := ExtractDocTime(docRoot) - id = seq.NewID(t, uint64(rand.Int63())) - } - - dp.appendMeta(len(doc), id, tokens) - dp.appendDoc(doc) -} - -func (dp *DocProvider) TryReset() { - dp.DocCount = 0 - dp.Docs = dp.Docs[:0] - dp.Metas = dp.Metas[:0] - -} - -func (dp *DocProvider) Provide() (storage.DocBlock, storage.DocBlock) { - c := GetDocsMetasCompressor(-1, -1) - c.CompressDocsAndMetas(dp.Docs, dp.Metas) - return c.DocsMetas() -} - -func encodeMeta(buf []byte, tokens []seq.Token, id seq.ID, size int) []byte { - metaTokens := make([]MetaToken, 0, len(tokens)) - for _, t := range tokens { - metaTokens = append(metaTokens, MetaToken{ - Key: t.Field, - Value: t.Val, - }) - } - md := MetaData{ - ID: id, - Size: uint32(size), - Tokens: metaTokens, - } - return md.MarshalBinaryTo(buf) -} - -// extractDocTime extract time from doc by supported fields and return that field -// if fields are absent or values are not parsable, zero time and empty string are returned -func extractDocTime(docRoot *insaneJSON.Root) (time.Time, []string) { - var t time.Time - var err error - for _, field := range consts.TimeFields { - timeNode := docRoot.Dig(field...) - if timeNode == nil { - continue - } - - timeVal := timeNode.AsString() - for _, f := range consts.TimeFormats { - t, err = time.Parse(f, timeVal) - if err == nil { - return t, field - } - } - } - - return t, nil -} - -// ExtractDocTime extracts timestamp from doc -// It searches by one of supported field name and parses by supported formats -// If no field was found or not parsable it returns time.Now() -func ExtractDocTime(docRoot *insaneJSON.Root) (time.Time, []string) { - t, f := extractDocTime(docRoot) - if t.IsZero() { - t = time.Now() - } - return t, f -} diff --git a/frac/meta_data_collector.go b/frac/meta_data_collector.go index 26e16e3d..35e81e03 100644 --- a/frac/meta_data_collector.go +++ b/frac/meta_data_collector.go @@ -1,145 +1,14 @@ package frac import ( - "encoding/binary" - "fmt" "math" - "slices" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/tokenizer" "github.com/ozontech/seq-db/util" ) -type MetaData struct { - ID seq.ID - // Size of an uncompressed document in bytes. - Size uint32 - Tokens []MetaToken -} - -// String used in tests for human-readable output. -func (m MetaData) String() string { - return fmt.Sprintf("ID: %s, Size: %d, Tokens: %s", m.ID, m.Size, m.Tokens) -} - -const metadataMagic = uint16(0x3F7C) // 2 magic bytes of the binary encoded metadata. - -func IsItBinaryEncodedMetaData(b []byte) bool { - if len(b) < 2 { - return false - } - return binary.LittleEndian.Uint16(b) == metadataMagic -} - -func (m *MetaData) MarshalBinaryTo(b []byte) []byte { - // Append "magic bytes" to determine that this is binary encoded metadata. - b = binary.LittleEndian.AppendUint16(b, metadataMagic) - - // Append current binary version of the metadata. - const version = 1 - b = binary.LittleEndian.AppendUint16(b, version) - - // Encode seq.ID. - b = binary.LittleEndian.AppendUint64(b, uint64(m.ID.MID)) - b = binary.LittleEndian.AppendUint64(b, uint64(m.ID.RID)) - - // Encode BlockLength. - b = binary.LittleEndian.AppendUint32(b, m.Size) - - // Encode tokens. - toksLen := len(m.Tokens) - b = binary.LittleEndian.AppendUint32(b, uint32(toksLen)) - for i := 0; i < toksLen; i++ { - b = m.Tokens[i].MarshalBinaryTo(b) - } - - return b -} - -func (m *MetaData) UnmarshalBinary(b []byte) error { - if !IsItBinaryEncodedMetaData(b) { - return fmt.Errorf("invalid metadata magic bytes") - } - b = b[2:] - - version := binary.LittleEndian.Uint16(b) - b = b[2:] - - switch version { - case 1: - return m.unmarshalVersion1(b) - default: - return fmt.Errorf("unimplemented metadata version: %d", version) - } -} - -func (m *MetaData) unmarshalVersion1(b []byte) error { - // Decode seq.ID. - m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) - b = b[8:] - m.ID.RID = seq.RID(binary.LittleEndian.Uint64(b)) - b = b[8:] - - // Decode uncompressed document size. - m.Size = binary.LittleEndian.Uint32(b) - b = b[4:] - - // Decode tokens length. - toksLen := binary.LittleEndian.Uint32(b) - b = b[4:] - - // Decode tokens. - m.Tokens = m.Tokens[:0] - m.Tokens = slices.Grow(m.Tokens, int(toksLen)) - var err error - for i := uint32(0); i < toksLen; i++ { - var token MetaToken - b, err = token.UnmarshalBinary(b) - if err != nil { - return err - } - m.Tokens = append(m.Tokens, token) - } - return nil -} - -type MetaToken struct { - Key []byte - Value []byte -} - -func (m *MetaToken) MarshalBinaryTo(b []byte) []byte { - b = binary.LittleEndian.AppendUint32(b, uint32(len(m.Key))) - b = append(b, m.Key...) - b = binary.LittleEndian.AppendUint32(b, uint32(len(m.Value))) - b = append(b, m.Value...) - return b -} - -func (m *MetaToken) UnmarshalBinary(b []byte) ([]byte, error) { - keyLen := binary.LittleEndian.Uint32(b) - b = b[4:] - if int(keyLen) > len(b) { - return nil, fmt.Errorf("malformed key") - } - m.Key = b[:keyLen] - b = b[keyLen:] - - valueLen := binary.LittleEndian.Uint32(b) - b = b[4:] - if int(valueLen) > len(b) { - return nil, fmt.Errorf("malformed value") - } - m.Value = b[:valueLen] - b = b[valueLen:] - return b, nil -} - -// String used in tests for human-readable output. -func (m MetaToken) String() string { - return fmt.Sprintf("(%s: %s)", m.Key, m.Value) -} - // metaDataCollector is a collection of metadata // metaDataCollector can reuse its fields to process many requests in a row one after another // metaDataCollector keep track of the size of its fields to avoid memory leak @@ -187,7 +56,7 @@ func newMetaDataCollector() *metaDataCollector { return &c } -func (c *metaDataCollector) AppendMeta(m MetaData) { +func (c *metaDataCollector) AppendMeta(m indexer.MetaData) { var pos seq.DocPos if m.Size == 0 { // This is a nested document that must point to the parent. @@ -272,7 +141,7 @@ func (c *metaDataCollector) Filter(appended []seq.ID) { c.tokensIndex = tokensIndex } -func (c *metaDataCollector) extractTokens(tokens []MetaToken) { +func (c *metaDataCollector) extractTokens(tokens []tokenizer.MetaToken) { for _, token := range tokens { key, value := token.Key, token.Value pos := len(c.tokensBuf) diff --git a/fracmanager/fetcher_test.go b/fracmanager/fetcher_test.go index 9ebc09e2..199bf1cf 100644 --- a/fracmanager/fetcher_test.go +++ b/fracmanager/fetcher_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/ozontech/seq-db/frac" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tests/common" ) @@ -24,7 +24,7 @@ func testFetcher(t *testing.T, fetcher *Fetcher, hasHint bool) { fm, err := newFracManagerWithBackgroundStart(t.Context(), config) assert.NoError(t, err) - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() addDummyDoc(t, fm, dp, seq.SimpleID(1)) fm.WaitIdle() info := fm.Active().Info() diff --git a/fracmanager/fracmanager_test.go b/fracmanager/fracmanager_test.go index 989c7b2d..71fd38d2 100644 --- a/fracmanager/fracmanager_test.go +++ b/fracmanager/fracmanager_test.go @@ -12,6 +12,7 @@ import ( "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/seq" testscommon "github.com/ozontech/seq-db/tests/common" ) @@ -26,16 +27,16 @@ func newFracManagerWithBackgroundStart(ctx context.Context, config *Config) (*Fr return fracManager, nil } -func addDummyDoc(t *testing.T, fm *FracManager, dp *frac.DocProvider, seqID seq.ID) { +func addDummyDoc(t *testing.T, fm *FracManager, dp *indexer.TestDocProvider, seqID seq.ID) { doc := []byte("document") - dp.Append(doc, nil, seqID, seq.Tokens("service:100500", "k8s_pod", "_all_:")) + dp.Append(doc, nil, seqID, "service:100500", "k8s_pod", "_all_:") docs, metas := dp.Provide() err := fm.Append(context.Background(), docs, metas) assert.NoError(t, err) } func MakeSomeFractions(t *testing.T, fm *FracManager) { - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() addDummyDoc(t, fm, dp, seq.SimpleID(1)) fm.seal(fm.rotate()) @@ -114,7 +115,7 @@ func TestMatureMode(t *testing.T) { } id := 1 - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() makeSealedFrac := func(fm *FracManager, docsPerFrac int) { for i := 0; i < docsPerFrac; i++ { addDummyDoc(t, fm, dp, seq.SimpleID(id)) diff --git a/fracmanager/sealed_frac_cache_test.go b/fracmanager/sealed_frac_cache_test.go index 5a1dbdaf..92b44814 100644 --- a/fracmanager/sealed_frac_cache_test.go +++ b/fracmanager/sealed_frac_cache_test.go @@ -14,6 +14,7 @@ import ( "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/seq" testscommon "github.com/ozontech/seq-db/tests/common" ) @@ -280,7 +281,7 @@ func TestFracInfoSavedToCache(t *testing.T) { }) assert.NoError(t, err) - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() metaRoot := insaneJSON.Spawn() defer insaneJSON.Release(metaRoot) @@ -365,7 +366,7 @@ func TestExtraFractionsRemoved(t *testing.T) { assert.NoError(t, err) - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() infos := map[string]*common.Info{} for i := 1; i < times+1; i++ { @@ -425,7 +426,7 @@ func TestMissingCacheFilesDeleted(t *testing.T) { }) assert.NoError(t, err) - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() metaRoot := insaneJSON.Spawn() defer insaneJSON.Release(metaRoot) diff --git a/fracmanager/sealer_test.go b/fracmanager/sealer_test.go index 7b6baaa0..a159f1f9 100644 --- a/fracmanager/sealer_test.go +++ b/fracmanager/sealer_test.go @@ -21,6 +21,7 @@ import ( "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/frac/sealed/sealing" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/seq" testscommon "github.com/ozontech/seq-db/tests/common" ) @@ -49,7 +50,7 @@ func fillActiveFraction(active *frac.Active) error { k := 0 wg := sync.WaitGroup{} - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() for i := 0; i < muliplier; i++ { dp.TryReset() @@ -66,12 +67,12 @@ func fillActiveFraction(active *frac.Active) error { } id := seq.NewID(time.Now(), uint64(rand.Int63())) - dp.Append(doc, docRoot, id, seq.Tokens( + dp.Append(doc, docRoot, id, "_all_:", "service:service"+strconv.Itoa(rand.Intn(200)), "k8s_pod1:"+strconv.Itoa(k%100000), "k8s_pod2:"+strconv.Itoa(k%1000000), - )) + ) } docs, metas := dp.Provide() wg.Add(1) diff --git a/frac/compress.go b/indexer/compress.go similarity index 76% rename from frac/compress.go rename to indexer/compress.go index 4faf2238..9ce4f88a 100644 --- a/frac/compress.go +++ b/indexer/compress.go @@ -1,24 +1,14 @@ -package frac +package indexer import ( "sync" "github.com/alecthomas/units" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/ozontech/seq-db/storage" "github.com/ozontech/seq-db/util" ) -var bulkSizeAfterCompression = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "bulk_size_after_compression", - Help: "Bulk request sizes after compression", - Buckets: prometheus.ExponentialBuckets(1024, 2, 16), -}) - type DocsMetasCompressor struct { docsCompressLevel int metaCompressLevel int @@ -53,8 +43,6 @@ func (c *DocsMetasCompressor) CompressDocsAndMetas(docs, meta []byte) { c.docsBuf = storage.CompressDocBlock(docs, c.docsBuf, c.docsCompressLevel) // Compress metas block. c.metaBuf = storage.CompressDocBlock(meta, c.metaBuf, c.metaCompressLevel) - // Set compressed doc block size. - c.metaBuf.SetExt1(uint64(len(c.docsBuf))) bulkSizeAfterCompression.Observe(float64(len(c.docsBuf) + len(c.metaBuf))) } diff --git a/proxy/bulk/indexer.go b/indexer/indexer.go similarity index 93% rename from proxy/bulk/indexer.go rename to indexer/indexer.go index 8eb4eb4e..fd774a80 100644 --- a/proxy/bulk/indexer.go +++ b/indexer/indexer.go @@ -1,4 +1,4 @@ -package bulk +package indexer import ( "bytes" @@ -7,7 +7,6 @@ import ( insaneJSON "github.com/ozontech/insane-json" - "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" ) @@ -18,8 +17,7 @@ import ( type indexer struct { tokenizers map[seq.TokenizerType]tokenizer.Tokenizer mapping seq.Mapping - - metas []frac.MetaData + metas []MetaData } // Index returns a list of metadata of the given json node. @@ -45,7 +43,7 @@ func (i *indexer) Index(node *insaneJSON.Node, id seq.ID, size uint32) { } } -func (i *indexer) Metas() []frac.MetaData { +func (i *indexer) Metas() []MetaData { return i.metas } @@ -109,7 +107,7 @@ func (i *indexer) decodeInternal(n *insaneJSON.Node, id seq.ID, name []byte, met } } -func (i *indexer) index(tokenTypes seq.MappingTypes, tokens []frac.MetaToken, key, value []byte) []frac.MetaToken { +func (i *indexer) index(tokenTypes seq.MappingTypes, tokens []tokenizer.MetaToken, key, value []byte) []tokenizer.MetaToken { for _, tokenType := range tokenTypes.All { if _, has := i.tokenizers[tokenType.TokenizerType]; !has { continue @@ -125,7 +123,7 @@ func (i *indexer) index(tokenTypes seq.MappingTypes, tokens []frac.MetaToken, ke if value != nil { tokens = i.tokenizers[tokenType.TokenizerType].Tokenize(tokens, title, value, tokenType.MaxSize) } - tokens = append(tokens, frac.MetaToken{ + tokens = append(tokens, tokenizer.MetaToken{ Key: seq.ExistsTokenName, Value: title, }) @@ -153,7 +151,7 @@ func (i *indexer) appendMeta(id seq.ID, size uint32) { i.metas[n].ID = id i.metas[n].Size = size - i.metas[n].Tokens = append(i.metas[n].Tokens, frac.MetaToken{ + i.metas[n].Tokens = append(i.metas[n].Tokens, tokenizer.MetaToken{ Key: seq.AllTokenName, Value: []byte{}, }) diff --git a/indexer/meta_data.go b/indexer/meta_data.go new file mode 100644 index 00000000..241f219f --- /dev/null +++ b/indexer/meta_data.go @@ -0,0 +1,103 @@ +package indexer + +import ( + "encoding/binary" + "fmt" + "slices" + + "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/tokenizer" +) + +type MetaData struct { + ID seq.ID + // Size of an uncompressed document in bytes. + Size uint32 + Tokens []tokenizer.MetaToken +} + +// String used in tests for human-readable output. +func (m MetaData) String() string { + return fmt.Sprintf("ID: %s, Size: %d, Tokens: %s", m.ID, m.Size, m.Tokens) +} + +const metadataMagic = uint16(0x3F7C) // 2 magic bytes of the binary encoded metadata. + +func IsItBinaryEncodedMetaData(b []byte) bool { + if len(b) < 2 { + return false + } + return binary.LittleEndian.Uint16(b) == metadataMagic +} + +func (m *MetaData) MarshalBinaryTo(b []byte) []byte { + // Append "magic bytes" to determine that this is binary encoded metadata. + b = binary.LittleEndian.AppendUint16(b, metadataMagic) + + // Append current binary version of the metadata. + const version = 1 + b = binary.LittleEndian.AppendUint16(b, version) + + // Encode seq.ID. + b = binary.LittleEndian.AppendUint64(b, uint64(m.ID.MID)) + b = binary.LittleEndian.AppendUint64(b, uint64(m.ID.RID)) + + // Encode BlockLength. + b = binary.LittleEndian.AppendUint32(b, m.Size) + + // Encode tokens. + toksLen := len(m.Tokens) + b = binary.LittleEndian.AppendUint32(b, uint32(toksLen)) + for i := 0; i < toksLen; i++ { + b = m.Tokens[i].MarshalBinaryTo(b) + } + + return b +} + +func (m *MetaData) UnmarshalBinary(b []byte) error { + if !IsItBinaryEncodedMetaData(b) { + return fmt.Errorf("invalid metadata magic bytes") + } + b = b[2:] + + version := binary.LittleEndian.Uint16(b) + b = b[2:] + + switch version { + case 1: + return m.unmarshalVersion1(b) + default: + return fmt.Errorf("unimplemented metadata version: %d", version) + } +} + +func (m *MetaData) unmarshalVersion1(b []byte) error { + // Decode seq.ID. + m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) + b = b[8:] + m.ID.RID = seq.RID(binary.LittleEndian.Uint64(b)) + b = b[8:] + + // Decode uncompressed document size. + m.Size = binary.LittleEndian.Uint32(b) + b = b[4:] + + // Decode tokens length. + toksLen := binary.LittleEndian.Uint32(b) + b = b[4:] + + // Decode tokens. + m.Tokens = m.Tokens[:0] + m.Tokens = slices.Grow(m.Tokens, int(toksLen)) + var err error + for i := uint32(0); i < toksLen; i++ { + var token tokenizer.MetaToken + b, err = token.UnmarshalBinary(b) + if err != nil { + return err + } + m.Tokens = append(m.Tokens, token) + } + return nil +} diff --git a/indexer/metrics.go b/indexer/metrics.go new file mode 100644 index 00000000..a146de23 --- /dev/null +++ b/indexer/metrics.go @@ -0,0 +1,39 @@ +package indexer + +import ( + "github.com/ozontech/seq-db/metric" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + bulkParseDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "parse_duration_seconds", + Help: "", + Buckets: metric.SecondsBuckets, + }) + + notAnObjectTotal = promauto.NewCounter(prometheus.CounterOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "not_an_object_errors_total", + Help: "Number of ingestion errors due to incorrect document type", + }) + + bulkTimeErrors = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "time_errors_total", + Help: "errors for time rules violation in events", + }, []string{"cause"}) + + bulkSizeAfterCompression = promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "bulk_size_after_compression", + Help: "Bulk request sizes after compression", + Buckets: prometheus.ExponentialBuckets(1024, 2, 16), + }) +) diff --git a/proxy/bulk/processor.go b/indexer/processor.go similarity index 63% rename from proxy/bulk/processor.go rename to indexer/processor.go index 4b3d4a82..dbf7c106 100644 --- a/proxy/bulk/processor.go +++ b/indexer/processor.go @@ -1,38 +1,34 @@ -package bulk +package indexer import ( + "encoding/binary" + "encoding/json" "errors" + "fmt" "math" "math/rand/v2" "time" insaneJSON "github.com/ozontech/insane-json" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + "go.uber.org/zap" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/frac" + + "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" "github.com/ozontech/seq-db/util" ) var ( - bulkTimeErrors = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "time_errors_total", - Help: "errors for time rules violation in events", - }, []string{"cause"}) - parseErrors = bulkTimeErrors.WithLabelValues("parse_error") delays = bulkTimeErrors.WithLabelValues("delay") futureDelays = bulkTimeErrors.WithLabelValues("future_delay") ) -// processor accumulates meta and docs from a single bulk +// Processor accumulates meta and docs from a single bulk // returns bulk request ready to be sent to store -type processor struct { +type Processor struct { proxyIndex uint64 drift time.Duration futureDrift time.Duration @@ -46,15 +42,15 @@ func init() { insaneJSON.MapUseThreshold = math.MaxInt32 } -func newBulkProcessor(mapping seq.Mapping, tokenizers map[seq.TokenizerType]tokenizer.Tokenizer, drift, futureDrift time.Duration, index uint64) *processor { - return &processor{ +func NewProcessor(mapping seq.Mapping, tokenizers map[seq.TokenizerType]tokenizer.Tokenizer, drift, futureDrift time.Duration, index uint64) *Processor { + return &Processor{ proxyIndex: index, drift: drift, futureDrift: futureDrift, indexer: &indexer{ tokenizers: tokenizers, mapping: mapping, - metas: []frac.MetaData{}, + metas: []MetaData{}, }, decoder: insaneJSON.Spawn(), } @@ -62,7 +58,7 @@ func newBulkProcessor(mapping seq.Mapping, tokenizers map[seq.TokenizerType]toke var errNotAnObject = errors.New("not an object") -func (p *processor) Process(doc []byte, requestTime time.Time) ([]byte, []frac.MetaData, error) { +func (p *Processor) ProcessDoc(doc []byte, requestTime time.Time) ([]byte, []MetaData, error) { err := p.decoder.DecodeBytes(doc) if err != nil { return nil, nil, err @@ -72,8 +68,7 @@ func (p *processor) Process(doc []byte, requestTime time.Time) ([]byte, []frac.M } docTime, timeField := extractDocTime(p.decoder.Node, requestTime) docDelay := requestTime.Sub(docTime) - if timeField == nil { - // couldn't parse given event time + if timeField == nil { // couldn't parse given event time parseErrors.Inc() } else if documentDelayed(docDelay, p.drift, p.futureDrift) { docTime = requestTime @@ -88,11 +83,11 @@ func (p *processor) Process(doc []byte, requestTime time.Time) ([]byte, []frac.M func documentDelayed(docDelay, drift, futureDrift time.Duration) bool { delayed := false - if docDelay > drift { + if docDelay > drift && drift > 0 { delays.Inc() delayed = true } - if docDelay < 0 && docDelay.Abs() > futureDrift { + if docDelay < 0 && docDelay.Abs() > futureDrift && futureDrift > 0 { futureDelays.Inc() delayed = true } @@ -182,3 +177,55 @@ func parseESTime(t string) (time.Time, bool) { return time.Date(int(year), time.Month(month), int(day), int(hour), int(minute), int(second), int(nsecs), time.UTC), true } + +func (p *Processor) ProcessBulk( + requestTime time.Time, + dstDocs, dstMeta []byte, + readNext func() ([]byte, error), +) (int, []byte, []byte, error) { + parseDuration := time.Duration(0) + + total := 0 + for { + originalDoc, err := readNext() + if err != nil { + return 0, nil, nil, fmt.Errorf("reading next document: %s", err) + } + if originalDoc == nil { + break + } + parseStart := time.Now() + doc, meta, err := p.ProcessDoc(originalDoc, requestTime) + if err != nil { + if errors.Is(err, errNotAnObject) { + logger.Error("unable to process the document because it is not an object", zap.Any("document", json.RawMessage(originalDoc))) + notAnObjectTotal.Inc() + continue + } + return 0, nil, nil, fmt.Errorf("processing doc: %s", err) + } + parseDuration += time.Since(parseStart) + + total++ + dstDocs = binary.LittleEndian.AppendUint32(dstDocs, uint32(len(doc))) + dstDocs = append(dstDocs, doc...) + for _, m := range meta { + dstMeta = marshalAppendMeta(dstMeta, m) + } + } + + bulkParseDurationSeconds.Observe(parseDuration.Seconds()) + + return total, dstDocs, dstMeta, nil +} + +func marshalAppendMeta(dst []byte, meta MetaData) []byte { + metaLenPosition := len(dst) + dst = append(dst, make([]byte, 4)...) + dst = meta.MarshalBinaryTo(dst) + // Metadata length = len(slice after append) - len(slice before append). + metaLen := uint32(len(dst) - metaLenPosition - 4) + // Put metadata length before metadata bytes. + binary.LittleEndian.PutUint32(dst[metaLenPosition:], metaLen) + return dst +} diff --git a/proxy/bulk/processor_test.go b/indexer/processor_test.go similarity index 99% rename from proxy/bulk/processor_test.go rename to indexer/processor_test.go index 9818ea7b..0b25c5f3 100644 --- a/proxy/bulk/processor_test.go +++ b/indexer/processor_test.go @@ -1,4 +1,4 @@ -package bulk +package indexer import ( "testing" diff --git a/indexer/test_doc_provider.go b/indexer/test_doc_provider.go new file mode 100644 index 00000000..316464d2 --- /dev/null +++ b/indexer/test_doc_provider.go @@ -0,0 +1,135 @@ +package indexer + +import ( + "encoding/binary" + "math/rand" + "strings" + "time" + + insaneJSON "github.com/ozontech/insane-json" + + "github.com/ozontech/seq-db/consts" + "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/storage" + "github.com/ozontech/seq-db/tokenizer" + "github.com/ozontech/seq-db/util" +) + +type TestDocProvider struct { + DocCount int + Docs []byte + Metas []byte + buf []byte +} + +func NewTestDocProvider() *TestDocProvider { + return &TestDocProvider{ + Docs: make([]byte, 0), + buf: make([]byte, 4), + } +} + +func (dp *TestDocProvider) appendDoc(doc []byte) { + dp.DocCount++ + numBuf := make([]byte, 4) + binary.LittleEndian.PutUint32(numBuf, uint32(len(doc))) + dp.Docs = append(dp.Docs, numBuf...) + dp.Docs = append(dp.Docs, doc...) +} + +func (dp *TestDocProvider) appendMeta(docLen int, id seq.ID, tokens []tokenizer.MetaToken) { + dp.buf = dp.buf[:4] + dp.buf = encodeMeta(dp.buf, tokens, id, docLen) + binary.LittleEndian.PutUint32(dp.buf, uint32(len(dp.buf)-4)) + + dp.Metas = append(dp.Metas, dp.buf...) +} + +func (dp *TestDocProvider) Append(doc []byte, docRoot *insaneJSON.Root, id seq.ID, tokensStr ...string) { + tokens := stringsToTokens(tokensStr...) + if id.MID == 0 { + // this case runs only in the integration tests + t, _ := extractDocTimeForTest(docRoot) + id = seq.NewID(t, uint64(rand.Int63())) + } + + dp.appendMeta(len(doc), id, tokens) + dp.appendDoc(doc) +} + +func (dp *TestDocProvider) TryReset() { + dp.DocCount = 0 + dp.Docs = dp.Docs[:0] + dp.Metas = dp.Metas[:0] + +} + +func (dp *TestDocProvider) Provide() (storage.DocBlock, storage.DocBlock) { + c := GetDocsMetasCompressor(-1, -1) + c.CompressDocsAndMetas(dp.Docs, dp.Metas) + return c.DocsMetas() +} + +func encodeMeta(buf []byte, tokens []tokenizer.MetaToken, id seq.ID, size int) []byte { + metaTokens := make([]tokenizer.MetaToken, 0, len(tokens)) + for _, t := range tokens { + metaTokens = append(metaTokens, tokenizer.MetaToken{ + Key: t.Key, + Value: t.Value, + }) + } + md := MetaData{ + ID: id, + Size: uint32(size), + Tokens: metaTokens, + } + return md.MarshalBinaryTo(buf) +} + +// extractDocTimeForTest extracts timestamp from doc +// It searches by one of supported field name and parses by supported formats +// If no field was found or not parsable it returns time.Now() +func extractDocTimeForTest(docRoot *insaneJSON.Root) (time.Time, []string) { + var t time.Time + var f []string +top: + for _, field := range consts.TimeFields { + timeNode := docRoot.Dig(field...) + if timeNode == nil { + continue + } + timeVal := timeNode.AsString() + for _, format := range consts.TimeFormats { + if value, err := time.Parse(format, timeVal); err == nil { + t = value + f = field + break top + } + } + } + + if t.IsZero() { + t = time.Now() + } + return t, f +} + +func stringsToTokens(tokens ...string) []tokenizer.MetaToken { + r := make([]tokenizer.MetaToken, 0) + for _, tokenStr := range tokens { + fieldPos := strings.IndexByte(tokenStr, ':') + var t tokenizer.MetaToken + if fieldPos < 0 { + t = tokenizer.MetaToken{ + Key: util.StringToByteUnsafe(tokenStr), + Value: []byte("some_val")} + } else { + t = tokenizer.MetaToken{ + Key: util.StringToByteUnsafe(tokenStr[:fieldPos]), + Value: util.StringToByteUnsafe(tokenStr[fieldPos+1:]), + } + } + r = append(r, t) + } + return r +} diff --git a/proxy/bulk/ingestor.go b/proxy/bulk/ingestor.go index c19b6f55..b59a5022 100644 --- a/proxy/bulk/ingestor.go +++ b/proxy/bulk/ingestor.go @@ -2,46 +2,22 @@ package bulk import ( "context" - "encoding/binary" - "encoding/json" "errors" - "fmt" "math/rand/v2" "sync" "sync/atomic" "time" - "github.com/ozontech/seq-db/seq" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "go.uber.org/zap" - "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/frac" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" "github.com/ozontech/seq-db/network/circuitbreaker" "github.com/ozontech/seq-db/proxy/stores" + "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" -) - -var ( - inflightBulks = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "in_flight_queries_total", - Help: "", - }) - - bulkParseDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "parse_duration_seconds", - Help: "", - Buckets: metric.SecondsBuckets, - }) + "go.uber.org/zap" ) type MappingProvider interface { @@ -142,29 +118,6 @@ func (i *Ingestor) Stop() { var ErrTooManyInflightBulks = errors.New("too many inflight bulks, dropping") -var ( - rateLimitedTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "seq_db_ingestor", - Name: "rate_limited_total", - Help: "Count of rate limited requests", - }) - - docsWritten = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "docs_written", - Help: "", - Buckets: prometheus.ExponentialBuckets(1, 2, 16), - }) - - notAnObjectTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "seq_db_ingestor", - Subsystem: "bulk", - Name: "not_an_object_errors_total", - Help: "Number of ingestion errors due to incorrect document type", - }) -) - func (i *Ingestor) ProcessDocuments(ctx context.Context, requestTime time.Time, readNext func() ([]byte, error)) (int, error) { ctx, cancel := context.WithTimeout(ctx, consts.BulkTimeout) defer cancel() @@ -186,10 +139,10 @@ func (i *Ingestor) ProcessDocuments(ctx context.Context, requestTime time.Time, t := time.Now() - compressor := frac.GetDocsMetasCompressor(i.config.DocsZSTDCompressLevel, i.config.MetasZSTDCompressLevel) - defer frac.PutDocMetasCompressor(compressor) + compressor := indexer.GetDocsMetasCompressor(i.config.DocsZSTDCompressLevel, i.config.MetasZSTDCompressLevel) + defer indexer.PutDocMetasCompressor(compressor) - total, err := i.processDocsToCompressor(compressor, requestTime, readNext) + total, docs, metas, err := i.processDocsToCompressor(compressor, requestTime, readNext) if err != nil { return 0, err } @@ -198,8 +151,6 @@ func (i *Ingestor) ProcessDocuments(ctx context.Context, requestTime time.Time, return 0, nil } - docs, metas := compressor.DocsMetas() - metric.IngestorBulkDocProvideDurationSeconds.Observe(time.Since(t).Seconds()) t = time.Now() @@ -228,79 +179,46 @@ var ( ) func (i *Ingestor) processDocsToCompressor( - compressor *frac.DocsMetasCompressor, + compressor *indexer.DocsMetasCompressor, requestTime time.Time, readNext func() ([]byte, error), -) (int, error) { - parseDuration := time.Duration(0) - +) (int, []byte, []byte, error) { proc := i.getProcessor() defer i.putProcessor(proc) binaryDocs := binaryDocsPool.Get().(*bytespool.Buffer) defer binaryDocsPool.Put(binaryDocs) binaryDocs.Reset() + binaryMetas := binaryMetasPool.Get().(*bytespool.Buffer) defer binaryMetasPool.Put(binaryMetas) binaryMetas.Reset() - total := 0 - for { - originalDoc, err := readNext() - if err != nil { - return total, fmt.Errorf("reading next document: %s", err) - } - if originalDoc == nil { - break - } - parseStart := time.Now() - doc, metas, err := proc.Process(originalDoc, requestTime) - if err != nil { - if errors.Is(err, errNotAnObject) { - logger.Error("unable to process the document because it is not an object", zap.Any("document", json.RawMessage(originalDoc))) - notAnObjectTotal.Inc() - continue - } - return total, fmt.Errorf("processing doc: %s", err) - } - parseDuration += time.Since(parseStart) - - binaryDocs.B = binary.LittleEndian.AppendUint32(binaryDocs.B, uint32(len(doc))) - binaryDocs.B = append(binaryDocs.B, doc...) - for _, meta := range metas { - binaryMetas.B = marshalAppendMeta(binaryMetas.B, meta) - } - total++ + var ( + err error + total int + ) + total, binaryDocs.B, binaryMetas.B, err = proc.ProcessBulk(requestTime, binaryDocs.B, binaryMetas.B, readNext) + if err != nil { + return 0, nil, nil, err } - bulkParseDurationSeconds.Observe(parseDuration.Seconds()) - compressor.CompressDocsAndMetas(binaryDocs.B, binaryMetas.B) + docs, metas := compressor.DocsMetas() - return total, nil -} - -func marshalAppendMeta(dst []byte, meta frac.MetaData) []byte { - metaLenPosition := len(dst) - dst = append(dst, make([]byte, 4)...) - dst = meta.MarshalBinaryTo(dst) - // Metadata length = len(slice after append) - len(slice before append). - metaLen := uint32(len(dst) - metaLenPosition - 4) - // Put metadata length before metadata bytes. - binary.LittleEndian.PutUint32(dst[metaLenPosition:], metaLen) - return dst + return total, docs, metas, nil } -func (i *Ingestor) getProcessor() *processor { +func (i *Ingestor) getProcessor() *indexer.Processor { procEface := i.procPool.Get() if procEface != nil { // The proc already initialized with current ingestor config, so we don't need to reinit it. - return procEface.(*processor) + return procEface.(*indexer.Processor) } index := rand.Uint64() % consts.IngestorMaxInstances - return newBulkProcessor(i.config.MappingProvider.GetMapping(), i.tokenizers, i.config.AllowedTimeDrift, i.config.FutureAllowedTimeDrift, index) + return indexer.NewProcessor(i.config.MappingProvider.GetMapping(), i.tokenizers, i.config.AllowedTimeDrift, i.config.FutureAllowedTimeDrift, index) } -func (i *Ingestor) putProcessor(proc *processor) { +func (i *Ingestor) putProcessor(proc *indexer.Processor) { i.procPool.Put(proc) } diff --git a/proxy/bulk/ingestor_test.go b/proxy/bulk/ingestor_test.go index 8800af6a..4361eff4 100644 --- a/proxy/bulk/ingestor_test.go +++ b/proxy/bulk/ingestor_test.go @@ -13,11 +13,12 @@ import ( "github.com/stretchr/testify/require" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/frac" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/mappingprovider" "github.com/ozontech/seq-db/packer" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" + "github.com/ozontech/seq-db/tokenizer" ) func TestProcessDocuments(t *testing.T) { @@ -92,7 +93,7 @@ func TestProcessDocuments(t *testing.T) { type TestPayload struct { InDocs []string ExpDocs []string - ExpMeta []frac.MetaData + ExpMeta []indexer.MetaData } type TestCase struct { Name string @@ -111,10 +112,10 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: []string{"{}"}, ExpDocs: nil, - ExpMeta: []frac.MetaData{{ + ExpMeta: []indexer.MetaData{{ ID: id, Size: 2, - Tokens: []frac.MetaToken{newToken(seq.TokenAll, "")}, + Tokens: []tokenizer.MetaToken{newToken(seq.TokenAll, "")}, }}, } }, @@ -122,7 +123,7 @@ func TestProcessDocuments(t *testing.T) { { Name: "text_with_asterisks", Payload: func() TestPayload { - tk := func(val string) frac.MetaToken { + tk := func(val string) tokenizer.MetaToken { return newToken("message", val) } return TestPayload{ @@ -139,15 +140,15 @@ func TestProcessDocuments(t *testing.T) { `{"message":"postfix asterisk *"}`, }, ExpDocs: nil, - ExpMeta: []frac.MetaData{ - {ID: id, Size: 30, Tokens: []frac.MetaToken{all, tk("*prefix_asterisk"), existsMsg}}, - {ID: id, Size: 31, Tokens: []frac.MetaToken{all, tk("*"), tk("prefix_asterisk"), existsMsg}}, - {ID: id, Size: 28, Tokens: []frac.MetaToken{all, tk("infix*asterisk"), existsMsg}}, - {ID: id, Size: 30, Tokens: []frac.MetaToken{all, tk("infix"), tk("*"), tk("asterisk"), existsMsg}}, - {ID: id, Size: 29, Tokens: []frac.MetaToken{all, tk("infix"), tk("*asterisk"), existsMsg}}, - {ID: id, Size: 29, Tokens: []frac.MetaToken{all, tk("infix*"), tk("asterisk"), existsMsg}}, - {ID: id, Size: 31, Tokens: []frac.MetaToken{all, tk("postfix"), tk("asterisk*"), existsMsg}}, - {ID: id, Size: 32, Tokens: []frac.MetaToken{all, tk("postfix"), tk("asterisk"), tk("*"), existsMsg}}, + ExpMeta: []indexer.MetaData{ + {ID: id, Size: 30, Tokens: []tokenizer.MetaToken{all, tk("*prefix_asterisk"), existsMsg}}, + {ID: id, Size: 31, Tokens: []tokenizer.MetaToken{all, tk("*"), tk("prefix_asterisk"), existsMsg}}, + {ID: id, Size: 28, Tokens: []tokenizer.MetaToken{all, tk("infix*asterisk"), existsMsg}}, + {ID: id, Size: 30, Tokens: []tokenizer.MetaToken{all, tk("infix"), tk("*"), tk("asterisk"), existsMsg}}, + {ID: id, Size: 29, Tokens: []tokenizer.MetaToken{all, tk("infix"), tk("*asterisk"), existsMsg}}, + {ID: id, Size: 29, Tokens: []tokenizer.MetaToken{all, tk("infix*"), tk("asterisk"), existsMsg}}, + {ID: id, Size: 31, Tokens: []tokenizer.MetaToken{all, tk("postfix"), tk("asterisk*"), existsMsg}}, + {ID: id, Size: 32, Tokens: []tokenizer.MetaToken{all, tk("postfix"), tk("asterisk"), tk("*"), existsMsg}}, }, } }, @@ -159,10 +160,10 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: doc, ExpDocs: doc, - ExpMeta: []frac.MetaData{{ + ExpMeta: []indexer.MetaData{{ ID: id, Size: 22, - Tokens: []frac.MetaToken{ + Tokens: []tokenizer.MetaToken{ newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "exists_only"), }, @@ -177,8 +178,8 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: []string{doc}, ExpDocs: []string{doc}, - ExpMeta: []frac.MetaData{ - {ID: id, Size: uint32(len(doc)), Tokens: []frac.MetaToken{ + ExpMeta: []indexer.MetaData{ + {ID: id, Size: uint32(len(doc)), Tokens: []tokenizer.MetaToken{ newToken(seq.TokenAll, ""), newToken("_exists_", "tags.level"), newToken("_exists_", "tags.message"), @@ -197,10 +198,10 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: []string{doc1, doc2, doc3}, ExpDocs: nil, - ExpMeta: []frac.MetaData{ - {ID: id, Size: uint32(len(doc1)), Tokens: []frac.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "level")}}, - {ID: id, Size: uint32(len(doc2)), Tokens: []frac.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "message")}}, - {ID: id, Size: uint32(len(doc3)), Tokens: []frac.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "path")}}, + ExpMeta: []indexer.MetaData{ + {ID: id, Size: uint32(len(doc1)), Tokens: []tokenizer.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "level")}}, + {ID: id, Size: uint32(len(doc2)), Tokens: []tokenizer.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "message")}}, + {ID: id, Size: uint32(len(doc3)), Tokens: []tokenizer.MetaToken{newToken(seq.TokenAll, ""), newToken(seq.TokenExists, "path")}}, }, } }, @@ -209,7 +210,7 @@ func TestProcessDocuments(t *testing.T) { Name: "simple_document", Payload: func() TestPayload { const doc = `{"level":"error", "message":" request 🫦 failed! ", "error": "context cancelled", "shard": "1", "path":"http://localhost:8080/example"}` - meta := frac.MetaData{ID: id, Size: uint32(len(doc)), Tokens: []frac.MetaToken{ + meta := indexer.MetaData{ID: id, Size: uint32(len(doc)), Tokens: []tokenizer.MetaToken{ newToken(seq.TokenAll, ""), newToken("level", "error"), newToken(seq.TokenExists, "level"), @@ -231,7 +232,7 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: []string{doc, doc, doc}, ExpDocs: []string{doc, doc, doc}, - ExpMeta: []frac.MetaData{meta, meta, meta}, + ExpMeta: []indexer.MetaData{meta, meta, meta}, } }, }, @@ -323,7 +324,7 @@ func TestProcessDocuments(t *testing.T) { return TestPayload{ InDocs: []string{string(inTrace)}, ExpDocs: []string{string(expTrace)}, - ExpMeta: []frac.MetaData{ + ExpMeta: []indexer.MetaData{ {ID: id, Size: uint32(len(expTrace)), Tokens: buildKeywordTokens( "trace_id", "aaaaaaaaaaabcmadwewubq==", "trace_duration", "137252000", @@ -378,7 +379,7 @@ func TestProcessDocuments(t *testing.T) { }, }) - meta := []frac.MetaData{ + meta := []indexer.MetaData{ {ID: id, Size: uint32(len(doc)), Tokens: buildKeywordTokens()}, {ID: id, Size: 0, Tokens: buildKeywordTokens("spans.span_id", "1", "spans.operation_name", "op1")}, {ID: id, Size: 0, Tokens: buildKeywordTokens("spans.span_id", "2", "spans.operation_name", "op2")}, @@ -432,9 +433,9 @@ func TestProcessDocuments(t *testing.T) { binaryMetas, err := storage.DocBlock(c.metas).DecompressTo(nil) require.NoError(t, err) metasUnpacker := packer.NewBytesUnpacker(binaryMetas) - var gotMetas []frac.MetaData + var gotMetas []indexer.MetaData for metasUnpacker.Len() > 0 { - meta := frac.MetaData{} + meta := indexer.MetaData{} r.NoError(meta.UnmarshalBinary(metasUnpacker.GetBinary())) gotMetas = append(gotMetas, meta) } @@ -527,15 +528,15 @@ func newMapping(mappingType seq.TokenizerType) seq.MappingTypes { return seq.NewSingleType(mappingType, "", int(units.KiB)) } -func newToken(k, v string) frac.MetaToken { - return frac.MetaToken{ +func newToken(k, v string) tokenizer.MetaToken { + return tokenizer.MetaToken{ Key: []byte(k), Value: []byte(v), } } -func buildKeywordTokens(kvs ...string) []frac.MetaToken { - var tokens []frac.MetaToken +func buildKeywordTokens(kvs ...string) []tokenizer.MetaToken { + var tokens []tokenizer.MetaToken tokens = append(tokens, newToken("_all_", "")) diff --git a/proxy/bulk/metrics.go b/proxy/bulk/metrics.go new file mode 100644 index 00000000..23df56a6 --- /dev/null +++ b/proxy/bulk/metrics.go @@ -0,0 +1,29 @@ +package bulk + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + inflightBulks = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "in_flight_queries_total", + Help: "", + }) + + rateLimitedTotal = promauto.NewCounter(prometheus.CounterOpts{ + Namespace: "seq_db_ingestor", + Name: "rate_limited_total", + Help: "Count of rate limited requests", + }) + + docsWritten = promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: "seq_db_ingestor", + Subsystem: "bulk", + Name: "docs_written", + Help: "", + Buckets: prometheus.ExponentialBuckets(1, 2, 16), + }) +) diff --git a/seq/tokenizer.go b/seq/tokenizer.go index f5a59687..e2ee31f4 100644 --- a/seq/tokenizer.go +++ b/seq/tokenizer.go @@ -1,11 +1,5 @@ package seq -import ( - "strings" - - "github.com/ozontech/seq-db/util" -) - const ( TokenAll = "_all_" TokenExists = "_exists_" @@ -48,30 +42,3 @@ func init() { NamesToTokenTypes[v] = k } } - -type Token struct { - Field []byte - Val []byte -} - -func Tokens(tokens ...string) []Token { - r := make([]Token, 0) - for _, tokenStr := range tokens { - fieldPos := strings.IndexByte(tokenStr, ':') - var t Token - if fieldPos < 0 { - t = Token{ - Field: util.StringToByteUnsafe(tokenStr), - Val: []byte("some_val")} - } else { - t = Token{ - Field: util.StringToByteUnsafe(tokenStr[:fieldPos]), - Val: util.StringToByteUnsafe(tokenStr[fieldPos+1:]), - } - } - - r = append(r, t) - } - - return r -} diff --git a/storeapi/grpc_v1_test.go b/storeapi/grpc_v1_test.go index d0ebc45c..5c699065 100644 --- a/storeapi/grpc_v1_test.go +++ b/storeapi/grpc_v1_test.go @@ -11,8 +11,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/fracmanager" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/mappingprovider" "github.com/ozontech/seq-db/pkg/storeapi" "github.com/ozontech/seq-db/seq" @@ -51,12 +51,11 @@ func makeBulkRequest(cnt int) *storeapi.BulkRequest { metaRoot := insaneJSON.Spawn() defer insaneJSON.Release(metaRoot) - dp := frac.NewDocProvider() + dp := indexer.NewTestDocProvider() for i := 0; i < cnt; i++ { id := seq.SimpleID(i + 1) doc := []byte("document") - tokens := seq.Tokens("_all_:", "service:100500", "k8s_pod:"+strconv.Itoa(i)) - dp.Append(doc, nil, id, tokens) + dp.Append(doc, nil, id, "_all_:", "service:100500", "k8s_pod:"+strconv.Itoa(i)) } req := &storeapi.BulkRequest{Count: int64(cnt)} req.Docs, req.Metas = dp.Provide() diff --git a/tokenizer/exists_tokenizer.go b/tokenizer/exists_tokenizer.go index 1069870f..0cdc6bbd 100644 --- a/tokenizer/exists_tokenizer.go +++ b/tokenizer/exists_tokenizer.go @@ -1,13 +1,11 @@ package tokenizer -import "github.com/ozontech/seq-db/frac" - type ExistsTokenizer struct{} func NewExistsTokenizer() *ExistsTokenizer { return &ExistsTokenizer{} } -func (t *ExistsTokenizer) Tokenize(tokens []frac.MetaToken, _, _ []byte, _ int) []frac.MetaToken { +func (t *ExistsTokenizer) Tokenize(tokens []MetaToken, _, _ []byte, _ int) []MetaToken { return tokens } diff --git a/tokenizer/keyword_tokenizer.go b/tokenizer/keyword_tokenizer.go index 7151e0f1..1c893bdf 100644 --- a/tokenizer/keyword_tokenizer.go +++ b/tokenizer/keyword_tokenizer.go @@ -1,7 +1,6 @@ package tokenizer import ( - "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/metric" ) @@ -19,7 +18,7 @@ func NewKeywordTokenizer(maxTokenSize int, caseSensitive, partialIndexing bool) } } -func (t *KeywordTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, maxTokenSize int) []frac.MetaToken { +func (t *KeywordTokenizer) Tokenize(tokens []MetaToken, name, value []byte, maxTokenSize int) []MetaToken { if maxTokenSize == 0 { maxTokenSize = t.defaultMaxTokenSize } @@ -34,7 +33,7 @@ func (t *KeywordTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, metric.SkippedIndexesBytesKeyword.Add(float64(len(value[maxLength:]))) value = value[:maxLength] - tokens = append(tokens, frac.MetaToken{ + tokens = append(tokens, MetaToken{ Key: name, Value: toLowerIfCaseInsensitive(t.caseSensitive, value), }) diff --git a/tokenizer/keyword_tokenizer_test.go b/tokenizer/keyword_tokenizer_test.go index 1d862e37..6ba1ed20 100644 --- a/tokenizer/keyword_tokenizer_test.go +++ b/tokenizer/keyword_tokenizer_test.go @@ -4,19 +4,17 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/ozontech/seq-db/frac" ) -func newFracToken(k, v string) frac.MetaToken { - return frac.MetaToken{Key: []byte(k), Value: []byte(v)} +func newMetaToken(k, v string) MetaToken { + return MetaToken{Key: []byte(k), Value: []byte(v)} } func TestKeywordTokenizerEmptyValue(t *testing.T) { tokenizer := NewKeywordTokenizer(10, true, true) - expected := []frac.MetaToken{newFracToken("message", "")} - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte{}, 10) + expected := []MetaToken{newMetaToken("message", "")} + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte{}, 10) assert.Equal(t, expected, tokens) } @@ -25,8 +23,8 @@ func TestKeywordTokenizerSimple1(t *testing.T) { tokenizer := NewKeywordTokenizer(10, true, true) value := []byte("woRld") - expected := []frac.MetaToken{newFracToken("message", "woRld")} - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), value, 10) + expected := []MetaToken{newMetaToken("message", "woRld")} + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), value, 10) assert.Equal(t, expected, tokens) } @@ -36,22 +34,22 @@ func TestKeywordTokenizerMaxLength(t *testing.T) { // maxSize as argument tokenizer := NewKeywordTokenizer(100, true, false) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(value), 10) - assert.Equal(t, []frac.MetaToken{}, tokens) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(value), 10) + assert.Equal(t, []MetaToken{}, tokens) // default maxSize tokenizer = NewKeywordTokenizer(10, true, false) - tokens = tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(value), 0) - assert.Equal(t, []frac.MetaToken{}, tokens) + tokens = tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(value), 0) + assert.Equal(t, []MetaToken{}, tokens) } func TestKeywordTokenizerCaseSensitive(t *testing.T) { tokenizer := NewKeywordTokenizer(16, false, true) value := "heLlo WoRld" - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(value), 16) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(value), 16) - assert.Equal(t, []frac.MetaToken{newFracToken("message", "hello world")}, tokens) + assert.Equal(t, []MetaToken{newMetaToken("message", "hello world")}, tokens) } func TestKeywordTokenizerPartialIndexing(t *testing.T) { @@ -61,11 +59,11 @@ func TestKeywordTokenizerPartialIndexing(t *testing.T) { // maxSize as argument tokenizer := NewKeywordTokenizer(100, true, true) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(value), maxSize) - assert.Equal(t, []frac.MetaToken{newFracToken("message", value[:maxSize])}, tokens) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(value), maxSize) + assert.Equal(t, []MetaToken{newMetaToken("message", value[:maxSize])}, tokens) // default maxSize tokenizer = NewKeywordTokenizer(maxSize, true, true) - tokens = tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(value), 0) - assert.Equal(t, []frac.MetaToken{newFracToken("message", value[:maxSize])}, tokens) + tokens = tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(value), 0) + assert.Equal(t, []MetaToken{newMetaToken("message", value[:maxSize])}, tokens) } diff --git a/tokenizer/meta_token.go b/tokenizer/meta_token.go new file mode 100644 index 00000000..2929479b --- /dev/null +++ b/tokenizer/meta_token.go @@ -0,0 +1,43 @@ +package tokenizer + +import ( + "encoding/binary" + "fmt" +) + +type MetaToken struct { + Key []byte + Value []byte +} + +func (m *MetaToken) MarshalBinaryTo(b []byte) []byte { + b = binary.LittleEndian.AppendUint32(b, uint32(len(m.Key))) + b = append(b, m.Key...) + b = binary.LittleEndian.AppendUint32(b, uint32(len(m.Value))) + b = append(b, m.Value...) + return b +} + +func (m *MetaToken) UnmarshalBinary(b []byte) ([]byte, error) { + keyLen := binary.LittleEndian.Uint32(b) + b = b[4:] + if int(keyLen) > len(b) { + return nil, fmt.Errorf("malformed key") + } + m.Key = b[:keyLen] + b = b[keyLen:] + + valueLen := binary.LittleEndian.Uint32(b) + b = b[4:] + if int(valueLen) > len(b) { + return nil, fmt.Errorf("malformed value") + } + m.Value = b[:valueLen] + b = b[valueLen:] + return b, nil +} + +// String used in tests for human-readable output. +func (m MetaToken) String() string { + return fmt.Sprintf("(%s: %s)", m.Key, m.Value) +} diff --git a/tokenizer/path_tokenizer.go b/tokenizer/path_tokenizer.go index 3e3b1efc..965cb28f 100644 --- a/tokenizer/path_tokenizer.go +++ b/tokenizer/path_tokenizer.go @@ -3,7 +3,6 @@ package tokenizer import ( "bytes" - "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/metric" ) @@ -29,7 +28,7 @@ func NewPathTokenizer( } } -func (t *PathTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, maxTokenSize int) []frac.MetaToken { +func (t *PathTokenizer) Tokenize(tokens []MetaToken, name, value []byte, maxTokenSize int) []MetaToken { if maxTokenSize == 0 { maxTokenSize = t.defaultMaxTokenSize } @@ -57,13 +56,13 @@ func (t *PathTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, ma } i += sepIndex - tokens = append(tokens, frac.MetaToken{ + tokens = append(tokens, MetaToken{ Key: name, Value: toLowerIfCaseInsensitive(t.caseSensitive, value[:i]), }) } - tokens = append(tokens, frac.MetaToken{ + tokens = append(tokens, MetaToken{ Key: name, Value: toLowerIfCaseInsensitive(t.caseSensitive, value), }) diff --git a/tokenizer/path_tokenizer_test.go b/tokenizer/path_tokenizer_test.go index 8e6874b0..bf7082f4 100644 --- a/tokenizer/path_tokenizer_test.go +++ b/tokenizer/path_tokenizer_test.go @@ -4,8 +4,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/ozontech/seq-db/frac" ) func TestPathTokenizer(t *testing.T) { @@ -15,24 +13,24 @@ func TestPathTokenizer(t *testing.T) { title, value string maxTokenSize int tokenizer *PathTokenizer - expected []frac.MetaToken + expected []MetaToken }{ { title: "empty value", value: "", maxTokenSize: 100, tokenizer: NewPathTokenizer(100, true, true), - expected: []frac.MetaToken{newFracToken(field, "")}, + expected: []MetaToken{newMetaToken(field, "")}, }, { title: "slashes only", value: "///", maxTokenSize: 100, tokenizer: NewPathTokenizer(100, true, true), - expected: []frac.MetaToken{ - newFracToken(field, "/"), - newFracToken(field, "//"), - newFracToken(field, "///"), + expected: []MetaToken{ + newMetaToken(field, "/"), + newMetaToken(field, "//"), + newMetaToken(field, "///"), }, }, { @@ -40,10 +38,10 @@ func TestPathTokenizer(t *testing.T) { value: "/One/Two/Three", maxTokenSize: 100, tokenizer: NewPathTokenizer(100, true, true), - expected: []frac.MetaToken{ - newFracToken(field, "/One"), - newFracToken(field, "/One/Two"), - newFracToken(field, "/One/Two/Three"), + expected: []MetaToken{ + newMetaToken(field, "/One"), + newMetaToken(field, "/One/Two"), + newMetaToken(field, "/One/Two/Three"), }, }, { @@ -51,11 +49,11 @@ func TestPathTokenizer(t *testing.T) { value: "/One/Two/Three/", maxTokenSize: 100, tokenizer: NewPathTokenizer(100, true, true), - expected: []frac.MetaToken{ - newFracToken(field, "/One"), - newFracToken(field, "/One/Two"), - newFracToken(field, "/One/Two/Three"), - newFracToken(field, "/One/Two/Three/"), + expected: []MetaToken{ + newMetaToken(field, "/One"), + newMetaToken(field, "/One/Two"), + newMetaToken(field, "/One/Two/Three"), + newMetaToken(field, "/One/Two/Three/"), }, }, { @@ -63,24 +61,24 @@ func TestPathTokenizer(t *testing.T) { value: "/one/two/three/", maxTokenSize: 10, tokenizer: NewPathTokenizer(100, true, false), - expected: []frac.MetaToken{}, + expected: []MetaToken{}, }, { title: "max length default", value: "/one/two/three/", maxTokenSize: 0, tokenizer: NewPathTokenizer(10, true, false), - expected: []frac.MetaToken{}, + expected: []MetaToken{}, }, { title: "partial indexing", value: "/one/two/three/", maxTokenSize: 10, tokenizer: NewPathTokenizer(100, true, true), - expected: []frac.MetaToken{ - newFracToken(field, "/one"), - newFracToken(field, "/one/two"), - newFracToken(field, "/one/two/t"), + expected: []MetaToken{ + newMetaToken(field, "/one"), + newMetaToken(field, "/one/two"), + newMetaToken(field, "/one/two/t"), }, }, { @@ -88,10 +86,10 @@ func TestPathTokenizer(t *testing.T) { value: "/one/two/three/", maxTokenSize: 0, tokenizer: NewPathTokenizer(10, true, true), - expected: []frac.MetaToken{ - newFracToken(field, "/one"), - newFracToken(field, "/one/two"), - newFracToken(field, "/one/two/t"), + expected: []MetaToken{ + newMetaToken(field, "/one"), + newMetaToken(field, "/one/two"), + newMetaToken(field, "/one/two/t"), }, }, { @@ -99,16 +97,16 @@ func TestPathTokenizer(t *testing.T) { value: "/OnE/tWo", maxTokenSize: 10, tokenizer: NewPathTokenizer(10, false, true), - expected: []frac.MetaToken{ - newFracToken(field, "/one"), - newFracToken(field, "/one/two"), + expected: []MetaToken{ + newMetaToken(field, "/one"), + newMetaToken(field, "/one/two"), }, }, } for _, tc := range tests { t.Run(tc.title, func(t *testing.T) { - tokens := tc.tokenizer.Tokenize([]frac.MetaToken{}, []byte(field), []byte(tc.value), tc.maxTokenSize) + tokens := tc.tokenizer.Tokenize([]MetaToken{}, []byte(field), []byte(tc.value), tc.maxTokenSize) assert.Equal(t, tc.expected, tokens) }) } diff --git a/tokenizer/text_tokenizer.go b/tokenizer/text_tokenizer.go index 05038d62..6a728a91 100644 --- a/tokenizer/text_tokenizer.go +++ b/tokenizer/text_tokenizer.go @@ -4,7 +4,6 @@ import ( "unicode" "unicode/utf8" - "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/metric" ) @@ -24,7 +23,7 @@ func NewTextTokenizer(maxTokenSize int, caseSensitive, partialIndexing bool, max } } -func (t *TextTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, maxFieldValueLength int) []frac.MetaToken { +func (t *TextTokenizer) Tokenize(tokens []MetaToken, name, value []byte, maxFieldValueLength int) []MetaToken { metric.TokenizerIncomingTextLen.Observe(float64(len(value))) if maxFieldValueLength == 0 { @@ -38,7 +37,7 @@ func (t *TextTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, ma } if len(value) == 0 { - tokens = append(tokens, frac.MetaToken{Key: name, Value: value}) + tokens = append(tokens, MetaToken{Key: name, Value: value}) return tokens } @@ -85,7 +84,7 @@ func (t *TextTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, ma // We can skip the ToLower call if we are sure that there are only ASCII characters and no uppercase letters. token = toLowerTryInplace(token) } - tokens = append(tokens, frac.MetaToken{Key: name, Value: token}) + tokens = append(tokens, MetaToken{Key: name, Value: token}) } hasUpper = false @@ -100,7 +99,7 @@ func (t *TextTokenizer) Tokenize(tokens []frac.MetaToken, name, value []byte, ma if !t.caseSensitive && (asciiOnly && hasUpper || !asciiOnly) { token = toLowerTryInplace(token) } - tokens = append(tokens, frac.MetaToken{Key: name, Value: token}) + tokens = append(tokens, MetaToken{Key: name, Value: token}) return tokens } diff --git a/tokenizer/text_tokenizer_test.go b/tokenizer/text_tokenizer_test.go index 240671c7..b56c82b6 100644 --- a/tokenizer/text_tokenizer_test.go +++ b/tokenizer/text_tokenizer_test.go @@ -6,8 +6,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/ozontech/seq-db/frac" ) const maxTokenSizeDummy = 0 @@ -18,8 +16,8 @@ func TestTokenizeEmptyValue(t *testing.T) { testCase := []byte("") tokenizer := NewTextTokenizer(1000, false, true, 1024) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) - expected := []frac.MetaToken{newFracToken("message", "")} + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) + expected := []MetaToken{newMetaToken("message", "")} assert.Equal(t, expected, tokens) } @@ -29,23 +27,23 @@ func TestTokenizeSimple(t *testing.T) { tokenizer := NewTextTokenizer(1000, false, true, 1024) tokens := tokenizer.Tokenize(nil, []byte("message"), testCase, maxTokenSizeDummy) - assert.Equal(t, newFracToken("message", "arr"), tokens[0]) - assert.Equal(t, newFracToken("message", "hello"), tokens[1]) - assert.Equal(t, newFracToken("message", "world"), tokens[2]) + assert.Equal(t, newMetaToken("message", "arr"), tokens[0]) + assert.Equal(t, newMetaToken("message", "hello"), tokens[1]) + assert.Equal(t, newMetaToken("message", "world"), tokens[2]) } func TestTokenizeSimple2(t *testing.T) { tokenizer := NewTextTokenizer(1000, false, true, 1024) tokens := tokenizer.Tokenize(nil, []byte("message"), bytes.Clone(longDocument), maxTokenSizeDummy) - assert.Equal(t, newFracToken("message", "t1"), tokens[0]) - assert.Equal(t, newFracToken("message", "t2_t3"), tokens[1]) - assert.Equal(t, newFracToken("message", "t4"), tokens[2]) - assert.Equal(t, newFracToken("message", "looooong_t5"), tokens[3]) - assert.Equal(t, newFracToken("message", "readyz"), tokens[4]) - assert.Equal(t, newFracToken("message", "error*"), tokens[5]) - assert.Equal(t, newFracToken("message", "5555"), tokens[6]) - assert.Equal(t, newFracToken("message", "r2"), tokens[7]) + assert.Equal(t, newMetaToken("message", "t1"), tokens[0]) + assert.Equal(t, newMetaToken("message", "t2_t3"), tokens[1]) + assert.Equal(t, newMetaToken("message", "t4"), tokens[2]) + assert.Equal(t, newMetaToken("message", "looooong_t5"), tokens[3]) + assert.Equal(t, newMetaToken("message", "readyz"), tokens[4]) + assert.Equal(t, newMetaToken("message", "error*"), tokens[5]) + assert.Equal(t, newMetaToken("message", "5555"), tokens[6]) + assert.Equal(t, newMetaToken("message", "r2"), tokens[7]) } func TestTokenizePartialDefault(t *testing.T) { @@ -53,9 +51,9 @@ func TestTokenizePartialDefault(t *testing.T) { tokenizer := NewTextTokenizer(maxSize, false, true, maxSize) testCase := []byte(strings.Repeat("1", maxSize+1)) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) - expected := []frac.MetaToken{newFracToken("message", strings.Repeat("1", maxSize))} + expected := []MetaToken{newMetaToken("message", strings.Repeat("1", maxSize))} assert.Equal(t, expected, tokens) } @@ -67,7 +65,7 @@ func TestTokenizePartial(t *testing.T) { tokens := tokenizer.Tokenize(nil, []byte("message"), testCase, maxSize) - expected := []frac.MetaToken{newFracToken("message", strings.Repeat("1", maxSize))} + expected := []MetaToken{newMetaToken("message", strings.Repeat("1", maxSize))} assert.Equal(t, expected, tokens) } @@ -77,9 +75,9 @@ func TestTokenizePartialSkipDefault(t *testing.T) { tokenizer := NewTextTokenizer(maxSize, false, false, maxSize) testCase := []byte(strings.Repeat("1", maxSize+1)) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), testCase, maxTokenSizeDummy) - assert.Equal(t, []frac.MetaToken{}, tokens) + assert.Equal(t, []MetaToken{}, tokens) } func TestTokenizePartialSkip(t *testing.T) { @@ -87,22 +85,22 @@ func TestTokenizePartialSkip(t *testing.T) { tokenizer := NewTextTokenizer(maxSize, false, false, 0) testCase := []byte(strings.Repeat("1", maxSize+1)) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), testCase, maxSize) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), testCase, maxSize) - assert.Equal(t, []frac.MetaToken{}, tokens) + assert.Equal(t, []MetaToken{}, tokens) } func TestTokenizeDefaultMaxTokenSize(t *testing.T) { tokenizer := NewTextTokenizer(6, false, true, 1024) tokens := tokenizer.Tokenize(nil, []byte("message"), bytes.Clone(longDocument), maxTokenSizeDummy) - assert.Equal(t, newFracToken("message", "t1"), tokens[0]) - assert.Equal(t, newFracToken("message", "t2_t3"), tokens[1]) - assert.Equal(t, newFracToken("message", "t4"), tokens[2]) - assert.Equal(t, newFracToken("message", "readyz"), tokens[3]) - assert.Equal(t, newFracToken("message", "error*"), tokens[4]) - assert.Equal(t, newFracToken("message", "5555"), tokens[5]) - assert.Equal(t, newFracToken("message", "r2"), tokens[6]) + assert.Equal(t, newMetaToken("message", "t1"), tokens[0]) + assert.Equal(t, newMetaToken("message", "t2_t3"), tokens[1]) + assert.Equal(t, newMetaToken("message", "t4"), tokens[2]) + assert.Equal(t, newMetaToken("message", "readyz"), tokens[3]) + assert.Equal(t, newMetaToken("message", "error*"), tokens[4]) + assert.Equal(t, newMetaToken("message", "5555"), tokens[5]) + assert.Equal(t, newMetaToken("message", "r2"), tokens[6]) } func TestTokenizeCaseSensitive(t *testing.T) { @@ -110,14 +108,14 @@ func TestTokenizeCaseSensitive(t *testing.T) { tokens := tokenizer.Tokenize(nil, []byte("message"), bytes.Clone(longDocument), maxTokenSizeDummy) - assert.Equal(t, newFracToken("message", "T1"), tokens[0]) - assert.Equal(t, newFracToken("message", "T2_T3"), tokens[1]) - assert.Equal(t, newFracToken("message", "t4"), tokens[2]) - assert.Equal(t, newFracToken("message", "looooong_t5"), tokens[3]) - assert.Equal(t, newFracToken("message", "readyz"), tokens[4]) - assert.Equal(t, newFracToken("message", "error*"), tokens[5]) - assert.Equal(t, newFracToken("message", "5555"), tokens[6]) - assert.Equal(t, newFracToken("message", "r2"), tokens[7]) + assert.Equal(t, newMetaToken("message", "T1"), tokens[0]) + assert.Equal(t, newMetaToken("message", "T2_T3"), tokens[1]) + assert.Equal(t, newMetaToken("message", "t4"), tokens[2]) + assert.Equal(t, newMetaToken("message", "looooong_t5"), tokens[3]) + assert.Equal(t, newMetaToken("message", "readyz"), tokens[4]) + assert.Equal(t, newMetaToken("message", "error*"), tokens[5]) + assert.Equal(t, newMetaToken("message", "5555"), tokens[6]) + assert.Equal(t, newMetaToken("message", "r2"), tokens[7]) } func TestTokenizeCaseSensitiveAndMaxTokenSize(t *testing.T) { @@ -125,13 +123,13 @@ func TestTokenizeCaseSensitiveAndMaxTokenSize(t *testing.T) { tokens := tokenizer.Tokenize(nil, []byte("message"), bytes.Clone(longDocument), maxTokenSizeDummy) - assert.Equal(t, newFracToken("message", "T1"), tokens[0]) - assert.Equal(t, newFracToken("message", "T2_T3"), tokens[1]) - assert.Equal(t, newFracToken("message", "t4"), tokens[2]) - assert.Equal(t, newFracToken("message", "readyz"), tokens[3]) - assert.Equal(t, newFracToken("message", "error*"), tokens[4]) - assert.Equal(t, newFracToken("message", "5555"), tokens[5]) - assert.Equal(t, newFracToken("message", "r2"), tokens[6]) + assert.Equal(t, newMetaToken("message", "T1"), tokens[0]) + assert.Equal(t, newMetaToken("message", "T2_T3"), tokens[1]) + assert.Equal(t, newMetaToken("message", "t4"), tokens[2]) + assert.Equal(t, newMetaToken("message", "readyz"), tokens[3]) + assert.Equal(t, newMetaToken("message", "error*"), tokens[4]) + assert.Equal(t, newMetaToken("message", "5555"), tokens[5]) + assert.Equal(t, newMetaToken("message", "r2"), tokens[6]) } func TestTokenizeLastTokenLength(t *testing.T) { @@ -140,7 +138,7 @@ func TestTokenizeLastTokenLength(t *testing.T) { tokens := tokenizer.Tokenize(nil, []byte("message"), testCase, maxTokenSizeDummy) assert.Equal(t, 1, len(tokens)) - assert.Equal(t, newFracToken("message", "1"), tokens[0]) + assert.Equal(t, newMetaToken("message", "1"), tokens[0]) } func TestTextTokenizerUTF8(t *testing.T) { @@ -155,14 +153,14 @@ func TestTextTokenizerUTF8(t *testing.T) { tokenizer := NewTextTokenizer(100, true, true, 1024) - tokens := tokenizer.Tokenize([]frac.MetaToken{}, []byte("message"), []byte(in), maxTokenSizeDummy) + tokens := tokenizer.Tokenize([]MetaToken{}, []byte("message"), []byte(in), maxTokenSizeDummy) - expected := []frac.MetaToken{} + expected := []MetaToken{} for _, token := range out { if lowercase { token = strings.ToLower(token) } - expected = append(expected, newFracToken("message", token)) + expected = append(expected, newMetaToken("message", token)) } assert.Equal(t, expected, tokens) } diff --git a/tokenizer/tokenizer.go b/tokenizer/tokenizer.go index 2c201416..9425b378 100644 --- a/tokenizer/tokenizer.go +++ b/tokenizer/tokenizer.go @@ -4,12 +4,10 @@ import ( "bytes" "unicode" "unicode/utf8" - - "github.com/ozontech/seq-db/frac" ) type Tokenizer interface { - Tokenize(tokens []frac.MetaToken, key, value []byte, maxLength int) []frac.MetaToken + Tokenize(tokens []MetaToken, key, value []byte, maxLength int) []MetaToken } func toLowerIfCaseInsensitive(isCaseSensitive bool, x []byte) []byte { From c9fbd403f3c5cc12f822efcbf0ba8ca1d5116efc Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:22:17 +0400 Subject: [PATCH 07/48] add from, to, limit, offset for quering WIP --- frac/fraction_test.go | 285 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 254 insertions(+), 31 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index e69e9259..dd233e02 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -5,6 +5,7 @@ import ( "math" "os" "path/filepath" + "strings" "sync" "testing" "time" @@ -58,6 +59,9 @@ func (s *FractionTestSuite) SetupSuite() { "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "source": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "trace_id": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "spans": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), } } @@ -112,11 +116,21 @@ func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { continue } - for _, _ = range mappingTypes.All { - tokens = append(tokens, seq.Token{ - Field: []byte(fieldName), - Val: fieldBytes, - }) + for _, mappingType := range mappingTypes.All { + if mappingType.TokenizerType == seq.TokenizerTypeText { + textTokens := tokenizeText(fieldBytes) + for _, tokenStr := range textTokens { + tokens = append(tokens, seq.Token{ + Field: []byte(fieldName), + Val: []byte(tokenStr), + }) + } + } else { + tokens = append(tokens, seq.Token{ + Field: []byte(fieldName), + Val: fieldBytes, + }) + } } } tokens = append(tokens, seq.Token{ @@ -127,25 +141,79 @@ func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { return tokens } -func (s *FractionTestSuite) AssertSearch(query string, originalDocs []string, indexes []int) { - seqql, err := parser.ParseSeqQL(query, s.mapping) - s.Require().NoError(err, "failed to parse query: %s", query) +// TODO delete this and replace with proper tokenize +func tokenizeText(text []byte) []string { + if len(text) == 0 { + return nil + } + + var tokens []string + var current strings.Builder + + for i := 0; i < len(text); i++ { + c := text[i] + + if c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '=' { + if current.Len() > 0 { + tokens = append(tokens, strings.ToLower(current.String())) + current.Reset() + } + } else if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' { + current.WriteByte(c) + } + } + + if current.Len() > 0 { + tokens = append(tokens, strings.ToLower(current.String())) + } + + return tokens +} + +func (s *FractionTestSuite) AssertSearch(queryString string, originalDocs []string, indexes []int) { + s.AssertSearchQuery(query(queryString), originalDocs, indexes) +} + +func (s *FractionTestSuite) AssertSearchQuery(query *SearchQuery, originalDocs []string, indexes []int) { + var queryStr string + var from, to seq.MID + var limit int + + queryStr = query.query + if query.from != nil { + from = *query.from + } else { + from = seq.MID(0) + } + if query.to != nil { + to = *query.to + } else { + to = seq.MID(math.MaxUint64) + } + if query.limit != nil { + limit = *query.limit + } else { + limit = math.MaxInt32 + } + + seqql, err := parser.ParseSeqQL(queryStr, s.mapping) + s.Require().NoError(err, "failed to parse query: %s", queryStr) dp, release := s.fraction.DataProvider(context.Background()) defer release() params := processor.SearchParams{ AST: seqql.Root, - From: seq.MID(0), - To: seq.MID(math.MaxUint64), - Limit: math.MaxInt32, + From: from, + To: to, + Limit: limit, } qpr, err := dp.Search(params) - s.Require().NoError(err, "search failed for query: %s", query) + s.Require().NoError(err, "search failed for query: %s", queryStr) s.Require().Equal(len(indexes), qpr.IDs.Len(), - "expected %d documents but found %d for query: %s", len(indexes), qpr.IDs.Len(), query) + "expected %d documents but found %d for query: %s", len(indexes), qpr.IDs.Len(), queryStr) docs, err := dp.Fetch(qpr.IDs.IDs()) s.Require().NoError(err, "failed to fetch documents for IDs: %v", qpr.IDs.IDs()) @@ -160,7 +228,7 @@ func (s *FractionTestSuite) AssertSearch(query string, originalDocs []string, in expectedDoc := originalDocs[indexes[i]] s.Require().Equal(expectedDoc, fetchedDoc, "document at index %d doesn't match expected document at original index %d for query: %s", - i, indexes[i], query) + i, indexes[i], queryStr) } } } @@ -201,32 +269,187 @@ func (s *FractionTestSuite) TestSearchKeyword() { s.AssertSearch("status:fail", docs, []int{1}) } -/* -TODO not working now because we must properly tokenize message -func (s *FractionTestSuite) TestSearchFullText() { +func (s *FractionTestSuite) TestBasicSearch() { docs := []string{ - `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"time":103, "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":110,"service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, + `{"timestamp":130,"service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, + `{"timestamp":140,"service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, + `{"timestamp":120,"service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, } + s.insertDocuments(docs...) + + s.AssertSearch("service:service_a", docs, []int{3, 0}) + s.AssertSearch("trace_id:abcdef", docs, []int{1, 0}) + s.AssertSearch("level:1", docs, []int{3, 1, 0}) + s.AssertSearch("source:prod01", docs, []int{3, 1, 0}) + s.AssertSearch("source:prod02", docs, []int{2}) + + s.AssertSearch("trace_id:abcd*", docs, []int{1, 0}) + s.AssertSearch("trace_id:a*", docs, []int{2, 1, 0}) + s.AssertSearch("trace_id:a*f", docs, []int{1, 0}) + s.AssertSearch("trace_id:a*a", docs, []int{2}) + s.AssertSearch("service:service*a", docs, []int{3, 0}) +} +func (s *FractionTestSuite) TestSearchNot() { + docs := []string{ + `{"timestamp":100,"message":"bad","level":"1","service":"srv_1","status":"ok"}`, + `{"timestamp":101,"message":"good","level":"2","service":"srv_2","status":"ok"}`, + `{"timestamp":102,"message":"bad","level":"3","service":"srv_3","status":"ok"}`, + `{"timestamp":103,"message":"good","level":"4","service":"srv_4","status":"ok"}`, + `{"timestamp":104,"message":"bad","level":"5","service":"srv_5","status":"ok"}`, + `{"timestamp":105,"message":"good","level":"6","service":"srv_6","status":"ok"}`, + } s.insertDocuments(docs...) - s.AssertSearch("message:document", docs, []int{3, 2, 1, 0}) -}*/ + s.AssertSearch("NOT level:1", docs, []int{5, 4, 3, 2, 1}) + s.AssertSearch("NOT level:2", docs, []int{5, 4, 3, 2, 0}) + s.AssertSearch("NOT level:5", docs, []int{5, 3, 2, 1, 0}) + s.AssertSearch("NOT level:6", docs, []int{4, 3, 2, 1, 0}) + + s.AssertSearch("NOT message:notfound", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("NOT service:srv_*", docs, []int{}) + + s.AssertSearch("NOT message:bad", docs, []int{5, 3, 1}) + s.AssertSearch("NOT message:good", docs, []int{4, 2, 0}) + + s.AssertSearch("NOT message:\"good bad\"", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("NOT (message:good AND message:bad)", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("NOT (message:good OR message:bad)", docs, []int{}) + + s.AssertSearch("NOT message:bad AND message:bad", docs, []int{}) + s.AssertSearch("NOT message:bad AND message:good", docs, []int{5, 3, 1}) + s.AssertSearch("message:good AND NOT message:good", docs, []int{}) + s.AssertSearch("message:bad AND NOT message:good", docs, []int{4, 2, 0}) +} + +func (s *FractionTestSuite) TestWildcardSymbols() { + docs := []string{ + `{"timestamp":110,"service":"first_value","level":"info"}`, + `{"timestamp":120,"service":"second_value","level":"error"}`, + `{"timestamp":130,"service":"third_value","level":"debug"}`, + `{"timestamp":140,"service":"fourth","level":"warn"}`, + } + s.insertDocuments(docs...) + + s.AssertSearch("service:*", docs, []int{3, 2, 1, 0}) + s.AssertSearch("service:first_value", docs, []int{0}) + s.AssertSearch("service:second_value", docs, []int{1}) + s.AssertSearch("service:third_value", docs, []int{2}) + s.AssertSearch("service:fourth", docs, []int{3}) + s.AssertSearch("level:*", docs, []int{3, 2, 1, 0}) + s.AssertSearch("level:info", docs, []int{0}) + s.AssertSearch("level:error", docs, []int{1}) + s.AssertSearch("level:debug", docs, []int{2}) + s.AssertSearch("level:warn", docs, []int{3}) +} -func (s *FractionTestSuite) checkContains(fraction Fraction, ids []seq.ID) { - info := fraction.Info() - s.Equal(uint32(len(ids)), info.DocsTotal, "Fraction should contain %d documents", len(ids)) +func (s *FractionTestSuite) TestFetch() { + docs := []string{ + `{"timestamp":100,"message":"bad","level":"1","trace_id":"0","service":"0","status":"ok"}`, + `{"timestamp":101,"message":"good","level":"2","trace_id":"0","service":"1","status":"ok"}`, + `{"timestamp":102,"message":"bad","level":"3","trace_id":"0","service":"2","status":"ok"}`, + `{"timestamp":103,"message":"good","level":"4","trace_id":"1","service":"0","status":"ok"}`, + `{"timestamp":104,"message":"bad","level":"5","trace_id":"1","service":"1","status":"ok"}`, + `{"timestamp":105,"message":"good","level":"6","trace_id":"1","service":"2","status":"ok"}`, + `{"timestamp":106,"message":"bad","level":"7","trace_id":"2","service":"0","status":"ok"}`, + `{"timestamp":107,"message":"good","level":"8","trace_id":"2","service":"1","status":"ok"}`, + } + + ids := s.insertDocuments(docs...) + s.Require().Equal(8, len(ids)) + + // Test fetching all documents using a simple query + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + + // Use a simple query that matches all documents + seqql, err := parser.ParseSeqQL("_all_:*", s.mapping) + s.Require().NoError(err) + + params := processor.SearchParams{ + AST: seqql.Root, + From: seq.MID(0), + To: seq.MID(math.MaxUint64), + Limit: math.MaxInt32, + } + + qpr, err := dp.Search(params) + s.Require().NoError(err) + s.Require().Equal(8, qpr.IDs.Len()) + + // Test fetching documents by IDs + fetchedDocs, err := dp.Fetch(qpr.IDs.IDs()) + s.Require().NoError(err) + s.Require().Equal(len(qpr.IDs.IDs()), len(fetchedDocs)) +} + +func (s *FractionTestSuite) TestSearchFullText() { + docs := []string{ + `{"timestamp":100,"message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"timestamp":101,"message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"timestamp":102,"message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + } - if len(ids) > 0 { - s.True(fraction.Contains(ids[0].MID), "Fraction should contain first document") - s.True(fraction.Contains(ids[len(ids)-1].MID), "Fraction should contain last document") + ids := s.insertDocuments(docs...) + s.Require().Equal(4, len(ids)) + + s.AssertSearch("message:document", docs, []int{3, 2, 1, 0}) + s.AssertSearch("message:test", docs, []int{3, 2, 1, 0}) + s.AssertSearch("message:first", docs, []int{0}) + s.AssertSearch("message:second", docs, []int{1}) + s.AssertSearch("message:third", docs, []int{2}) + s.AssertSearch("message:fourth", docs, []int{3}) + s.AssertSearch("message:fivth", docs, []int{}) +} - s.True(fraction.IsIntersecting(ids[0].MID, ids[len(ids)-1].MID), - "Fraction should intersect with document range") +func (s *FractionTestSuite) TestSearchFromTo() { + docs := []string{ + `{"timestamp":100,"message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"timestamp":101,"message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"timestamp":102,"message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } + + ids := s.insertDocuments(docs...) + s.Require().Equal(4, len(ids)) + + s.AssertSearchQuery(query("level:info").From(0).To(200), docs, []int{3, 0}) +} + +type SearchQuery struct { + query string + from *seq.MID + to *seq.MID + offset *int + limit *int +} + +func query(q string) *SearchQuery { + return &SearchQuery{query: q} +} + +func (sq *SearchQuery) From(timestamp uint64) *SearchQuery { + mid := seq.MID(timestamp) + sq.from = &mid + return sq +} + +func (sq *SearchQuery) To(timestamp uint64) *SearchQuery { + mid := seq.MID(timestamp) + sq.to = &mid + return sq +} + +func (sq *SearchQuery) Offset(offset int) *SearchQuery { + sq.offset = &offset + return sq +} + +func (sq *SearchQuery) Limit(limit int) *SearchQuery { + sq.limit = &limit + return sq } type ActiveFractionSuite struct { From 6764e06f2f3aa4fabf189b1b5ed58d563bc430a4 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:54:39 +0400 Subject: [PATCH 08/48] Use tokenizer API --- frac/fraction_test.go | 162 +++++++++++------------------------------- 1 file changed, 43 insertions(+), 119 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index dd233e02..15441302 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -5,13 +5,11 @@ import ( "math" "os" "path/filepath" - "strings" "sync" "testing" "time" "github.com/alecthomas/units" - insaneJSON "github.com/ozontech/insane-json" "github.com/ozontech/seq-db/cache" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" @@ -19,9 +17,11 @@ import ( "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" + "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/parser" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" + "github.com/ozontech/seq-db/tokenizer" "github.com/stretchr/testify/suite" ) @@ -36,7 +36,7 @@ type FractionTestSuite struct { fraction Fraction - insertDocuments func(docs ...string) []seq.ID + insertDocuments func(docs ...string) } func (s *FractionTestSuite) SetupSuite() { @@ -72,102 +72,39 @@ func (s *FractionTestSuite) SetupTest() { // s.Require().NoError(err) } -func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) []seq.ID { - docProvider := NewDocProvider() - ids := make([]seq.ID, 0, len(docs)) +func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { + tokenizers := map[seq.TokenizerType]tokenizer.Tokenizer{ + seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(512, false, true), + seq.TokenizerTypeText: tokenizer.NewTextTokenizer(20, false, true, 4096), + } - for i, docStr := range docs { - docBytes := []byte(docStr) - root := insaneJSON.Spawn() - err := root.DecodeBytes(docBytes) - s.Require().NoError(err, "not a valid JSON", i) + // drift and futureDrift are 0, we can process docs at any timestamps + p := indexer.NewProcessor(s.mapping, tokenizers, 0, 0, 0) - id := seq.ID{ - MID: seq.MID(time.Now().UnixMilli()) + seq.MID(i*1000), // 1 second apart - RID: seq.RID(i + 1), + idx := 0 + readNext := func() ([]byte, error) { + if idx >= len(docs) { + return nil, nil } - ids = append(ids, id) - tokens := s.extractTokens(root) - docProvider.Append(docBytes, root, id, tokens) + d := []byte(docs[idx]) + idx++ + return d, nil } - docsBlock, metasBlock := docProvider.Provide() + _, rawDocs, rawMeta, err := p.ProcessBulk(time.Now(), nil, nil, readNext) + s.Require().NoError(err, "processing bulk failed") + + compressor := indexer.GetDocsMetasCompressor(3, 3) + defer indexer.PutDocMetasCompressor(compressor) + compressor.CompressDocsAndMetas(rawDocs, rawMeta) + docsBlock, metasBlock := compressor.DocsMetas() var wg sync.WaitGroup wg.Add(1) - err := active.Append(docsBlock, metasBlock, &wg) + err = active.Append(docsBlock, metasBlock, &wg) s.Require().NoError(err, "append to active failed") wg.Wait() - return ids -} - -func (s *FractionTestSuite) extractTokens(root *insaneJSON.Root) []seq.Token { - tokens := make([]seq.Token, 0) - - for fieldName, mappingTypes := range s.mapping { - fieldValue := root.Dig(fieldName) - if fieldValue == nil { - continue - } - - fieldBytes := fieldValue.AsBytes() - if len(fieldBytes) == 0 { - continue - } - - for _, mappingType := range mappingTypes.All { - if mappingType.TokenizerType == seq.TokenizerTypeText { - textTokens := tokenizeText(fieldBytes) - for _, tokenStr := range textTokens { - tokens = append(tokens, seq.Token{ - Field: []byte(fieldName), - Val: []byte(tokenStr), - }) - } - } else { - tokens = append(tokens, seq.Token{ - Field: []byte(fieldName), - Val: fieldBytes, - }) - } - } - } - tokens = append(tokens, seq.Token{ - Field: []byte("_all_"), - Val: []byte(""), - }) - - return tokens -} - -// TODO delete this and replace with proper tokenize -func tokenizeText(text []byte) []string { - if len(text) == 0 { - return nil - } - - var tokens []string - var current strings.Builder - - for i := 0; i < len(text); i++ { - c := text[i] - - if c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '=' { - if current.Len() > 0 { - tokens = append(tokens, strings.ToLower(current.String())) - current.Reset() - } - } else if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' { - current.WriteByte(c) - } - } - - if current.Len() > 0 { - tokens = append(tokens, strings.ToLower(current.String())) - } - - return tokens } func (s *FractionTestSuite) AssertSearch(queryString string, originalDocs []string, indexes []int) { @@ -233,21 +170,17 @@ func (s *FractionTestSuite) AssertSearchQuery(query *SearchQuery, originalDocs [ } } -func (s *FractionTestSuite) TestContainsDocuments() { - docs := []string{ - `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - } - - ids := s.insertDocuments(docs...) - - s.Len(ids, 3, "Should return 3 document IDs") - s.True(s.fraction.Contains(ids[0].MID)) - s.True(s.fraction.Contains(ids[1].MID)) - s.True(s.fraction.Contains(ids[2].MID)) -} +/* + func (s *FractionTestSuite) TestContainsDocuments() { + docs := []string{ + `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + } + s.insertDocuments(docs...) + } +*/ func (s *FractionTestSuite) TestSearchKeyword() { docs := []string{ `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, @@ -344,7 +277,7 @@ func (s *FractionTestSuite) TestWildcardSymbols() { s.AssertSearch("level:warn", docs, []int{3}) } -func (s *FractionTestSuite) TestFetch() { +/*func (s *FractionTestSuite) TestFetch() { docs := []string{ `{"timestamp":100,"message":"bad","level":"1","trace_id":"0","service":"0","status":"ok"}`, `{"timestamp":101,"message":"good","level":"2","trace_id":"0","service":"1","status":"ok"}`, @@ -382,7 +315,7 @@ func (s *FractionTestSuite) TestFetch() { fetchedDocs, err := dp.Fetch(qpr.IDs.IDs()) s.Require().NoError(err) s.Require().Equal(len(qpr.IDs.IDs()), len(fetchedDocs)) -} +}*/ func (s *FractionTestSuite) TestSearchFullText() { docs := []string{ @@ -392,8 +325,7 @@ func (s *FractionTestSuite) TestSearchFullText() { `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } - ids := s.insertDocuments(docs...) - s.Require().Equal(4, len(ids)) + s.insertDocuments(docs...) s.AssertSearch("message:document", docs, []int{3, 2, 1, 0}) s.AssertSearch("message:test", docs, []int{3, 2, 1, 0}) @@ -412,8 +344,7 @@ func (s *FractionTestSuite) TestSearchFromTo() { `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } - ids := s.insertDocuments(docs...) - s.Require().Equal(4, len(ids)) + s.insertDocuments(docs...) s.AssertSearchQuery(query("level:info").From(0).To(200), docs, []int{3, 0}) } @@ -488,8 +419,8 @@ func (s *ActiveFractionSuite) SetupTest() { ) s.fraction = active - s.insertDocuments = func(docs ...string) []seq.ID { - return s.InsertIntoActive(active, docs...) + s.insertDocuments = func(docs ...string) { + s.InsertIntoActive(active, docs...) } } @@ -529,7 +460,7 @@ func (s *SealedFractionSuite) SetupTest() { s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") s.Require().NoError(err) - s.insertDocuments = func(docs ...string) []seq.ID { + s.insertDocuments = func(docs ...string) { baseFile := filepath.Join(s.tmpDir, "test_fraction") indexer := NewActiveIndexer(4, 10) indexer.Start() @@ -543,13 +474,7 @@ func (s *SealedFractionSuite) SetupTest() { s.config, ) - ids := s.InsertIntoActive(active, docs...) - - if len(ids) == 0 { - // TODO fail test? - active.Release() - return ids - } + s.InsertIntoActive(active, docs...) sealParams := common.SealParams{ IDsZstdLevel: 3, @@ -577,7 +502,6 @@ func (s *SealedFractionSuite) SetupTest() { ) s.fraction = sealed active.Release() - return ids } } From d1dad9af20bfd8d45bc5d5928707f7dc02e03398 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 7 Oct 2025 17:16:34 +0400 Subject: [PATCH 09/48] basic version with from->to search --- frac/fraction_test.go | 74 +++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 15441302..7dda165d 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -48,7 +48,7 @@ func (s *FractionTestSuite) SetupSuite() { MaxTIDsPerFraction: 1000, }, }, - SkipSortDocs: true, // TODO enabling will fail tests + SkipSortDocs: true, // TODO enabling sorting will fail tests KeepMetaFile: false, } s.mapping = seq.Mapping{ @@ -91,12 +91,12 @@ func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { return d, nil } - _, rawDocs, rawMeta, err := p.ProcessBulk(time.Now(), nil, nil, readNext) + _, binaryDocs, binaryMeta, err := p.ProcessBulk(time.Now(), nil, nil, readNext) s.Require().NoError(err, "processing bulk failed") compressor := indexer.GetDocsMetasCompressor(3, 3) defer indexer.PutDocMetasCompressor(compressor) - compressor.CompressDocsAndMetas(rawDocs, rawMeta) + compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) docsBlock, metasBlock := compressor.DocsMetas() var wg sync.WaitGroup @@ -170,23 +170,12 @@ func (s *FractionTestSuite) AssertSearchQuery(query *SearchQuery, originalDocs [ } } -/* - func (s *FractionTestSuite) TestContainsDocuments() { - docs := []string{ - `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - } - - s.insertDocuments(docs...) - } -*/ func (s *FractionTestSuite) TestSearchKeyword() { docs := []string{ - `{"time":100, "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"time":101, "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"time":102, "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"time":103, "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00Z", "message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:01Z", "message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:02Z", "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:03Z", "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } s.insertDocuments(docs...) @@ -204,10 +193,10 @@ func (s *FractionTestSuite) TestSearchKeyword() { func (s *FractionTestSuite) TestBasicSearch() { docs := []string{ - `{"timestamp":110,"service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, - `{"timestamp":130,"service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, - `{"timestamp":140,"service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, - `{"timestamp":120,"service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, } s.insertDocuments(docs...) @@ -226,12 +215,12 @@ func (s *FractionTestSuite) TestBasicSearch() { func (s *FractionTestSuite) TestSearchNot() { docs := []string{ - `{"timestamp":100,"message":"bad","level":"1","service":"srv_1","status":"ok"}`, - `{"timestamp":101,"message":"good","level":"2","service":"srv_2","status":"ok"}`, - `{"timestamp":102,"message":"bad","level":"3","service":"srv_3","status":"ok"}`, - `{"timestamp":103,"message":"good","level":"4","service":"srv_4","status":"ok"}`, - `{"timestamp":104,"message":"bad","level":"5","service":"srv_5","status":"ok"}`, - `{"timestamp":105,"message":"good","level":"6","service":"srv_6","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:25Z","message":"bad","level":"1","service":"srv_1","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:26Z","message":"good","level":"2","service":"srv_2","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:27Z","message":"bad","level":"3","service":"srv_3","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:28Z","message":"good","level":"4","service":"srv_4","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:29Z","message":"bad","level":"5","service":"srv_5","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:30Z","message":"good","level":"6","service":"srv_6","status":"ok"}`, } s.insertDocuments(docs...) @@ -258,10 +247,10 @@ func (s *FractionTestSuite) TestSearchNot() { func (s *FractionTestSuite) TestWildcardSymbols() { docs := []string{ - `{"timestamp":110,"service":"first_value","level":"info"}`, - `{"timestamp":120,"service":"second_value","level":"error"}`, - `{"timestamp":130,"service":"third_value","level":"debug"}`, - `{"timestamp":140,"service":"fourth","level":"warn"}`, + `{"timestamp":"2000-01-01T13:00:27Z","service":"first_value","level":"info"}`, + `{"timestamp":"2000-01-01T13:00:28Z","service":"second_value","level":"error"}`, + `{"timestamp":"2000-01-01T13:00:29Z","service":"third_value","level":"debug"}`, + `{"timestamp":"2000-01-01T13:00:30Z","service":"fourth","level":"warn"}`, } s.insertDocuments(docs...) @@ -319,10 +308,10 @@ func (s *FractionTestSuite) TestWildcardSymbols() { func (s *FractionTestSuite) TestSearchFullText() { docs := []string{ - `{"timestamp":100,"message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"timestamp":101,"message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"timestamp":102,"message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:30Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:31Z","message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:32Z","message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:33Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } s.insertDocuments(docs...) @@ -338,15 +327,18 @@ func (s *FractionTestSuite) TestSearchFullText() { func (s *FractionTestSuite) TestSearchFromTo() { docs := []string{ - `{"timestamp":100,"message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"timestamp":101,"message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"timestamp":102,"message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"timestamp":103,"message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:35Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:36Z","message":"second test document","level":"error","service":"test-service","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:37Z","message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:38Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } s.insertDocuments(docs...) - s.AssertSearchQuery(query("level:info").From(0).To(200), docs, []int{3, 0}) + from, _ := time.Parse(time.RFC3339, "2000-01-01T13:00:35Z") + to, _ := time.Parse(time.RFC3339, "2000-01-01T13:00:38Z") + + s.AssertSearchQuery(query("message:document").From(uint64(from.UnixNano()/int64(time.Millisecond))).To(uint64(to.UnixNano()/int64(time.Millisecond))), docs, []int{3, 2, 1, 0}) } type SearchQuery struct { From faa542475c76b83c6e80b4b86d96950b3235dc40 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 7 Oct 2025 20:38:54 +0400 Subject: [PATCH 10/48] Query options --- frac/fraction_test.go | 212 ++++++++++++++++-------------------------- 1 file changed, 82 insertions(+), 130 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 7dda165d..83a8b0ee 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -107,69 +107,6 @@ func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { wg.Wait() } -func (s *FractionTestSuite) AssertSearch(queryString string, originalDocs []string, indexes []int) { - s.AssertSearchQuery(query(queryString), originalDocs, indexes) -} - -func (s *FractionTestSuite) AssertSearchQuery(query *SearchQuery, originalDocs []string, indexes []int) { - var queryStr string - var from, to seq.MID - var limit int - - queryStr = query.query - if query.from != nil { - from = *query.from - } else { - from = seq.MID(0) - } - if query.to != nil { - to = *query.to - } else { - to = seq.MID(math.MaxUint64) - } - if query.limit != nil { - limit = *query.limit - } else { - limit = math.MaxInt32 - } - - seqql, err := parser.ParseSeqQL(queryStr, s.mapping) - s.Require().NoError(err, "failed to parse query: %s", queryStr) - - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - - params := processor.SearchParams{ - AST: seqql.Root, - From: from, - To: to, - Limit: limit, - } - - qpr, err := dp.Search(params) - s.Require().NoError(err, "search failed for query: %s", queryStr) - - s.Require().Equal(len(indexes), qpr.IDs.Len(), - "expected %d documents but found %d for query: %s", len(indexes), qpr.IDs.Len(), queryStr) - - docs, err := dp.Fetch(qpr.IDs.IDs()) - s.Require().NoError(err, "failed to fetch documents for IDs: %v", qpr.IDs.IDs()) - - fetchedDocs := make([]string, 0, len(docs)) - for _, doc := range docs { - fetchedDocs = append(fetchedDocs, string(doc)) - } - - for i, fetchedDoc := range fetchedDocs { - if i < len(indexes) { - expectedDoc := originalDocs[indexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, - "document at index %d doesn't match expected document at original index %d for query: %s", - i, indexes[i], queryStr) - } - } -} - func (s *FractionTestSuite) TestSearchKeyword() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00Z", "message":"first test document","level":"info","service":"test-service","status":"ok"}`, @@ -222,6 +159,7 @@ func (s *FractionTestSuite) TestSearchNot() { `{"timestamp":"2000-01-01T13:00:29Z","message":"bad","level":"5","service":"srv_5","status":"ok"}`, `{"timestamp":"2000-01-01T13:00:30Z","message":"good","level":"6","service":"srv_6","status":"ok"}`, } + s.insertDocuments(docs...) s.AssertSearch("NOT level:1", docs, []int{5, 4, 3, 2, 1}) @@ -266,46 +204,6 @@ func (s *FractionTestSuite) TestWildcardSymbols() { s.AssertSearch("level:warn", docs, []int{3}) } -/*func (s *FractionTestSuite) TestFetch() { - docs := []string{ - `{"timestamp":100,"message":"bad","level":"1","trace_id":"0","service":"0","status":"ok"}`, - `{"timestamp":101,"message":"good","level":"2","trace_id":"0","service":"1","status":"ok"}`, - `{"timestamp":102,"message":"bad","level":"3","trace_id":"0","service":"2","status":"ok"}`, - `{"timestamp":103,"message":"good","level":"4","trace_id":"1","service":"0","status":"ok"}`, - `{"timestamp":104,"message":"bad","level":"5","trace_id":"1","service":"1","status":"ok"}`, - `{"timestamp":105,"message":"good","level":"6","trace_id":"1","service":"2","status":"ok"}`, - `{"timestamp":106,"message":"bad","level":"7","trace_id":"2","service":"0","status":"ok"}`, - `{"timestamp":107,"message":"good","level":"8","trace_id":"2","service":"1","status":"ok"}`, - } - - ids := s.insertDocuments(docs...) - s.Require().Equal(8, len(ids)) - - // Test fetching all documents using a simple query - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - - // Use a simple query that matches all documents - seqql, err := parser.ParseSeqQL("_all_:*", s.mapping) - s.Require().NoError(err) - - params := processor.SearchParams{ - AST: seqql.Root, - From: seq.MID(0), - To: seq.MID(math.MaxUint64), - Limit: math.MaxInt32, - } - - qpr, err := dp.Search(params) - s.Require().NoError(err) - s.Require().Equal(8, qpr.IDs.Len()) - - // Test fetching documents by IDs - fetchedDocs, err := dp.Fetch(qpr.IDs.IDs()) - s.Require().NoError(err) - s.Require().Equal(len(qpr.IDs.IDs()), len(fetchedDocs)) -}*/ - func (s *FractionTestSuite) TestSearchFullText() { docs := []string{ `{"timestamp":"2000-01-01T13:00:30Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, @@ -335,44 +233,98 @@ func (s *FractionTestSuite) TestSearchFromTo() { s.insertDocuments(docs...) - from, _ := time.Parse(time.RFC3339, "2000-01-01T13:00:35Z") - to, _ := time.Parse(time.RFC3339, "2000-01-01T13:00:38Z") - - s.AssertSearchQuery(query("message:document").From(uint64(from.UnixNano()/int64(time.Millisecond))).To(uint64(to.UnixNano()/int64(time.Millisecond))), docs, []int{3, 2, 1, 0}) + s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:35Z"), withTo("2000-01-01T13:00:38Z")), docs, []int{3, 2, 1, 0}) + s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:35Z"), withTo("2000-01-01T13:00:37Z")), docs, []int{2, 1, 0}) + s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:36Z"), withTo("2000-01-01T13:00:37Z")), docs, []int{2, 1}) } -type SearchQuery struct { - query string - from *seq.MID - to *seq.MID - offset *int - limit *int +type searchOption func(*processor.SearchParams) error + +func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { + seqql, err := parser.ParseSeqQL(queryString, s.mapping) + s.Require().NoError(err, "failed to parse query: %s", queryString) + + params := &processor.SearchParams{ + AST: seqql.Root, + From: seq.MID(0), + To: seq.MID(math.MaxUint64), + Limit: math.MaxInt32, + } + + for _, option := range options { + err := option(params) + s.Require().NoError(err, "option can not be applied") + } + + return params } -func query(q string) *SearchQuery { - return &SearchQuery{query: q} +func withFrom(from string) searchOption { + return func(p *processor.SearchParams) error { + time, err := time.Parse(time.RFC3339, from) + if err != nil { + return err + } + p.From = seq.TimeToMID(time) + return nil + } } -func (sq *SearchQuery) From(timestamp uint64) *SearchQuery { - mid := seq.MID(timestamp) - sq.from = &mid - return sq +func withTo(to string) searchOption { + return func(p *processor.SearchParams) error { + time, err := time.Parse(time.RFC3339, to) + if err != nil { + return err + } + p.To = seq.TimeToMID(time) + return nil + } } -func (sq *SearchQuery) To(timestamp uint64) *SearchQuery { - mid := seq.MID(timestamp) - sq.to = &mid - return sq +func withLimit(limit int) searchOption { + return func(p *processor.SearchParams) error { + p.Limit = limit + return nil + } } -func (sq *SearchQuery) Offset(offset int) *SearchQuery { - sq.offset = &offset - return sq +func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, indexes []int) { + switch q := queryObject.(type) { + case string: + s.AssertSearchWithSearchParams(s.query(q), originalDocs, indexes) + case *processor.SearchParams: + s.AssertSearchWithSearchParams(q, originalDocs, indexes) + default: + s.Require().Fail("type for query object not supported") + } } -func (sq *SearchQuery) Limit(limit int) *SearchQuery { - sq.limit = &limit - return sq +func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, indexes []int) { + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + + qpr, err := dp.Search(*params) + s.Require().NoError(err, "search failed for query") + + s.Require().Equal(len(indexes), qpr.IDs.Len(), + "expected %d documents but found %d", len(indexes), qpr.IDs.Len()) + + docs, err := dp.Fetch(qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch documents for IDs: %v", qpr.IDs.IDs()) + + fetchedDocs := make([]string, 0, len(docs)) + for _, doc := range docs { + fetchedDocs = append(fetchedDocs, string(doc)) + } + + for i, fetchedDoc := range fetchedDocs { + if i < len(indexes) { + expectedDoc := originalDocs[indexes[i]] + s.Require().Equal(expectedDoc, fetchedDoc, + "document at index %d doesn't match expected document at original index %d", + i, indexes[i]) + } + } } type ActiveFractionSuite struct { @@ -426,7 +378,7 @@ func (s *ActiveFractionSuite) TearDownTest() { } err := os.RemoveAll(s.tmpDir) - s.NoError(err, "Failed to remove tmp dir") + s.NoError(err, "failed to remove tmp dir") } type SealedFractionSuite struct { From e120517336a28bd438595e97fe4aaee1edef74f6 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 8 Oct 2025 11:49:42 +0400 Subject: [PATCH 11/48] Port from-to and wildcard tests --- frac/fraction_test.go | 97 ++++++++---- tests/integration_tests/single_test.go | 208 +++---------------------- 2 files changed, 89 insertions(+), 216 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 83a8b0ee..30d9aee0 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2,6 +2,7 @@ package frac import ( "context" + "fmt" "math" "os" "path/filepath" @@ -185,23 +186,26 @@ func (s *FractionTestSuite) TestSearchNot() { func (s *FractionTestSuite) TestWildcardSymbols() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:27Z","service":"first_value","level":"info"}`, - `{"timestamp":"2000-01-01T13:00:28Z","service":"second_value","level":"error"}`, - `{"timestamp":"2000-01-01T13:00:29Z","service":"third_value","level":"debug"}`, - `{"timestamp":"2000-01-01T13:00:30Z","service":"fourth","level":"warn"}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","message":"first value:****"}`, + `{"timestamp":"2000-01-01T13:00:00.020Z","message":"second value:*******"}`, + `{"timestamp":"2000-01-01T13:00:00.030Z","message":"third value****"}`, + `{"timestamp":"2000-01-01T13:00:00.040Z","message":"fourth ****"}`, } s.insertDocuments(docs...) - s.AssertSearch("service:*", docs, []int{3, 2, 1, 0}) - s.AssertSearch("service:first_value", docs, []int{0}) - s.AssertSearch("service:second_value", docs, []int{1}) - s.AssertSearch("service:third_value", docs, []int{2}) - s.AssertSearch("service:fourth", docs, []int{3}) - s.AssertSearch("level:*", docs, []int{3, 2, 1, 0}) - s.AssertSearch("level:info", docs, []int{0}) - s.AssertSearch("level:error", docs, []int{1}) - s.AssertSearch("level:debug", docs, []int{2}) - s.AssertSearch("level:warn", docs, []int{3}) + s.AssertSearch(`message:*`, docs, []int{3, 2, 1, 0}) + s.AssertSearch(`message:value`, docs, []int{1, 0}) + s.AssertSearch(`message:value*`, docs, []int{2, 1, 0}) + s.AssertSearch(`message:value\*`, docs, []int{}) + s.AssertSearch(`message:value\**`, docs, []int{2}) + s.AssertSearch(`message:*\**`, docs, []int{3, 2, 1, 0}) + s.AssertSearch(`message:*e\**`, docs, []int{2}) + s.AssertSearch(`message:\**`, docs, []int{3, 1, 0}) + s.AssertSearch(`message:\*\*\*\*`, docs, []int{3, 0}) + s.AssertSearch(`message:\*\*\*\**`, docs, []int{3, 1, 0}) + s.AssertSearch(`message:value* AND message:\*\**`, docs, []int{1, 0}) + s.AssertSearch(`message:value* OR message:\*\**`, docs, []int{3, 2, 1, 0}) + } func (s *FractionTestSuite) TestSearchFullText() { @@ -220,32 +224,69 @@ func (s *FractionTestSuite) TestSearchFullText() { s.AssertSearch("message:second", docs, []int{1}) s.AssertSearch("message:third", docs, []int{2}) s.AssertSearch("message:fourth", docs, []int{3}) - s.AssertSearch("message:fivth", docs, []int{}) + s.AssertSearch("message:fifth", docs, []int{}) } func (s *FractionTestSuite) TestSearchFromTo() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:35Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:36Z","message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:37Z","message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:38Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, } s.insertDocuments(docs...) - s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:35Z"), withTo("2000-01-01T13:00:38Z")), docs, []int{3, 2, 1, 0}) - s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:35Z"), withTo("2000-01-01T13:00:37Z")), docs, []int{2, 1, 0}) - s.AssertSearch(s.query("message:document", withFrom("2000-01-01T13:00:36Z"), withTo("2000-01-01T13:00:37Z")), docs, []int{2, 1}) + assertSearch := func(query string, fromOffset, toOffset int, indexes []int) { + s.AssertSearch(s.query( + query, + withFrom(fmt.Sprintf("2000-01-01T13:00:00.%03dZ", fromOffset)), + withTo(fmt.Sprintf("2000-01-01T13:00:00.%03dZ", toOffset))), + docs, indexes) + } + + assertSearch(`message:good`, 0, 7, []int{7, 5, 3, 1}) + assertSearch(`message:bad`, 0, 7, []int{6, 4, 2, 0}) + assertSearch(`message:good`, 0, 6, []int{5, 3, 1}) + assertSearch(`message:bad`, 1, 7, []int{6, 4, 2}) + + assertSearch(`message:good OR message:bad`, 2, 6, []int{6, 5, 4, 3, 2}) + assertSearch(`message:good OR message:bad`, 3, 3, []int{3}) + + assertSearch(`NOT message:notexists`, 0, 7, []int{7, 6, 5, 4, 3, 2, 1, 0}) + assertSearch(`NOT message:notexists`, 0, 6, []int{6, 5, 4, 3, 2, 1, 0}) + assertSearch(`NOT message:notexists`, 1, 7, []int{7, 6, 5, 4, 3, 2, 1}) + assertSearch(`NOT message:notexists`, 1, 6, []int{6, 5, 4, 3, 2, 1}) + + assertSearch(`NOT message:notexists AND message:*`, 1, 6, []int{6, 5, 4, 3, 2, 1}) + assertSearch(`NOT message:notexists AND (message:* OR message:*)`, 1, 6, []int{6, 5, 4, 3, 2, 1}) + assertSearch(`NOT message:notexists AND (message:good OR message:bad)`, 1, 6, []int{6, 5, 4, 3, 2, 1}) + assertSearch(`NOT message:notexists AND message:good`, 1, 6, []int{5, 3, 1}) + + assertSearch(`NOT (message:good OR message:bad)`, 0, 7, []int{}) + assertSearch(`NOT (message:good OR message:bad)`, 1, 6, []int{}) + + assertSearch(`NOT trace_id:0`, 0, 2, []int{}) + assertSearch(`NOT trace_id:0`, 0, 3, []int{3}) + assertSearch(`NOT trace_id:1`, 3, 5, []int{}) + assertSearch(`NOT trace_id:1`, 2, 6, []int{6, 2}) + + assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 0, 10, []int{5, 4, 3}) + assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } type searchOption func(*processor.SearchParams) error func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { - seqql, err := parser.ParseSeqQL(queryString, s.mapping) + queryAst, err := parser.ParseQuery(queryString, s.mapping) s.Require().NoError(err, "failed to parse query: %s", queryString) params := &processor.SearchParams{ - AST: seqql.Root, + AST: queryAst, From: seq.MID(0), To: seq.MID(math.MaxUint64), Limit: math.MaxInt32, @@ -261,22 +302,22 @@ func (s *FractionTestSuite) query(queryString string, options ...searchOption) * func withFrom(from string) searchOption { return func(p *processor.SearchParams) error { - time, err := time.Parse(time.RFC3339, from) + t, err := time.Parse(time.RFC3339, from) if err != nil { return err } - p.From = seq.TimeToMID(time) + p.From = seq.TimeToMID(t) return nil } } func withTo(to string) searchOption { return func(p *processor.SearchParams) error { - time, err := time.Parse(time.RFC3339, to) + t, err := time.Parse(time.RFC3339, to) if err != nil { return err } - p.To = seq.TimeToMID(time) + p.To = seq.TimeToMID(t) return nil } } diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index 0b029de4..906dc58b 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -5,7 +5,6 @@ import ( "fmt" "math" "math/rand/v2" - "slices" "sort" "strings" "testing" @@ -18,7 +17,6 @@ import ( "github.com/ozontech/seq-db/proxy/search" "github.com/ozontech/seq-db/proxy/stores" "github.com/ozontech/seq-db/seq" - "github.com/ozontech/seq-db/tests/common" "github.com/ozontech/seq-db/tests/setup" "github.com/ozontech/seq-db/tests/suites" ) @@ -76,19 +74,6 @@ func simpleCases(startTS time.Time) []setup.ExampleDoc { return docs } -func (s *SingleTestSuite) TestBasicSearch() { - startTS := time.Now() - docs := simpleCases(startTS) - docStrs := setup.DocsToStrings(docs) - // order of docs is "2, 1, 3, 0" - // first: order is reversed - // second: doc #3 has smaller timestamp, than #1 and #2, - // so it will be reordered - s.Bulk(docStrs) - - s.assertSearch(docStrs) -} - func (s *SingleTestSuite) TestBasicSearchHotRead() { startTS := time.Now() docs := simpleCases(startTS) @@ -105,7 +90,26 @@ func (s *SingleTestSuite) TestBasicSearchHotRead() { Shards: [][]string{}, Vers: []string{}, } - s.assertSearch(docStrs) + + s.RunFracEnvs(suites.AllFracEnvs, true, func() { + s.AssertSearch(`service: service_a`, docStrs, []int{3, 0}) + s.AssertSearch(`traceID:abcdef`, docStrs, []int{1, 0}) + s.AssertSearch(`level: 1`, docStrs, []int{1, 3, 0}) + + s.AssertSearch(`message: "message text"`, docStrs, []int{2, 1, 3, 0}) + s.AssertSearch(`message: "other text"`, docStrs, []int{2, 1}) + + s.AssertSearch(`traceID: abcd*`, docStrs, []int{1, 0}) + s.AssertSearch(`traceID: a*`, docStrs, []int{2, 1, 0}) + s.AssertSearch(`traceID: a*f`, docStrs, []int{1, 0}) + s.AssertSearch(`traceID: a*a`, docStrs, []int{2}) + s.AssertSearch(`service: service*a`, docStrs, []int{3, 0}) + s.AssertSearch(`message: message\ som*`, docStrs, []int{3, 0}) + + // test limit + s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderAsc)) + s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderDesc)) + }) } func (s *SingleTestSuite) TestSearchAgg() { @@ -140,28 +144,6 @@ func (s *SingleTestSuite) TestSearchAgg() { }) } -func (s *SingleTestSuite) assertSearch(docStrs []string) { - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - s.AssertSearch(`service: service_a`, docStrs, []int{3, 0}) - s.AssertSearch(`traceID:abcdef`, docStrs, []int{1, 0}) - s.AssertSearch(`level: 1`, docStrs, []int{1, 3, 0}) - - s.AssertSearch(`message: "message text"`, docStrs, []int{2, 1, 3, 0}) - s.AssertSearch(`message: "other text"`, docStrs, []int{2, 1}) - - s.AssertSearch(`traceID: abcd*`, docStrs, []int{1, 0}) - s.AssertSearch(`traceID: a*`, docStrs, []int{2, 1, 0}) - s.AssertSearch(`traceID: a*f`, docStrs, []int{1, 0}) - s.AssertSearch(`traceID: a*a`, docStrs, []int{2}) - s.AssertSearch(`service: service*a`, docStrs, []int{3, 0}) - s.AssertSearch(`message: message\ som*`, docStrs, []int{3, 0}) - - // test limit - s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderAsc)) - s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderDesc)) - }) -} - func (s *SingleTestSuite) TestSearchNestedIndexOneFraction() { const numDocs = 100 const doc = `{"trace_id": "1", "spans": [{"span_id": "1"}, {"span_id": "2"}]}` @@ -223,41 +205,6 @@ func (s *SingleTestSuite) TestSearchNestedWithAND() { }) } -func (s *SingleTestSuite) TestSearchNot() { - docs := setup.GenerateDocs(6, func(i int, doc *setup.ExampleDoc) { - doc.Message = good - if i%2 == 0 { - doc.Message = bad - } - doc.Level = i + 1 // zero will not write - doc.Service = fmt.Sprintf("srv_%d", i+1) - }) - docStrs := setup.DocsToStrings(docs) - s.Bulk(docStrs) - - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - s.AssertSearch(`NOT level:1`, docStrs, []int{5, 4, 3, 2, 1}) - s.AssertSearch(`NOT level:2`, docStrs, []int{5, 4, 3, 2, 0}) - s.AssertSearch(`NOT level:5`, docStrs, []int{5, 3, 2, 1, 0}) - s.AssertSearch(`NOT level:6`, docStrs, []int{4, 3, 2, 1, 0}) - - s.AssertSearch(`NOT message:notfound`, docStrs, []int{5, 4, 3, 2, 1, 0}) - s.AssertSearch(`NOT service:srv_*`, docStrs, []int{}) - - s.AssertSearch(`NOT message:bad`, docStrs, []int{5, 3, 1}) - s.AssertSearch(`NOT message:good`, docStrs, []int{4, 2, 0}) - - s.AssertSearch(`NOT message:"good bad"`, docStrs, []int{5, 4, 3, 2, 1, 0}) - s.AssertSearch(`NOT (message:good AND message:bad)`, docStrs, []int{5, 4, 3, 2, 1, 0}) - s.AssertSearch(`NOT (message:good OR message:bad)`, docStrs, []int{}) - - s.AssertSearch(`NOT message:bad AND message:bad`, docStrs, []int{}) - s.AssertSearch(`NOT message:bad AND message:good`, docStrs, []int{5, 3, 1}) - s.AssertSearch(`message:good AND NOT message:good`, docStrs, []int{}) - s.AssertSearch(`message:bad AND NOT message:good`, docStrs, []int{4, 2, 0}) - }) -} - type ExampleDocSorting struct { sample []setup.ExampleDoc docStrs []string @@ -332,82 +279,6 @@ func (s *SingleTestSuite) TestFetchHints() { }) } -func (s *SingleTestSuite) TestSearchFromTo() { - docs := setup.GenerateDocs(8, func(i int, doc *setup.ExampleDoc) { - doc.Message = good - if i%2 == 0 { - doc.Message = bad - } - doc.Level = i + 1 // zero will not write - doc.TraceID = fmt.Sprintf("%d", i/3) - doc.Service = fmt.Sprintf("%d", i%3) - }) - start := docs[0].Timestamp - docStrs := setup.DocsToStrings(docs) - s.Bulk(docStrs) - - assertSearch := func(query string, from int, to int, indexes []int) { - fromMID := seq.TimeToMID(start.Add(time.Millisecond * time.Duration(from))) - toMID := seq.TimeToMID(start.Add(time.Millisecond * time.Duration(to))) - - for _, withTotal := range []bool{true, false} { - for _, o := range []seq.DocsOrder{seq.DocsOrderAsc, seq.DocsOrderDesc} { - _, docsStream, _, err := s.Ingestor().SearchIngestor.Search( - context.Background(), - &search.SearchRequest{ - Explain: false, - Q: []byte(query), - Offset: 0, - Size: math.MaxUint32, - Interval: 0, - From: fromMID, - To: toMID, - WithTotal: withTotal, - ShouldFetch: true, - Order: o, - }, - nil, - ) - s.Require().NoError(err) - foundDocs := common.ToStringSlice(search.ReadAll(docsStream)) - if o.IsReverse() { - slices.Reverse(foundDocs) - } - s.AssertDocsEqual(docStrs, indexes, foundDocs) - } - } - } - - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - assertSearch(`message:good`, 0, 7, []int{7, 5, 3, 1}) - assertSearch(`message:bad`, 0, 7, []int{6, 4, 2, 0}) - assertSearch(`message:good`, 0, 6, []int{5, 3, 1}) - assertSearch(`message:bad`, 1, 7, []int{6, 4, 2}) - - assertSearch(`message:good OR message:bad`, 2, 6, []int{6, 5, 4, 3, 2}) - assertSearch(`message:good OR message:bad`, 3, 3, []int{3}) - assertSearch(`NOT message:notexists`, 0, 7, []int{7, 6, 5, 4, 3, 2, 1, 0}) - assertSearch(`NOT message:notexists`, 0, 6, []int{6, 5, 4, 3, 2, 1, 0}) - - assertSearch(`NOT message:notexists`, 1, 7, []int{7, 6, 5, 4, 3, 2, 1}) - assertSearch(`NOT message:notexists`, 1, 6, []int{6, 5, 4, 3, 2, 1}) - assertSearch(`NOT message:notexists AND message:*`, 1, 6, []int{6, 5, 4, 3, 2, 1}) - assertSearch(`NOT message:notexists AND (message:* OR message:*)`, 1, 6, []int{6, 5, 4, 3, 2, 1}) - - assertSearch(`NOT message:notexists AND (message:good OR message:bad)`, 1, 6, []int{6, 5, 4, 3, 2, 1}) - assertSearch(`NOT message:notexists AND message:good`, 1, 6, []int{5, 3, 1}) - assertSearch(`NOT (message:good OR message:bad)`, 0, 7, []int{}) - assertSearch(`NOT (message:good OR message:bad)`, 1, 6, []int{}) - - assertSearch(`NOT traceID:0`, 0, 2, []int{}) - assertSearch(`NOT traceID:0`, 0, 3, []int{3}) - assertSearch(`NOT traceID:1`, 3, 5, []int{}) - assertSearch(`NOT traceID:1`, 2, 6, []int{6, 2}) - assertSearch(`NOT traceID:0 AND NOT traceID:2`, 0, 10, []int{5, 4, 3}) - assertSearch(`NOT traceID:0 AND NOT traceID:2`, 3, 5, []int{5, 4, 3}) - }) -} - func (s *SingleTestSuite) TestFetch() { n := 8 docs := setup.GenerateDocs(n, func(i int, doc *setup.ExampleDoc) { @@ -434,45 +305,6 @@ func (s *SingleTestSuite) TestFetch() { }) } -func (s *SingleTestSuite) TestWildcardSymbols() { - startTS := time.Now() - docs := []setup.ExampleDoc{ - { - Message: "first value:****", - Timestamp: startTS.Add(time.Millisecond * 10), - }, - { - Message: "second value:*******", - Timestamp: startTS.Add(time.Millisecond * 20), - }, - { - Message: "third value****", - Timestamp: startTS.Add(time.Millisecond * 30), - }, - { - Message: "fourth ****", - Timestamp: startTS.Add(time.Millisecond * 40), - }, - } - docStrs := setup.DocsToStrings(docs) - s.Bulk(docStrs) - - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - s.AssertSearch(`message:*`, docStrs, []int{3, 2, 1, 0}) - s.AssertSearch(`message:value`, docStrs, []int{1, 0}) - s.AssertSearch(`message:value*`, docStrs, []int{2, 1, 0}) - s.AssertSearch(`message:value\*`, docStrs, []int{}) - s.AssertSearch(`message:value\**`, docStrs, []int{2}) - s.AssertSearch(`message:*\**`, docStrs, []int{3, 2, 1, 0}) - s.AssertSearch(`message:*e\**`, docStrs, []int{2}) - s.AssertSearch(`message:\**`, docStrs, []int{3, 1, 0}) - s.AssertSearch(`message:\*\*\*\*`, docStrs, []int{3, 0}) - s.AssertSearch(`message:\*\*\*\**`, docStrs, []int{3, 1, 0}) - s.AssertSearch(`message:value* AND message:\*\**`, docStrs, []int{1, 0}) - s.AssertSearch(`message:value* OR message:\*\**`, docStrs, []int{3, 2, 1, 0}) - }) -} - func (s *SingleTestSuite) TestIndexingAllFields() { defer func(m seq.Mapping, enabled bool) { s.Config.Mapping = m From 77e9fe0f9111b51668a7ac2ecf66812c4016fe7d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 8 Oct 2025 12:31:31 +0400 Subject: [PATCH 12/48] path search test, nested search test --- frac/fraction_test.go | 116 ++++++++++++++++--------- tests/integration_tests/single_test.go | 26 ------ 2 files changed, 76 insertions(+), 66 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 30d9aee0..8b9c2fa1 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -62,21 +62,38 @@ func (s *FractionTestSuite) SetupSuite() { "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "source": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "trace_id": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "spans": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "request_uri": seq.NewSingleType(seq.TokenizerTypePath, "", 0), + "spans": seq.NewSingleType(seq.TokenizerTypeNested, "", 0), + "spans.span_id": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "process": seq.NewSingleType(seq.TokenizerTypeObject, "", 0), + "process.tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), + "tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), } } -func (s *FractionTestSuite) SetupTest() { - // TODO doesn't work. check - // var err error - // s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") - // s.Require().NoError(err) +func (s *FractionTestSuite) SetupTestCommon() { + var err error + s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + s.Require().NoError(err) + + s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.indexCache = &IndexCache{ + MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + } + s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) } func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { tokenizers := map[seq.TokenizerType]tokenizer.Tokenizer{ seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(512, false, true), seq.TokenizerTypeText: tokenizer.NewTextTokenizer(20, false, true, 4096), + seq.TokenizerTypePath: tokenizer.NewPathTokenizer(512, false, true), } // drift and futureDrift are 0, we can process docs at any timestamps @@ -149,6 +166,7 @@ func (s *FractionTestSuite) TestBasicSearch() { s.AssertSearch("trace_id:a*f", docs, []int{1, 0}) s.AssertSearch("trace_id:a*a", docs, []int{2}) s.AssertSearch("service:service*a", docs, []int{3, 0}) + s.AssertSearch("_all_:*", docs, []int{3, 2, 1, 0}) } func (s *FractionTestSuite) TestSearchNot() { @@ -184,7 +202,7 @@ func (s *FractionTestSuite) TestSearchNot() { s.AssertSearch("message:bad AND NOT message:good", docs, []int{4, 2, 0}) } -func (s *FractionTestSuite) TestWildcardSymbols() { +func (s *FractionTestSuite) TestWildcardSymbolsSearch() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.010Z","message":"first value:****"}`, `{"timestamp":"2000-01-01T13:00:00.020Z","message":"second value:*******"}`, @@ -227,6 +245,54 @@ func (s *FractionTestSuite) TestSearchFullText() { s.AssertSearch("message:fifth", docs, []int{}) } +func (s *FractionTestSuite) TestSearchPath() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"a","request_uri":"/one"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"a","request_uri":"/one/two"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"a","request_uri":"/one/two/three"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"a","request_uri":"/one/two.three/four"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"a","request_uri":"/one/two.three/five"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"a","request_uri":"/one/two/three/"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"a","request_uri":"/one/two/three/1"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","service":"a","request_uri":"/one/two/three/2"}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","service":"a","request_uri":"/one/two/three/3/four/"}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","service":"a","request_uri":"/one/four/three/3/"}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","service":"a","request_uri":"/two/one/three/2"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("request_uri:/one", docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch("request_uri:/two", docs, []int{10}) + s.AssertSearch("request_uri:/one/two", docs, []int{8, 7, 6, 5, 2, 1}) + s.AssertSearch("request_uri:/one/two/three", docs, []int{8, 7, 6, 5, 2}) + s.AssertSearch("request_uri:/one/two/three/1", docs, []int{6}) + s.AssertSearch("request_uri:/one/two.three", docs, []int{4, 3}) + s.AssertSearch("request_uri:/one/two.three/four", docs, []int{3}) + s.AssertSearch("request_uri:/one/*/three", docs, []int{9, 8, 7, 6, 5, 2}) + s.AssertSearch("request_uri:/two/*/three", docs, []int{10}) + s.AssertSearch("request_uri:*/three/", docs, []int{5}) + s.AssertSearch("request_uri:*/three", docs, []int{10, 9, 8, 7, 6, 5, 2}) +} + +func (s *FractionTestSuite) TestSearchNested() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","spans":[{"span_id":"1"},{"span_id":"2"}]}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","spans":[{"span_id":"2"},{"span_id":"3"}]}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","spans":[{"span_id":"1"},{"span_id":"3"}]}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","spans":[{"span_id":"4"},{"span_id":"5"}]}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("spans.span_id:*", docs, []int{3, 2, 1, 0}) + s.AssertSearch("spans.span_id:1", docs, []int{2, 0}) + s.AssertSearch("spans.span_id:2", docs, []int{1, 0}) + s.AssertSearch("spans.span_id:3", docs, []int{2, 1}) + s.AssertSearch("spans.span_id:4", docs, []int{3}) + s.AssertSearch("spans.span_id:5", docs, []int{3}) +} + func (s *FractionTestSuite) TestSearchFromTo() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, @@ -373,22 +439,7 @@ type ActiveFractionSuite struct { } func (s *ActiveFractionSuite) SetupTest() { - s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) - s.indexCache = &IndexCache{ - MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - } - s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) - - // TODO setup test - var err error - s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") - s.Require().NoError(err) + s.SetupTestCommon() baseName := filepath.Join(s.tmpDir, "test_fraction") indexer := NewActiveIndexer(4, 10) @@ -427,23 +478,7 @@ type SealedFractionSuite struct { } func (s *SealedFractionSuite) SetupTest() { - s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) - s.indexCache = &IndexCache{ - MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - } - s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) - - // Ensure tmpDir exists - // TODO here? - var err error - s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") - s.Require().NoError(err) + s.SetupTestCommon() s.insertDocuments = func(docs ...string) { baseFile := filepath.Join(s.tmpDir, "test_fraction") @@ -491,6 +526,7 @@ func (s *SealedFractionSuite) SetupTest() { } func (s *SealedFractionSuite) TearDownTest() { + // TODO if tear down is same as in active, then move it to FractionSuite if s.fraction != nil { s.fraction.Suicide() } diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index 906dc58b..cb0df22e 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -279,32 +279,6 @@ func (s *SingleTestSuite) TestFetchHints() { }) } -func (s *SingleTestSuite) TestFetch() { - n := 8 - docs := setup.GenerateDocs(n, func(i int, doc *setup.ExampleDoc) { - doc.Message = good - if i%2 == 0 { - doc.Message = bad - } - doc.Level = i + 1 // zero will not write - doc.TraceID = fmt.Sprintf("%d", i/3) - doc.Service = fmt.Sprintf("%d", i%3) - }) - docStrs := setup.DocsToStrings(docs) - s.Bulk(docStrs) - - qpr, _, _, err := s.Env.Search("_all_:*", math.MaxInt32, setup.WithTotal(false), setup.NoFetch()) - s.Assert().NoError(err) - s.Assert().Equal(n, len(qpr.IDs)) - - ids := qpr.IDs.IDs() - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - docs, err := s.Env.Fetch(ids) - s.Assert().NoError(err) - s.Assert().Equal(len(ids), len(docs)) - }) -} - func (s *SingleTestSuite) TestIndexingAllFields() { defer func(m seq.Mapping, enabled bool) { s.Config.Mapping = m From a6071c45c9adbd0e652358b141a08b2c9c0f8bdc Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:10:35 +0400 Subject: [PATCH 13/48] AND/OR quert test --- frac/fraction_test.go | 102 +++++++++++++++++++------ tests/integration_tests/single_test.go | 29 ------- 2 files changed, 77 insertions(+), 54 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8b9c2fa1..d85100fc 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -6,6 +6,7 @@ import ( "math" "os" "path/filepath" + "slices" "sync" "testing" "time" @@ -34,6 +35,7 @@ type FractionTestSuite struct { readLimiter *storage.ReadLimiter config *Config mapping seq.Mapping + tokenizers map[seq.TokenizerType]tokenizer.Tokenizer fraction Fraction @@ -52,6 +54,11 @@ func (s *FractionTestSuite) SetupSuite() { SkipSortDocs: true, // TODO enabling sorting will fail tests KeepMetaFile: false, } + s.tokenizers = map[seq.TokenizerType]tokenizer.Tokenizer{ + seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(20, false, true), + seq.TokenizerTypeText: tokenizer.NewTextTokenizer(20, false, true, 100), + seq.TokenizerTypePath: tokenizer.NewPathTokenizer(512, false, true), + } s.mapping = seq.Mapping{ "k8s_pod": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "k8s_namespace": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), @@ -90,14 +97,9 @@ func (s *FractionTestSuite) SetupTestCommon() { } func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { - tokenizers := map[seq.TokenizerType]tokenizer.Tokenizer{ - seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(512, false, true), - seq.TokenizerTypeText: tokenizer.NewTextTokenizer(20, false, true, 4096), - seq.TokenizerTypePath: tokenizer.NewPathTokenizer(512, false, true), - } // drift and futureDrift are 0, we can process docs at any timestamps - p := indexer.NewProcessor(s.mapping, tokenizers, 0, 0, 0) + processor := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) idx := 0 readNext := func() ([]byte, error) { @@ -109,7 +111,7 @@ func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { return d, nil } - _, binaryDocs, binaryMeta, err := p.ProcessBulk(time.Now(), nil, nil, readNext) + _, binaryDocs, binaryMeta, err := processor.ProcessBulk(time.Now(), nil, nil, readNext) s.Require().NoError(err, "processing bulk failed") compressor := indexer.GetDocsMetasCompressor(3, 3) @@ -275,6 +277,46 @@ func (s *FractionTestSuite) TestSearchPath() { s.AssertSearch("request_uri:*/three", docs, []int{10, 9, 8, 7, 6, 5, 2}) } +func (s *FractionTestSuite) TestSearchANDOR() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","message":"banana","level":"info","service":"svc_a","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","message":"banana","level":"error","service":"svc_b","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","message":"cherry","level":"info","service":"svc_c","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("message:apple AND level:info", docs, []int{0}) + s.AssertSearch("message:banana AND service:svc_a", docs, []int{2}) + s.AssertSearch("message:cherry AND level:warn", docs, []int{5}) + s.AssertSearch("level:info AND status:ok", docs, []int{4, 2, 0}) + s.AssertSearch("service:svc_a AND status:ok", docs, []int{2, 0}) + + s.AssertSearch("message:apple OR message:banana", docs, []int{3, 2, 1, 0}) + s.AssertSearch("level:error OR level:warn", docs, []int{5, 3, 1}) + s.AssertSearch("service:svc_a OR service:svc_b", docs, []int{3, 2, 1, 0}) + s.AssertSearch("status:fail OR level:warn", docs, []int{5, 3, 1}) + + s.AssertSearch("(message:apple OR message:banana) AND level:info", docs, []int{2, 0}) + s.AssertSearch("message:cherry AND (level:info OR level:warn)", docs, []int{5, 4}) + s.AssertSearch("(service:svc_a OR service:svc_b) AND level:info", docs, []int{2, 0}) + s.AssertSearch("(service:svc_a OR service:svc_b) AND (level:info OR level:error)", docs, []int{3, 2, 1, 0}) + + s.AssertSearch("(message:apple AND level:info) OR (message:banana AND level:error)", docs, []int{3, 0}) + s.AssertSearch("(message:apple OR message:cherry) AND (level:info OR level:error)", docs, []int{4, 1, 0}) + s.AssertSearch("message:* AND (level:info OR level:error) AND status:ok", docs, []int{4, 2, 0}) + + s.AssertSearch("message:apple OR message:notfound", docs, []int{1, 0}) + s.AssertSearch("message:notfound OR message:banana", docs, []int{3, 2}) + + s.AssertSearch("message:apple AND message:banana", docs, []int{}) + s.AssertSearch("level:info AND level:error", docs, []int{}) + s.AssertSearch("service:svc_a AND service:svc_b", docs, []int{}) +} + func (s *FractionTestSuite) TestSearchNested() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","spans":[{"span_id":"1"},{"span_id":"2"}]}`, @@ -285,6 +327,7 @@ func (s *FractionTestSuite) TestSearchNested() { s.insertDocuments(docs...) + // Each AssertSearch now tests both desc and asc order s.AssertSearch("spans.span_id:*", docs, []int{3, 2, 1, 0}) s.AssertSearch("spans.span_id:1", docs, []int{2, 0}) s.AssertSearch("spans.span_id:2", docs, []int{1, 0}) @@ -407,30 +450,39 @@ func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs [ } func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, indexes []int) { - dp, release := s.fraction.DataProvider(context.Background()) - defer release() + for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { + params.Order = order - qpr, err := dp.Search(*params) - s.Require().NoError(err, "search failed for query") + dp, release := s.fraction.DataProvider(context.Background()) - s.Require().Equal(len(indexes), qpr.IDs.Len(), - "expected %d documents but found %d", len(indexes), qpr.IDs.Len()) + qpr, err := dp.Search(*params) + s.Require().NoError(err, "search failed for query with order=%v", order) - docs, err := dp.Fetch(qpr.IDs.IDs()) - s.Require().NoError(err, "failed to fetch documents for IDs: %v", qpr.IDs.IDs()) + s.Require().Equal(len(indexes), qpr.IDs.Len(), + "expected %d docs but found %d with order=%v", len(indexes), qpr.IDs.Len(), order) - fetchedDocs := make([]string, 0, len(docs)) - for _, doc := range docs { - fetchedDocs = append(fetchedDocs, string(doc)) - } + docs, err := dp.Fetch(qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch docs for IDs: %v", qpr.IDs.IDs()) - for i, fetchedDoc := range fetchedDocs { - if i < len(indexes) { - expectedDoc := originalDocs[indexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, - "document at index %d doesn't match expected document at original index %d", - i, indexes[i]) + if order.IsReverse() { + slices.Reverse(docs) } + + fetchedDocs := make([]string, 0, len(docs)) + for _, doc := range docs { + fetchedDocs = append(fetchedDocs, string(doc)) + } + + for i, fetchedDoc := range fetchedDocs { + if i < len(indexes) { + expectedDoc := originalDocs[indexes[i]] + s.Require().Equal(expectedDoc, fetchedDoc, + "doc at index %d doesn't match expected doc at original index %d with order=%v", + i, indexes[i], order) + } + } + + release() } } diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index cb0df22e..e1c7c7d9 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -21,11 +21,6 @@ import ( "github.com/ozontech/seq-db/tests/suites" ) -const ( - good = "good" - bad = "bad" -) - type SingleTestSuite struct { suites.Single } @@ -144,30 +139,6 @@ func (s *SingleTestSuite) TestSearchAgg() { }) } -func (s *SingleTestSuite) TestSearchNestedIndexOneFraction() { - const numDocs = 100 - const doc = `{"trace_id": "1", "spans": [{"span_id": "1"}, {"span_id": "2"}]}` - - docs := []string{} - for range numDocs { - docs = append(docs, doc) - s.Bulk([]string{doc}) - } - - assertSearch := func(q string, size int) { - s.Assert().Equal(size, len(s.SearchDocs(q, size, seq.DocsOrderAsc))) - s.Assert().Equal(docs[:size], s.SearchDocs(q, size, seq.DocsOrderAsc)) - } - - assertSearch(`spans.span_id:*`, 1) - assertSearch(`spans.span_id:*`, 5) - assertSearch(`spans.span_id:*`, numDocs) - assertSearch(`trace_id:*`, numDocs) - - s.Assert().Equal(0, len(s.SearchDocs(`spans.span_id:*`, 0, seq.DocsOrderAsc))) - s.Assert().Equal(numDocs, len(s.SearchDocs(`spans.span_id:*`, numDocs+1, seq.DocsOrderAsc))) -} - // Test AND tree (sorting issue) func (s *SingleTestSuite) TestSearchNestedWithAND() { const ( From 0bc2b99c05ec36b4f133dbc63199b7f74a93df5c Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 8 Oct 2025 18:05:39 +0400 Subject: [PATCH 14/48] aggregation test WIP --- frac/fraction_test.go | 65 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index d85100fc..ba1269a6 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -211,6 +211,7 @@ func (s *FractionTestSuite) TestWildcardSymbolsSearch() { `{"timestamp":"2000-01-01T13:00:00.030Z","message":"third value****"}`, `{"timestamp":"2000-01-01T13:00:00.040Z","message":"fourth ****"}`, } + s.insertDocuments(docs...) s.AssertSearch(`message:*`, docs, []int{3, 2, 1, 0}) @@ -350,12 +351,12 @@ func (s *FractionTestSuite) TestSearchFromTo() { s.insertDocuments(docs...) - assertSearch := func(query string, fromOffset, toOffset int, indexes []int) { + assertSearch := func(query string, fromOffset, toOffset int, expectedIndexes []int) { s.AssertSearch(s.query( query, withFrom(fmt.Sprintf("2000-01-01T13:00:00.%03dZ", fromOffset)), withTo(fmt.Sprintf("2000-01-01T13:00:00.%03dZ", toOffset))), - docs, indexes) + docs, expectedIndexes) } assertSearch(`message:good`, 0, 7, []int{7, 5, 3, 1}) @@ -388,6 +389,41 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } +func (s *FractionTestSuite) TestAgg() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:01.000Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:01.000Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:03.000Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:04.000Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:05.000Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + } + + s.insertDocuments(docs...) + + assertSearch := func(query string, agg string, expected []map[string]uint64) { + searchParams := s.query(query) + + err := withAgg(agg)(searchParams) + s.Require().NoError(err, "agg setting up failed") + + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + + qpr, err := dp.Search(*searchParams) + s.Require().NoError(err, "search failed") + + s.Require().Equal(len(expected), len(qpr.Aggs)) + //for i := range expected { + // for bin, hist := range qpr.Aggs[i].SamplesByBin { + // r.Equalf(int64(expected[i][bin.Token]), hist.Total, "failed for token %s", bin) + // } + //} + } + + assertSearch("message:*", "service", []map[string]uint64{}) +} + type searchOption func(*processor.SearchParams) error func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { @@ -438,6 +474,31 @@ func withLimit(limit int) searchOption { } } +func withAgg(aggQueries ...any) searchOption { + aggs := make([]processor.AggQuery, 0, len(aggQueries)) + for _, aggQuery := range aggQueries { + switch aggQuery := aggQuery.(type) { + case string: + //searchAll := []parser.Term{{ + // Kind: parser.TermSymbol, Data: "*", + //}} + groupBy := &parser.Literal{ + Field: aggQuery, + Terms: []parser.Term{}, + } + aggs = append(aggs, processor.AggQuery{Field: groupBy, Func: seq.AggFuncCount}) + case processor.AggQuery: + aggs = append(aggs, aggQuery) + default: + panic("unknown query type") + } + } + return func(sp *processor.SearchParams) error { + sp.AggQ = append(sp.AggQ, aggs...) + return nil + } +} + func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, indexes []int) { switch q := queryObject.(type) { case string: From 607d97ce4c695a46ed9594bc7e234ec569b58512 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:03:11 +0400 Subject: [PATCH 15/48] migrate range query test --- frac/fraction_test.go | 137 +++++++++++++++----- tests/integration_tests/integration_test.go | 128 ------------------ 2 files changed, 103 insertions(+), 162 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index ba1269a6..63b141d9 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -129,10 +129,10 @@ func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { func (s *FractionTestSuite) TestSearchKeyword() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00Z", "message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:01Z", "message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:02Z", "message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:03Z", "message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00Z", "message":"first test document","level":"info","service":"test","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:01Z", "message":"second test document","level":"error","service":"test","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:02Z", "message":"third test document","level":"debug","service":"prod","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:03Z", "message":"fourth test document","level":"info","status":"ok"}`, } s.insertDocuments(docs...) @@ -141,8 +141,9 @@ func (s *FractionTestSuite) TestSearchKeyword() { s.AssertSearch("level:error", docs, []int{1}) s.AssertSearch("level:debug", docs, []int{2}) - s.AssertSearch("service:test-service", docs, []int{1, 0}) - s.AssertSearch("service:another-service", docs, []int{3, 2}) + s.AssertSearch("service:test", docs, []int{1, 0}) + s.AssertSearch("service:prod", docs, []int{2}) + s.AssertSearch("_exists_:service", docs, []int{2, 1, 0}) s.AssertSearch("status:ok", docs, []int{3, 2, 0}) s.AssertSearch("status:fail", docs, []int{1}) @@ -318,6 +319,80 @@ func (s *FractionTestSuite) TestSearchANDOR() { s.AssertSearch("service:svc_a AND service:svc_b", docs, []int{}) } +func (s *FractionTestSuite) TestSearchRange() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"test-service","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"test-service","level":"3"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"test-service","level":"7"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"test-service","level":"15"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"test-service","level":"31"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"test-service","level":"63"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"test-service","level":"127"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("level:[1 TO 3]", docs, []int{1, 0}) + s.AssertSearch(s.seqql("level:[1, 3]"), docs, []int{1, 0}) + s.AssertSearch("level:[0 TO 63]", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch(s.seqql("level:[0, 63]"), docs, []int{5, 4, 3, 2, 1, 0}) + + s.AssertSearch("level:{0 TO 3}", docs, []int{0}) + s.AssertSearch("level:{-100 TO 100}", docs, []int{5, 4, 3, 2, 1, 0}) + + s.AssertSearch("level:{0 TO 3]", docs, []int{1, 0}) + s.AssertSearch(s.seqql("level:(0, 3]"), docs, []int{1, 0}) + s.AssertSearch("level:[0 TO 3}", docs, []int{0}) + + s.AssertSearch("level:[-100 TO 100]", docs, []int{5, 4, 3, 2, 1, 0}) + + s.AssertSearch("level:[0 TO *]", docs, []int{6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch(s.seqql("level:[0, *]"), docs, []int{6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:[0 TO *}", docs, []int{6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:[31 TO *]", docs, []int{6, 5, 4}) + s.AssertSearch("level:{31 TO *]", docs, []int{6, 5}) + + s.AssertSearch("level:[200 TO 300]", docs, []int{}) + s.AssertSearch("level:{127 TO 200]", docs, []int{}) +} + +func (s *FractionTestSuite) TestSearchIn() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"starting pod","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node1"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","message":"api call failed","level":"error","k8s_namespace":"prod","k8s_pod":"apiserver-master1"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","message":"scheduling task","level":"info","k8s_namespace":"test","k8s_pod":"scheduler-master1"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","message":"authentication error","level":"error","k8s_namespace":"test","k8s_pod":"apiserver-master2"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","message":"network policy applied","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node2"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","message":"scheduling completed","level":"info","k8s_namespace":"staging","k8s_pod":"scheduler-master2"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","message":"connection timeout","level":"error","k8s_namespace":"staging","k8s_pod":"app-backend-1"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","message":"health check passed","level":"info","k8s_namespace":"prod","k8s_pod":"app-frontend-1"}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","message":"database query slow","level":"warn","k8s_namespace":"prod","k8s_pod":"app-backend-2"}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","message":"cache miss","level":"warn","k8s_namespace":"test","k8s_pod":"app-cache-1"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch(s.seqql("k8s_namespace:in(prod)"), docs, []int{8, 7, 4, 1, 0}) + s.AssertSearch(s.seqql("k8s_namespace:in(test)"), docs, []int{9, 3, 2}) + s.AssertSearch(s.seqql("k8s_namespace:in(staging)"), docs, []int{6, 5}) + s.AssertSearch(s.seqql("k8s_namespace:in(prod,test)"), docs, []int{9, 8, 7, 4, 3, 2, 1, 0}) + s.AssertSearch(s.seqql("k8s_namespace:in(prod,test,staging)"), docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) + + s.AssertSearch(s.seqql("k8s_pod:in(proxy-*)"), docs, []int{4, 0}) + s.AssertSearch(s.seqql("k8s_pod:in(apiserver-*)"), docs, []int{3, 1}) + s.AssertSearch(s.seqql("k8s_pod:in(scheduler-*)"), docs, []int{5, 2}) + s.AssertSearch(s.seqql("k8s_pod:in(proxy-*,apiserver-*)"), docs, []int{4, 3, 1, 0}) + s.AssertSearch(s.seqql("k8s_pod:in(proxy-*,apiserver-*,scheduler-*)"), docs, []int{5, 4, 3, 2, 1, 0}) + + s.AssertSearch(s.seqql("level:error AND k8s_namespace:in(prod,test)"), docs, []int{3, 1}) + s.AssertSearch(s.seqql("level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(apiserver-*)"), docs, []int{3, 1}) + + s.AssertSearch( + s.seqql(`level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(proxy-*,apiserver-*,scheduler-*)`), + docs, + []int{3, 1}) +} + func (s *FractionTestSuite) TestSearchNested() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","spans":[{"span_id":"1"},{"span_id":"2"}]}`, @@ -389,7 +464,7 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } -func (s *FractionTestSuite) TestAgg() { +/*func (s *FractionTestSuite) TestAgg() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, `{"timestamp":"2000-01-01T13:00:01.000Z","message":"good","level":"2","trace_id":"0","service":"1"}`, @@ -404,7 +479,7 @@ func (s *FractionTestSuite) TestAgg() { assertSearch := func(query string, agg string, expected []map[string]uint64) { searchParams := s.query(query) - err := withAgg(agg)(searchParams) + err := withAggBy(agg)(searchParams) s.Require().NoError(err, "agg setting up failed") dp, release := s.fraction.DataProvider(context.Background()) @@ -422,7 +497,7 @@ func (s *FractionTestSuite) TestAgg() { } assertSearch("message:*", "service", []map[string]uint64{}) -} +}*/ type searchOption func(*processor.SearchParams) error @@ -445,6 +520,25 @@ func (s *FractionTestSuite) query(queryString string, options ...searchOption) * return params } +func (s *FractionTestSuite) seqql(queryString string, options ...searchOption) *processor.SearchParams { + queryAst, err := parser.ParseSeqQL(queryString, s.mapping) + s.Require().NoError(err, "failed to parse query: %s", queryString) + + params := &processor.SearchParams{ + AST: queryAst.Root, + From: seq.MID(0), + To: seq.MID(math.MaxUint64), + Limit: math.MaxInt32, + } + + for _, option := range options { + err := option(params) + s.Require().NoError(err, "option can not be applied") + } + + return params +} + func withFrom(from string) searchOption { return func(p *processor.SearchParams) error { t, err := time.Parse(time.RFC3339, from) @@ -474,31 +568,6 @@ func withLimit(limit int) searchOption { } } -func withAgg(aggQueries ...any) searchOption { - aggs := make([]processor.AggQuery, 0, len(aggQueries)) - for _, aggQuery := range aggQueries { - switch aggQuery := aggQuery.(type) { - case string: - //searchAll := []parser.Term{{ - // Kind: parser.TermSymbol, Data: "*", - //}} - groupBy := &parser.Literal{ - Field: aggQuery, - Terms: []parser.Term{}, - } - aggs = append(aggs, processor.AggQuery{Field: groupBy, Func: seq.AggFuncCount}) - case processor.AggQuery: - aggs = append(aggs, aggQuery) - default: - panic("unknown query type") - } - } - return func(sp *processor.SearchParams) error { - sp.AggQ = append(sp.AggQ, aggs...) - return nil - } -} - func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, indexes []int) { switch q := queryObject.(type) { case string: diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 2fef4c30..856314ca 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -1383,76 +1383,6 @@ func (s *IntegrationTestSuite) TestSeal() { } } -func (s *IntegrationTestSuite) TestSearchRange() { - doc := `{"service": "test-service", "level": "%d"}` - - env := setup.NewTestingEnv(s.Config) - defer env.StopAll() - - origDocs := []string{} - for i := 0; i < 100; i = 2*i + 1 { - origDocs = append(origDocs, fmt.Sprintf(doc, i)) - } - setup.Bulk(s.T(), env.IngestorBulkAddr(), origDocs) - env.WaitIdle() - - tests := []struct { - request string - cnt int - }{ - { - request: "[1 TO 3]", - cnt: 2, - }, - { - request: "[0 TO 3]", - cnt: 3, - }, - { - request: "{0 TO 3}", - cnt: 1, - }, - { - request: "{0 TO 3]", - cnt: 2, - }, - { - request: "[0 TO 3}", - cnt: 2, - }, - { - request: "[0 TO 63]", - cnt: 7, - }, - { - request: "[-100 TO 100]", - cnt: 7, - }, - { - request: "{-100 TO 100}", - cnt: 7, - }, - { - request: "[0 TO *]", - cnt: 7, - }, - { - request: "[0 TO *}", - cnt: 7, - }, - } - - for _, test := range tests { - for _, withTotal := range []bool{true, false} { - req := fmt.Sprintf(`level:%v`, test.request) - qpr, _, _, err := env.Search(req, 1000, setup.WithTotal(withTotal)) - require.NoError(s.T(), err, "should be no errors") - assert.Len(s.T(), qpr.IDs, test.cnt, "wrong doc count") - assert.Equal(s.T(), getTotal(test.cnt, withTotal), qpr.Total, "wrong doc count") - } - } -} - func (s *IntegrationTestSuite) TestQueryErr() { origDocs := []string{ `{"service":"a", "xxxx":"yyyy"}`, @@ -1713,64 +1643,6 @@ func copySlice[V any](src []V) []V { return dst } -func (s *IntegrationTestSuite) TestPathSearch() { - env := setup.NewTestingEnv(s.Config) - defer env.StopAll() - - docs := []string{ - `{"service":"a", "request_uri":"/one"}`, - `{"service":"a", "request_uri":"/one/two"}`, - `{"service":"a", "request_uri":"/one/two/three"}`, - `{"service":"a", "request_uri":"/one/two.three/four"}`, - `{"service":"a", "request_uri":"/one/two.three/five"}`, - `{"service":"a", "request_uri":"/one/two/three/"}`, - `{"service":"a", "request_uri":"/one/two/three/1"}`, - `{"service":"a", "request_uri":"/one/two/three/2"}`, - `{"service":"a", "request_uri":"/one/two/three/3/four/"}`, - `{"service":"a", "request_uri":"/one/four/three/3/"}`, - `{"service":"a", "request_uri":"/two/one/three/2"}`, - } - - setup.Bulk(s.T(), env.IngestorBulkAddr(), docs) - env.WaitIdle() - - tests := []struct { - request string - cnt int - }{ - {request: "/one", cnt: 10}, - {request: "/two", cnt: 1}, - {request: "/one/two", cnt: 6}, - {request: "/one/two/three", cnt: 5}, - {request: "/one/two/three/1", cnt: 1}, - {request: "/one/two.three", cnt: 2}, - {request: "/one/two.three/four", cnt: 1}, - {request: "/one/*/three", cnt: 6}, - {request: "/two/*/three", cnt: 1}, - {request: "*/three/", cnt: 1}, - {request: "*/three", cnt: 7}, - } - - for _, test := range tests { - req := fmt.Sprintf(`request_uri:%v`, test.request) - qpr, _, _, err := env.Search(req, 1000, setup.WithTotal(true)) - require.NoError(s.T(), err, "should be no errors") - assert.Len(s.T(), qpr.IDs, test.cnt, "wrong doc count") - assert.Equal(s.T(), test.cnt, int(qpr.Total), "wrong doc count") - } - - env.WaitIdle() - env.SealAll() - - for _, test := range tests { - req := fmt.Sprintf(`request_uri:%v`, test.request) - qpr, _, _, err := env.Search(req, 1000, setup.WithTotal(true)) - require.NoError(s.T(), err, "should be no errors") - assert.Len(s.T(), qpr.IDs, test.cnt, "wrong doc count") - assert.Equal(s.T(), test.cnt, int(qpr.Total), "wrong doc count") - } -} - func (s *IntegrationTestSuite) TestSearchFieldsWithMultipleTypes() { t := s.T() From c0656eafe9c88a6e276a3d9519a8b53885bb69ad Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:56:51 +0400 Subject: [PATCH 16/48] basic aggregation test migrated --- frac/fraction_test.go | 106 ++++++++++++++++++++----- tests/integration_tests/single_test.go | 32 -------- 2 files changed, 86 insertions(+), 52 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 63b141d9..aca7b005 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -464,24 +464,19 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } -/*func (s *FractionTestSuite) TestAgg() { +func (s *FractionTestSuite) TestBasicAggregation() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:01.000Z","message":"good","level":"2","trace_id":"0","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:01.000Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, - `{"timestamp":"2000-01-01T13:00:03.000Z","message":"good","level":"4","trace_id":"1","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:04.000Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:05.000Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"proxy"}`, + `{"timestamp":"2000-01-01T13:00:01.000Z","message":"good","level":"2","trace_id":"0","service":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:01.000Z","message":"bad","level":"3","trace_id":"0","service":"scheduler"}`, + `{"timestamp":"2000-01-01T13:00:03.000Z","message":"good","level":"1","trace_id":"1","service":"proxy"}`, + `{"timestamp":"2000-01-01T13:00:04.000Z","message":"bad","level":"1","trace_id":"1","service":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:05.000Z","message":"good","level":"1","trace_id":"1","service":"gateway"}`, } s.insertDocuments(docs...) - assertSearch := func(query string, agg string, expected []map[string]uint64) { - searchParams := s.query(query) - - err := withAggBy(agg)(searchParams) - s.Require().NoError(err, "agg setting up failed") - + assertAggSearch := func(searchParams *processor.SearchParams, expected []map[string]uint64) { dp, release := s.fraction.DataProvider(context.Background()) defer release() @@ -489,15 +484,44 @@ func (s *FractionTestSuite) TestSearchFromTo() { s.Require().NoError(err, "search failed") s.Require().Equal(len(expected), len(qpr.Aggs)) - //for i := range expected { - // for bin, hist := range qpr.Aggs[i].SamplesByBin { - // r.Equalf(int64(expected[i][bin.Token]), hist.Total, "failed for token %s", bin) - // } - //} + for i := range expected { + for bin, hist := range qpr.Aggs[i].SamplesByBin { + s.Require().Equalf(int64(expected[i][bin.Token]), hist.Total, "failed for token %s", bin) + } + } } - assertSearch("message:*", "service", []map[string]uint64{}) -}*/ + assertAggSearch( + s.query( + "message:*", + withAggQuery(processor.AggQuery{GroupBy: aggField("service")})), + []map[string]uint64{ + {"gateway": 3, "proxy": 2, "scheduler": 1}, + }) + assertAggSearch( + s.query( + "message:good", + withAggQuery(processor.AggQuery{GroupBy: aggField("service")})), + []map[string]uint64{ + {"gateway": 2, "proxy": 1}, + }) + assertAggSearch( + s.query( + "message:*", + withAggQuery(processor.AggQuery{GroupBy: aggField("level")})), + []map[string]uint64{ + {"1": 4, "2": 1, "3": 1}, + }) + assertAggSearch( + s.query( + "message:*", + withAggQuery(processor.AggQuery{GroupBy: aggField("service")}), + withAggQuery(processor.AggQuery{GroupBy: aggField("level")})), + []map[string]uint64{ + {"gateway": 3, "proxy": 2, "scheduler": 1}, + {"1": 4, "2": 1, "3": 1}, + }) +} type searchOption func(*processor.SearchParams) error @@ -568,6 +592,48 @@ func withLimit(limit int) searchOption { } } +func withAgg(aggQueries ...any) searchOption { + aggs := make([]processor.AggQuery, 0, len(aggQueries)) + for _, aggQuery := range aggQueries { + switch aggQuery := aggQuery.(type) { + case string: + searchAll := []parser.Term{{ + Kind: parser.TermSymbol, Data: "*", + }} + groupBy := &parser.Literal{ + Field: aggQuery, + Terms: searchAll, + } + aggs = append(aggs, processor.AggQuery{GroupBy: groupBy, Func: seq.AggFuncCount}) + case processor.AggQuery: + aggs = append(aggs, aggQuery) + default: + panic("unknown query type") + } + } + return func(sp *processor.SearchParams) error { + sp.AggQ = append(sp.AggQ, aggs...) + return nil + } +} + +func aggField(field string) *parser.Literal { + searchAll := []parser.Term{{ + Kind: parser.TermSymbol, Data: "*", + }} + return &parser.Literal{ + Field: field, + Terms: searchAll, + } +} + +func withAggQuery(aggQuery processor.AggQuery) searchOption { + return func(sp *processor.SearchParams) error { + sp.AggQ = append(sp.AggQ, aggQuery) + return nil + } +} + func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, indexes []int) { switch q := queryObject.(type) { case string: diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index e1c7c7d9..1691e069 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -107,38 +107,6 @@ func (s *SingleTestSuite) TestBasicSearchHotRead() { }) } -func (s *SingleTestSuite) TestSearchAgg() { - startTS := time.Now() - docs := simpleCases(startTS) - docStrs := setup.DocsToStrings(docs) - s.Bulk(docStrs) - - assertAgg := func(query string, aggQ []any, expected []map[string]uint64) { - r := s.Require() - qpr, _, _, err := s.Env.Search(query, math.MaxInt32, setup.WithAggQuery(aggQ...), setup.WithTotal(false)) - r.NoError(err) - r.Equal(len(expected), len(qpr.Aggs)) - for i := range expected { - for bin, hist := range qpr.Aggs[i].SamplesByBin { - r.Equalf(int64(expected[i][bin.Token]), hist.Total, "failed for token %s", bin) - } - } - } - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - assertAgg("message:message", []any{"service"}, []map[string]uint64{ - {"service_a": 2, "service_b": 1, "service_c": 1}, - }) - assertAgg("message:message", []any{"level"}, []map[string]uint64{ - {"1": 3, "2": 1}, - }) - assertAgg("message:message", []any{"service", "level"}, - []map[string]uint64{ - {"service_a": 2, "service_b": 1, "service_c": 1}, - {"1": 3, "2": 1}, - }) - }) -} - // Test AND tree (sorting issue) func (s *SingleTestSuite) TestSearchNestedWithAND() { const ( From 7b74455aa57f67025efe07712580a33a7cd7c2c4 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:10:14 +0400 Subject: [PATCH 17/48] test agg sum --- frac/fraction_test.go | 69 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index aca7b005..b77be99c 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -75,6 +75,7 @@ func (s *FractionTestSuite) SetupSuite() { "process": seq.NewSingleType(seq.TokenizerTypeObject, "", 0), "process.tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), "tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), + "v": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), } } @@ -227,7 +228,6 @@ func (s *FractionTestSuite) TestWildcardSymbolsSearch() { s.AssertSearch(`message:\*\*\*\**`, docs, []int{3, 1, 0}) s.AssertSearch(`message:value* AND message:\*\**`, docs, []int{1, 0}) s.AssertSearch(`message:value* OR message:\*\**`, docs, []int{3, 2, 1, 0}) - } func (s *FractionTestSuite) TestSearchFullText() { @@ -523,6 +523,73 @@ func (s *FractionTestSuite) TestBasicAggregation() { }) } +func (s *FractionTestSuite) TestAggSum() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"sum1","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"some_log","v":2}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"sum1","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"sum1","v":-1}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"sum1","v":-0}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"sum1","v":+0}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"sum1","v":0}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","service":"sum1"}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","service":"sum2","v":-1}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","service":"sum2","v":-3}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","service":"sum2","v":-4}`, + `{"timestamp":"2000-01-01T13:00:00.011Z","service":"sum3","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.012Z","service":"sum4","v":99}`, + `{"timestamp":"2000-01-01T13:00:00.013Z","service":"sum4","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.014Z","service":"sum4","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.015Z","service":"sum4","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.016Z","service":"sum4","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.017Z","service":"sum4","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.018Z","service":"sum5","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.019Z","service":"sum5"}`, + } + + s.insertDocuments(docs...) + + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + + searchParams := s.query( + "service:sum*", + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + GroupBy: aggField("service"), + Func: seq.AggFuncSum, + })) + + qpr, err := dp.Search(*searchParams) + s.Require().NoError(err, "search failed") + + aggResults := qpr.Aggregate([]seq.AggregateArgs{{Func: seq.AggFuncSum}}) + s.Require().Equal(1, len(aggResults)) + + expectedBuckets := []seq.AggregationBucket{ + {Name: "sum4", Value: 104, NotExists: 0}, + {Name: "sum1", Value: 1, NotExists: 1}, + {Name: "sum3", Value: 1, NotExists: 0}, + {Name: "sum5", Value: 1, NotExists: 1}, + {Name: "sum2", Value: -8, NotExists: 0}, + } + + s.Require().Equal(len(expectedBuckets), len(aggResults[0].Buckets), "wrong number of buckets") + + for _, expectedBucket := range expectedBuckets { + found := false + for _, gotBucket := range aggResults[0].Buckets { + if gotBucket.Name == expectedBucket.Name { + s.Require().Equal(expectedBucket.Value, gotBucket.Value, "wrong value for bucket %s", expectedBucket.Name) + s.Require().Equal(expectedBucket.NotExists, gotBucket.NotExists, "wrong NotExists for bucket %s", expectedBucket.Name) + found = true + break + } + } + s.Require().True(found, "bucket %s not found in results", expectedBucket.Name) + } +} + type searchOption func(*processor.SearchParams) error func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { From 860d5f77a9203de715a75c100963f3dd7d510dd2 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:57:30 +0400 Subject: [PATCH 18/48] all aggregation tests are migrated from integration_test.go --- frac/fraction_test.go | 324 ++++++++++++++++---- tests/integration_tests/integration_test.go | 286 ----------------- 2 files changed, 267 insertions(+), 343 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index b77be99c..d88774bd 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -279,7 +279,7 @@ func (s *FractionTestSuite) TestSearchPath() { s.AssertSearch("request_uri:*/three", docs, []int{10, 9, 8, 7, 6, 5, 2}) } -func (s *FractionTestSuite) TestSearchANDOR() { +func (s *FractionTestSuite) TestSearchAndOr() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, @@ -549,9 +549,6 @@ func (s *FractionTestSuite) TestAggSum() { s.insertDocuments(docs...) - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - searchParams := s.query( "service:sum*", withAggQuery(processor.AggQuery{ @@ -559,13 +556,6 @@ func (s *FractionTestSuite) TestAggSum() { GroupBy: aggField("service"), Func: seq.AggFuncSum, })) - - qpr, err := dp.Search(*searchParams) - s.Require().NoError(err, "search failed") - - aggResults := qpr.Aggregate([]seq.AggregateArgs{{Func: seq.AggFuncSum}}) - s.Require().Equal(1, len(aggResults)) - expectedBuckets := []seq.AggregationBucket{ {Name: "sum4", Value: 104, NotExists: 0}, {Name: "sum1", Value: 1, NotExists: 1}, @@ -573,21 +563,231 @@ func (s *FractionTestSuite) TestAggSum() { {Name: "sum5", Value: 1, NotExists: 1}, {Name: "sum2", Value: -8, NotExists: 0}, } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncSum}, expectedBuckets) +} - s.Require().Equal(len(expectedBuckets), len(aggResults[0].Buckets), "wrong number of buckets") +func (s *FractionTestSuite) TestAggMin() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"min1","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"min1","v":2}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"min2","v":3}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"min2","v":"-10"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","service":"min4"}`, + `{"timestamp":"2000-01-01T13:00:00.011Z","service":null,"v":null}`, + `{"timestamp":"2000-01-01T13:00:00.012Z","v":null}`, + } - for _, expectedBucket := range expectedBuckets { - found := false - for _, gotBucket := range aggResults[0].Buckets { - if gotBucket.Name == expectedBucket.Name { - s.Require().Equal(expectedBucket.Value, gotBucket.Value, "wrong value for bucket %s", expectedBucket.Name) - s.Require().Equal(expectedBucket.NotExists, gotBucket.NotExists, "wrong NotExists for bucket %s", expectedBucket.Name) - found = true - break - } - } - s.Require().True(found, "bucket %s not found in results", expectedBucket.Name) + s.insertDocuments(docs...) + + searchParams := s.query( + "service:min*", + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + GroupBy: aggField("service"), + Func: seq.AggFuncMin, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "min4", Value: math.NaN(), NotExists: 7}, + {Name: "min2", Value: -10, NotExists: 0}, + {Name: "min1", Value: 1, NotExists: 0}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncMin}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggMax() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"max1","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"max1","v":2}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"max2","v":3}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"max2","v":"-10"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"max4"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"max4"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":null,"v":null}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","v":null}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + "service:max*", + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + GroupBy: aggField("service"), + Func: seq.AggFuncMax, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "max2", Value: 3, NotExists: 0}, + {Name: "max1", Value: 2, NotExists: 0}, + {Name: "max4", Value: math.NaN(), NotExists: 2}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncMax}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggQuantile() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"quantile1","v":1}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"quantile1","v":2}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"quantile1","v":3}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"quantile1","v":4}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"quantile1","v":5}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"quantile1","v":6}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"quantile1","v":7}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","service":"quantile1","v":8}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","service":"quantile1","v":9}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","service":"quantile1","v":10}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + "service:quantile*", + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + GroupBy: aggField("service"), + Func: seq.AggFuncQuantile, + Quantiles: []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999, 0.99999999}, + })) + expectedBuckets := []seq.AggregationBucket{ + { + Name: "quantile1", + Value: 1, + Quantiles: []float64{1, 2, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 10}, + NotExists: 0, + }, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{ + Func: seq.AggFuncQuantile, + Quantiles: []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999, 0.99999999}, + }, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggUnique() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"some_log","level":2}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","service":"unique1","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","service":"unique2","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","service":"unique2","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","service":"unique3","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","service":"unique3","level":2}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","service":"unique4","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","service":"unique4","level":2}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","service":"unique4","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","service":"unique5","level":3}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","level":3}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + "level:3", + withAggQuery(processor.AggQuery{ + GroupBy: aggField("service"), + Func: seq.AggFuncUnique, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "unique1", Value: 0, NotExists: 0}, + {Name: "unique2", Value: 0, NotExists: 0}, + {Name: "unique3", Value: 0, NotExists: 0}, + {Name: "unique4", Value: 0, NotExists: 0}, + {Name: "unique5", Value: 0, NotExists: 0}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncUnique}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggSumWithoutGroupBy() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","v":2,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","v":1,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","v":2,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.008Z","v":-0,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.009Z","v":+0,"service":"sum_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.010Z","v":0,"service":"sum_without_group_by"}`, } + + s.insertDocuments(docs...) + + searchParams := s.query( + `service:"sum_without_group_by"`, + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + Func: seq.AggFuncSum, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "", Value: 10, NotExists: 0}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncSum}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggMaxWithoutGroupBy() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","v":100,"service":"max_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","v":-200,"service":"max_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","v":300,"service":"max_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","v":-300,"service":"max_without_group_by"}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + `service:"max_without_group_by"`, + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + Func: seq.AggFuncMax, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "", Value: 300, NotExists: 0}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncMax}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggNotExists() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"not_exists"}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + `service:"not_exists"`, + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + Func: seq.AggFuncAvg, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "", Value: math.NaN(), NotExists: 1}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncAvg}, expectedBuckets) +} + +func (s *FractionTestSuite) TestAggAvgWithoutGroupBy() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","v":200,"service":"avg_without_group_by"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","v":500,"service":"avg_without_group_by"}`, + } + + s.insertDocuments(docs...) + + searchParams := s.query( + `service:"avg_without_group_by"`, + withAggQuery(processor.AggQuery{ + Field: aggField("v"), + Func: seq.AggFuncAvg, + })) + expectedBuckets := []seq.AggregationBucket{ + {Name: "", Value: 350, NotExists: 0}, + } + s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncAvg}, expectedBuckets) } type searchOption func(*processor.SearchParams) error @@ -659,31 +859,6 @@ func withLimit(limit int) searchOption { } } -func withAgg(aggQueries ...any) searchOption { - aggs := make([]processor.AggQuery, 0, len(aggQueries)) - for _, aggQuery := range aggQueries { - switch aggQuery := aggQuery.(type) { - case string: - searchAll := []parser.Term{{ - Kind: parser.TermSymbol, Data: "*", - }} - groupBy := &parser.Literal{ - Field: aggQuery, - Terms: searchAll, - } - aggs = append(aggs, processor.AggQuery{GroupBy: groupBy, Func: seq.AggFuncCount}) - case processor.AggQuery: - aggs = append(aggs, aggQuery) - default: - panic("unknown query type") - } - } - return func(sp *processor.SearchParams) error { - sp.AggQ = append(sp.AggQ, aggs...) - return nil - } -} - func aggField(field string) *parser.Literal { searchAll := []parser.Term{{ Kind: parser.TermSymbol, Data: "*", @@ -701,18 +876,18 @@ func withAggQuery(aggQuery processor.AggQuery) searchOption { } } -func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, indexes []int) { +func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, expectedIndexes []int) { switch q := queryObject.(type) { case string: - s.AssertSearchWithSearchParams(s.query(q), originalDocs, indexes) + s.AssertSearchWithSearchParams(s.query(q), originalDocs, expectedIndexes) case *processor.SearchParams: - s.AssertSearchWithSearchParams(q, originalDocs, indexes) + s.AssertSearchWithSearchParams(q, originalDocs, expectedIndexes) default: s.Require().Fail("type for query object not supported") } } -func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, indexes []int) { +func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, expectedIndexes []int) { for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { params.Order = order @@ -721,8 +896,8 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc qpr, err := dp.Search(*params) s.Require().NoError(err, "search failed for query with order=%v", order) - s.Require().Equal(len(indexes), qpr.IDs.Len(), - "expected %d docs but found %d with order=%v", len(indexes), qpr.IDs.Len(), order) + s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), + "expected %d docs but found %d with order=%v", len(expectedIndexes), qpr.IDs.Len(), order) docs, err := dp.Fetch(qpr.IDs.IDs()) s.Require().NoError(err, "failed to fetch docs for IDs: %v", qpr.IDs.IDs()) @@ -737,11 +912,11 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc } for i, fetchedDoc := range fetchedDocs { - if i < len(indexes) { - expectedDoc := originalDocs[indexes[i]] + if i < len(expectedIndexes) { + expectedDoc := originalDocs[expectedIndexes[i]] s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match expected doc at original index %d with order=%v", - i, indexes[i], order) + i, expectedIndexes[i], order) } } @@ -749,6 +924,41 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc } } +func (s *FractionTestSuite) AssertAggregation( + searchParams *processor.SearchParams, + aggregate seq.AggregateArgs, + expectedBuckets []seq.AggregationBucket) { + + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + + qpr, err := dp.Search(*searchParams) + s.Require().NoError(err, "search failed") + + aggResults := qpr.Aggregate([]seq.AggregateArgs{aggregate}) + s.Require().Equal(1, len(aggResults)) + s.Require().Equal(len(expectedBuckets), len(aggResults[0].Buckets), "wrong number of buckets") + + for _, expectedBucket := range expectedBuckets { + found := false + for _, gotBucket := range aggResults[0].Buckets { + if gotBucket.Name == expectedBucket.Name { + if math.IsNaN(expectedBucket.Value) || math.IsNaN(gotBucket.Value) { + s.Require().Truef(math.IsNaN(expectedBucket.Value) && math.IsNaN(gotBucket.Value), + "wrong value for bucket %s: expected NaN=%v, got NaN=%v", + expectedBucket.Name, math.IsNaN(expectedBucket.Value), math.IsNaN(gotBucket.Value)) + } else { + s.Require().Equal(expectedBucket.Value, gotBucket.Value, "wrong value for bucket %s", expectedBucket.Name) + } + s.Require().Equal(expectedBucket.NotExists, gotBucket.NotExists, "wrong NotExists for bucket %s", expectedBucket.Name) + found = true + break + } + } + s.Require().True(found, "bucket %s not found in results", expectedBucket.Name) + } +} + type ActiveFractionSuite struct { FractionTestSuite } diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 856314ca..5fe31470 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -921,292 +921,6 @@ func sortedTimeBins(hist map[seq.AggBin]*seq.SamplesContainer) []seq.AggBin { return keys } -func (s *IntegrationTestSuite) TestAggStat() { - t := s.T() - - cfg := *s.Config - cfg.Mapping = map[string]seq.MappingTypes{ - "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "v": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - } - - type Expected struct { - NotExists int64 - Buckets []seq.AggregationBucket - } - type TestCase struct { - Name string - ToBulk []string - SearchQuery string - AggQuery search.AggQuery - Expected Expected - } - - tcs := []TestCase{ - { - Name: "sum", - ToBulk: []string{ - `{"service": "sum1", "v":1}`, - `{"service": "some_log", "v":2}`, - `{"service": "sum1", "v":1}`, - `{"service": "sum1", "v":-1}`, - `{"service": "sum1", "v":-0}`, - `{"service": "sum1", "v":+0}`, - `{"service": "sum1", "v":0}`, - `{"service": "sum1"}`, - // test negative values - `{"service": "sum2", "v":-1}`, - `{"service": "sum2", "v":-3}`, - `{"service": "sum2", "v":-4}`, - // test same token ("1") repetitions - `{"service": "sum3", "v":1}`, - `{"service": "sum4", "v":99}`, - `{"service": "sum4", "v":1}`, - `{"service": "sum4", "v":1}`, - `{"service": "sum4", "v":1}`, - `{"service": "sum4", "v":1}`, - `{"service": "sum4", "v":1}`, - // test sort - `{"service": "sum5", "v":1}`, - // test not exists - `{"service": "sum5"}`, - }, - SearchQuery: "service:sum*", - AggQuery: search.AggQuery{ - Field: "v", - GroupBy: "service", - Func: seq.AggFuncSum, - }, - Expected: Expected{ - NotExists: 0, - Buckets: []seq.AggregationBucket{ - {Name: "sum4", Value: 104, NotExists: 0}, - {Name: "sum1", Value: 1, NotExists: 1}, - {Name: "sum3", Value: 1, NotExists: 0}, - {Name: "sum5", Value: 1, NotExists: 1}, - {Name: "sum2", Value: -8, NotExists: 0}, - }, - }, - }, - { - Name: "min", - ToBulk: []string{ - `{"service": "min1", "v":1}`, - `{"service": "min1", "v":2}`, - `{"service": "min2", "v":3}`, - `{"service": "min2", "v":"-10"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": "min4"}`, - `{"service": null, "v":null}`, - `{"v":null}`, - }, - SearchQuery: "service:min*", - AggQuery: search.AggQuery{ - Field: "v", - GroupBy: "service", - Func: seq.AggFuncMin, - }, - Expected: Expected{ - NotExists: 0, - Buckets: []seq.AggregationBucket{ - {Name: "min4", Value: math.NaN(), NotExists: 7}, - {Name: "min2", Value: -10, NotExists: 0}, - {Name: "min1", Value: 1, NotExists: 0}, - }, - }, - }, - { - Name: "max", - ToBulk: []string{ - `{"service": "max1", "v":1}`, - `{"service": "max1", "v":2}`, - `{"service": "max2", "v":3}`, - `{"service": "max2", "v":"-10"}`, - `{"service": "max4"}`, - `{"service": "max4"}`, - `{"service": null, "v":null}`, - `{"v":null}`, - }, - SearchQuery: "service:max*", - AggQuery: search.AggQuery{ - Field: "v", - GroupBy: "service", - Func: seq.AggFuncMax, - }, - Expected: Expected{ - NotExists: 0, - Buckets: []seq.AggregationBucket{ - {Name: "max2", Value: 3, NotExists: 0}, - {Name: "max1", Value: 2, NotExists: 0}, - {Name: "max4", Value: math.NaN(), NotExists: 2}, - }, - }, - }, - { - Name: "quantile", - ToBulk: []string{ - `{"service": "quantile1", "v":1}`, - `{"service": "quantile1", "v":2}`, - `{"service": "quantile1", "v":3}`, - `{"service": "quantile1", "v":4}`, - `{"service": "quantile1", "v":5}`, - `{"service": "quantile1", "v":6}`, - `{"service": "quantile1", "v":7}`, - `{"service": "quantile1", "v":8}`, - `{"service": "quantile1", "v":9}`, - `{"service": "quantile1", "v":10}`, - }, - SearchQuery: "service:quantile*", - AggQuery: search.AggQuery{ - Field: "v", - GroupBy: "service", - Func: seq.AggFuncQuantile, - Quantiles: []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999, 0.99999999}, - }, - Expected: Expected{ - NotExists: 0, - Buckets: []seq.AggregationBucket{ - { - Name: "quantile1", - Value: 1, - Quantiles: []float64{1, 2, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 10}, - NotExists: 0, - }, - }, - }, - }, - { - Name: "unique", - ToBulk: []string{ - `{"service": "some_log", "level": 2}`, - `{"service": "unique1", "level": 3}`, - `{"service": "unique2", "level": 3}`, - `{"service": "unique2", "level": 3}`, - `{"service": "unique3", "level": 3}`, - `{"service": "unique3", "level": 2}`, - `{"service": "unique4", "level": 3}`, - `{"service": "unique4", "level": 2}`, - `{"service": "unique4", "level": 3}`, - `{"service": "unique5", "level": 3}`, - `{"level": 3}`, - }, - SearchQuery: "level:3", - AggQuery: search.AggQuery{ - GroupBy: "service", - Func: seq.AggFuncUnique, - }, - Expected: Expected{ - NotExists: 1, - Buckets: []seq.AggregationBucket{ - {Name: "unique1", Value: 0, NotExists: 0}, - {Name: "unique2", Value: 0, NotExists: 0}, - {Name: "unique3", Value: 0, NotExists: 0}, - {Name: "unique4", Value: 0, NotExists: 0}, - {Name: "unique5", Value: 0, NotExists: 0}, - }, - }, - }, - { - Name: "sum without group_by", - ToBulk: []string{ - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":2, "service":"sum_without_group_by"}`, - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":1, "service":"sum_without_group_by"}`, - `{"v":2, "service":"sum_without_group_by"}`, - `{"v":-0, "service":"sum_without_group_by"}`, - `{"v":+0, "service":"sum_without_group_by"}`, - `{"v":0, "service":"sum_without_group_by"}`, - }, - SearchQuery: `service:"sum_without_group_by"`, - AggQuery: search.AggQuery{Field: "v", Func: seq.AggFuncSum}, - Expected: Expected{NotExists: 0, Buckets: []seq.AggregationBucket{{Name: "", Value: 10, NotExists: 0}}}, - }, - { - Name: "max without group_by", - ToBulk: []string{ - `{"v":100, "service":"max_without_group_by"}`, - `{"v":-200, "service":"max_without_group_by"}`, - `{"v":300, "service":"max_without_group_by"}`, - `{"v":-300, "service":"max_without_group_by"}`, - }, - SearchQuery: `service:"max_without_group_by"`, - AggQuery: search.AggQuery{Field: "v", Func: seq.AggFuncMax}, - Expected: Expected{NotExists: 0, Buckets: []seq.AggregationBucket{{Name: "", Value: 300, NotExists: 0}}}, - }, - { - Name: "check not_exists without group_by", - ToBulk: []string{`{"service":"not_exists_without_group_by"}`}, - SearchQuery: `service:"not_exists_without_group_by"`, - AggQuery: search.AggQuery{Field: "v", Func: seq.AggFuncAvg}, - Expected: Expected{NotExists: 0, Buckets: []seq.AggregationBucket{{Name: "", Value: math.NaN(), NotExists: 1}}}, - }, - { - Name: "avg without group_by", - ToBulk: []string{ - `{"v":200, "service":"avg_without_group_by"}`, - `{"v":500, "service":"avg_without_group_by"}`, - }, - SearchQuery: `service:"avg_without_group_by"`, - AggQuery: search.AggQuery{Field: "v", Func: seq.AggFuncAvg}, - Expected: Expected{NotExists: 0, Buckets: []seq.AggregationBucket{{Name: "", Value: 350, NotExists: 0}}}, - }, - } - - aggregateWithOrder := func(r *require.Assertions, env *setup.TestingEnv, tc *TestCase, order seq.DocsOrder) { - qpr, _, _, err := env.Search(tc.SearchQuery, math.MaxInt32, setup.WithAggQuery(tc.AggQuery), setup.WithOrder(order)) - r.NoError(err) - - gotBuckets := qpr.Aggregate([]seq.AggregateArgs{{Func: tc.AggQuery.Func, Quantiles: tc.AggQuery.Quantiles}}) - - r.Equal(1, len(gotBuckets)) - r.Equal(1, len(qpr.Aggs)) - r.Equal(tc.Expected.NotExists, qpr.Aggs[0].NotExists) - - // Handwritten bucket comparison to ignore NaN values - r.Len(gotBuckets[0].Buckets, len(tc.Expected.Buckets), "wrong bucket count, expected=%v, got=%v", tc.Expected.Buckets, gotBuckets[0]) - for i, expBucket := range tc.Expected.Buckets { - gotBucket := gotBuckets[0].Buckets[i] - if math.IsNaN(expBucket.Value) || math.IsNaN(gotBucket.Value) { - r.Truef(math.IsNaN(expBucket.Value) && math.IsNaN(gotBucket.Value), "wrong bucket value, expected=%v, got=%v", expBucket.Value, gotBucket.Value) - expBucket.Value = 0 - gotBucket.Value = 0 - } - r.EqualValues(expBucket, gotBucket) - } - } - - for i := range tcs { - tc := &tcs[i] - t.Run(tc.Name, func(t *testing.T) { - env := setup.NewTestingEnv(&cfg) - defer env.StopAll() - - setup.Bulk(t, env.IngestorBulkAddr(), tc.ToBulk) - env.WaitIdle() - - t.Run("asc", func(t *testing.T) { - r := require.New(t) - aggregateWithOrder(r, env, tc, seq.DocsOrderAsc) - }) - - t.Run("desc", func(t *testing.T) { - r := require.New(t) - aggregateWithOrder(r, env, tc, seq.DocsOrderDesc) - }) - }) - } -} - func (s *IntegrationTestSuite) TestAggNoTotal() { env := setup.NewTestingEnv(s.Config) defer env.StopAll() From 822fb027260b681711a462e859e86a2372f404fe Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:51:11 +0400 Subject: [PATCH 19/48] check with/without total --- frac/fraction_test.go | 79 +++++++++++++-------- tests/integration_tests/integration_test.go | 31 -------- 2 files changed, 49 insertions(+), 61 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index d88774bd..f04eb810 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -403,8 +403,7 @@ func (s *FractionTestSuite) TestSearchNested() { s.insertDocuments(docs...) - // Each AssertSearch now tests both desc and asc order - s.AssertSearch("spans.span_id:*", docs, []int{3, 2, 1, 0}) + s.AssertSearchIgnoreTotal("spans.span_id:*", docs, []int{3, 2, 1, 0}) s.AssertSearch("spans.span_id:1", docs, []int{2, 0}) s.AssertSearch("spans.span_id:2", docs, []int{1, 0}) s.AssertSearch("spans.span_id:3", docs, []int{2, 1}) @@ -876,51 +875,71 @@ func withAggQuery(aggQuery processor.AggQuery) searchOption { } } +func (s *FractionTestSuite) AssertSearchIgnoreTotal(query string, originalDocs []string, expectedIndexes []int) { + s.AssertSearchWithSearchParams(s.query(query), originalDocs, expectedIndexes, false) +} + func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, expectedIndexes []int) { switch q := queryObject.(type) { case string: - s.AssertSearchWithSearchParams(s.query(q), originalDocs, expectedIndexes) + s.AssertSearchWithSearchParams(s.query(q), originalDocs, expectedIndexes, true) case *processor.SearchParams: - s.AssertSearchWithSearchParams(q, originalDocs, expectedIndexes) + s.AssertSearchWithSearchParams(q, originalDocs, expectedIndexes, true) default: s.Require().Fail("type for query object not supported") } } -func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, expectedIndexes []int) { - for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { - params.Order = order - - dp, release := s.fraction.DataProvider(context.Background()) +func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, expectedIndexes []int, checkTotal bool) { + var withTotals = []bool{false} + if checkTotal { + withTotals = append(withTotals, true) + } - qpr, err := dp.Search(*params) - s.Require().NoError(err, "search failed for query with order=%v", order) + for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { + for _, withTotal := range withTotals { + params.Order = order + params.WithTotal = withTotal + + dp, release := s.fraction.DataProvider(context.Background()) + + qpr, err := dp.Search(*params) + s.Require().NoError(err, "search failed for query with order=%v", order) + if withTotal { + s.Require().Equal( + uint64(len(expectedIndexes)), + qpr.Total, + "total doesn't match. expected: %d, actual: %d", len(expectedIndexes), qpr.Total) + } else { + s.Require().Equal(uint64(0), qpr.Total, "qpr has total but not expected to have") + } - s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), - "expected %d docs but found %d with order=%v", len(expectedIndexes), qpr.IDs.Len(), order) + s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), + "expected %d docs but found %d with order=%v", len(expectedIndexes), qpr.IDs.Len(), order) - docs, err := dp.Fetch(qpr.IDs.IDs()) - s.Require().NoError(err, "failed to fetch docs for IDs: %v", qpr.IDs.IDs()) + docs, err := dp.Fetch(qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch docs for IDs: %v", qpr.IDs.IDs()) - if order.IsReverse() { - slices.Reverse(docs) - } + if order.IsReverse() { + slices.Reverse(docs) + } - fetchedDocs := make([]string, 0, len(docs)) - for _, doc := range docs { - fetchedDocs = append(fetchedDocs, string(doc)) - } + fetchedDocs := make([]string, 0, len(docs)) + for _, doc := range docs { + fetchedDocs = append(fetchedDocs, string(doc)) + } - for i, fetchedDoc := range fetchedDocs { - if i < len(expectedIndexes) { - expectedDoc := originalDocs[expectedIndexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, - "doc at index %d doesn't match expected doc at original index %d with order=%v", - i, expectedIndexes[i], order) + for i, fetchedDoc := range fetchedDocs { + if i < len(expectedIndexes) { + expectedDoc := originalDocs[expectedIndexes[i]] + s.Require().Equal(expectedDoc, fetchedDoc, + "doc at index %d doesn't match expected doc at original index %d with order=%v", + i, expectedIndexes[i], order) + } } - } - release() + release() + } } } diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 5fe31470..68e89b3e 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -255,37 +255,6 @@ func (s *IntegrationTestSuite) TestSearchNothing() { assert.Equal(s.T(), uint64(0), qpr.Total, "wrong doc count") } -func (s *IntegrationTestSuite) TestSearchBackwards() { - now := time.Now() - before := now.Add(-5 * time.Hour) - origDocs := []string{ - fmt.Sprintf(`{"service":"a","xxxx":"yyyy","time":%q}`, now.Format(time.RFC3339)), - fmt.Sprintf(`{"service":"a","yyyy":"xxxx","time":%q}`, before.Format(time.RFC3339)), - } - - env := setup.NewTestingEnv(s.Config) - defer env.StopAll() - - setup.Bulk(s.T(), env.IngestorBulkAddr(), origDocs) - env.WaitIdle() - - for _, o := range []seq.DocsOrder{seq.DocsOrderAsc, seq.DocsOrderDesc} { - for _, withTotal := range []bool{true, false} { - qpr, docs, _, err := env.Search(`service:a`, 1000, setup.WithTotal(withTotal), setup.WithOrder(o)) - - if o.IsReverse() { - slices.Reverse(docs) - } - - assert.NoError(s.T(), err, "should be no errors") - assert.Len(s.T(), qpr.IDs, 2, "wrong doc count") - assert.Equal(s.T(), origDocs[0], string(docs[0]), "wrong doc content") - assert.Equal(s.T(), origDocs[1], string(docs[1]), "wrong doc content") - assert.Equal(s.T(), getTotal(2, withTotal), qpr.Total, "wrong doc count") - } - } -} - func (s *IntegrationTestSuite) TestSearchSequence() { docTemplate := `{"service":"a","time":"%s"}` bulks := 16 From a3c38a3558d7645b0d6dc7ff0d6791e5b492ebd4 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 9 Oct 2025 21:13:40 +0400 Subject: [PATCH 20/48] refactor for sealed loaded test suite --- frac/fraction_test.go | 173 +++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 88 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index f04eb810..79fa513a 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -97,35 +97,9 @@ func (s *FractionTestSuite) SetupTestCommon() { s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) } -func (s *FractionTestSuite) InsertIntoActive(active *Active, docs ...string) { - - // drift and futureDrift are 0, we can process docs at any timestamps - processor := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) - - idx := 0 - readNext := func() ([]byte, error) { - if idx >= len(docs) { - return nil, nil - } - d := []byte(docs[idx]) - idx++ - return d, nil - } - - _, binaryDocs, binaryMeta, err := processor.ProcessBulk(time.Now(), nil, nil, readNext) - s.Require().NoError(err, "processing bulk failed") - - compressor := indexer.GetDocsMetasCompressor(3, 3) - defer indexer.PutDocMetasCompressor(compressor) - compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) - docsBlock, metasBlock := compressor.DocsMetas() - - var wg sync.WaitGroup - wg.Add(1) - err = active.Append(docsBlock, metasBlock, &wg) - s.Require().NoError(err, "append to active failed") - - wg.Wait() +func (s *FractionTestSuite) TearDownTestCommon() { + err := os.RemoveAll(s.tmpDir) + s.NoError(err, "Failed to remove tmp dir") } func (s *FractionTestSuite) TestSearchKeyword() { @@ -978,29 +952,92 @@ func (s *FractionTestSuite) AssertAggregation( } } -type ActiveFractionSuite struct { - FractionTestSuite -} - -func (s *ActiveFractionSuite) SetupTest() { - s.SetupTestCommon() - +func (s *FractionTestSuite) newActive(docs ...string) *Active { baseName := filepath.Join(s.tmpDir, "test_fraction") - indexer := NewActiveIndexer(4, 10) - indexer.Start() + activeIndexer := NewActiveIndexer(4, 10) + activeIndexer.Start() active := NewActive( baseName, - indexer, + activeIndexer, s.readLimiter, cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), s.sortCache, s.config, ) - s.fraction = active + proc := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) + + idx := 0 + readNext := func() ([]byte, error) { + if idx >= len(docs) { + return nil, nil + } + d := []byte(docs[idx]) + idx++ + return d, nil + } + + _, binaryDocs, binaryMeta, err := proc.ProcessBulk(time.Now(), nil, nil, readNext) + s.Require().NoError(err, "processing bulk failed") + + compressor := indexer.GetDocsMetasCompressor(3, 3) + defer indexer.PutDocMetasCompressor(compressor) + compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) + docsBlock, metasBlock := compressor.DocsMetas() + + var wg sync.WaitGroup + wg.Add(1) + err = active.Append(docsBlock, metasBlock, &wg) + s.Require().NoError(err, "append to active failed") + + wg.Wait() + return active +} + +func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { + active := s.newActive(docs...) + + sealParams := common.SealParams{ + IDsZstdLevel: 1, // min comression level + LIDsZstdLevel: 1, + TokenListZstdLevel: 1, + DocsPositionsZstdLevel: 1, + TokenTableZstdLevel: 1, + DocBlocksZstdLevel: 1, + DocBlockSize: 128 * int(units.KiB), + } + + activeSealingSource, err := NewActiveSealingSource(active, sealParams) + s.Require().NoError(err, "Sealing source creation failed") + + preloaded, err := sealing.Seal(activeSealingSource, sealParams) + s.Require().NoError(err, "Sealing failed") + + sealed := NewSealedPreloaded( + active.BaseFileName, + preloaded, + s.readLimiter, + s.indexCache, + cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + s.config, + ) + active.Release() + return sealed +} + +/* +ActiveFractionSuite TODO +*/ +type ActiveFractionSuite struct { + FractionTestSuite +} + +func (s *ActiveFractionSuite) SetupTest() { + s.SetupTestCommon() + s.insertDocuments = func(docs ...string) { - s.InsertIntoActive(active, docs...) + s.fraction = s.newActive(docs...) } } @@ -1013,10 +1050,12 @@ func (s *ActiveFractionSuite) TearDownTest() { s.fraction.Suicide() } - err := os.RemoveAll(s.tmpDir) - s.NoError(err, "failed to remove tmp dir") + s.TearDownTestCommon() } +/* +SealedFractionSuite tests TODO comment +*/ type SealedFractionSuite struct { FractionTestSuite } @@ -1025,58 +1064,16 @@ func (s *SealedFractionSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { - baseFile := filepath.Join(s.tmpDir, "test_fraction") - indexer := NewActiveIndexer(4, 10) - indexer.Start() - - active := NewActive( - baseFile, - indexer, - s.readLimiter, - cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - s.sortCache, - s.config, - ) - - s.InsertIntoActive(active, docs...) - - sealParams := common.SealParams{ - IDsZstdLevel: 3, - LIDsZstdLevel: 3, - TokenListZstdLevel: 3, - DocsPositionsZstdLevel: 3, - TokenTableZstdLevel: 3, - DocBlocksZstdLevel: 3, - DocBlockSize: 1024 * 1024, - } - - activeSealingSource, err := NewActiveSealingSource(active, sealParams) - s.Require().NoError(err, "Sealing source creation failed") - - preloaded, err := sealing.Seal(activeSealingSource, sealParams) - s.Require().NoError(err, "Sealing failed") - - sealed := NewSealedPreloaded( - baseFile, - preloaded, - s.readLimiter, - s.indexCache, - cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - s.config, - ) - s.fraction = sealed - active.Release() + // TODO check if fraction is nil + s.fraction = s.newSealed(docs...) } } func (s *SealedFractionSuite) TearDownTest() { - // TODO if tear down is same as in active, then move it to FractionSuite if s.fraction != nil { s.fraction.Suicide() } - - err := os.RemoveAll(s.tmpDir) - s.NoError(err, "Failed to remove tmp dir") + s.TearDownTestCommon() } func TestFractionSuites(t *testing.T) { From 535a36dd195c0708e39334064621fc54f47661bd Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 11:16:11 +0400 Subject: [PATCH 21/48] add sealed loaded fraction test suite --- frac/fraction_test.go | 118 ++++++++++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 79fa513a..fcbc33f7 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -180,6 +180,46 @@ func (s *FractionTestSuite) TestSearchNot() { s.AssertSearch("message:bad AND NOT message:good", docs, []int{4, 2, 0}) } +func (s *FractionTestSuite) TestSearchAndOr() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","message":"banana","level":"info","service":"svc_a","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","message":"banana","level":"error","service":"svc_b","status":"fail"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","message":"cherry","level":"info","service":"svc_c","status":"ok"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch("message:apple AND level:info", docs, []int{0}) + s.AssertSearch("message:banana AND service:svc_a", docs, []int{2}) + s.AssertSearch("message:cherry AND level:warn", docs, []int{5}) + s.AssertSearch("level:info AND status:ok", docs, []int{4, 2, 0}) + s.AssertSearch("service:svc_a AND status:ok", docs, []int{2, 0}) + + s.AssertSearch("message:apple OR message:banana", docs, []int{3, 2, 1, 0}) + s.AssertSearch("level:error OR level:warn", docs, []int{5, 3, 1}) + s.AssertSearch("service:svc_a OR service:svc_b", docs, []int{3, 2, 1, 0}) + s.AssertSearch("status:fail OR level:warn", docs, []int{5, 3, 1}) + + s.AssertSearch("(message:apple OR message:banana) AND level:info", docs, []int{2, 0}) + s.AssertSearch("message:cherry AND (level:info OR level:warn)", docs, []int{5, 4}) + s.AssertSearch("(service:svc_a OR service:svc_b) AND level:info", docs, []int{2, 0}) + s.AssertSearch("(service:svc_a OR service:svc_b) AND (level:info OR level:error)", docs, []int{3, 2, 1, 0}) + + s.AssertSearch("(message:apple AND level:info) OR (message:banana AND level:error)", docs, []int{3, 0}) + s.AssertSearch("(message:apple OR message:cherry) AND (level:info OR level:error)", docs, []int{4, 1, 0}) + s.AssertSearch("message:* AND (level:info OR level:error) AND status:ok", docs, []int{4, 2, 0}) + + s.AssertSearch("message:apple OR message:notfound", docs, []int{1, 0}) + s.AssertSearch("message:notfound OR message:banana", docs, []int{3, 2}) + + s.AssertSearch("message:apple AND message:banana", docs, []int{}) + s.AssertSearch("level:info AND level:error", docs, []int{}) + s.AssertSearch("service:svc_a AND service:svc_b", docs, []int{}) +} + func (s *FractionTestSuite) TestWildcardSymbolsSearch() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.010Z","message":"first value:****"}`, @@ -253,46 +293,6 @@ func (s *FractionTestSuite) TestSearchPath() { s.AssertSearch("request_uri:*/three", docs, []int{10, 9, 8, 7, 6, 5, 2}) } -func (s *FractionTestSuite) TestSearchAndOr() { - docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","message":"banana","level":"info","service":"svc_a","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","message":"banana","level":"error","service":"svc_b","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","message":"cherry","level":"info","service":"svc_c","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, - } - - s.insertDocuments(docs...) - - s.AssertSearch("message:apple AND level:info", docs, []int{0}) - s.AssertSearch("message:banana AND service:svc_a", docs, []int{2}) - s.AssertSearch("message:cherry AND level:warn", docs, []int{5}) - s.AssertSearch("level:info AND status:ok", docs, []int{4, 2, 0}) - s.AssertSearch("service:svc_a AND status:ok", docs, []int{2, 0}) - - s.AssertSearch("message:apple OR message:banana", docs, []int{3, 2, 1, 0}) - s.AssertSearch("level:error OR level:warn", docs, []int{5, 3, 1}) - s.AssertSearch("service:svc_a OR service:svc_b", docs, []int{3, 2, 1, 0}) - s.AssertSearch("status:fail OR level:warn", docs, []int{5, 3, 1}) - - s.AssertSearch("(message:apple OR message:banana) AND level:info", docs, []int{2, 0}) - s.AssertSearch("message:cherry AND (level:info OR level:warn)", docs, []int{5, 4}) - s.AssertSearch("(service:svc_a OR service:svc_b) AND level:info", docs, []int{2, 0}) - s.AssertSearch("(service:svc_a OR service:svc_b) AND (level:info OR level:error)", docs, []int{3, 2, 1, 0}) - - s.AssertSearch("(message:apple AND level:info) OR (message:banana AND level:error)", docs, []int{3, 0}) - s.AssertSearch("(message:apple OR message:cherry) AND (level:info OR level:error)", docs, []int{4, 1, 0}) - s.AssertSearch("message:* AND (level:info OR level:error) AND status:ok", docs, []int{4, 2, 0}) - - s.AssertSearch("message:apple OR message:notfound", docs, []int{1, 0}) - s.AssertSearch("message:notfound OR message:banana", docs, []int{3, 2}) - - s.AssertSearch("message:apple AND message:banana", docs, []int{}) - s.AssertSearch("level:info AND level:error", docs, []int{}) - s.AssertSearch("service:svc_a AND service:svc_b", docs, []int{}) -} - func (s *FractionTestSuite) TestSearchRange() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","service":"test-service","level":"1"}`, @@ -1076,7 +1076,45 @@ func (s *SealedFractionSuite) TearDownTest() { s.TearDownTestCommon() } +/* +SealedLoadedFractionSuite +*/ +type SealedLoadedFractionSuite struct { + FractionTestSuite +} + +func (s *SealedLoadedFractionSuite) SetupTest() { + s.SetupTestCommon() + + s.insertDocuments = func(docs ...string) { + s.fraction = s.newSealedLoaded(docs...) + } +} + +func (s *SealedLoadedFractionSuite) newSealedLoaded(docs ...string) *Sealed { + sealed := s.newSealed(docs...) + sealed.close("closed") + + sealed = NewSealed( + sealed.BaseFileName, + s.readLimiter, + s.indexCache, + cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + nil, + s.config) + s.fraction = sealed + return sealed +} + +func (s *SealedLoadedFractionSuite) TearDownTest() { + if s.fraction != nil { + s.fraction.Suicide() + } + s.TearDownTestCommon() +} + func TestFractionSuites(t *testing.T) { suite.Run(t, new(ActiveFractionSuite)) suite.Run(t, new(SealedFractionSuite)) + suite.Run(t, new(SealedLoadedFractionSuite)) } From e25da20a1a876284e6578f75ad4a333ed1e0cffe Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 12:06:23 +0400 Subject: [PATCH 22/48] TestBasicSearch is not needed --- frac/fraction_test.go | 61 +++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index fcbc33f7..01f9bb5c 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -84,15 +84,15 @@ func (s *FractionTestSuite) SetupTestCommon() { s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") s.Require().NoError(err) - s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil) + s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil) s.indexCache = &IndexCache{ - MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(10*units.MiB), nil), nil), - Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), + RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), + Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(units.KiB), nil), nil), + LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(units.KiB), nil), nil), + Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(units.KiB), nil), nil), + TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(units.KiB), nil), nil), + Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), } s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) } @@ -103,33 +103,12 @@ func (s *FractionTestSuite) TearDownTestCommon() { } func (s *FractionTestSuite) TestSearchKeyword() { - docs := []string{ - `{"timestamp":"2000-01-01T13:00:00Z", "message":"first test document","level":"info","service":"test","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:01Z", "message":"second test document","level":"error","service":"test","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:02Z", "message":"third test document","level":"debug","service":"prod","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:03Z", "message":"fourth test document","level":"info","status":"ok"}`, - } - - s.insertDocuments(docs...) - - s.AssertSearch("level:info", docs, []int{3, 0}) - s.AssertSearch("level:error", docs, []int{1}) - s.AssertSearch("level:debug", docs, []int{2}) - - s.AssertSearch("service:test", docs, []int{1, 0}) - s.AssertSearch("service:prod", docs, []int{2}) - s.AssertSearch("_exists_:service", docs, []int{2, 1, 0}) - - s.AssertSearch("status:ok", docs, []int{3, 2, 0}) - s.AssertSearch("status:fail", docs, []int{1}) -} - -func (s *FractionTestSuite) TestBasicSearch() { docs := []string{ `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","source":"prod03"}`, } s.insertDocuments(docs...) @@ -144,7 +123,10 @@ func (s *FractionTestSuite) TestBasicSearch() { s.AssertSearch("trace_id:a*f", docs, []int{1, 0}) s.AssertSearch("trace_id:a*a", docs, []int{2}) s.AssertSearch("service:service*a", docs, []int{3, 0}) - s.AssertSearch("_all_:*", docs, []int{3, 2, 1, 0}) + s.AssertSearch("_all_:*", docs, []int{4, 3, 2, 1, 0}) + + s.AssertSearch("_exists_:message", docs, []int{4, 3, 2, 1, 0}) + s.AssertSearch("_exists_:level", docs, []int{3, 2, 1, 0}) } func (s *FractionTestSuite) TestSearchNot() { @@ -261,6 +243,10 @@ func (s *FractionTestSuite) TestSearchFullText() { s.AssertSearch("message:third", docs, []int{2}) s.AssertSearch("message:fourth", docs, []int{3}) s.AssertSearch("message:fifth", docs, []int{}) + + s.AssertSearch(`message:"first test"`, docs, []int{0}) + s.AssertSearch(`message:"first document"`, docs, []int{0}) + s.AssertSearch(`message:"test document"`, docs, []int{3, 2, 1, 0}) } func (s *FractionTestSuite) TestSearchPath() { @@ -961,7 +947,7 @@ func (s *FractionTestSuite) newActive(docs ...string) *Active { baseName, activeIndexer, s.readLimiter, - cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), s.sortCache, s.config, ) @@ -1019,7 +1005,7 @@ func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { preloaded, s.readLimiter, s.indexCache, - cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), s.config, ) active.Release() @@ -1027,7 +1013,7 @@ func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { } /* -ActiveFractionSuite TODO +ActiveFractionSuite run tests for active fraction */ type ActiveFractionSuite struct { FractionTestSuite @@ -1054,7 +1040,7 @@ func (s *ActiveFractionSuite) TearDownTest() { } /* -SealedFractionSuite tests TODO comment +SealedFractionSuite run tests for sealed fraction. Active fraction is created first and then sealed. */ type SealedFractionSuite struct { FractionTestSuite @@ -1077,7 +1063,8 @@ func (s *SealedFractionSuite) TearDownTest() { } /* -SealedLoadedFractionSuite +SealedLoadedFractionSuite run tests for sealed fraction. Active fraction is created first and then sealed. +Sealed fraction is then loaded with sealed.NewSealed call */ type SealedLoadedFractionSuite struct { FractionTestSuite @@ -1099,7 +1086,7 @@ func (s *SealedLoadedFractionSuite) newSealedLoaded(docs ...string) *Sealed { sealed.BaseFileName, s.readLimiter, s.indexCache, - cache.NewCache[[]byte](cache.NewCleaner(uint64(10*units.MiB), nil), nil), + cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), nil, s.config) s.fraction = sealed From af84339fc77e0afe92a52f6b79184450d6b1f10d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 12:16:38 +0400 Subject: [PATCH 23/48] properly release data provider so that Suicide() doesn't deadlock in case of test failure --- frac/fraction_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 01f9bb5c..e339b7ad 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -856,13 +856,14 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc withTotals = append(withTotals, true) } + dp, release := s.fraction.DataProvider(context.Background()) + defer release() + for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { for _, withTotal := range withTotals { params.Order = order params.WithTotal = withTotal - dp, release := s.fraction.DataProvider(context.Background()) - qpr, err := dp.Search(*params) s.Require().NoError(err, "search failed for query with order=%v", order) if withTotal { @@ -897,8 +898,6 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc i, expectedIndexes[i], order) } } - - release() } } } From 5f1635a9c9120721e23524a6c7a158d34d479695 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 12:32:15 +0400 Subject: [PATCH 24/48] fail test properly if docs inserted more than once --- frac/fraction_test.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index e339b7ad..060df548 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -110,6 +110,7 @@ func (s *FractionTestSuite) TestSearchKeyword() { `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","source":"prod03"}`, } + s.insertDocuments(docs...) s.AssertSearch("service:service_a", docs, []int{3, 0}) @@ -1022,6 +1023,9 @@ func (s *ActiveFractionSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { + if s.fraction != nil { + s.Require().Fail("can insert docs only once in each test") + } s.fraction = s.newActive(docs...) } } @@ -1031,8 +1035,11 @@ func (s *ActiveFractionSuite) TearDownTest() { active, ok := s.fraction.(*Active) if ok { active.Release() + } else { + s.Require().Fail("fraction is not of Active type") } s.fraction.Suicide() + s.fraction = nil } s.TearDownTestCommon() @@ -1049,7 +1056,9 @@ func (s *SealedFractionSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { - // TODO check if fraction is nil + if s.fraction != nil { + s.Require().Fail("can insert docs only once in each test") + } s.fraction = s.newSealed(docs...) } } @@ -1057,6 +1066,7 @@ func (s *SealedFractionSuite) SetupTest() { func (s *SealedFractionSuite) TearDownTest() { if s.fraction != nil { s.fraction.Suicide() + s.fraction = nil } s.TearDownTestCommon() } @@ -1073,6 +1083,9 @@ func (s *SealedLoadedFractionSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { + if s.fraction != nil { + s.Require().Fail("can insert docs only once in each test") + } s.fraction = s.newSealedLoaded(docs...) } } @@ -1095,6 +1108,7 @@ func (s *SealedLoadedFractionSuite) newSealedLoaded(docs ...string) *Sealed { func (s *SealedLoadedFractionSuite) TearDownTest() { if s.fraction != nil { s.fraction.Suicide() + s.fraction = nil } s.TearDownTestCommon() } From ee8498f019ba37d77fc2124896d262be5ece93e1 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 13:44:52 +0400 Subject: [PATCH 25/48] test for frac.Info, fixes --- frac/fraction_test.go | 153 ++++++++++++++++++++++++++---------------- frac/nop_counter.go | 33 --------- 2 files changed, 96 insertions(+), 90 deletions(-) delete mode 100644 frac/nop_counter.go diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 060df548..b2e112ea 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -29,13 +29,10 @@ import ( type FractionTestSuite struct { suite.Suite - tmpDir string - sortCache *cache.Cache[[]byte] - indexCache *IndexCache - readLimiter *storage.ReadLimiter - config *Config - mapping seq.Mapping - tokenizers map[seq.TokenizerType]tokenizer.Tokenizer + tmpDir string + config *Config + mapping seq.Mapping + tokenizers map[seq.TokenizerType]tokenizer.Tokenizer fraction Fraction @@ -83,18 +80,10 @@ func (s *FractionTestSuite) SetupTestCommon() { var err error s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") s.Require().NoError(err) +} - s.sortCache = cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil) - s.indexCache = &IndexCache{ - MIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), - RIDs: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), - Params: cache.NewCache[seqids.BlockParams](cache.NewCleaner(uint64(units.KiB), nil), nil), - LIDs: cache.NewCache[*lids.Block](cache.NewCleaner(uint64(units.KiB), nil), nil), - Tokens: cache.NewCache[*token.Block](cache.NewCleaner(uint64(units.KiB), nil), nil), - TokenTable: cache.NewCache[token.Table](cache.NewCleaner(uint64(units.KiB), nil), nil), - Registry: cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), - } - s.readLimiter = storage.NewReadLimiter(2, NopCounter{}) +func newSmallCache[V any]() *cache.Cache[V] { + return cache.NewCache[V](cache.NewCleaner(uint64(units.KiB), nil), nil) } func (s *FractionTestSuite) TearDownTestCommon() { @@ -750,6 +739,41 @@ func (s *FractionTestSuite) TestAggAvgWithoutGroupBy() { s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncAvg}, expectedBuckets) } +func (s *FractionTestSuite) TestFractionInfo() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "service":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text", "service":"kube-proxy"}`, + `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text", "service":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text", "service":"kube-proxy"}`, + `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","service":"kube-scheduler"}`, + } + + s.insertDocuments(docs...) + + info := s.fraction.Info() + + // these checks should not break without a reason + // but if compression/marshalling has changed, expected values can be updated accordingly + s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") + s.Require().Equal(uint64(234), info.DocsOnDisk, "doc total doesn't match") + s.Require().Equal(uint64(573), info.DocsRaw, "doc total doesn't match") + s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") + s.Require().Equal(seq.MID(946731654000), info.To, "from doesn't match") + + switch s.fraction.(type) { + case *Active: + s.Require().True(info.MetaOnDisk > uint64(340) && info.MetaOnDisk < uint64(350), + "meta on disk doesn't match. actual value: %d", info.MetaOnDisk) + s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") + case *Sealed: + s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") + s.Require().True(info.IndexOnDisk > uint64(1450) && info.IndexOnDisk < uint64(1500), + "index on disk doesn't match. actual value: %d", info.MetaOnDisk) + default: + s.Require().Fail("unsupported fraction type") + } +} + type searchOption func(*processor.SearchParams) error func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { @@ -867,20 +891,17 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc qpr, err := dp.Search(*params) s.Require().NoError(err, "search failed for query with order=%v", order) + if withTotal { - s.Require().Equal( - uint64(len(expectedIndexes)), - qpr.Total, - "total doesn't match. expected: %d, actual: %d", len(expectedIndexes), qpr.Total) + s.Require().Equal(uint64(len(expectedIndexes)), qpr.Total, "qpr.total doesn't match") } else { s.Require().Equal(uint64(0), qpr.Total, "qpr has total but not expected to have") } - s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), - "expected %d docs but found %d with order=%v", len(expectedIndexes), qpr.IDs.Len(), order) + s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), "doc count doesn't match") docs, err := dp.Fetch(qpr.IDs.IDs()) - s.Require().NoError(err, "failed to fetch docs for IDs: %v", qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch docs") if order.IsReverse() { slices.Reverse(docs) @@ -894,9 +915,7 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc for i, fetchedDoc := range fetchedDocs { if i < len(expectedIndexes) { expectedDoc := originalDocs[expectedIndexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, - "doc at index %d doesn't match expected doc at original index %d with order=%v", - i, expectedIndexes[i], order) + s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match") } } } @@ -916,7 +935,7 @@ func (s *FractionTestSuite) AssertAggregation( aggResults := qpr.Aggregate([]seq.AggregateArgs{aggregate}) s.Require().Equal(1, len(aggResults)) - s.Require().Equal(len(expectedBuckets), len(aggResults[0].Buckets), "wrong number of buckets") + s.Require().Equal(len(expectedBuckets), len(aggResults[0].Buckets), "bucket count doesn't match") for _, expectedBucket := range expectedBuckets { found := false @@ -946,9 +965,9 @@ func (s *FractionTestSuite) newActive(docs ...string) *Active { active := NewActive( baseName, activeIndexer, - s.readLimiter, - cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), - s.sortCache, + storage.NewReadLimiter(1, nil), + newSmallCache[[]byte](), + newSmallCache[[]byte](), s.config, ) @@ -1000,12 +1019,22 @@ func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { preloaded, err := sealing.Seal(activeSealingSource, sealParams) s.Require().NoError(err, "Sealing failed") + indexCache := &IndexCache{ + MIDs: newSmallCache[[]byte](), + RIDs: newSmallCache[[]byte](), + Params: newSmallCache[seqids.BlockParams](), + LIDs: newSmallCache[*lids.Block](), + Tokens: newSmallCache[*token.Block](), + TokenTable: newSmallCache[token.Table](), + Registry: newSmallCache[[]byte](), + } + sealed := NewSealedPreloaded( active.BaseFileName, preloaded, - s.readLimiter, - s.indexCache, - cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), + storage.NewReadLimiter(1, nil), + indexCache, + newSmallCache[[]byte](), s.config, ) active.Release() @@ -1013,24 +1042,24 @@ func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { } /* -ActiveFractionSuite run tests for active fraction +ActiveFractionTestSuite run tests for active fraction */ -type ActiveFractionSuite struct { +type ActiveFractionTestSuite struct { FractionTestSuite } -func (s *ActiveFractionSuite) SetupTest() { +func (s *ActiveFractionTestSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { if s.fraction != nil { - s.Require().Fail("can insert docs only once in each test") + s.Require().Fail("can insert docs only once") } s.fraction = s.newActive(docs...) } } -func (s *ActiveFractionSuite) TearDownTest() { +func (s *ActiveFractionTestSuite) TearDownTest() { if s.fraction != nil { active, ok := s.fraction.(*Active) if ok { @@ -1046,24 +1075,24 @@ func (s *ActiveFractionSuite) TearDownTest() { } /* -SealedFractionSuite run tests for sealed fraction. Active fraction is created first and then sealed. +SealedFractionTestSuite run tests for sealed fraction. Active fraction is created first and then sealed. */ -type SealedFractionSuite struct { +type SealedFractionTestSuite struct { FractionTestSuite } -func (s *SealedFractionSuite) SetupTest() { +func (s *SealedFractionTestSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { if s.fraction != nil { - s.Require().Fail("can insert docs only once in each test") + s.Require().Fail("can insert docs only once") } s.fraction = s.newSealed(docs...) } } -func (s *SealedFractionSuite) TearDownTest() { +func (s *SealedFractionTestSuite) TearDownTest() { if s.fraction != nil { s.fraction.Suicide() s.fraction = nil @@ -1072,40 +1101,50 @@ func (s *SealedFractionSuite) TearDownTest() { } /* -SealedLoadedFractionSuite run tests for sealed fraction. Active fraction is created first and then sealed. +SealedLoadedFractionTestSuite run tests for sealed fraction. Active fraction is created first and then sealed. Sealed fraction is then loaded with sealed.NewSealed call */ -type SealedLoadedFractionSuite struct { +type SealedLoadedFractionTestSuite struct { FractionTestSuite } -func (s *SealedLoadedFractionSuite) SetupTest() { +func (s *SealedLoadedFractionTestSuite) SetupTest() { s.SetupTestCommon() s.insertDocuments = func(docs ...string) { if s.fraction != nil { - s.Require().Fail("can insert docs only once in each test") + s.Require().Fail("can insert docs only once") } s.fraction = s.newSealedLoaded(docs...) } } -func (s *SealedLoadedFractionSuite) newSealedLoaded(docs ...string) *Sealed { +func (s *SealedLoadedFractionTestSuite) newSealedLoaded(docs ...string) *Sealed { sealed := s.newSealed(docs...) sealed.close("closed") + indexCache := &IndexCache{ + MIDs: newSmallCache[[]byte](), + RIDs: newSmallCache[[]byte](), + Params: newSmallCache[seqids.BlockParams](), + LIDs: newSmallCache[*lids.Block](), + Tokens: newSmallCache[*token.Block](), + TokenTable: newSmallCache[token.Table](), + Registry: newSmallCache[[]byte](), + } + sealed = NewSealed( sealed.BaseFileName, - s.readLimiter, - s.indexCache, - cache.NewCache[[]byte](cache.NewCleaner(uint64(units.KiB), nil), nil), + storage.NewReadLimiter(1, nil), + indexCache, + newSmallCache[[]byte](), nil, s.config) s.fraction = sealed return sealed } -func (s *SealedLoadedFractionSuite) TearDownTest() { +func (s *SealedLoadedFractionTestSuite) TearDownTest() { if s.fraction != nil { s.fraction.Suicide() s.fraction = nil @@ -1114,7 +1153,7 @@ func (s *SealedLoadedFractionSuite) TearDownTest() { } func TestFractionSuites(t *testing.T) { - suite.Run(t, new(ActiveFractionSuite)) - suite.Run(t, new(SealedFractionSuite)) - suite.Run(t, new(SealedLoadedFractionSuite)) + suite.Run(t, new(ActiveFractionTestSuite)) + suite.Run(t, new(SealedFractionTestSuite)) + suite.Run(t, new(SealedLoadedFractionTestSuite)) } diff --git a/frac/nop_counter.go b/frac/nop_counter.go deleted file mode 100644 index c7b8666c..00000000 --- a/frac/nop_counter.go +++ /dev/null @@ -1,33 +0,0 @@ -package frac - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_model/go" -) - -type NopCounter struct { -} - -func (n NopCounter) Desc() *prometheus.Desc { - return nil -} - -func (n NopCounter) Write(metric *io_prometheus_client.Metric) error { - return nil -} - -func (n NopCounter) Describe(descs chan<- *prometheus.Desc) { - -} - -func (n NopCounter) Collect(metrics chan<- prometheus.Metric) { - -} - -func (n NopCounter) Inc() { - -} - -func (n NopCounter) Add(f float64) { - -} From 6a7094947d3e6e2a5ded226a7c6234acdce19aa8 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 10 Oct 2025 16:46:55 +0400 Subject: [PATCH 26/48] skip sort docs = false --- frac/fraction_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index b2e112ea..8480ba90 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -48,7 +48,7 @@ func (s *FractionTestSuite) SetupSuite() { MaxTIDsPerFraction: 1000, }, }, - SkipSortDocs: true, // TODO enabling sorting will fail tests + SkipSortDocs: false, KeepMetaFile: false, } s.tokenizers = map[seq.TokenizerType]tokenizer.Tokenizer{ @@ -755,7 +755,7 @@ func (s *FractionTestSuite) TestFractionInfo() { // these checks should not break without a reason // but if compression/marshalling has changed, expected values can be updated accordingly s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") - s.Require().Equal(uint64(234), info.DocsOnDisk, "doc total doesn't match") + s.Require().Equal(uint64(235), info.DocsOnDisk, "doc total doesn't match") s.Require().Equal(uint64(573), info.DocsRaw, "doc total doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") s.Require().Equal(seq.MID(946731654000), info.To, "from doesn't match") From 412b945bd4c9df58b1d47e104a503ab882d248d0 Mon Sep 17 00:00:00 2001 From: Evgenii Guguchkin Date: Fri, 10 Oct 2025 15:51:20 +0300 Subject: [PATCH 27/48] fix imports --- indexer/metrics.go | 3 ++- proxy/bulk/ingestor.go | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/indexer/metrics.go b/indexer/metrics.go index a146de23..e91d6fc7 100644 --- a/indexer/metrics.go +++ b/indexer/metrics.go @@ -1,9 +1,10 @@ package indexer import ( - "github.com/ozontech/seq-db/metric" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/ozontech/seq-db/metric" ) var ( diff --git a/proxy/bulk/ingestor.go b/proxy/bulk/ingestor.go index b59a5022..3f7439e9 100644 --- a/proxy/bulk/ingestor.go +++ b/proxy/bulk/ingestor.go @@ -8,6 +8,8 @@ import ( "sync/atomic" "time" + "go.uber.org/zap" + "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/indexer" @@ -17,7 +19,6 @@ import ( "github.com/ozontech/seq-db/proxy/stores" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" - "go.uber.org/zap" ) type MappingProvider interface { From 4dff0e84d71ab20c5bd12d0cf7e67f7a4df435b7 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 13 Oct 2025 10:47:30 +0400 Subject: [PATCH 28/48] test limit, fraction info test --- frac/fraction_test.go | 51 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8480ba90..8e5c8240 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -413,6 +413,31 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } +func (s *FractionTestSuite) TestSearchWithLimit() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, + `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch(s.query("message:good"), docs, []int{7, 5, 3, 1}) + s.AssertSearch(s.query("message:good", withLimit(3)), docs, []int{7, 5, 3}) + s.AssertSearch(s.query( + "message:good", + withLimit(2), + withFrom("2000-01-01T13:00:00.000Z"), + withTo("2000-01-01T13:00:00.005Z")), + docs, + []int{5, 3}) +} + func (s *FractionTestSuite) TestBasicAggregation() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"proxy"}`, @@ -755,8 +780,9 @@ func (s *FractionTestSuite) TestFractionInfo() { // these checks should not break without a reason // but if compression/marshalling has changed, expected values can be updated accordingly s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") - s.Require().Equal(uint64(235), info.DocsOnDisk, "doc total doesn't match") - s.Require().Equal(uint64(573), info.DocsRaw, "doc total doesn't match") + s.Require().True(info.DocsOnDisk > uint64(230) && info.DocsOnDisk < uint64(240), + "doc raw doesn't match. actual value: %d", info.DocsOnDisk) + s.Require().Equal(uint64(573), info.DocsRaw, "doc raw doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") s.Require().Equal(seq.MID(946731654000), info.To, "from doesn't match") @@ -875,16 +901,29 @@ func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs [ } } -func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.SearchParams, originalDocs []string, expectedIndexes []int, checkTotal bool) { +func (s *FractionTestSuite) AssertSearchWithSearchParams( + params *processor.SearchParams, + originalDocs []string, + expectedIndexes []int, + checkTotal bool) { + var withTotals = []bool{false} - if checkTotal { + + // We can check total only if limit is not set. Otherwise, total returns a count + // of all docs which match the query + if checkTotal && params.Limit == 0 { withTotals = append(withTotals, true) } + var sortOrders = []seq.DocsOrder{seq.DocsOrderDesc} + if params.Limit == 0 { + sortOrders = append(sortOrders, seq.DocsOrderAsc) + } + dp, release := s.fraction.DataProvider(context.Background()) defer release() - for _, order := range []seq.DocsOrder{seq.DocsOrderDesc, seq.DocsOrderAsc} { + for _, order := range sortOrders { for _, withTotal := range withTotals { params.Order = order params.WithTotal = withTotal @@ -915,7 +954,7 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams(params *processor.Searc for i, fetchedDoc := range fetchedDocs { if i < len(expectedIndexes) { expectedDoc := originalDocs[expectedIndexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match") + s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match", i) } } } From 1f37d71b7cc309131f4746076f2f45f4abb7da9b Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 13 Oct 2025 17:50:50 +0400 Subject: [PATCH 29/48] DataProvider interface is now gone --- frac/fraction_test.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8e5c8240..80dc6bd0 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -451,10 +451,8 @@ func (s *FractionTestSuite) TestBasicAggregation() { s.insertDocuments(docs...) assertAggSearch := func(searchParams *processor.SearchParams, expected []map[string]uint64) { - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - qpr, err := dp.Search(*searchParams) + qpr, err := s.fraction.Search(context.Background(), *searchParams) s.Require().NoError(err, "search failed") s.Require().Equal(len(expected), len(qpr.Aggs)) @@ -920,15 +918,12 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( sortOrders = append(sortOrders, seq.DocsOrderAsc) } - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - for _, order := range sortOrders { for _, withTotal := range withTotals { params.Order = order params.WithTotal = withTotal - qpr, err := dp.Search(*params) + qpr, err := s.fraction.Search(context.Background(), *params) s.Require().NoError(err, "search failed for query with order=%v", order) if withTotal { @@ -939,7 +934,7 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), "doc count doesn't match") - docs, err := dp.Fetch(qpr.IDs.IDs()) + docs, err := s.fraction.Fetch(context.Background(), qpr.IDs.IDs()) s.Require().NoError(err, "failed to fetch docs") if order.IsReverse() { @@ -966,10 +961,7 @@ func (s *FractionTestSuite) AssertAggregation( aggregate seq.AggregateArgs, expectedBuckets []seq.AggregationBucket) { - dp, release := s.fraction.DataProvider(context.Background()) - defer release() - - qpr, err := dp.Search(*searchParams) + qpr, err := s.fraction.Search(context.Background(), *searchParams) s.Require().NoError(err, "search failed") aggResults := qpr.Aggregate([]seq.AggregateArgs{aggregate}) From 82bcebe72c8dc992b20439590b141edb241ec39d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:21:53 +0400 Subject: [PATCH 30/48] Add test for histogram --- frac/fraction_test.go | 120 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 80dc6bd0..0cc6c9d5 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -438,6 +438,97 @@ func (s *FractionTestSuite) TestSearchWithLimit() { []int{5, 3}) } +func (s *FractionTestSuite) TestSearchHist() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:01.549Z","message": "apple banana smoothie"}`, + `{"timestamp":"2000-01-01T13:00:02.690Z","message": "apple banana salad"}`, + `{"timestamp":"2000-01-01T13:00:03.102Z","message": "apple banana pineapple smoothie"}`, + `{"timestamp":"2000-01-01T13:00:03.052Z","message": "apple juice"}`, + `{"timestamp":"2000-01-01T13:00:04.999Z","message": "banana"}`, + `{"timestamp":"2000-01-01T13:00:05.000Z","message": "apple juice"}`, + `{"timestamp":"2000-01-01T13:00:10.777Z","message": "apple banana"}`, + `{"timestamp":"2000-01-01T13:00:15.100Z","message": "apple pie"}`, + `{"timestamp":"2000-01-01T13:00:15.200Z","message": "apple tart"}`, + `{"timestamp":"2000-01-01T13:00:15.300Z","message": "apple crisp"}`, + `{"timestamp":"2000-01-01T13:00:20.500Z","message": "orange juice"}`, + `{"timestamp":"2000-01-01T13:00:25.600Z","message": "apple cider"}`, + } + + s.insertDocuments(docs...) + + s.AssertHist(s.query("message:apple", withHist(1000)), map[string]uint64{ + "2000-01-01T13:00:01.000Z": 1, + "2000-01-01T13:00:02.000Z": 1, + "2000-01-01T13:00:03.000Z": 2, + "2000-01-01T13:00:05.000Z": 1, + "2000-01-01T13:00:10.000Z": 1, + "2000-01-01T13:00:15.000Z": 3, + "2000-01-01T13:00:25.000Z": 1, + }) + s.AssertHist(s.query("message:apple", withHist(3000)), map[string]uint64{ + "2000-01-01T13:00:00.000Z": 2, + "2000-01-01T13:00:03.000Z": 3, + "2000-01-01T13:00:09.000Z": 1, + "2000-01-01T13:00:15.000Z": 3, + "2000-01-01T13:00:24.000Z": 1, + }) + s.AssertHist(s.query("message:*", withHist(1000)), map[string]uint64{ + "2000-01-01T13:00:01.000Z": 1, + "2000-01-01T13:00:02.000Z": 1, + "2000-01-01T13:00:03.000Z": 2, + "2000-01-01T13:00:04.000Z": 1, + "2000-01-01T13:00:05.000Z": 1, + "2000-01-01T13:00:10.000Z": 1, + "2000-01-01T13:00:15.000Z": 3, + "2000-01-01T13:00:20.000Z": 1, + "2000-01-01T13:00:25.000Z": 1, + }) + s.AssertHist(s.query("message:*", withHist(2000)), map[string]uint64{ + "2000-01-01T13:00:00.000Z": 1, + "2000-01-01T13:00:02.000Z": 3, + "2000-01-01T13:00:04.000Z": 2, + "2000-01-01T13:00:10.000Z": 1, + "2000-01-01T13:00:14.000Z": 3, + "2000-01-01T13:00:20.000Z": 1, + "2000-01-01T13:00:24.000Z": 1, + }) + s.AssertHist(s.query( + "message:*", + withFrom("2000-01-01T13:00:03.000Z"), + withTo("2000-01-01T13:00:15.000Z"), + withHist(1000)), + map[string]uint64{ + "2000-01-01T13:00:03.000Z": 2, + "2000-01-01T13:00:04.000Z": 1, + "2000-01-01T13:00:05.000Z": 1, + "2000-01-01T13:00:10.000Z": 1, + }) + s.AssertHist(s.query( + "message:*", + withFrom("2000-01-01T13:00:03.000Z"), + withTo("2000-01-01T13:00:15.000Z"), + withHist(1000)), + map[string]uint64{ + "2000-01-01T13:00:03.000Z": 2, + "2000-01-01T13:00:04.000Z": 1, + "2000-01-01T13:00:05.000Z": 1, + "2000-01-01T13:00:10.000Z": 1, + }) + // Limit doesn't `limit` histogram but only query results + s.AssertHist(s.query( + "message:*", + withFrom("2000-01-01T13:00:03.000Z"), + withTo("2000-01-01T13:00:15.000Z"), + withLimit(1), + withHist(1000)), + map[string]uint64{ + "2000-01-01T13:00:03.000Z": 2, + "2000-01-01T13:00:04.000Z": 1, + "2000-01-01T13:00:05.000Z": 1, + "2000-01-01T13:00:10.000Z": 1, + }) +} + func (s *FractionTestSuite) TestBasicAggregation() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"proxy"}`, @@ -867,6 +958,13 @@ func withLimit(limit int) searchOption { } } +func withHist(histInterval uint64) searchOption { + return func(p *processor.SearchParams) error { + p.HistInterval = histInterval + return nil + } +} + func aggField(field string) *parser.Literal { searchAll := []parser.Term{{ Kind: parser.TermSymbol, Data: "*", @@ -988,6 +1086,28 @@ func (s *FractionTestSuite) AssertAggregation( } } +func (s *FractionTestSuite) AssertHist( + searchParams *processor.SearchParams, + expectedHist map[string]uint64) { + + qpr, err := s.fraction.Search(context.Background(), *searchParams) + s.Require().NoError(err, "search failed") + s.Require().Equal(len(expectedHist), len(qpr.Histogram), "histogram count doesn't match") + + for ts, expectedCount := range expectedHist { + timestamp, err := time.Parse(time.RFC3339, ts) + s.Require().NoError(err, "timestamp parsing failed") + expectedMID := seq.TimeToMID(timestamp) + + actualCount, ok := qpr.Histogram[expectedMID] + if ok { + s.Require().Equal(expectedCount, actualCount, "count at bucket %s doesn't match", ts) + } else { + s.Require().Fail("bucket not found", "bucket %s was not found in qpr.hist", ts) + } + } +} + func (s *FractionTestSuite) newActive(docs ...string) *Active { baseName := filepath.Join(s.tmpDir, "test_fraction") activeIndexer := NewActiveIndexer(4, 10) From af4807720ed013c666c0baa7887ea264d670a26b Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:23:34 +0400 Subject: [PATCH 31/48] delete unneeded mapping --- frac/fraction_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 0cc6c9d5..77bd9ed6 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -69,9 +69,6 @@ func (s *FractionTestSuite) SetupSuite() { "request_uri": seq.NewSingleType(seq.TokenizerTypePath, "", 0), "spans": seq.NewSingleType(seq.TokenizerTypeNested, "", 0), "spans.span_id": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), - "process": seq.NewSingleType(seq.TokenizerTypeObject, "", 0), - "process.tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), - "tags": seq.NewSingleType(seq.TokenizerTypeTags, "", 0), "v": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), } } From 926c3b5abdaa69fb0401f7396906691db673a0a3 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:57:24 +0400 Subject: [PATCH 32/48] add test for ip_range query --- frac/fraction_test.go | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 77bd9ed6..1b03d15d 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -62,6 +62,7 @@ func (s *FractionTestSuite) SetupSuite() { "k8s_container": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "message": seq.NewSingleType(seq.TokenizerTypeText, "", 0), "level": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), + "client_ip": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "service": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "status": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "source": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), @@ -303,6 +304,50 @@ func (s *FractionTestSuite) TestSearchRange() { s.AssertSearch("level:{127 TO 200]", docs, []int{}) } +func (s *FractionTestSuite) TestSearchIPRange() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000Z","service":"gateway-0","level":"1","client_ip":"192.168.31.0"}`, + `{"timestamp":"2000-01-01T13:00:01.000Z","service":"gateway-1","level":"1","client_ip":"192.168.0.1"}`, + `{"timestamp":"2000-01-01T13:00:02.000Z","service":"gateway-2","level":"1","client_ip":"192.168.0.2"}`, + `{"timestamp":"2000-01-01T13:00:03.000Z","service":"gateway-3","level":"1","client_ip":"192.168.0.3"}`, + `{"timestamp":"2000-01-01T13:00:04.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.0"}`, + `{"timestamp":"2000-01-01T13:00:05.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.1"}`, + `{"timestamp":"2000-01-01T13:00:06.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.2"}`, + `{"timestamp":"2000-01-01T13:00:07.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.255"}`, + `{"timestamp":"2000-01-01T13:00:08.000Z","service":"gateway-3","level":"1","client_ip":"192.168.31.0"}`, + `{"timestamp":"2000-01-01T13:00:09.000Z","service":"api-0","level":"2","client_ip":"172.10.0.1"}`, + `{"timestamp":"2000-01-01T13:00:10.000Z","service":"api-1","level":"2","client_ip":"172.10.0.100"}`, + `{"timestamp":"2000-01-01T13:00:11.000Z","service":"api-2","level":"2","client_ip":"172.10.1.50"}`, + `{"timestamp":"2000-01-01T13:00:12.000Z","service":"api-3","level":"2","client_ip":"172.10.1.200"}`, + `{"timestamp":"2000-01-01T13:00:13.000Z","service":"api-4","level":"2","client_ip":"172.10.2.1"}`, + `{"timestamp":"2000-01-01T13:00:14.000Z","service":"backend-0","level":"3","client_ip":"10.53.0.10"}`, + `{"timestamp":"2000-01-01T13:00:15.000Z","service":"backend-1","level":"3","client_ip":"10.53.0.20"}`, + `{"timestamp":"2000-01-01T13:00:16.000Z","service":"backend-2","level":"3","client_ip":"10.53.1.30"}`, + `{"timestamp":"2000-01-01T13:00:17.000Z","service":"backend-3","level":"3","client_ip":"10.53.1.40"}`, + `{"timestamp":"2000-01-01T13:00:18.000Z","service":"backend-4","level":"3","client_ip":"10.53.2.50"}`, + } + + s.insertDocuments(docs...) + + s.AssertSearch(s.seqql("client_ip:ip_range(192.168.0.0,192.168.0.255)"), docs, []int{3, 2, 1}) + s.AssertSearch(s.seqql("client_ip:ip_range(192.168.1.0,192.168.1.255)"), docs, []int{7, 6, 5, 4}) + s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0,172.10.0.255)"), docs, []int{10, 9}) + s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0,172.10.255.255)"), docs, []int{13, 12, 11, 10, 9}) + s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0,10.53.0.255)"), docs, []int{15, 14}) + s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0,10.53.255.255)"), docs, []int{18, 17, 16, 15, 14}) + + s.AssertSearch(s.seqql("client_ip:ip_range(192.168.0.0/24)"), docs, []int{3, 2, 1}) + s.AssertSearch(s.seqql("client_ip:ip_range(192.168.1.0/24)"), docs, []int{7, 6, 5, 4}) + s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0/24)"), docs, []int{10, 9}) + s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0/24)"), docs, []int{15, 14}) + + s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0/16)"), docs, []int{13, 12, 11, 10, 9}) + s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0/16)"), docs, []int{18, 17, 16, 15, 14}) + + s.AssertSearch(s.seqql("client_ip:ip_range(192.168.31.0/32)"), docs, []int{8, 0}) + s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.1/32)"), docs, []int{9}) +} + func (s *FractionTestSuite) TestSearchIn() { docs := []string{ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"starting pod","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node1"}`, From 0fa38bc59d650cfb8f862b7ba0d270f25ea378a5 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:04:41 +0400 Subject: [PATCH 33/48] Fix WithTotal not checked --- frac/fraction_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 1b03d15d..c85bfd5f 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -919,7 +919,7 @@ func (s *FractionTestSuite) TestFractionInfo() { switch s.fraction.(type) { case *Active: - s.Require().True(info.MetaOnDisk > uint64(340) && info.MetaOnDisk < uint64(350), + s.Require().True(info.MetaOnDisk >= uint64(320) && info.MetaOnDisk <= uint64(350), "meta on disk doesn't match. actual value: %d", info.MetaOnDisk) s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") case *Sealed: @@ -1049,7 +1049,7 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( // We can check total only if limit is not set. Otherwise, total returns a count // of all docs which match the query - if checkTotal && params.Limit == 0 { + if checkTotal && params.Limit == math.MaxInt32 { withTotals = append(withTotals, true) } From 29edf4a78a42469a8480e6f3a2301c71dcc5ca9a Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:09:36 +0400 Subject: [PATCH 34/48] check AscOrder on most queries --- frac/fraction_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index c85bfd5f..bebad568 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1054,7 +1054,7 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( } var sortOrders = []seq.DocsOrder{seq.DocsOrderDesc} - if params.Limit == 0 { + if params.Limit == math.MaxInt32 { sortOrders = append(sortOrders, seq.DocsOrderAsc) } From c5ade69649cdbe7a1ebd5e6f05c0779fd638b96e Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:19:30 +0400 Subject: [PATCH 35/48] collect active indexers and stop after each test, prevent goroutine leak --- frac/fraction_test.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index bebad568..4873cb15 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -29,10 +29,11 @@ import ( type FractionTestSuite struct { suite.Suite - tmpDir string - config *Config - mapping seq.Mapping - tokenizers map[seq.TokenizerType]tokenizer.Tokenizer + tmpDir string + config *Config + mapping seq.Mapping + tokenizers map[seq.TokenizerType]tokenizer.Tokenizer + activeIndexers []*ActiveIndexer fraction Fraction @@ -85,6 +86,11 @@ func newSmallCache[V any]() *cache.Cache[V] { } func (s *FractionTestSuite) TearDownTestCommon() { + for _, activeIndexer := range s.activeIndexers { + activeIndexer.Stop() + } + s.activeIndexers = nil + err := os.RemoveAll(s.tmpDir) s.NoError(err, "Failed to remove tmp dir") } @@ -1154,6 +1160,7 @@ func (s *FractionTestSuite) newActive(docs ...string) *Active { baseName := filepath.Join(s.tmpDir, "test_fraction") activeIndexer := NewActiveIndexer(4, 10) activeIndexer.Start() + s.activeIndexers = append(s.activeIndexers, activeIndexer) active := NewActive( baseName, From 93151b6530aea16607a781141f7e9e1ebd6c13fd Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:51:54 +0400 Subject: [PATCH 36/48] search for multi-bulk insert --- frac/fraction_test.go | 176 +++++++++++++++++++++++++----------------- 1 file changed, 106 insertions(+), 70 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 4873cb15..0cffd4e9 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -34,13 +34,14 @@ type FractionTestSuite struct { mapping seq.Mapping tokenizers map[seq.TokenizerType]tokenizer.Tokenizer activeIndexers []*ActiveIndexer + sealParams common.SealParams fraction Fraction - insertDocuments func(docs ...string) + insertDocuments func(docs ...[]string) } -func (s *FractionTestSuite) SetupSuite() { +func (s *FractionTestSuite) SetupTestCommon() { s.config = &Config{ Search: SearchConfig{ AggLimits: AggLimits{ @@ -73,9 +74,16 @@ func (s *FractionTestSuite) SetupSuite() { "spans.span_id": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), "v": seq.NewSingleType(seq.TokenizerTypeKeyword, "", 0), } -} + s.sealParams = common.SealParams{ + IDsZstdLevel: 1, + LIDsZstdLevel: 1, + TokenListZstdLevel: 1, + DocsPositionsZstdLevel: 1, + TokenTableZstdLevel: 1, + DocBlocksZstdLevel: 1, + DocBlockSize: 128 * int(units.KiB), + } -func (s *FractionTestSuite) SetupTestCommon() { var err error s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") s.Require().NoError(err) @@ -104,7 +112,7 @@ func (s *FractionTestSuite) TestSearchKeyword() { `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","source":"prod03"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("service:service_a", docs, []int{3, 0}) s.AssertSearch("trace_id:abcdef", docs, []int{1, 0}) @@ -133,7 +141,7 @@ func (s *FractionTestSuite) TestSearchNot() { `{"timestamp":"2000-01-01T13:00:30Z","message":"good","level":"6","service":"srv_6","status":"ok"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("NOT level:1", docs, []int{5, 4, 3, 2, 1}) s.AssertSearch("NOT level:2", docs, []int{5, 4, 3, 2, 0}) @@ -166,7 +174,7 @@ func (s *FractionTestSuite) TestSearchAndOr() { `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("message:apple AND level:info", docs, []int{0}) s.AssertSearch("message:banana AND service:svc_a", docs, []int{2}) @@ -204,7 +212,7 @@ func (s *FractionTestSuite) TestWildcardSymbolsSearch() { `{"timestamp":"2000-01-01T13:00:00.040Z","message":"fourth ****"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch(`message:*`, docs, []int{3, 2, 1, 0}) s.AssertSearch(`message:value`, docs, []int{1, 0}) @@ -228,7 +236,7 @@ func (s *FractionTestSuite) TestSearchFullText() { `{"timestamp":"2000-01-01T13:00:33Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("message:document", docs, []int{3, 2, 1, 0}) s.AssertSearch("message:test", docs, []int{3, 2, 1, 0}) @@ -258,7 +266,7 @@ func (s *FractionTestSuite) TestSearchPath() { `{"timestamp":"2000-01-01T13:00:00.010Z","service":"a","request_uri":"/two/one/three/2"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("request_uri:/one", docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) s.AssertSearch("request_uri:/two", docs, []int{10}) @@ -284,7 +292,7 @@ func (s *FractionTestSuite) TestSearchRange() { `{"timestamp":"2000-01-01T13:00:00.006Z","service":"test-service","level":"127"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch("level:[1 TO 3]", docs, []int{1, 0}) s.AssertSearch(s.seqql("level:[1, 3]"), docs, []int{1, 0}) @@ -333,7 +341,7 @@ func (s *FractionTestSuite) TestSearchIPRange() { `{"timestamp":"2000-01-01T13:00:18.000Z","service":"backend-4","level":"3","client_ip":"10.53.2.50"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch(s.seqql("client_ip:ip_range(192.168.0.0,192.168.0.255)"), docs, []int{3, 2, 1}) s.AssertSearch(s.seqql("client_ip:ip_range(192.168.1.0,192.168.1.255)"), docs, []int{7, 6, 5, 4}) @@ -368,7 +376,7 @@ func (s *FractionTestSuite) TestSearchIn() { `{"timestamp":"2000-01-01T13:00:00.009Z","message":"cache miss","level":"warn","k8s_namespace":"test","k8s_pod":"app-cache-1"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch(s.seqql("k8s_namespace:in(prod)"), docs, []int{8, 7, 4, 1, 0}) s.AssertSearch(s.seqql("k8s_namespace:in(test)"), docs, []int{9, 3, 2}) @@ -399,7 +407,7 @@ func (s *FractionTestSuite) TestSearchNested() { `{"timestamp":"2000-01-01T13:00:00.003Z","spans":[{"span_id":"4"},{"span_id":"5"}]}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearchIgnoreTotal("spans.span_id:*", docs, []int{3, 2, 1, 0}) s.AssertSearch("spans.span_id:1", docs, []int{2, 0}) @@ -421,7 +429,7 @@ func (s *FractionTestSuite) TestSearchFromTo() { `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) assertSearch := func(query string, fromOffset, toOffset int, expectedIndexes []int) { s.AssertSearch(s.query( @@ -473,7 +481,7 @@ func (s *FractionTestSuite) TestSearchWithLimit() { `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertSearch(s.query("message:good"), docs, []int{7, 5, 3, 1}) s.AssertSearch(s.query("message:good", withLimit(3)), docs, []int{7, 5, 3}) @@ -502,7 +510,7 @@ func (s *FractionTestSuite) TestSearchHist() { `{"timestamp":"2000-01-01T13:00:25.600Z","message": "apple cider"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) s.AssertHist(s.query("message:apple", withHist(1000)), map[string]uint64{ "2000-01-01T13:00:01.000Z": 1, @@ -587,7 +595,7 @@ func (s *FractionTestSuite) TestBasicAggregation() { `{"timestamp":"2000-01-01T13:00:05.000Z","message":"good","level":"1","trace_id":"1","service":"gateway"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) assertAggSearch := func(searchParams *processor.SearchParams, expected []map[string]uint64) { @@ -658,7 +666,7 @@ func (s *FractionTestSuite) TestAggSum() { `{"timestamp":"2000-01-01T13:00:00.019Z","service":"sum5"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( "service:sum*", @@ -694,7 +702,7 @@ func (s *FractionTestSuite) TestAggMin() { `{"timestamp":"2000-01-01T13:00:00.012Z","v":null}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( "service:min*", @@ -723,7 +731,7 @@ func (s *FractionTestSuite) TestAggMax() { `{"timestamp":"2000-01-01T13:00:00.007Z","v":null}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( "service:max*", @@ -754,7 +762,7 @@ func (s *FractionTestSuite) TestAggQuantile() { `{"timestamp":"2000-01-01T13:00:00.009Z","service":"quantile1","v":10}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( "service:quantile*", @@ -793,7 +801,7 @@ func (s *FractionTestSuite) TestAggUnique() { `{"timestamp":"2000-01-01T13:00:00.010Z","level":3}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( "level:3", @@ -826,7 +834,7 @@ func (s *FractionTestSuite) TestAggSumWithoutGroupBy() { `{"timestamp":"2000-01-01T13:00:00.010Z","v":0,"service":"sum_without_group_by"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( `service:"sum_without_group_by"`, @@ -848,7 +856,7 @@ func (s *FractionTestSuite) TestAggMaxWithoutGroupBy() { `{"timestamp":"2000-01-01T13:00:00.003Z","v":-300,"service":"max_without_group_by"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( `service:"max_without_group_by"`, @@ -867,7 +875,7 @@ func (s *FractionTestSuite) TestAggNotExists() { `{"timestamp":"2000-01-01T13:00:00.000Z","service":"not_exists"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( `service:"not_exists"`, @@ -887,7 +895,7 @@ func (s *FractionTestSuite) TestAggAvgWithoutGroupBy() { `{"timestamp":"2000-01-01T13:00:00.001Z","v":500,"service":"avg_without_group_by"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) searchParams := s.query( `service:"avg_without_group_by"`, @@ -901,6 +909,43 @@ func (s *FractionTestSuite) TestAggAvgWithoutGroupBy() { s.AssertAggregation(searchParams, seq.AggregateArgs{Func: seq.AggFuncAvg}, expectedBuckets) } +func (s *FractionTestSuite) TestSearchMultipleBulks() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:01Z","service":"service_a","message":"request started","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:02Z","service":"service_b","message":"processing data","source":"prod03","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:03Z","service":"service_c","message":"database query","source":"prod02","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:04Z","service":"service_a","message":"request completed","source":"prod01","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:05Z","service":"service_c","message":"cache hit","source":"prod03","level":"3"}`, + `{"timestamp":"2000-01-01T13:00:06Z","service":"service_c","message":"processing request","source":"prod01","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:07Z","service":"service_a","message":"request failed","source":"prod02","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:08Z","service":"service_b","message":"processing failed","source":"prod03","level":"4"}`, + `{"timestamp":"2000-01-01T13:00:09Z","service":"service_b","message":"processing retry","source":"prod03","level":"3"}`, + } + var bulk1 []string + var bulk2 []string + var bulk3 []string + docs = append(docs, bulk1...) + docs = append(docs, bulk2...) + docs = append(docs, bulk3...) + for i, doc := range docs { + switch i % 3 { + case 0: + bulk1 = append(bulk1, doc) + case 1: + bulk2 = append(bulk2, doc) + case 2: + bulk3 = append(bulk3, doc) + } + } + + s.insertDocuments(bulk1, bulk2, bulk3) + + s.AssertSearch(s.query("service:service_b"), docs, []int{8, 7, 1}) + s.AssertSearch(s.query("source:prod01"), docs, []int{5, 3, 0}) + s.AssertSearch(s.query("level:4"), docs, []int{7}) + s.AssertSearch(s.query("message:request"), docs, []int{6, 5, 3, 0}) +} + func (s *FractionTestSuite) TestFractionInfo() { docs := []string{ `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "service":"gateway"}`, @@ -910,7 +955,7 @@ func (s *FractionTestSuite) TestFractionInfo() { `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","service":"kube-scheduler"}`, } - s.insertDocuments(docs...) + s.insertDocuments(docs) info := s.fraction.Info() @@ -1156,7 +1201,7 @@ func (s *FractionTestSuite) AssertHist( } } -func (s *FractionTestSuite) newActive(docs ...string) *Active { +func (s *FractionTestSuite) newActive(bulks ...[]string) *Active { baseName := filepath.Join(s.tmpDir, "test_fraction") activeIndexer := NewActiveIndexer(4, 10) activeIndexer.Start() @@ -1172,51 +1217,42 @@ func (s *FractionTestSuite) newActive(docs ...string) *Active { ) proc := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) + compressor := indexer.GetDocsMetasCompressor(3, 3) + defer indexer.PutDocMetasCompressor(compressor) - idx := 0 - readNext := func() ([]byte, error) { - if idx >= len(docs) { - return nil, nil + for _, docs := range bulks { + idx := 0 + readNext := func() ([]byte, error) { + if idx >= len(docs) { + return nil, nil + } + d := []byte(docs[idx]) + idx++ + return d, nil } - d := []byte(docs[idx]) - idx++ - return d, nil - } - _, binaryDocs, binaryMeta, err := proc.ProcessBulk(time.Now(), nil, nil, readNext) - s.Require().NoError(err, "processing bulk failed") + _, binaryDocs, binaryMeta, err := proc.ProcessBulk(time.Now(), nil, nil, readNext) + s.Require().NoError(err, "processing bulk failed") - compressor := indexer.GetDocsMetasCompressor(3, 3) - defer indexer.PutDocMetasCompressor(compressor) - compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) - docsBlock, metasBlock := compressor.DocsMetas() + compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) + docsBlock, metasBlock := compressor.DocsMetas() - var wg sync.WaitGroup - wg.Add(1) - err = active.Append(docsBlock, metasBlock, &wg) - s.Require().NoError(err, "append to active failed") - - wg.Wait() + var wg sync.WaitGroup + wg.Add(1) + err = active.Append(docsBlock, metasBlock, &wg) + s.Require().NoError(err, "append to active failed") + wg.Wait() + } return active } -func (s *FractionTestSuite) newSealed(docs ...string) *Sealed { - active := s.newActive(docs...) - - sealParams := common.SealParams{ - IDsZstdLevel: 1, // min comression level - LIDsZstdLevel: 1, - TokenListZstdLevel: 1, - DocsPositionsZstdLevel: 1, - TokenTableZstdLevel: 1, - DocBlocksZstdLevel: 1, - DocBlockSize: 128 * int(units.KiB), - } +func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { + active := s.newActive(bulks...) - activeSealingSource, err := NewActiveSealingSource(active, sealParams) + activeSealingSource, err := NewActiveSealingSource(active, s.sealParams) s.Require().NoError(err, "Sealing source creation failed") - preloaded, err := sealing.Seal(activeSealingSource, sealParams) + preloaded, err := sealing.Seal(activeSealingSource, s.sealParams) s.Require().NoError(err, "Sealing failed") indexCache := &IndexCache{ @@ -1251,11 +1287,11 @@ type ActiveFractionTestSuite struct { func (s *ActiveFractionTestSuite) SetupTest() { s.SetupTestCommon() - s.insertDocuments = func(docs ...string) { + s.insertDocuments = func(bulks ...[]string) { if s.fraction != nil { s.Require().Fail("can insert docs only once") } - s.fraction = s.newActive(docs...) + s.fraction = s.newActive(bulks...) } } @@ -1284,7 +1320,7 @@ type SealedFractionTestSuite struct { func (s *SealedFractionTestSuite) SetupTest() { s.SetupTestCommon() - s.insertDocuments = func(docs ...string) { + s.insertDocuments = func(docs ...[]string) { if s.fraction != nil { s.Require().Fail("can insert docs only once") } @@ -1311,16 +1347,16 @@ type SealedLoadedFractionTestSuite struct { func (s *SealedLoadedFractionTestSuite) SetupTest() { s.SetupTestCommon() - s.insertDocuments = func(docs ...string) { + s.insertDocuments = func(bulks ...[]string) { if s.fraction != nil { s.Require().Fail("can insert docs only once") } - s.fraction = s.newSealedLoaded(docs...) + s.fraction = s.newSealedLoaded(bulks...) } } -func (s *SealedLoadedFractionTestSuite) newSealedLoaded(docs ...string) *Sealed { - sealed := s.newSealed(docs...) +func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Sealed { + sealed := s.newSealed(bulks...) sealed.close("closed") indexCache := &IndexCache{ From 0a28ef73ae0d110e5759a08ccff7421198e80e11 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:57:20 +0400 Subject: [PATCH 37/48] shuffle docs in each bulk before insert --- frac/fraction_test.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 0cffd4e9..8059d993 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "math" + "math/rand/v2" "os" "path/filepath" "slices" @@ -962,7 +963,8 @@ func (s *FractionTestSuite) TestFractionInfo() { // these checks should not break without a reason // but if compression/marshalling has changed, expected values can be updated accordingly s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") - s.Require().True(info.DocsOnDisk > uint64(230) && info.DocsOnDisk < uint64(240), + // it varies depending on params and docs shuffled + s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(250), "doc raw doesn't match. actual value: %d", info.DocsOnDisk) s.Require().Equal(uint64(573), info.DocsRaw, "doc raw doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") @@ -1221,12 +1223,18 @@ func (s *FractionTestSuite) newActive(bulks ...[]string) *Active { defer indexer.PutDocMetasCompressor(compressor) for _, docs := range bulks { + docsCopy := make([]string, len(docs)) + copy(docsCopy, docs) + rand.Shuffle(len(docsCopy), func(i, j int) { + docsCopy[i], docsCopy[j] = docsCopy[j], docsCopy[i] + }) + idx := 0 readNext := func() ([]byte, error) { - if idx >= len(docs) { + if idx >= len(docsCopy) { return nil, nil } - d := []byte(docs[idx]) + d := []byte(docsCopy[idx]) idx++ return d, nil } From ecee05fe28713d9291874b4adcb3e9ee7b26192f Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 17:53:47 +0400 Subject: [PATCH 38/48] test search on large 200kbyte frac to add 1.5% of statements coverage --- frac/fraction_test.go | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8059d993..c1debbcd 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "slices" + "strings" "sync" "testing" "time" @@ -947,6 +948,67 @@ func (s *FractionTestSuite) TestSearchMultipleBulks() { s.AssertSearch(s.query("message:request"), docs, []int{6, 5, 3, 0}) } +// This test checks search on a large frac. Doc count is to 25000 which results in ~200 kbyte docs file (3 doc blocks) +func (s *FractionTestSuite) TestSearchLargeFrac() { + services := []string{"gateway", "proxy", "scheduler"} + messages := []string{ + "request started", "request completed", "processing timed out", + "processing data", "processing failed", "processing retry", + } + + baseTime := time.Date(2000, 1, 1, 13, 0, 0, 0, time.UTC) + + var docs []string + var messageRequestIndexes []int + var serviceGatewayIndexes []int + var level5Indexes []int + + for i := 0; i < 25000; i++ { + service := services[rand.IntN(len(services))] + message := messages[rand.IntN(len(messages))] + level := rand.IntN(6) + timestamp := baseTime.Add(time.Duration(i) * time.Millisecond) + + doc := fmt.Sprintf(`{"timestamp":"%s","service":"%s","message":"%s","level":"%d"}`, + timestamp.Format(time.RFC3339Nano), service, message, level) + docs = append(docs, doc) + + if service == "gateway" { + serviceGatewayIndexes = append(serviceGatewayIndexes, i) + } + if level == 5 { + level5Indexes = append(level5Indexes, i) + } + if strings.Contains(message, "request") { + messageRequestIndexes = append(messageRequestIndexes, i) + } + } + + slices.Reverse(messageRequestIndexes) + slices.Reverse(serviceGatewayIndexes) + slices.Reverse(level5Indexes) + + bulkSize := 1000 + var bulks [][]string + for i := 0; i < len(docs); i += bulkSize { + end := i + bulkSize + if end > len(docs) { + end = len(docs) + } + bulks = append(bulks, docs[i:end]) + } + // docs in each bulk will be shuffled in insertDocuments, now we only shuffle bulks + rand.Shuffle(len(bulks), func(i, j int) { + bulks[i], bulks[j] = bulks[j], bulks[i] + }) + + s.insertDocuments(bulks...) + + s.AssertSearch(s.query("message:request", withLimit(100)), docs, messageRequestIndexes[:100]) + s.AssertSearch(s.query("service:gateway ", withLimit(100)), docs, serviceGatewayIndexes[:100]) + s.AssertSearch(s.query("level:5", withLimit(100)), docs, level5Indexes[:100]) +} + func (s *FractionTestSuite) TestFractionInfo() { docs := []string{ `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "service":"gateway"}`, From 84a2ecdbd1a7aadf56e77bd8950b97defcd94ee2 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 18:05:59 +0400 Subject: [PATCH 39/48] test large queries on large frac --- frac/fraction_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index c1debbcd..d8c5d2e6 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -948,7 +948,7 @@ func (s *FractionTestSuite) TestSearchMultipleBulks() { s.AssertSearch(s.query("message:request"), docs, []int{6, 5, 3, 0}) } -// This test checks search on a large frac. Doc count is to 25000 which results in ~200 kbyte docs file (3 doc blocks) +// This test checks search on a large frac. Doc count is set to 25000 which results in ~200 kbyte docs file (3 doc blocks) func (s *FractionTestSuite) TestSearchLargeFrac() { services := []string{"gateway", "proxy", "scheduler"} messages := []string{ @@ -1005,7 +1005,9 @@ func (s *FractionTestSuite) TestSearchLargeFrac() { s.insertDocuments(bulks...) s.AssertSearch(s.query("message:request", withLimit(100)), docs, messageRequestIndexes[:100]) - s.AssertSearch(s.query("service:gateway ", withLimit(100)), docs, serviceGatewayIndexes[:100]) + s.AssertSearch(s.query("service:gateway"), docs, serviceGatewayIndexes) + s.AssertSearch(s.query("service:gateway", withLimit(100)), docs, serviceGatewayIndexes[:100]) + s.AssertSearch(s.query("level:5"), docs, level5Indexes) s.AssertSearch(s.query("level:5", withLimit(100)), docs, level5Indexes[:100]) } @@ -1168,8 +1170,8 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( withTotals = append(withTotals, true) } - var sortOrders = []seq.DocsOrder{seq.DocsOrderDesc} - if params.Limit == math.MaxInt32 { + var sortOrders = []seq.DocsOrder{params.Order} + if params.Order == seq.DocsOrderDesc && params.Limit == math.MaxInt32 { sortOrders = append(sortOrders, seq.DocsOrderAsc) } From c5b26d3f19aa7789b87eb9ee3f987e6542a26b50 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Oct 2025 18:27:07 +0400 Subject: [PATCH 40/48] mark docs with indexes so that it's easier to read asserts --- frac/fraction_test.go | 202 +++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index d8c5d2e6..41c8dafa 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -107,11 +107,11 @@ func (s *FractionTestSuite) TearDownTestCommon() { func (s *FractionTestSuite) TestSearchKeyword() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, - `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","source":"prod03"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text","trace_id":"abcdef","source":"prod01","level":"1"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text","trace_id":"abcdef","source":"prod01","level":"1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text","trace_id":"aaaaaa","source":"prod02","level":"2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text","trace_id":"bbbbbb","source":"prod01","level":"1"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","source":"prod03"}`, } s.insertDocuments(docs) @@ -135,12 +135,12 @@ func (s *FractionTestSuite) TestSearchKeyword() { func (s *FractionTestSuite) TestSearchNot() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:25Z","message":"bad","level":"1","service":"srv_1","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:26Z","message":"good","level":"2","service":"srv_2","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:27Z","message":"bad","level":"3","service":"srv_3","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:28Z","message":"good","level":"4","service":"srv_4","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:29Z","message":"bad","level":"5","service":"srv_5","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:30Z","message":"good","level":"6","service":"srv_6","status":"ok"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:25Z","message":"bad","level":"1","service":"srv_1","status":"ok"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:26Z","message":"good","level":"2","service":"srv_2","status":"ok"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:27Z","message":"bad","level":"3","service":"srv_3","status":"ok"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:28Z","message":"good","level":"4","service":"srv_4","status":"ok"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:29Z","message":"bad","level":"5","service":"srv_5","status":"ok"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:30Z","message":"good","level":"6","service":"srv_6","status":"ok"}`, } s.insertDocuments(docs) @@ -168,12 +168,12 @@ func (s *FractionTestSuite) TestSearchNot() { func (s *FractionTestSuite) TestSearchAndOr() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","message":"banana","level":"info","service":"svc_a","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","message":"banana","level":"error","service":"svc_b","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","message":"cherry","level":"info","service":"svc_c","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"apple","level":"info","service":"svc_a","status":"ok"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","message":"apple","level":"error","service":"svc_b","status":"fail"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","message":"banana","level":"info","service":"svc_a","status":"ok"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","message":"banana","level":"error","service":"svc_b","status":"fail"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","message":"cherry","level":"info","service":"svc_c","status":"ok"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","message":"cherry","level":"warn","service":"svc_c","status":"ok"}`, } s.insertDocuments(docs) @@ -208,10 +208,10 @@ func (s *FractionTestSuite) TestSearchAndOr() { func (s *FractionTestSuite) TestWildcardSymbolsSearch() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.010Z","message":"first value:****"}`, - `{"timestamp":"2000-01-01T13:00:00.020Z","message":"second value:*******"}`, - `{"timestamp":"2000-01-01T13:00:00.030Z","message":"third value****"}`, - `{"timestamp":"2000-01-01T13:00:00.040Z","message":"fourth ****"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.010Z","message":"first value:****"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.020Z","message":"second value:*******"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.030Z","message":"third value****"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.040Z","message":"fourth ****"}`, } s.insertDocuments(docs) @@ -232,10 +232,10 @@ func (s *FractionTestSuite) TestWildcardSymbolsSearch() { func (s *FractionTestSuite) TestSearchFullText() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:30Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:31Z","message":"second test document","level":"error","service":"test-service","status":"fail"}`, - `{"timestamp":"2000-01-01T13:00:32Z","message":"third test document","level":"debug","service":"another-service","status":"ok"}`, - `{"timestamp":"2000-01-01T13:00:33Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:30Z","message":"first test document","level":"info","service":"test-service","status":"ok"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:31Z","message":"second test document","level":"error","service":"test-service","status":"fail"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:32Z","message":"third test document","level":"debug","service":"another-service","status":"ok"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:33Z","message":"fourth test document","level":"info","service":"another-service","status":"ok"}`, } s.insertDocuments(docs) @@ -255,17 +255,17 @@ func (s *FractionTestSuite) TestSearchFullText() { func (s *FractionTestSuite) TestSearchPath() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","service":"a","request_uri":"/one"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","service":"a","request_uri":"/one/two"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","service":"a","request_uri":"/one/two/three"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","service":"a","request_uri":"/one/two.three/four"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","service":"a","request_uri":"/one/two.three/five"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","service":"a","request_uri":"/one/two/three/"}`, - `{"timestamp":"2000-01-01T13:00:00.006Z","service":"a","request_uri":"/one/two/three/1"}`, - `{"timestamp":"2000-01-01T13:00:00.007Z","service":"a","request_uri":"/one/two/three/2"}`, - `{"timestamp":"2000-01-01T13:00:00.008Z","service":"a","request_uri":"/one/two/three/3/four/"}`, - `{"timestamp":"2000-01-01T13:00:00.009Z","service":"a","request_uri":"/one/four/three/3/"}`, - `{"timestamp":"2000-01-01T13:00:00.010Z","service":"a","request_uri":"/two/one/three/2"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","service":"a","request_uri":"/one"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","service":"a","request_uri":"/one/two"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","service":"a","request_uri":"/one/two/three"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","service":"a","request_uri":"/one/two.three/four"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","service":"a","request_uri":"/one/two.three/five"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","service":"a","request_uri":"/one/two/three/"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.006Z","service":"a","request_uri":"/one/two/three/1"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:00.007Z","service":"a","request_uri":"/one/two/three/2"}`, + /*8*/ `{"timestamp":"2000-01-01T13:00:00.008Z","service":"a","request_uri":"/one/two/three/3/four/"}`, + /*9*/ `{"timestamp":"2000-01-01T13:00:00.009Z","service":"a","request_uri":"/one/four/three/3/"}`, + /*10*/ `{"timestamp":"2000-01-01T13:00:00.010Z","service":"a","request_uri":"/two/one/three/2"}`, } s.insertDocuments(docs) @@ -285,13 +285,13 @@ func (s *FractionTestSuite) TestSearchPath() { func (s *FractionTestSuite) TestSearchRange() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","service":"test-service","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","service":"test-service","level":"3"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","service":"test-service","level":"7"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","service":"test-service","level":"15"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","service":"test-service","level":"31"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","service":"test-service","level":"63"}`, - `{"timestamp":"2000-01-01T13:00:00.006Z","service":"test-service","level":"127"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","service":"test-service","level":"1"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","service":"test-service","level":"3"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","service":"test-service","level":"7"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","service":"test-service","level":"15"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","service":"test-service","level":"31"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","service":"test-service","level":"63"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.006Z","service":"test-service","level":"127"}`, } s.insertDocuments(docs) @@ -322,25 +322,25 @@ func (s *FractionTestSuite) TestSearchRange() { func (s *FractionTestSuite) TestSearchIPRange() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","service":"gateway-0","level":"1","client_ip":"192.168.31.0"}`, - `{"timestamp":"2000-01-01T13:00:01.000Z","service":"gateway-1","level":"1","client_ip":"192.168.0.1"}`, - `{"timestamp":"2000-01-01T13:00:02.000Z","service":"gateway-2","level":"1","client_ip":"192.168.0.2"}`, - `{"timestamp":"2000-01-01T13:00:03.000Z","service":"gateway-3","level":"1","client_ip":"192.168.0.3"}`, - `{"timestamp":"2000-01-01T13:00:04.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.0"}`, - `{"timestamp":"2000-01-01T13:00:05.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.1"}`, - `{"timestamp":"2000-01-01T13:00:06.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.2"}`, - `{"timestamp":"2000-01-01T13:00:07.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.255"}`, - `{"timestamp":"2000-01-01T13:00:08.000Z","service":"gateway-3","level":"1","client_ip":"192.168.31.0"}`, - `{"timestamp":"2000-01-01T13:00:09.000Z","service":"api-0","level":"2","client_ip":"172.10.0.1"}`, - `{"timestamp":"2000-01-01T13:00:10.000Z","service":"api-1","level":"2","client_ip":"172.10.0.100"}`, - `{"timestamp":"2000-01-01T13:00:11.000Z","service":"api-2","level":"2","client_ip":"172.10.1.50"}`, - `{"timestamp":"2000-01-01T13:00:12.000Z","service":"api-3","level":"2","client_ip":"172.10.1.200"}`, - `{"timestamp":"2000-01-01T13:00:13.000Z","service":"api-4","level":"2","client_ip":"172.10.2.1"}`, - `{"timestamp":"2000-01-01T13:00:14.000Z","service":"backend-0","level":"3","client_ip":"10.53.0.10"}`, - `{"timestamp":"2000-01-01T13:00:15.000Z","service":"backend-1","level":"3","client_ip":"10.53.0.20"}`, - `{"timestamp":"2000-01-01T13:00:16.000Z","service":"backend-2","level":"3","client_ip":"10.53.1.30"}`, - `{"timestamp":"2000-01-01T13:00:17.000Z","service":"backend-3","level":"3","client_ip":"10.53.1.40"}`, - `{"timestamp":"2000-01-01T13:00:18.000Z","service":"backend-4","level":"3","client_ip":"10.53.2.50"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","service":"gateway-0","level":"1","client_ip":"192.168.31.0"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:01.000Z","service":"gateway-1","level":"1","client_ip":"192.168.0.1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:02.000Z","service":"gateway-2","level":"1","client_ip":"192.168.0.2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:03.000Z","service":"gateway-3","level":"1","client_ip":"192.168.0.3"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:04.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.0"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:05.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.1"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:06.000Z","service":"gateway-0","level":"1","client_ip":"192.168.1.2"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:07.000Z","service":"gateway-1","level":"1","client_ip":"192.168.1.255"}`, + /*8*/ `{"timestamp":"2000-01-01T13:00:08.000Z","service":"gateway-3","level":"1","client_ip":"192.168.31.0"}`, + /*9*/ `{"timestamp":"2000-01-01T13:00:09.000Z","service":"api-0","level":"2","client_ip":"172.10.0.1"}`, + /*10*/ `{"timestamp":"2000-01-01T13:00:10.000Z","service":"api-1","level":"2","client_ip":"172.10.0.100"}`, + /*11*/ `{"timestamp":"2000-01-01T13:00:11.000Z","service":"api-2","level":"2","client_ip":"172.10.1.50"}`, + /*12*/ `{"timestamp":"2000-01-01T13:00:12.000Z","service":"api-3","level":"2","client_ip":"172.10.1.200"}`, + /*13*/ `{"timestamp":"2000-01-01T13:00:13.000Z","service":"api-4","level":"2","client_ip":"172.10.2.1"}`, + /*14*/ `{"timestamp":"2000-01-01T13:00:14.000Z","service":"backend-0","level":"3","client_ip":"10.53.0.10"}`, + /*15*/ `{"timestamp":"2000-01-01T13:00:15.000Z","service":"backend-1","level":"3","client_ip":"10.53.0.20"}`, + /*16*/ `{"timestamp":"2000-01-01T13:00:16.000Z","service":"backend-2","level":"3","client_ip":"10.53.1.30"}`, + /*17*/ `{"timestamp":"2000-01-01T13:00:17.000Z","service":"backend-3","level":"3","client_ip":"10.53.1.40"}`, + /*18*/ `{"timestamp":"2000-01-01T13:00:18.000Z","service":"backend-4","level":"3","client_ip":"10.53.2.50"}`, } s.insertDocuments(docs) @@ -366,16 +366,16 @@ func (s *FractionTestSuite) TestSearchIPRange() { func (s *FractionTestSuite) TestSearchIn() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"starting pod","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node1"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","message":"api call failed","level":"error","k8s_namespace":"prod","k8s_pod":"apiserver-master1"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","message":"scheduling task","level":"info","k8s_namespace":"test","k8s_pod":"scheduler-master1"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","message":"authentication error","level":"error","k8s_namespace":"test","k8s_pod":"apiserver-master2"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","message":"network policy applied","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node2"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","message":"scheduling completed","level":"info","k8s_namespace":"staging","k8s_pod":"scheduler-master2"}`, - `{"timestamp":"2000-01-01T13:00:00.006Z","message":"connection timeout","level":"error","k8s_namespace":"staging","k8s_pod":"app-backend-1"}`, - `{"timestamp":"2000-01-01T13:00:00.007Z","message":"health check passed","level":"info","k8s_namespace":"prod","k8s_pod":"app-frontend-1"}`, - `{"timestamp":"2000-01-01T13:00:00.008Z","message":"database query slow","level":"warn","k8s_namespace":"prod","k8s_pod":"app-backend-2"}`, - `{"timestamp":"2000-01-01T13:00:00.009Z","message":"cache miss","level":"warn","k8s_namespace":"test","k8s_pod":"app-cache-1"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"starting pod","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node1"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","message":"api call failed","level":"error","k8s_namespace":"prod","k8s_pod":"apiserver-master1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","message":"scheduling task","level":"info","k8s_namespace":"test","k8s_pod":"scheduler-master1"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","message":"authentication error","level":"error","k8s_namespace":"test","k8s_pod":"apiserver-master2"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","message":"network policy applied","level":"info","k8s_namespace":"prod","k8s_pod":"proxy-node2"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","message":"scheduling completed","level":"info","k8s_namespace":"staging","k8s_pod":"scheduler-master2"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.006Z","message":"connection timeout","level":"error","k8s_namespace":"staging","k8s_pod":"app-backend-1"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:00.007Z","message":"health check passed","level":"info","k8s_namespace":"prod","k8s_pod":"app-frontend-1"}`, + /*8*/ `{"timestamp":"2000-01-01T13:00:00.008Z","message":"database query slow","level":"warn","k8s_namespace":"prod","k8s_pod":"app-backend-2"}`, + /*9*/ `{"timestamp":"2000-01-01T13:00:00.009Z","message":"cache miss","level":"warn","k8s_namespace":"test","k8s_pod":"app-cache-1"}`, } s.insertDocuments(docs) @@ -403,10 +403,10 @@ func (s *FractionTestSuite) TestSearchIn() { func (s *FractionTestSuite) TestSearchNested() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","spans":[{"span_id":"1"},{"span_id":"2"}]}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","spans":[{"span_id":"2"},{"span_id":"3"}]}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","spans":[{"span_id":"1"},{"span_id":"3"}]}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","spans":[{"span_id":"4"},{"span_id":"5"}]}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","spans":[{"span_id":"1"},{"span_id":"2"}]}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","spans":[{"span_id":"2"},{"span_id":"3"}]}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","spans":[{"span_id":"1"},{"span_id":"3"}]}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","spans":[{"span_id":"4"},{"span_id":"5"}]}`, } s.insertDocuments(docs) @@ -421,14 +421,14 @@ func (s *FractionTestSuite) TestSearchNested() { func (s *FractionTestSuite) TestSearchFromTo() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, - `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, } s.insertDocuments(docs) @@ -473,14 +473,14 @@ func (s *FractionTestSuite) TestSearchFromTo() { func (s *FractionTestSuite) TestSearchWithLimit() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, - `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, - `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, - `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, - `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:00.007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, } s.insertDocuments(docs) @@ -913,15 +913,15 @@ func (s *FractionTestSuite) TestAggAvgWithoutGroupBy() { func (s *FractionTestSuite) TestSearchMultipleBulks() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:01Z","service":"service_a","message":"request started","source":"prod01","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:02Z","service":"service_b","message":"processing data","source":"prod03","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:03Z","service":"service_c","message":"database query","source":"prod02","level":"2"}`, - `{"timestamp":"2000-01-01T13:00:04Z","service":"service_a","message":"request completed","source":"prod01","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:05Z","service":"service_c","message":"cache hit","source":"prod03","level":"3"}`, - `{"timestamp":"2000-01-01T13:00:06Z","service":"service_c","message":"processing request","source":"prod01","level":"2"}`, - `{"timestamp":"2000-01-01T13:00:07Z","service":"service_a","message":"request failed","source":"prod02","level":"1"}`, - `{"timestamp":"2000-01-01T13:00:08Z","service":"service_b","message":"processing failed","source":"prod03","level":"4"}`, - `{"timestamp":"2000-01-01T13:00:09Z","service":"service_b","message":"processing retry","source":"prod03","level":"3"}`, + /*0*/ `{"timestamp":"2000-01-01T13:00:01Z","service":"service_a","message":"request started","source":"prod01","level":"1"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:02Z","service":"service_b","message":"processing data","source":"prod03","level":"1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:03Z","service":"service_c","message":"database query","source":"prod02","level":"2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:04Z","service":"service_a","message":"request completed","source":"prod01","level":"1"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:05Z","service":"service_c","message":"cache hit","source":"prod03","level":"3"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:06Z","service":"service_c","message":"processing request","source":"prod01","level":"2"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:07Z","service":"service_a","message":"request failed","source":"prod02","level":"1"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:08Z","service":"service_b","message":"processing failed","source":"prod03","level":"4"}`, + /*8*/ `{"timestamp":"2000-01-01T13:00:09Z","service":"service_b","message":"processing retry","source":"prod03","level":"3"}`, } var bulk1 []string var bulk2 []string From 81508a03bead689155f2a8982949cb54e9f4e3b4 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 16 Oct 2025 17:14:36 +0400 Subject: [PATCH 41/48] PR fixes --- frac/fraction_test.go | 287 +++++++++++++++++------------------------- 1 file changed, 114 insertions(+), 173 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 41c8dafa..26ea2048 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -31,18 +31,27 @@ import ( type FractionTestSuite struct { suite.Suite - tmpDir string - config *Config - mapping seq.Mapping - tokenizers map[seq.TokenizerType]tokenizer.Tokenizer - activeIndexers []*ActiveIndexer - sealParams common.SealParams + tmpDir string + config *Config + mapping seq.Mapping + tokenizers map[seq.TokenizerType]tokenizer.Tokenizer + activeIndexer *ActiveIndexer + sealParams common.SealParams fraction Fraction insertDocuments func(docs ...[]string) } +func (s *FractionTestSuite) SetupSuite() { + s.activeIndexer = NewActiveIndexer(4, 10) + s.activeIndexer.Start() +} + +func (s *FractionTestSuite) TearDownSuite() { + s.activeIndexer.Stop() +} + func (s *FractionTestSuite) SetupTestCommon() { s.config = &Config{ Search: SearchConfig{ @@ -91,16 +100,7 @@ func (s *FractionTestSuite) SetupTestCommon() { s.Require().NoError(err) } -func newSmallCache[V any]() *cache.Cache[V] { - return cache.NewCache[V](cache.NewCleaner(uint64(units.KiB), nil), nil) -} - func (s *FractionTestSuite) TearDownTestCommon() { - for _, activeIndexer := range s.activeIndexers { - activeIndexer.Stop() - } - s.activeIndexers = nil - err := os.RemoveAll(s.tmpDir) s.NoError(err, "Failed to remove tmp dir") } @@ -219,15 +219,15 @@ func (s *FractionTestSuite) TestWildcardSymbolsSearch() { s.AssertSearch(`message:*`, docs, []int{3, 2, 1, 0}) s.AssertSearch(`message:value`, docs, []int{1, 0}) s.AssertSearch(`message:value*`, docs, []int{2, 1, 0}) - s.AssertSearch(`message:value\*`, docs, []int{}) - s.AssertSearch(`message:value\**`, docs, []int{2}) - s.AssertSearch(`message:*\**`, docs, []int{3, 2, 1, 0}) - s.AssertSearch(`message:*e\**`, docs, []int{2}) - s.AssertSearch(`message:\**`, docs, []int{3, 1, 0}) - s.AssertSearch(`message:\*\*\*\*`, docs, []int{3, 0}) - s.AssertSearch(`message:\*\*\*\**`, docs, []int{3, 1, 0}) - s.AssertSearch(`message:value* AND message:\*\**`, docs, []int{1, 0}) - s.AssertSearch(`message:value* OR message:\*\**`, docs, []int{3, 2, 1, 0}) + s.AssertSearch(`message:"value\*"`, docs, []int{}) + s.AssertSearch(`message:"value\**"`, docs, []int{2}) + s.AssertSearch(`message:"*\**"`, docs, []int{3, 2, 1, 0}) + s.AssertSearch(`message:"*e\**"`, docs, []int{2}) + s.AssertSearch(`message:"\**"`, docs, []int{3, 1, 0}) + s.AssertSearch(`message:"\*\*\*\*"`, docs, []int{3, 0}) + s.AssertSearch(`message:"\*\*\*\**"`, docs, []int{3, 1, 0}) + s.AssertSearch(`message:value* AND message:"\*\**"`, docs, []int{1, 0}) + s.AssertSearch(`message:value* OR message:"\*\**"`, docs, []int{3, 2, 1, 0}) } func (s *FractionTestSuite) TestSearchFullText() { @@ -270,17 +270,17 @@ func (s *FractionTestSuite) TestSearchPath() { s.insertDocuments(docs) - s.AssertSearch("request_uri:/one", docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) - s.AssertSearch("request_uri:/two", docs, []int{10}) - s.AssertSearch("request_uri:/one/two", docs, []int{8, 7, 6, 5, 2, 1}) - s.AssertSearch("request_uri:/one/two/three", docs, []int{8, 7, 6, 5, 2}) - s.AssertSearch("request_uri:/one/two/three/1", docs, []int{6}) - s.AssertSearch("request_uri:/one/two.three", docs, []int{4, 3}) - s.AssertSearch("request_uri:/one/two.three/four", docs, []int{3}) - s.AssertSearch("request_uri:/one/*/three", docs, []int{9, 8, 7, 6, 5, 2}) - s.AssertSearch("request_uri:/two/*/three", docs, []int{10}) - s.AssertSearch("request_uri:*/three/", docs, []int{5}) - s.AssertSearch("request_uri:*/three", docs, []int{10, 9, 8, 7, 6, 5, 2}) + s.AssertSearch(`request_uri:"/one"`, docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch(`request_uri:"/two"`, docs, []int{10}) + s.AssertSearch(`request_uri:"/one/two"`, docs, []int{8, 7, 6, 5, 2, 1}) + s.AssertSearch(`request_uri:"/one/two/three"`, docs, []int{8, 7, 6, 5, 2}) + s.AssertSearch(`request_uri:"/one/two/three/1"`, docs, []int{6}) + s.AssertSearch(`request_uri:"/one/two.three"`, docs, []int{4, 3}) + s.AssertSearch(`request_uri:"/one/two.three/four"`, docs, []int{3}) + s.AssertSearch(`request_uri:"/one/*/three"`, docs, []int{9, 8, 7, 6, 5, 2}) + s.AssertSearch(`request_uri:"/two/*/three"`, docs, []int{10}) + s.AssertSearch(`request_uri:"*/three/"`, docs, []int{5}) + s.AssertSearch(`request_uri:"*/three"`, docs, []int{10, 9, 8, 7, 6, 5, 2}) } func (s *FractionTestSuite) TestSearchRange() { @@ -296,28 +296,17 @@ func (s *FractionTestSuite) TestSearchRange() { s.insertDocuments(docs) - s.AssertSearch("level:[1 TO 3]", docs, []int{1, 0}) - s.AssertSearch(s.seqql("level:[1, 3]"), docs, []int{1, 0}) - s.AssertSearch("level:[0 TO 63]", docs, []int{5, 4, 3, 2, 1, 0}) - s.AssertSearch(s.seqql("level:[0, 63]"), docs, []int{5, 4, 3, 2, 1, 0}) - - s.AssertSearch("level:{0 TO 3}", docs, []int{0}) - s.AssertSearch("level:{-100 TO 100}", docs, []int{5, 4, 3, 2, 1, 0}) - - s.AssertSearch("level:{0 TO 3]", docs, []int{1, 0}) - s.AssertSearch(s.seqql("level:(0, 3]"), docs, []int{1, 0}) - s.AssertSearch("level:[0 TO 3}", docs, []int{0}) - - s.AssertSearch("level:[-100 TO 100]", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:[1, 3]", docs, []int{1, 0}) + s.AssertSearch("level:[0, 63]", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:[-100, 100]", docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:(0, 3]", docs, []int{1, 0}) - s.AssertSearch("level:[0 TO *]", docs, []int{6, 5, 4, 3, 2, 1, 0}) - s.AssertSearch(s.seqql("level:[0, *]"), docs, []int{6, 5, 4, 3, 2, 1, 0}) - s.AssertSearch("level:[0 TO *}", docs, []int{6, 5, 4, 3, 2, 1, 0}) - s.AssertSearch("level:[31 TO *]", docs, []int{6, 5, 4}) - s.AssertSearch("level:{31 TO *]", docs, []int{6, 5}) + s.AssertSearch("level:[0, *]", docs, []int{6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch("level:[31, *]", docs, []int{6, 5, 4}) + s.AssertSearch("level:(31, *]", docs, []int{6, 5}) - s.AssertSearch("level:[200 TO 300]", docs, []int{}) - s.AssertSearch("level:{127 TO 200]", docs, []int{}) + s.AssertSearch("level:[200, 300]", docs, []int{}) + s.AssertSearch("level:(127, 200]", docs, []int{}) } func (s *FractionTestSuite) TestSearchIPRange() { @@ -345,23 +334,23 @@ func (s *FractionTestSuite) TestSearchIPRange() { s.insertDocuments(docs) - s.AssertSearch(s.seqql("client_ip:ip_range(192.168.0.0,192.168.0.255)"), docs, []int{3, 2, 1}) - s.AssertSearch(s.seqql("client_ip:ip_range(192.168.1.0,192.168.1.255)"), docs, []int{7, 6, 5, 4}) - s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0,172.10.0.255)"), docs, []int{10, 9}) - s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0,172.10.255.255)"), docs, []int{13, 12, 11, 10, 9}) - s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0,10.53.0.255)"), docs, []int{15, 14}) - s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0,10.53.255.255)"), docs, []int{18, 17, 16, 15, 14}) + s.AssertSearch("client_ip:ip_range(192.168.0.0,192.168.0.255)", docs, []int{3, 2, 1}) + s.AssertSearch("client_ip:ip_range(192.168.1.0,192.168.1.255)", docs, []int{7, 6, 5, 4}) + s.AssertSearch("client_ip:ip_range(172.10.0.0,172.10.0.255)", docs, []int{10, 9}) + s.AssertSearch("client_ip:ip_range(172.10.0.0,172.10.255.255)", docs, []int{13, 12, 11, 10, 9}) + s.AssertSearch("client_ip:ip_range(10.53.0.0,10.53.0.255)", docs, []int{15, 14}) + s.AssertSearch("client_ip:ip_range(10.53.0.0,10.53.255.255)", docs, []int{18, 17, 16, 15, 14}) - s.AssertSearch(s.seqql("client_ip:ip_range(192.168.0.0/24)"), docs, []int{3, 2, 1}) - s.AssertSearch(s.seqql("client_ip:ip_range(192.168.1.0/24)"), docs, []int{7, 6, 5, 4}) - s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0/24)"), docs, []int{10, 9}) - s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0/24)"), docs, []int{15, 14}) + s.AssertSearch("client_ip:ip_range(192.168.0.0/24)", docs, []int{3, 2, 1}) + s.AssertSearch("client_ip:ip_range(192.168.1.0/24)", docs, []int{7, 6, 5, 4}) + s.AssertSearch("client_ip:ip_range(172.10.0.0/24)", docs, []int{10, 9}) + s.AssertSearch("client_ip:ip_range(10.53.0.0/24)", docs, []int{15, 14}) - s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.0/16)"), docs, []int{13, 12, 11, 10, 9}) - s.AssertSearch(s.seqql("client_ip:ip_range(10.53.0.0/16)"), docs, []int{18, 17, 16, 15, 14}) + s.AssertSearch("client_ip:ip_range(172.10.0.0/16)", docs, []int{13, 12, 11, 10, 9}) + s.AssertSearch("client_ip:ip_range(10.53.0.0/16)", docs, []int{18, 17, 16, 15, 14}) - s.AssertSearch(s.seqql("client_ip:ip_range(192.168.31.0/32)"), docs, []int{8, 0}) - s.AssertSearch(s.seqql("client_ip:ip_range(172.10.0.1/32)"), docs, []int{9}) + s.AssertSearch("client_ip:ip_range(192.168.31.0/32)", docs, []int{8, 0}) + s.AssertSearch("client_ip:ip_range(172.10.0.1/32)", docs, []int{9}) } func (s *FractionTestSuite) TestSearchIn() { @@ -380,23 +369,23 @@ func (s *FractionTestSuite) TestSearchIn() { s.insertDocuments(docs) - s.AssertSearch(s.seqql("k8s_namespace:in(prod)"), docs, []int{8, 7, 4, 1, 0}) - s.AssertSearch(s.seqql("k8s_namespace:in(test)"), docs, []int{9, 3, 2}) - s.AssertSearch(s.seqql("k8s_namespace:in(staging)"), docs, []int{6, 5}) - s.AssertSearch(s.seqql("k8s_namespace:in(prod,test)"), docs, []int{9, 8, 7, 4, 3, 2, 1, 0}) - s.AssertSearch(s.seqql("k8s_namespace:in(prod,test,staging)"), docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) + s.AssertSearch("k8s_namespace:in(prod)", docs, []int{8, 7, 4, 1, 0}) + s.AssertSearch("k8s_namespace:in(test)", docs, []int{9, 3, 2}) + s.AssertSearch("k8s_namespace:in(staging)", docs, []int{6, 5}) + s.AssertSearch("k8s_namespace:in(prod,test)", docs, []int{9, 8, 7, 4, 3, 2, 1, 0}) + s.AssertSearch("k8s_namespace:in(prod,test,staging)", docs, []int{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) - s.AssertSearch(s.seqql("k8s_pod:in(proxy-*)"), docs, []int{4, 0}) - s.AssertSearch(s.seqql("k8s_pod:in(apiserver-*)"), docs, []int{3, 1}) - s.AssertSearch(s.seqql("k8s_pod:in(scheduler-*)"), docs, []int{5, 2}) - s.AssertSearch(s.seqql("k8s_pod:in(proxy-*,apiserver-*)"), docs, []int{4, 3, 1, 0}) - s.AssertSearch(s.seqql("k8s_pod:in(proxy-*,apiserver-*,scheduler-*)"), docs, []int{5, 4, 3, 2, 1, 0}) + s.AssertSearch("k8s_pod:in(proxy-*)", docs, []int{4, 0}) + s.AssertSearch("k8s_pod:in(apiserver-*)", docs, []int{3, 1}) + s.AssertSearch("k8s_pod:in(scheduler-*)", docs, []int{5, 2}) + s.AssertSearch("k8s_pod:in(proxy-*,apiserver-*)", docs, []int{4, 3, 1, 0}) + s.AssertSearch("k8s_pod:in(proxy-*,apiserver-*,scheduler-*)", docs, []int{5, 4, 3, 2, 1, 0}) - s.AssertSearch(s.seqql("level:error AND k8s_namespace:in(prod,test)"), docs, []int{3, 1}) - s.AssertSearch(s.seqql("level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(apiserver-*)"), docs, []int{3, 1}) + s.AssertSearch("level:error AND k8s_namespace:in(prod,test)", docs, []int{3, 1}) + s.AssertSearch("level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(apiserver-*)", docs, []int{3, 1}) s.AssertSearch( - s.seqql(`level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(proxy-*,apiserver-*,scheduler-*)`), + `level:error AND k8s_namespace:in(prod,test) AND k8s_pod:in(proxy-*,apiserver-*,scheduler-*)`, docs, []int{3, 1}) } @@ -411,7 +400,7 @@ func (s *FractionTestSuite) TestSearchNested() { s.insertDocuments(docs) - s.AssertSearchIgnoreTotal("spans.span_id:*", docs, []int{3, 2, 1, 0}) + s.AssertSearch("spans.span_id:*", docs, []int{3, 2, 1, 0}) s.AssertSearch("spans.span_id:1", docs, []int{2, 0}) s.AssertSearch("spans.span_id:2", docs, []int{1, 0}) s.AssertSearch("spans.span_id:3", docs, []int{2, 1}) @@ -1051,25 +1040,6 @@ func (s *FractionTestSuite) TestFractionInfo() { type searchOption func(*processor.SearchParams) error func (s *FractionTestSuite) query(queryString string, options ...searchOption) *processor.SearchParams { - queryAst, err := parser.ParseQuery(queryString, s.mapping) - s.Require().NoError(err, "failed to parse query: %s", queryString) - - params := &processor.SearchParams{ - AST: queryAst, - From: seq.MID(0), - To: seq.MID(math.MaxUint64), - Limit: math.MaxInt32, - } - - for _, option := range options { - err := option(params) - s.Require().NoError(err, "option can not be applied") - } - - return params -} - -func (s *FractionTestSuite) seqql(queryString string, options ...searchOption) *processor.SearchParams { queryAst, err := parser.ParseSeqQL(queryString, s.mapping) s.Require().NoError(err, "failed to parse query: %s", queryString) @@ -1141,16 +1111,12 @@ func withAggQuery(aggQuery processor.AggQuery) searchOption { } } -func (s *FractionTestSuite) AssertSearchIgnoreTotal(query string, originalDocs []string, expectedIndexes []int) { - s.AssertSearchWithSearchParams(s.query(query), originalDocs, expectedIndexes, false) -} - func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, expectedIndexes []int) { switch q := queryObject.(type) { case string: - s.AssertSearchWithSearchParams(s.query(q), originalDocs, expectedIndexes, true) + s.AssertSearchWithSearchParams(s.query(q), originalDocs, expectedIndexes) case *processor.SearchParams: - s.AssertSearchWithSearchParams(q, originalDocs, expectedIndexes, true) + s.AssertSearchWithSearchParams(q, originalDocs, expectedIndexes) default: s.Require().Fail("type for query object not supported") } @@ -1159,16 +1125,7 @@ func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs [ func (s *FractionTestSuite) AssertSearchWithSearchParams( params *processor.SearchParams, originalDocs []string, - expectedIndexes []int, - checkTotal bool) { - - var withTotals = []bool{false} - - // We can check total only if limit is not set. Otherwise, total returns a count - // of all docs which match the query - if checkTotal && params.Limit == math.MaxInt32 { - withTotals = append(withTotals, true) - } + expectedIndexes []int) { var sortOrders = []seq.DocsOrder{params.Order} if params.Order == seq.DocsOrderDesc && params.Limit == math.MaxInt32 { @@ -1176,39 +1133,28 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( } for _, order := range sortOrders { - for _, withTotal := range withTotals { - params.Order = order - params.WithTotal = withTotal - - qpr, err := s.fraction.Search(context.Background(), *params) - s.Require().NoError(err, "search failed for query with order=%v", order) + params.Order = order - if withTotal { - s.Require().Equal(uint64(len(expectedIndexes)), qpr.Total, "qpr.total doesn't match") - } else { - s.Require().Equal(uint64(0), qpr.Total, "qpr has total but not expected to have") - } + qpr, err := s.fraction.Search(context.Background(), *params) + s.Require().NoError(err, "search failed for query with order=%v", order) - s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), "doc count doesn't match") + s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), "doc count doesn't match") - docs, err := s.fraction.Fetch(context.Background(), qpr.IDs.IDs()) - s.Require().NoError(err, "failed to fetch docs") + docs, err := s.fraction.Fetch(context.Background(), qpr.IDs.IDs()) + s.Require().NoError(err, "failed to fetch docs") - if order.IsReverse() { - slices.Reverse(docs) - } + if order.IsReverse() { + slices.Reverse(docs) + } - fetchedDocs := make([]string, 0, len(docs)) - for _, doc := range docs { - fetchedDocs = append(fetchedDocs, string(doc)) - } + fetchedDocs := make([]string, 0, len(docs)) + for _, doc := range docs { + fetchedDocs = append(fetchedDocs, string(doc)) + } - for i, fetchedDoc := range fetchedDocs { - if i < len(expectedIndexes) { - expectedDoc := originalDocs[expectedIndexes[i]] - s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match", i) - } - } + for i, fetchedDoc := range fetchedDocs { + expectedDoc := originalDocs[expectedIndexes[i]] + s.Require().Equal(expectedDoc, fetchedDoc, "doc at index %d doesn't match", i) } } } @@ -1269,22 +1215,16 @@ func (s *FractionTestSuite) AssertHist( func (s *FractionTestSuite) newActive(bulks ...[]string) *Active { baseName := filepath.Join(s.tmpDir, "test_fraction") - activeIndexer := NewActiveIndexer(4, 10) - activeIndexer.Start() - s.activeIndexers = append(s.activeIndexers, activeIndexer) - active := NewActive( baseName, - activeIndexer, + s.activeIndexer, storage.NewReadLimiter(1, nil), - newSmallCache[[]byte](), - newSmallCache[[]byte](), + cache.NewCache[[]byte](nil, nil), + cache.NewCache[[]byte](nil, nil), s.config, ) - proc := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) - compressor := indexer.GetDocsMetasCompressor(3, 3) - defer indexer.PutDocMetasCompressor(compressor) + var wg sync.WaitGroup for _, docs := range bulks { docsCopy := make([]string, len(docs)) @@ -1303,18 +1243,19 @@ func (s *FractionTestSuite) newActive(bulks ...[]string) *Active { return d, nil } + proc := indexer.NewProcessor(s.mapping, s.tokenizers, 0, 0, 0) + compressor := indexer.GetDocsMetasCompressor(3, 3) _, binaryDocs, binaryMeta, err := proc.ProcessBulk(time.Now(), nil, nil, readNext) s.Require().NoError(err, "processing bulk failed") compressor.CompressDocsAndMetas(binaryDocs, binaryMeta) docsBlock, metasBlock := compressor.DocsMetas() - var wg sync.WaitGroup wg.Add(1) err = active.Append(docsBlock, metasBlock, &wg) s.Require().NoError(err, "append to active failed") - wg.Wait() } + wg.Wait() return active } @@ -1328,13 +1269,13 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { s.Require().NoError(err, "Sealing failed") indexCache := &IndexCache{ - MIDs: newSmallCache[[]byte](), - RIDs: newSmallCache[[]byte](), - Params: newSmallCache[seqids.BlockParams](), - LIDs: newSmallCache[*lids.Block](), - Tokens: newSmallCache[*token.Block](), - TokenTable: newSmallCache[token.Table](), - Registry: newSmallCache[[]byte](), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[[]byte](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), + Registry: cache.NewCache[[]byte](nil, nil), } sealed := NewSealedPreloaded( @@ -1342,7 +1283,7 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { preloaded, storage.NewReadLimiter(1, nil), indexCache, - newSmallCache[[]byte](), + cache.NewCache[[]byte](nil, nil), s.config, ) active.Release() @@ -1432,20 +1373,20 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal sealed.close("closed") indexCache := &IndexCache{ - MIDs: newSmallCache[[]byte](), - RIDs: newSmallCache[[]byte](), - Params: newSmallCache[seqids.BlockParams](), - LIDs: newSmallCache[*lids.Block](), - Tokens: newSmallCache[*token.Block](), - TokenTable: newSmallCache[token.Table](), - Registry: newSmallCache[[]byte](), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[[]byte](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), + Registry: cache.NewCache[[]byte](nil, nil), } sealed = NewSealed( sealed.BaseFileName, storage.NewReadLimiter(1, nil), indexCache, - newSmallCache[[]byte](), + cache.NewCache[[]byte](nil, nil), nil, s.config) s.fraction = sealed From a95e0d30472ca537d93f3823d13a6647e0fc7f5d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:42:09 +0400 Subject: [PATCH 42/48] adjust limits in fraction info test --- frac/fraction_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 26ea2048..a97b4bcd 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1018,19 +1018,19 @@ func (s *FractionTestSuite) TestFractionInfo() { s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") // it varies depending on params and docs shuffled s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(250), - "doc raw doesn't match. actual value: %d", info.DocsOnDisk) + "doc on disk doesn't match. actual value: %d", info.DocsOnDisk) s.Require().Equal(uint64(573), info.DocsRaw, "doc raw doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") - s.Require().Equal(seq.MID(946731654000), info.To, "from doesn't match") + s.Require().Equal(seq.MID(946731654000), info.To, "to doesn't match") switch s.fraction.(type) { case *Active: - s.Require().True(info.MetaOnDisk >= uint64(320) && info.MetaOnDisk <= uint64(350), + s.Require().True(info.MetaOnDisk >= uint64(300) && info.MetaOnDisk <= uint64(350), "meta on disk doesn't match. actual value: %d", info.MetaOnDisk) s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") case *Sealed: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1450) && info.IndexOnDisk < uint64(1500), + s.Require().True(info.IndexOnDisk > uint64(1450) && info.IndexOnDisk < uint64(1550), "index on disk doesn't match. actual value: %d", info.MetaOnDisk) default: s.Require().Fail("unsupported fraction type") @@ -1137,7 +1137,6 @@ func (s *FractionTestSuite) AssertSearchWithSearchParams( qpr, err := s.fraction.Search(context.Background(), *params) s.Require().NoError(err, "search failed for query with order=%v", order) - s.Require().Equal(len(expectedIndexes), qpr.IDs.Len(), "doc count doesn't match") docs, err := s.fraction.Fetch(context.Background(), qpr.IDs.IDs()) From 54a9da13cebec38b80b2a48902bc2481d7448ab4 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:55:26 +0400 Subject: [PATCH 43/48] test with total and limit --- frac/fraction_test.go | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index a97b4bcd..17c51965 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -485,6 +485,35 @@ func (s *FractionTestSuite) TestSearchWithLimit() { []int{5, 3}) } +func (s *FractionTestSuite) TestSearchWithTotal() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:01.549Z","message": "apple banana smoothie"}`, + `{"timestamp":"2000-01-01T13:00:02.690Z","message": "fruit salad"}`, + `{"timestamp":"2000-01-01T13:00:03.102Z","message": "banana pineapple smoothie"}`, + `{"timestamp":"2000-01-01T13:00:03.052Z","message": "apple juice"}`, + `{"timestamp":"2000-01-01T13:00:04.999Z","message": "banana"}`, + `{"timestamp":"2000-01-01T13:00:05.000Z","message": "apple juice"}`, + `{"timestamp":"2000-01-01T13:00:10.777Z","message": "apple banana"}`, + `{"timestamp":"2000-01-01T13:00:15.100Z","message": "cherry pie"}`, + `{"timestamp":"2000-01-01T13:00:15.200Z","message": "apple tart"}`, + `{"timestamp":"2000-01-01T13:00:15.300Z","message": "bread crisp"}`, + `{"timestamp":"2000-01-01T13:00:20.500Z","message": "orange juice"}`, + `{"timestamp":"2000-01-01T13:00:25.600Z","message": "apple cider"}`, + } + + s.insertDocuments(docs) + + qpr, err := s.fraction.Search(context.Background(), *s.query("message:apple", withLimit(3), withTotal())) + s.Require().NoError(err, "search failed") + s.Require().Equal(uint64(6), qpr.Total) + s.Require().Equal(3, qpr.IDs.Len()) + + qpr, err = s.fraction.Search(context.Background(), *s.query("message:*", withLimit(4), withTotal())) + s.Require().NoError(err, "search failed") + s.Require().Equal(uint64(12), qpr.Total) + s.Require().Equal(4, qpr.IDs.Len()) +} + func (s *FractionTestSuite) TestSearchHist() { docs := []string{ `{"timestamp":"2000-01-01T13:00:01.549Z","message": "apple banana smoothie"}`, @@ -1017,7 +1046,7 @@ func (s *FractionTestSuite) TestFractionInfo() { // but if compression/marshalling has changed, expected values can be updated accordingly s.Require().Equal(uint32(5), info.DocsTotal, "doc total doesn't match") // it varies depending on params and docs shuffled - s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(250), + s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(300), "doc on disk doesn't match. actual value: %d", info.DocsOnDisk) s.Require().Equal(uint64(573), info.DocsRaw, "doc raw doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") @@ -1025,12 +1054,12 @@ func (s *FractionTestSuite) TestFractionInfo() { switch s.fraction.(type) { case *Active: - s.Require().True(info.MetaOnDisk >= uint64(300) && info.MetaOnDisk <= uint64(350), + s.Require().True(info.MetaOnDisk >= uint64(300) && info.MetaOnDisk <= uint64(400), "meta on disk doesn't match. actual value: %d", info.MetaOnDisk) s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") case *Sealed: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1450) && info.IndexOnDisk < uint64(1550), + s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), "index on disk doesn't match. actual value: %d", info.MetaOnDisk) default: s.Require().Fail("unsupported fraction type") @@ -1087,6 +1116,13 @@ func withLimit(limit int) searchOption { } } +func withTotal() searchOption { + return func(p *processor.SearchParams) error { + p.WithTotal = true + return nil + } +} + func withHist(histInterval uint64) searchOption { return func(p *processor.SearchParams) error { p.HistInterval = histInterval From 16b59857c11617309b397a749f642e47fedc2b07 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:27:21 +0400 Subject: [PATCH 44/48] split suite.Run calls across functions --- frac/fraction_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 17c51965..d2ce6d50 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1436,8 +1436,14 @@ func (s *SealedLoadedFractionTestSuite) TearDownTest() { s.TearDownTestCommon() } -func TestFractionSuites(t *testing.T) { +func TestActiveFractionTestSuite(t *testing.T) { suite.Run(t, new(ActiveFractionTestSuite)) +} + +func TestSealedFractionTestSuite(t *testing.T) { suite.Run(t, new(SealedFractionTestSuite)) +} + +func TestSealedLoadedFractionTestSuite(t *testing.T) { suite.Run(t, new(SealedLoadedFractionTestSuite)) } From 7b19e23dc4bcf5309343686d3c02be61737be82e Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 22 Oct 2025 12:15:43 +0400 Subject: [PATCH 45/48] PR fixes --- frac/fraction_test.go | 89 ++++++++++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index d2ce6d50..31d427c0 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -14,6 +14,8 @@ import ( "time" "github.com/alecthomas/units" + "github.com/stretchr/testify/suite" + "github.com/ozontech/seq-db/cache" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" @@ -26,7 +28,6 @@ import ( "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" "github.com/ozontech/seq-db/tokenizer" - "github.com/stretchr/testify/suite" ) type FractionTestSuite struct { @@ -53,17 +54,7 @@ func (s *FractionTestSuite) TearDownSuite() { } func (s *FractionTestSuite) SetupTestCommon() { - s.config = &Config{ - Search: SearchConfig{ - AggLimits: AggLimits{ - MaxFieldTokens: 1000, - MaxGroupTokens: 1000, - MaxTIDsPerFraction: 1000, - }, - }, - SkipSortDocs: false, - KeepMetaFile: false, - } + s.config = &Config{} s.tokenizers = map[seq.TokenizerType]tokenizer.Tokenizer{ seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(20, false, true), seq.TokenizerTypeText: tokenizer.NewTextTokenizer(20, false, true, 100), @@ -96,7 +87,7 @@ func (s *FractionTestSuite) SetupTestCommon() { } var err error - s.tmpDir, err = os.MkdirTemp("", "fraction_test_*") + s.tmpDir, err = os.MkdirTemp(os.TempDir(), "fraction_test_*") s.Require().NoError(err) } @@ -1031,11 +1022,11 @@ func (s *FractionTestSuite) TestSearchLargeFrac() { func (s *FractionTestSuite) TestFractionInfo() { docs := []string{ - `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "service":"gateway"}`, - `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text", "service":"kube-proxy"}`, - `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text", "service":"gateway"}`, - `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text", "service":"kube-proxy"}`, - `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","service":"kube-scheduler"}`, + `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "container":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:32Z","service":"service_b","message":"second message other text", "container":"kube-proxy"}`, + `{"timestamp":"2000-01-01T13:00:43Z","service":"service_c","message":"third message other text", "container":"gateway"}`, + `{"timestamp":"2000-01-01T13:00:53Z","service":"service_a","message":"fourth message some text", "container":"kube-proxy"}`, + `{"timestamp":"2000-01-01T13:00:54Z","service":"service_c","message":"apple","container":"kube-scheduler"}`, } s.insertDocuments(docs) @@ -1048,13 +1039,13 @@ func (s *FractionTestSuite) TestFractionInfo() { // it varies depending on params and docs shuffled s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(300), "doc on disk doesn't match. actual value: %d", info.DocsOnDisk) - s.Require().Equal(uint64(573), info.DocsRaw, "doc raw doesn't match") + s.Require().Equal(uint64(583), info.DocsRaw, "doc raw doesn't match") s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") s.Require().Equal(seq.MID(946731654000), info.To, "to doesn't match") switch s.fraction.(type) { case *Active: - s.Require().True(info.MetaOnDisk >= uint64(300) && info.MetaOnDisk <= uint64(400), + s.Require().True(info.MetaOnDisk >= uint64(250) && info.MetaOnDisk <= uint64(350), "meta on disk doesn't match. actual value: %d", info.MetaOnDisk) s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") case *Sealed: @@ -1262,8 +1253,7 @@ func (s *FractionTestSuite) newActive(bulks ...[]string) *Active { var wg sync.WaitGroup for _, docs := range bulks { - docsCopy := make([]string, len(docs)) - copy(docsCopy, docs) + docsCopy := slices.Clone(docs) rand.Shuffle(len(docsCopy), func(i, j int) { docsCopy[i], docsCopy[j] = docsCopy[j], docsCopy[i] }) @@ -1358,6 +1348,57 @@ func (s *ActiveFractionTestSuite) TearDownTest() { s.TearDownTestCommon() } +/* +ActiveReplayedFractionTestSuite run tests for active fraction which was replayed from meta and docs file on disk +*/ +type ActiveReplayedFractionTestSuite struct { + FractionTestSuite +} + +func (s *ActiveReplayedFractionTestSuite) SetupTest() { + s.SetupTestCommon() + // Setting this flags allows to keep meta and docs files on disk after Active.Release() is called + s.config.SkipSortDocs = true + s.config.KeepMetaFile = true + + s.insertDocuments = func(bulks ...[]string) { + if s.fraction != nil { + s.Require().Fail("can insert docs only once") + } + s.fraction = s.Replay(s.newActive(bulks...)) + } +} + +func (s *ActiveReplayedFractionTestSuite) Replay(frac *Active) Fraction { + fracFileName := frac.BaseFileName + frac.Release() + replayedFrac := NewActive( + fracFileName, + s.activeIndexer, + storage.NewReadLimiter(1, nil), + cache.NewCache[[]byte](nil, nil), + cache.NewCache[[]byte](nil, nil), + &Config{}) + err := replayedFrac.Replay(context.Background()) + s.Require().NoError(err, "replay failed") + return replayedFrac +} + +func (s *ActiveReplayedFractionTestSuite) TearDownTest() { + if s.fraction != nil { + active, ok := s.fraction.(*Active) + if ok { + active.Release() + } else { + s.Require().Fail("fraction is not of Active type") + } + s.fraction.Suicide() + s.fraction = nil + } + + s.TearDownTestCommon() +} + /* SealedFractionTestSuite run tests for sealed fraction. Active fraction is created first and then sealed. */ @@ -1440,6 +1481,10 @@ func TestActiveFractionTestSuite(t *testing.T) { suite.Run(t, new(ActiveFractionTestSuite)) } +func TestActiveReplayedFractionTestSuite(t *testing.T) { + suite.Run(t, new(ActiveReplayedFractionTestSuite)) +} + func TestSealedFractionTestSuite(t *testing.T) { suite.Run(t, new(SealedFractionTestSuite)) } From 123e336d09f16486ca9ab99a75768b38941d6965 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 20:55:30 +0400 Subject: [PATCH 46/48] merge main --- tests/integration_tests/single_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index 6eecec7f..ed889f3a 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -85,6 +85,7 @@ func (s *SingleTestSuite) TestBasicSearchHotRead() { Shards: [][]string{}, Vers: []string{}, } +} func (s *SingleTestSuite) TestSearchAgg() { startTS := time.Now() From 839ca4ef64240848f667d2c6478c4c3fa70af758 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 21:00:58 +0400 Subject: [PATCH 47/48] fix linter issues --- frac/fraction_test.go | 2 +- tests/integration_tests/single_test.go | 28 -------------------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 31d427c0..0227654a 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -978,7 +978,7 @@ func (s *FractionTestSuite) TestSearchLargeFrac() { level := rand.IntN(6) timestamp := baseTime.Add(time.Duration(i) * time.Millisecond) - doc := fmt.Sprintf(`{"timestamp":"%s","service":"%s","message":"%s","level":"%d"}`, + doc := fmt.Sprintf(`{"timestamp":%q,"service":%q,"message":%q,"level":"%d"}`, timestamp.Format(time.RFC3339Nano), service, message, level) docs = append(docs, doc) diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index ed889f3a..82e37981 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -119,34 +119,6 @@ func (s *SingleTestSuite) TestSearchAgg() { }) } -func (s *SingleTestSuite) assertSearch(docStrs []string) { - tests := []struct { - query string - indexes []int - }{ - {`service: service_a`, []int{3, 0}}, - {`traceID:abcdef`, []int{1, 0}}, - {`level: 1`, []int{1, 3, 0}}, - {`message: "message text"`, []int{2, 1, 3, 0}}, - {`message: "other text"`, []int{2, 1}}, - {`traceID: abcd*`, []int{1, 0}}, - {`traceID: a*`, []int{2, 1, 0}}, - {`traceID: a*f`, []int{1, 0}}, - {`traceID: a*a`, []int{2}}, - {`service: service*a`, []int{3, 0}}, - {`message: "message\ som*"`, []int{3, 0}}, - } - - s.RunFracEnvs(suites.AllFracEnvs, true, func() { - for _, test := range tests { - s.AssertSearch(test.query, docStrs, test.indexes) - } - // test limit - s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderAsc)) - s.AssertDocsEqual(docStrs, []int{2, 1}, s.SearchDocs(`message:other`, 2, seq.DocsOrderDesc)) - }) -} - // Test AND tree (sorting issue) func (s *SingleTestSuite) TestSearchNestedWithAND() { const ( From a8ab6c3637a1087ae56bc53d1d941a6483277e31 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 21:15:01 +0400 Subject: [PATCH 48/48] add legacy format range check --- frac/fraction_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 0227654a..e621c0b4 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -288,9 +288,11 @@ func (s *FractionTestSuite) TestSearchRange() { s.insertDocuments(docs) s.AssertSearch("level:[1, 3]", docs, []int{1, 0}) + s.AssertSearch("level:[1 TO 3]", docs, []int{1, 0}) s.AssertSearch("level:[0, 63]", docs, []int{5, 4, 3, 2, 1, 0}) s.AssertSearch("level:[-100, 100]", docs, []int{5, 4, 3, 2, 1, 0}) s.AssertSearch("level:(0, 3]", docs, []int{1, 0}) + s.AssertSearch("level:(0 TO 3]", docs, []int{1, 0}) s.AssertSearch("level:[0, *]", docs, []int{6, 5, 4, 3, 2, 1, 0}) s.AssertSearch("level:[31, *]", docs, []int{6, 5, 4})