From f154b7c3ee1f5d62f054db13128177e283056b23 Mon Sep 17 00:00:00 2001 From: Brian Gavin Date: Tue, 5 Aug 2025 14:12:49 -0700 Subject: [PATCH 1/5] small win by simple preallocating the path and parts slices, and only using 1 path allocation --- cache.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cache.go b/cache.go index 065b8d6..f6b0b36 100644 --- a/cache.go +++ b/cache.go @@ -7,6 +7,7 @@ package schema import ( "errors" "reflect" + "slices" "strconv" "strings" "sync" @@ -48,9 +49,9 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { var field *fieldInfo var index64 int64 var err error - parts := make([]pathPart, 0) - path := make([]string, 0) keys := strings.Split(p, ".") + parts := make([]pathPart, 0, len(keys)) + path := make([]string, 0, len(keys)) for i := 0; i < len(keys); i++ { if t.Kind() != reflect.Struct { return nil, errInvalidPath @@ -78,11 +79,11 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { return nil, errInvalidPath } parts = append(parts, pathPart{ - path: path, + path: slices.Clip(path), field: field, index: int(index64), }) - path = make([]string, 0) + path = path[len(path):] // Get the next struct type, dropping ptrs. if field.typ.Kind() == reflect.Ptr { @@ -104,10 +105,11 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { } // Add the remaining. parts = append(parts, pathPart{ - path: path, + path: slices.Clip(path), field: field, index: -1, }) + parts = slices.Clip(parts) return parts, nil } From 00d5c5ae2f8a52e195d25882116a492cd109b310 Mon Sep 17 00:00:00 2001 From: Brian Gavin Date: Tue, 5 Aug 2025 15:23:33 -0700 Subject: [PATCH 2/5] get rid of strings.Split usage using a custom iter type based on strings.Cut --- cache.go | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/cache.go b/cache.go index f6b0b36..812d2fd 100644 --- a/cache.go +++ b/cache.go @@ -38,6 +38,27 @@ func (c *cache) registerConverter(value interface{}, converterFunc Converter) { c.regconv[reflect.TypeOf(value)] = converterFunc } +type pathIter struct { + rest string +} + +func (p *pathIter) start(path string) (string, bool) { + return p.next(path) +} + +func (p *pathIter) advance() (string, bool) { + return p.next(p.rest) +} + +func (p *pathIter) next(path string) (key string, found bool) { + key, p.rest, found = strings.Cut(path, ".") + // special case for paths without ".". this can be the beginning or end of a path. + if !found && key != "" { + return key, true + } + return key, found +} + // parsePath parses a path in dotted notation verifying that it is a valid // path to a struct field. // @@ -49,33 +70,40 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { var field *fieldInfo var index64 int64 var err error - keys := strings.Split(p, ".") - parts := make([]pathPart, 0, len(keys)) - path := make([]string, 0, len(keys)) - for i := 0; i < len(keys); i++ { + // pre-allocate parts and path using the number of ".". + // path will be appended to every valid element of the path, but parts will + // only be appended to for each slice of struct. + n := strings.Count(p, ".") + if n == 0 { + n = 1 + } + parts := make([]pathPart, 0, n) + path := make([]string, 0, n) + var it pathIter + for key, found := it.start(p); found; key, found = it.advance() { if t.Kind() != reflect.Struct { return nil, errInvalidPath } if struc = c.get(t); struc == nil { return nil, errInvalidPath } - if field = struc.get(keys[i]); field == nil { + if field = struc.get(key); field == nil { return nil, errInvalidPath } // Valid field. Append index. path = append(path, field.name) if field.isSliceOfStructs && (!field.unmarshalerInfo.IsValid || (field.unmarshalerInfo.IsValid && field.unmarshalerInfo.IsSliceElement)) { // Parse a special case: slices of structs. - // i+1 must be the slice index. + // next key must be the slice index. // // Now that struct can implements TextUnmarshaler interface, // we don't need to force the struct's fields to appear in the path. // So checking i+2 is not necessary anymore. - i++ - if i+1 > len(keys) { + key, found = it.advance() + if !found { return nil, errInvalidPath } - if index64, err = strconv.ParseInt(keys[i], 10, 0); err != nil { + if index64, err = strconv.ParseInt(key, 10, 0); err != nil { return nil, errInvalidPath } parts = append(parts, pathPart{ From 14f5b216e8fc6cac148b5ca4228d4300d34b96d9 Mon Sep 17 00:00:00 2001 From: Brian Gavin Date: Wed, 6 Aug 2025 13:24:43 -0700 Subject: [PATCH 3/5] fix go 1.20 compat (with notes of helpers in later versions) --- cache.go | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/cache.go b/cache.go index 812d2fd..3bb962a 100644 --- a/cache.go +++ b/cache.go @@ -7,7 +7,6 @@ package schema import ( "errors" "reflect" - "slices" "strconv" "strings" "sync" @@ -38,25 +37,35 @@ func (c *cache) registerConverter(value interface{}, converterFunc Converter) { c.regconv[reflect.TypeOf(value)] = converterFunc } +// pathIter is used to iterate over the dotted notation argument to (*cache).parsePath. +// internally it utilizes strings.Cut to minimize allocations and garbage generated. +// +// this should be used with a for loop as: +// +// var it pathIter +// for key, ok := it.start(path); ok; key, ok = it.advance() {} type pathIter struct { rest string } +// start initializes the iteration, and returns the first key. func (p *pathIter) start(path string) (string, bool) { return p.next(path) } +// advance advances the iteration to the next key. func (p *pathIter) advance() (string, bool) { return p.next(p.rest) } -func (p *pathIter) next(path string) (key string, found bool) { - key, p.rest, found = strings.Cut(path, ".") - // special case for paths without ".". this can be the beginning or end of a path. - if !found && key != "" { - return key, true - } - return key, found +// next should only be used by (*pathIter).start and (*pathIter).advance. it will cut path, +// and return the "before", storing the "after" for the subsequent call. +// if path does not contain "." and is non-empty, it will return (path, true), storing "". +// if path is empty, it will return ("", false). this means the end of iteration. +func (p *pathIter) next(path string) (key string, ok bool) { + key, p.rest, _ = strings.Cut(path, ".") + ok = key != "" + return key, ok } // parsePath parses a path in dotted notation verifying that it is a valid @@ -74,13 +83,13 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { // path will be appended to every valid element of the path, but parts will // only be appended to for each slice of struct. n := strings.Count(p, ".") - if n == 0 { + if n == 0 { // note: go1.21 max() n = 1 } parts := make([]pathPart, 0, n) path := make([]string, 0, n) var it pathIter - for key, found := it.start(p); found; key, found = it.advance() { + for key, ok := it.start(p); ok; key, ok = it.advance() { if t.Kind() != reflect.Struct { return nil, errInvalidPath } @@ -99,15 +108,15 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { // Now that struct can implements TextUnmarshaler interface, // we don't need to force the struct's fields to appear in the path. // So checking i+2 is not necessary anymore. - key, found = it.advance() - if !found { + key, ok = it.advance() + if !ok { return nil, errInvalidPath } if index64, err = strconv.ParseInt(key, 10, 0); err != nil { return nil, errInvalidPath } parts = append(parts, pathPart{ - path: slices.Clip(path), + path: path[:len(path):len(path)], // note: go1.21 slices.Clip() field: field, index: int(index64), }) @@ -133,11 +142,11 @@ func (c *cache) parsePath(p string, t reflect.Type) ([]pathPart, error) { } // Add the remaining. parts = append(parts, pathPart{ - path: slices.Clip(path), + path: path[:len(path):len(path)], // note: go1.21 slices.Clip() field: field, index: -1, }) - parts = slices.Clip(parts) + parts = parts[:len(parts):len(parts)] // note: go1.21 slices.Clip() return parts, nil } From 4a4fe33b689097b44f664750fa6dd9811bebf787 Mon Sep 17 00:00:00 2001 From: Brian Gavin Date: Wed, 6 Aug 2025 16:57:03 -0700 Subject: [PATCH 4/5] add sliceofstruct workload benchmark --- decoder_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/decoder_test.go b/decoder_test.go index d01569e..12555fa 100644 --- a/decoder_test.go +++ b/decoder_test.go @@ -2527,3 +2527,42 @@ func TestDecoder_SetMaxSize(t *testing.T) { } }) } + +type keyvalue struct { + Key string + Value string +} + +type sliceOfStructInput struct { + Attributes []keyvalue + Tags []keyvalue +} + +func BenchmarkSliceOfStruct(b *testing.B) { + d := NewDecoder() + v := map[string][]string{ + "Attributes.0.Key": {"foo0"}, + "Attributes.0.Value": {"bar0"}, + "Attributes.1.Key": {"foo1"}, + "Attributes.1.Value": {"bar1"}, + "Attributes.2.Key": {"foo2"}, + "Attributes.2.Value": {"bar2"}, + "Attributes.3.Key": {"foo3"}, + "Attributes.3.Value": {"bar3"}, + "Attributes.4.Key": {"foo4"}, + "Attributes.4.Value": {"bar4"}, + "Attributes.5.Key": {"foo5"}, + "Attributes.5.Value": {"bar5"}, + "Tags.0.Key": {"baz0"}, + "Tags.0.Value": {"bam0"}, + "Tags.1.Key": {"baz1"}, + "Tags.1.Value": {"bam1"}, + "Tags.2.Key": {"baz2"}, + "Tags.2.Value": {"bam2"}, + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + in := &sliceOfStructInput{} + _ = d.Decode(in, v) + } +} From 7d12f2a445e0e1e55533dc13c72fd58ae8fbaf59 Mon Sep 17 00:00:00 2001 From: Brian Gavin Date: Thu, 7 Aug 2025 20:16:25 -0700 Subject: [PATCH 5/5] fix edge cases for pathIter, test for pathIter --- cache.go | 24 +++++++++++++++--------- decoder_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/cache.go b/cache.go index 3bb962a..3127ca0 100644 --- a/cache.go +++ b/cache.go @@ -45,26 +45,32 @@ func (c *cache) registerConverter(value interface{}, converterFunc Converter) { // var it pathIter // for key, ok := it.start(path); ok; key, ok = it.advance() {} type pathIter struct { - rest string + rest string + lastFound bool } // start initializes the iteration, and returns the first key. func (p *pathIter) start(path string) (string, bool) { - return p.next(path) + // init the iter for the first call to next, so that the first call will either + // cut on "." and return the "before", or it will return path if it does not contain + // ".". + p.rest = path + p.lastFound = true + return p.next() } // advance advances the iteration to the next key. func (p *pathIter) advance() (string, bool) { - return p.next(p.rest) + return p.next() } -// next should only be used by (*pathIter).start and (*pathIter).advance. it will cut path, +// next should only be used by (*pathIter).start and (*pathIter).advance. it will cut p.rest, // and return the "before", storing the "after" for the subsequent call. -// if path does not contain "." and is non-empty, it will return (path, true), storing "". -// if path is empty, it will return ("", false). this means the end of iteration. -func (p *pathIter) next(path string) (key string, ok bool) { - key, p.rest, _ = strings.Cut(path, ".") - ok = key != "" +func (p *pathIter) next() (key string, ok bool) { + var found bool + key, p.rest, found = strings.Cut(p.rest, ".") + ok = found || p.lastFound + p.lastFound = found return key, ok } diff --git a/decoder_test.go b/decoder_test.go index 12555fa..1b0768a 100644 --- a/decoder_test.go +++ b/decoder_test.go @@ -2566,3 +2566,29 @@ func BenchmarkSliceOfStruct(b *testing.B) { _ = d.Decode(in, v) } } + +func TestPathIter(t *testing.T) { + testcases := []string{ + "a", + "a.b", + ".b.", + ".", + "..", + "", + } + for _, tc := range testcases { + t.Run(tc, func(t *testing.T) { + var ( + it pathIter + split = strings.Split(tc, ".") + collect = make([]string, 0, len(split)) + ) + for k, ok := it.start(tc); ok; k, ok = it.advance() { + collect = append(collect, k) + } + if !reflect.DeepEqual(split, collect) { + t.Fatalf("expected: %q | got: %q", split, collect) + } + }) + } +}