From e96084501ac2eebd6503d8c870e1b15a29169075 Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Tue, 29 Jul 2025 11:14:35 -0600 Subject: [PATCH 01/10] json: consistent use of encodeNull (should inline well) --- json/encode.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/json/encode.go b/json/encode.go index 2a6da07..8fe2867 100644 --- a/json/encode.go +++ b/json/encode.go @@ -241,7 +241,7 @@ func (e encoder) encodeToString(b []byte, p unsafe.Pointer, encode encodeFunc) ( func (e encoder) encodeBytes(b []byte, p unsafe.Pointer) ([]byte, error) { v := *(*[]byte)(p) if v == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } n := base64.StdEncoding.EncodedLen(len(v)) + 2 @@ -299,7 +299,7 @@ func (e encoder) encodeSlice(b []byte, p unsafe.Pointer, size uintptr, t reflect s := (*slice)(p) if s.data == nil && s.len == 0 && s.cap == 0 { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } return e.encodeArray(b, s.data, s.len, size, t, encode) @@ -308,7 +308,7 @@ func (e encoder) encodeSlice(b []byte, p unsafe.Pointer, size uintptr, t reflect func (e encoder) encodeMap(b []byte, p unsafe.Pointer, t reflect.Type, encodeKey, encodeValue encodeFunc, sortKeys sortFunc) ([]byte, error) { m := reflect.NewAt(t, p).Elem() if m.IsNil() { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } keys := m.MapKeys() @@ -363,7 +363,7 @@ var mapslicePool = sync.Pool{ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, error) { m := *(*map[string]any)(p) if m == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } if (e.flags & SortMapKeys) == 0 { @@ -441,7 +441,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e func (e encoder) encodeMapStringRawMessage(b []byte, p unsafe.Pointer) ([]byte, error) { m := *(*map[string]RawMessage)(p) if m == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } if (e.flags & SortMapKeys) == 0 { @@ -520,7 +520,7 @@ func (e encoder) encodeMapStringRawMessage(b []byte, p unsafe.Pointer) ([]byte, func (e encoder) encodeMapStringString(b []byte, p unsafe.Pointer) ([]byte, error) { m := *(*map[string]string)(p) if m == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } if (e.flags & SortMapKeys) == 0 { @@ -586,7 +586,7 @@ func (e encoder) encodeMapStringString(b []byte, p unsafe.Pointer) ([]byte, erro func (e encoder) encodeMapStringStringSlice(b []byte, p unsafe.Pointer) ([]byte, error) { m := *(*map[string][]string)(p) if m == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } stringSize := unsafe.Sizeof("") @@ -667,7 +667,7 @@ func (e encoder) encodeMapStringStringSlice(b []byte, p unsafe.Pointer) ([]byte, func (e encoder) encodeMapStringBool(b []byte, p unsafe.Pointer) ([]byte, error) { m := *(*map[string]bool)(p) if m == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } if (e.flags & SortMapKeys) == 0 { @@ -828,7 +828,7 @@ func (e encoder) encodeRawMessage(b []byte, p unsafe.Pointer) ([]byte, error) { v := *(*RawMessage)(p) if v == nil { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } var s []byte @@ -862,7 +862,7 @@ func (e encoder) encodeJSONMarshaler(b []byte, p unsafe.Pointer, t reflect.Type, switch v.Kind() { case reflect.Ptr, reflect.Interface: if v.IsNil() { - return append(b, "null"...), nil + return e.encodeNull(b, nil) } } From dabc5078ec6efed02ef1425d59a435981a9f2b48 Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Mon, 28 Jul 2025 08:58:38 -0600 Subject: [PATCH 02/10] json: internal uses of Append now use new encoder.appendAny This is necessary for cycle detection to work, which had been implemented, yet was broken. Also introduce cachedCodec helper function. --- json/codec.go | 11 +++++++++++ json/encode.go | 26 ++++++++++++++++++++++---- json/json.go | 30 ++++-------------------------- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/json/codec.go b/json/codec.go index 77fe264..d55104a 100644 --- a/json/codec.go +++ b/json/codec.go @@ -63,6 +63,17 @@ type ( // lookup time for simple types like bool, int, etc.. var cache atomic.Pointer[map[unsafe.Pointer]codec] +func cachedCodec(t reflect.Type) codec { + cache := cacheLoad() + + c, found := cache[typeid(t)] + if !found { + c = constructCachedCodec(t, cache) + } + + return c +} + func cacheLoad() map[unsafe.Pointer]codec { p := cache.Load() if p == nil { diff --git a/json/encode.go b/json/encode.go index 8fe2867..6001a6e 100644 --- a/json/encode.go +++ b/json/encode.go @@ -5,6 +5,7 @@ import ( "fmt" "math" "reflect" + "runtime" "sort" "strconv" "sync" @@ -17,6 +18,23 @@ import ( const hex = "0123456789abcdef" +func (e encoder) appendAny(b []byte, x any) ([]byte, error) { + if x == nil { + // Special case for nil values because it makes the rest of the code + // simpler to assume that it won't be seeing nil pointers. + return e.encodeNull(b, nil) + } + + t := reflect.TypeOf(x) + p := (*iface)(unsafe.Pointer(&x)).ptr + c := cachedCodec(t) + + b, err := c.encode(e, b, p) + runtime.KeepAlive(x) + + return b, err +} + func (e encoder) encodeNull(b []byte, p unsafe.Pointer) ([]byte, error) { return append(b, "null"...), nil } @@ -383,7 +401,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e b, _ = e.encodeString(b, unsafe.Pointer(&k)) b = append(b, ':') - b, err = Append(b, v, e.flags) + b, err = e.appendAny(b, v) if err != nil { return b, err } @@ -417,7 +435,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e b, _ = e.encodeString(b, unsafe.Pointer(&elem.key)) b = append(b, ':') - b, err = Append(b, elem.val, e.flags) + b, err = e.appendAny(b, elem.val) if err != nil { break } @@ -813,11 +831,11 @@ func (e encoder) encodePointer(b []byte, p unsafe.Pointer, t reflect.Type, encod } func (e encoder) encodeInterface(b []byte, p unsafe.Pointer) ([]byte, error) { - return Append(b, *(*any)(p), e.flags) + return e.appendAny(b, *(*any)(p)) } func (e encoder) encodeMaybeEmptyInterface(b []byte, p unsafe.Pointer, t reflect.Type) ([]byte, error) { - return Append(b, reflect.NewAt(t, p).Elem().Interface(), e.flags) + return e.appendAny(b, reflect.NewAt(t, p).Elem().Interface()) } func (e encoder) encodeUnsupportedTypeError(b []byte, p unsafe.Pointer, t reflect.Type) ([]byte, error) { diff --git a/json/json.go b/json/json.go index 028fd1f..a3138f8 100644 --- a/json/json.go +++ b/json/json.go @@ -6,7 +6,6 @@ import ( "io" "math/bits" "reflect" - "runtime" "sync" "unsafe" ) @@ -194,25 +193,9 @@ func (k Kind) Class() Kind { return Kind(1 << uint(bits.Len(uint(k))-1)) } // Append acts like Marshal but appends the json representation to b instead of // always reallocating a new slice. func Append(b []byte, x any, flags AppendFlags) ([]byte, error) { - if x == nil { - // Special case for nil values because it makes the rest of the code - // simpler to assume that it won't be seeing nil pointers. - return append(b, "null"...), nil - } - - t := reflect.TypeOf(x) - p := (*iface)(unsafe.Pointer(&x)).ptr - - cache := cacheLoad() - c, found := cache[typeid(t)] - - if !found { - c = constructCachedCodec(t, cache) - } + e := encoder{flags: flags} - b, err := c.encode(encoder{flags: flags}, b, p) - runtime.KeepAlive(x) - return b, err + return e.appendAny(b, x) } // Escape is a convenience helper to construct an escaped JSON string from s. @@ -330,14 +313,9 @@ func Parse(b []byte, x any, flags ParseFlags) ([]byte, error) { } return r, &InvalidUnmarshalError{Type: t} } - t = t.Elem() - cache := cacheLoad() - c, found := cache[typeid(t)] - - if !found { - c = constructCachedCodec(t, cache) - } + t = t.Elem() + c := cachedCodec(t) r, err := c.decode(d, b, p) return skipSpaces(r), err From f786a42d3bd023c44073eb41a3796178eecd383e Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sun, 27 Jul 2025 22:01:09 -0600 Subject: [PATCH 03/10] json: refactor ref-cycle handling Also set UnsupportedValueError.Value (better stdlib compat). --- json/codec.go | 22 ++++++++++-- json/encode.go | 90 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 95 insertions(+), 17 deletions(-) diff --git a/json/codec.go b/json/codec.go index d55104a..6a6bec6 100644 --- a/json/codec.go +++ b/json/codec.go @@ -10,6 +10,7 @@ import ( "sort" "strconv" "strings" + "sync" "sync/atomic" "time" "unicode" @@ -32,13 +33,28 @@ type codec struct { type encoder struct { flags AppendFlags - // ptrDepth tracks the depth of pointer cycles, when it reaches the value + // refDepth tracks the depth of pointer cycles, when it reaches the value // of startDetectingCyclesAfter, the ptrSeen map is allocated and the // encoder starts tracking pointers it has seen as an attempt to detect // whether it has entered a pointer cycle and needs to error before the // goroutine runs out of stack space. - ptrDepth uint32 - ptrSeen map[unsafe.Pointer]struct{} + // + // This relies on encoder being passed as a value, + // and encoder methods calling each other in a traditional stack + // (not using trampoline techniques), + // since refDepth is never decremented. + refDepth uint32 + refSeen cycleMap +} + +type cycleKey struct { + ptr unsafe.Pointer +} + +type cycleMap map[cycleKey]struct{} + +var cycleMapPool = sync.Pool{ + New: func() any { return make(cycleMap) }, } type decoder struct { diff --git a/json/encode.go b/json/encode.go index 6001a6e..075c763 100644 --- a/json/encode.go +++ b/json/encode.go @@ -812,22 +812,23 @@ func (e encoder) encodeEmbeddedStructPointer(b []byte, p unsafe.Pointer, t refle } func (e encoder) encodePointer(b []byte, p unsafe.Pointer, t reflect.Type, encode encodeFunc) ([]byte, error) { - if p = *(*unsafe.Pointer)(p); p != nil { - if e.ptrDepth++; e.ptrDepth >= startDetectingCyclesAfter { - if _, seen := e.ptrSeen[p]; seen { - // TODO: reconstruct the reflect.Value from p + t so we can set - // the erorr's Value field? - return b, &UnsupportedValueError{Str: fmt.Sprintf("encountered a cycle via %s", t)} - } - if e.ptrSeen == nil { - e.ptrSeen = make(map[unsafe.Pointer]struct{}) - } - e.ptrSeen[p] = struct{}{} - defer delete(e.ptrSeen, p) + // p was a pointer to the actual user data pointer: + // dereference it to operate on the user data pointer. + p = *(*unsafe.Pointer)(p) + if p == nil { + return e.encodeNull(b, nil) + } + + if shouldCheckForRefCycle(&e) { + key := cycleKey{ptr: p} + if hasRefCycle(&e, key) { + return b, refCycleError(t, p) } - return encode(e, b, p) + + defer freeRefCycleInfo(&e, key) } - return e.encodeNull(b, nil) + + return encode(e, b, p) } func (e encoder) encodeInterface(b []byte, p unsafe.Pointer) ([]byte, error) { @@ -986,3 +987,64 @@ func appendCompactEscapeHTML(dst []byte, src []byte) []byte { return dst } + +// shouldCheckForRefCycle determines whether checking for reference cycles +// is reasonable to do at this time. +// +// When true, checkRefCycle should be called and any error handled, +// and then a deferred call to freeRefCycleInfo should be made. +// +// This should only be called from encoder methods that are possible points +// that could directly contribute to a reference cycle. +func shouldCheckForRefCycle(e *encoder) bool { + // Note: do not combine this with checkRefCycle, + // because checkRefCycle is too large to be inlined, + // and a non-inlined depth check leads to ~5%+ benchmark degradation. + e.refDepth++ + return e.refDepth >= startDetectingCyclesAfter +} + +// refCycleError constructs an [UnsupportedValueError]. +func refCycleError(t reflect.Type, p unsafe.Pointer) error { + v := reflect.NewAt(t, p) + return &UnsupportedValueError{ + Value: v, + Str: fmt.Sprintf("encountered a cycle via %s", t), + } +} + +// hasRefCycle returns an error if a reference cycle was detected. +// The data pointer passed in should be equivalent to one of: +// +// - A normal Go pointer, e.g. `unsafe.Pointer(&T)` +// - The pointer to a map header, e.g. `*(*unsafe.Pointer)(&map[K]V)` +// +// Many [encoder] methods accept a pointer-to-a-pointer, +// and so those may need to be derenced in order to safely pass them here. +func hasRefCycle(e *encoder, key cycleKey) bool { + _, seen := e.refSeen[key] + if seen { + return true + } + + if e.refSeen == nil { + e.refSeen = cycleMapPool.Get().(cycleMap) + } + + e.refSeen[key] = struct{}{} + + return false +} + +// freeRefCycle performs the cleanup operation for [checkRefCycle]. +// p must be the same value passed into a prior call to checkRefCycle. +func freeRefCycleInfo(e *encoder, key cycleKey) { + delete(e.refSeen, key) + if len(e.refSeen) == 0 { + // There are no remaining elements, + // so we can release this map for later reuse. + m := e.refSeen + e.refSeen = nil + cycleMapPool.Put(m) + } +} From d8717df0b5c52eafe4b77a25342f7e6855836a3f Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sun, 27 Jul 2025 22:40:15 -0600 Subject: [PATCH 04/10] json: support cycle detection involving maps --- json/encode.go | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/json/encode.go b/json/encode.go index 075c763..8401005 100644 --- a/json/encode.go +++ b/json/encode.go @@ -329,15 +329,29 @@ func (e encoder) encodeMap(b []byte, p unsafe.Pointer, t reflect.Type, encodeKey return e.encodeNull(b, nil) } + // checkRefCycle/freeRefCycle expect the map header pointer itself, + // rather than a pointer to the header. + p = *(*unsafe.Pointer)(p) + + if shouldCheckForRefCycle(&e) { + key := cycleKey{ptr: p} + if hasRefCycle(&e, key) { + return b, refCycleError(t, p) + } + + defer freeRefCycleInfo(&e, key) + } + keys := m.MapKeys() if sortKeys != nil && (e.flags&SortMapKeys) != 0 { sortKeys(keys) } start := len(b) - var err error b = append(b, '{') + var err error + for i, k := range keys { v := m.MapIndex(k) @@ -384,6 +398,19 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e return e.encodeNull(b, nil) } + // checkRefCycle/freeRefCycle expect the map header pointer itself, + // rather than a pointer to the header. + p = *(*unsafe.Pointer)(p) + + if shouldCheckForRefCycle(&e) { + key := cycleKey{ptr: p} + if hasRefCycle(&e, key) { + return b, refCycleError(mapStringInterfaceType, p) + } + + defer freeRefCycleInfo(&e, key) + } + if (e.flags & SortMapKeys) == 0 { // Optimized code path when the program does not need the map keys to be // sorted. @@ -424,9 +451,10 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e sort.Sort(s) start := len(b) - var err error b = append(b, '{') + var err error + for i, elem := range s.elements { if i != 0 { b = append(b, ',') From 722d2e359db1115f8f85c0f63db48ad1bf88e4cd Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Tue, 25 Nov 2025 09:22:39 -0700 Subject: [PATCH 05/10] json: detect slice-based value cycles --- json/codec.go | 1 + json/encode.go | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/json/codec.go b/json/codec.go index 6a6bec6..e4b6ab1 100644 --- a/json/codec.go +++ b/json/codec.go @@ -49,6 +49,7 @@ type encoder struct { type cycleKey struct { ptr unsafe.Pointer + len int // 0 for pointers or maps; length for slices or array pointers. } type cycleMap map[cycleKey]struct{} diff --git a/json/encode.go b/json/encode.go index 8401005..a65dc41 100644 --- a/json/encode.go +++ b/json/encode.go @@ -296,6 +296,15 @@ func (e encoder) encodeTime(b []byte, p unsafe.Pointer) ([]byte, error) { } func (e encoder) encodeArray(b []byte, p unsafe.Pointer, n int, size uintptr, t reflect.Type, encode encodeFunc) ([]byte, error) { + if shouldCheckForRefCycle(&e) { + key := cycleKey{ptr: p} + if hasRefCycle(&e, key) { + return b, refCycleError(t, p) + } + + defer freeRefCycleInfo(&e, key) + } + start := len(b) var err error b = append(b, '[') From b34b8b905cadf2dbc1a42dffffd0ad8643eab0b5 Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Thu, 27 Nov 2025 11:56:13 -0700 Subject: [PATCH 06/10] json: remove unnecessary bespoke pointer types The codec construction handles these without issue just like any other pointer-to-something. --- json/codec.go | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/json/codec.go b/json/codec.go index e4b6ab1..07e9cf0 100644 --- a/json/codec.go +++ b/json/codec.go @@ -145,18 +145,6 @@ func constructCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr b case rawMessageType: c = codec{encode: encoder.encodeRawMessage, decode: decoder.decodeRawMessage} - - case numberPtrType: - c = constructPointerCodec(numberPtrType, nil) - - case durationPtrType: - c = constructPointerCodec(durationPtrType, nil) - - case timePtrType: - c = constructPointerCodec(timePtrType, nil) - - case rawMessagePtrType: - c = constructPointerCodec(rawMessagePtrType, nil) } if c.encode != nil { @@ -1120,11 +1108,6 @@ var ( timeType = reflect.TypeOf(time.Time{}) rawMessageType = reflect.TypeOf(RawMessage(nil)) - numberPtrType = reflect.PointerTo(numberType) - durationPtrType = reflect.PointerTo(durationType) - timePtrType = reflect.PointerTo(timeType) - rawMessagePtrType = reflect.PointerTo(rawMessageType) - sliceInterfaceType = reflect.TypeOf(([]any)(nil)) sliceStringType = reflect.TypeOf(([]any)(nil)) mapStringInterfaceType = reflect.TypeOf((map[string]any)(nil)) From 41dfddc949c0a395841a19b76458a56b775aac0f Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Thu, 27 Nov 2025 16:08:59 -0700 Subject: [PATCH 07/10] json: seen data structure used in codec building supports non-structs --- json/codec.go | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/json/codec.go b/json/codec.go index 07e9cf0..cee292f 100644 --- a/json/codec.go +++ b/json/codec.go @@ -113,7 +113,8 @@ func typeid(t reflect.Type) unsafe.Pointer { } func constructCachedCodec(t reflect.Type, cache map[unsafe.Pointer]codec) codec { - c := constructCodec(t, map[reflect.Type]*structType{}, t.Kind() == reflect.Ptr) + seen := make(seenMap) + c := constructCodec(t, seen, t.Kind() == reflect.Ptr) if inlined(t) { c.encode = constructInlineValueEncodeFunc(c.encode) @@ -123,7 +124,14 @@ func constructCachedCodec(t reflect.Type, cache map[unsafe.Pointer]codec) codec return c } -func constructCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr bool) (c codec) { +type seenType struct { + *codec + *structType +} + +type seenMap map[reflect.Type]seenType + +func constructCodec(t reflect.Type, seen seenMap, canAddr bool) (c codec) { switch t { case nullType, nil: c = codec{encode: encoder.encodeNull, decode: decoder.decodeNull} @@ -247,7 +255,7 @@ func constructCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr b return } -func constructStringCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr bool) codec { +func constructStringCodec(t reflect.Type, seen seenMap, canAddr bool) codec { c := constructCodec(t, seen, canAddr) return codec{ encode: constructStringEncodeFunc(c.encode), @@ -273,7 +281,7 @@ func constructStringToIntDecodeFunc(t reflect.Type, decode decodeFunc) decodeFun } } -func constructArrayCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr bool) codec { +func constructArrayCodec(t reflect.Type, seen seenMap, canAddr bool) codec { e := t.Elem() c := constructCodec(e, seen, canAddr) s := alignedSize(e) @@ -297,7 +305,7 @@ func constructArrayDecodeFunc(size uintptr, t reflect.Type, decode decodeFunc) d } } -func constructSliceCodec(t reflect.Type, seen map[reflect.Type]*structType) codec { +func constructSliceCodec(t reflect.Type, seen seenMap) codec { e := t.Elem() s := alignedSize(e) @@ -364,7 +372,7 @@ func constructSliceDecodeFunc(size uintptr, t reflect.Type, decode decodeFunc) d } } -func constructMapCodec(t reflect.Type, seen map[reflect.Type]*structType) codec { +func constructMapCodec(t reflect.Type, seen seenMap) codec { var sortKeys sortFunc k := t.Key() v := t.Elem() @@ -482,7 +490,7 @@ func constructMapDecodeFunc(t reflect.Type, decodeKey, decodeValue decodeFunc) d } } -func constructStructCodec(t reflect.Type, seen map[reflect.Type]*structType, canAddr bool) codec { +func constructStructCodec(t reflect.Type, seen seenMap, canAddr bool) codec { st := constructStructType(t, seen, canAddr) return codec{ encode: constructStructEncodeFunc(st), @@ -490,10 +498,11 @@ func constructStructCodec(t reflect.Type, seen map[reflect.Type]*structType, can } } -func constructStructType(t reflect.Type, seen map[reflect.Type]*structType, canAddr bool) *structType { +func constructStructType(t reflect.Type, seen seenMap, canAddr bool) *structType { // Used for preventing infinite recursion on types that have pointers to // themselves. - st := seen[t] + seenInfo := seen[t] + st := seen[t].structType if st == nil { st = &structType{ @@ -503,7 +512,9 @@ func constructStructType(t reflect.Type, seen map[reflect.Type]*structType, canA typ: t, } - seen[t] = st + seenInfo.structType = st + seen[t] = seenInfo + st.fields = appendStructFields(st.fields, t, 0, seen, canAddr) for i := range st.fields { @@ -563,7 +574,7 @@ func constructEmbeddedStructPointerDecodeFunc(t reflect.Type, unexported bool, o } } -func appendStructFields(fields []structField, t reflect.Type, offset uintptr, seen map[reflect.Type]*structType, canAddr bool) []structField { +func appendStructFields(fields []structField, t reflect.Type, offset uintptr, seen seenMap, canAddr bool) []structField { type embeddedField struct { index int offset uintptr @@ -764,7 +775,7 @@ func encodeKeyFragment(s string, flags AppendFlags) string { return *(*string)(unsafe.Pointer(&b)) } -func constructPointerCodec(t reflect.Type, seen map[reflect.Type]*structType) codec { +func constructPointerCodec(t reflect.Type, seen seenMap) codec { e := t.Elem() c := constructCodec(e, seen, true) return codec{ From 40c79bc233d57b4f5d5539adca364c023f4105f9 Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sun, 27 Jul 2025 22:33:13 -0600 Subject: [PATCH 08/10] json: update cycle checking aspects from stdlib golang_encode_test.go --- json/golang_encode_test.go | 86 +++++++++++++++++++++++++++++++++----- json/golang_shim_test.go | 30 +++++++++++++ 2 files changed, 105 insertions(+), 11 deletions(-) diff --git a/json/golang_encode_test.go b/json/golang_encode_test.go index 5e334a6..86f4f8d 100644 --- a/json/golang_encode_test.go +++ b/json/golang_encode_test.go @@ -136,21 +136,85 @@ func TestEncodeRenamedByteSlice(t *testing.T) { } } -var unsupportedValues = []any{ - math.NaN(), - math.Inf(-1), - math.Inf(1), +type SamePointerNoCycle struct { + Ptr1, Ptr2 *SamePointerNoCycle +} + +var samePointerNoCycle = &SamePointerNoCycle{} + +type PointerCycle struct { + Ptr *PointerCycle +} + +var pointerCycle = &PointerCycle{} + +type PointerCycleIndirect struct { + Ptrs []any +} + +type RecursiveSlice []RecursiveSlice + +var ( + pointerCycleIndirect = &PointerCycleIndirect{} + mapCycle = make(map[string]any) + sliceCycle = []any{nil} + sliceNoCycle = []any{nil, nil} + recursiveSliceCycle = []RecursiveSlice{nil} +) + +func init() { + ptr := &SamePointerNoCycle{} + samePointerNoCycle.Ptr1 = ptr + samePointerNoCycle.Ptr2 = ptr + + pointerCycle.Ptr = pointerCycle + pointerCycleIndirect.Ptrs = []any{pointerCycleIndirect} + + mapCycle["x"] = mapCycle + sliceCycle[0] = sliceCycle + sliceNoCycle[1] = sliceNoCycle[:1] + for i := startDetectingCyclesAfter; i > 0; i-- { + sliceNoCycle = []any{sliceNoCycle} + } + recursiveSliceCycle[0] = recursiveSliceCycle +} + +func TestSamePointerNoCycle(t *testing.T) { + if _, err := Marshal(samePointerNoCycle); err != nil { + t.Fatalf("Marshal error: %v", err) + } +} + +func TestSliceNoCycle(t *testing.T) { + if _, err := Marshal(sliceNoCycle); err != nil { + t.Fatalf("Marshal error: %v", err) + } } func TestUnsupportedValues(t *testing.T) { - for _, v := range unsupportedValues { - if _, err := Marshal(v); err != nil { - if _, ok := err.(*UnsupportedValueError); !ok { - t.Errorf("for %v, got %T want UnsupportedValueError", v, err) + tests := []struct { + CaseName + in any + }{ + {Name(""), math.NaN()}, + {Name(""), math.Inf(-1)}, + {Name(""), math.Inf(1)}, + {Name(""), pointerCycle}, + {Name(""), pointerCycleIndirect}, + {Name(""), mapCycle}, + {Name(""), sliceCycle}, + {Name(""), recursiveSliceCycle}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + if _, err := Marshal(tt.in); err != nil { + if _, ok := err.(*UnsupportedValueError); !ok { + t.Errorf("%s: Marshal error:\n\tgot: %T\n\twant: %T", tt.Where, err, new(UnsupportedValueError)) + } + } else { + t.Errorf("%s: Marshal error: got nil, want non-nil", tt.Where) } - } else { - t.Errorf("for %v, expected error", v) - } + }) } } diff --git a/json/golang_shim_test.go b/json/golang_shim_test.go index 5a19b7f..90e4fa9 100644 --- a/json/golang_shim_test.go +++ b/json/golang_shim_test.go @@ -4,7 +4,10 @@ package json import ( "bytes" + "fmt" + "path" "reflect" + "runtime" "sync" "testing" ) @@ -68,3 +71,30 @@ func errorWithPrefixes(t *testing.T, prefixes []any, format string, elements ... } t.Errorf(fullFormat, allElements...) } + +// ============================================================================= +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// CaseName is a case name annotated with a file and line. +type CaseName struct { + Name string + Where CasePos +} + +// Name annotates a case name with the file and line of the caller. +func Name(s string) (c CaseName) { + c.Name = s + runtime.Callers(2, c.Where.pc[:]) + return c +} + +// CasePos represents a file and line number. +type CasePos struct{ pc [1]uintptr } + +func (pos CasePos) String() string { + frames := runtime.CallersFrames(pos.pc[:]) + frame, _ := frames.Next() + return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line) +} From 396975c77dc342569a285b16b62bca86d0bd6a9d Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sun, 30 Nov 2025 00:52:26 -0700 Subject: [PATCH 09/10] json: codec is passed as a pointer --- json/codec.go | 150 ++++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 67 deletions(-) diff --git a/json/codec.go b/json/codec.go index cee292f..3852e37 100644 --- a/json/codec.go +++ b/json/codec.go @@ -78,9 +78,9 @@ type ( // Note: using a uintptr as key instead of reflect.Type shaved ~15ns off of // the ~30ns Marhsal/Unmarshal functions which were dominated by the map // lookup time for simple types like bool, int, etc.. -var cache atomic.Pointer[map[unsafe.Pointer]codec] +var cache atomic.Pointer[map[unsafe.Pointer]*codec] -func cachedCodec(t reflect.Type) codec { +func cachedCodec(t reflect.Type) *codec { cache := cacheLoad() c, found := cache[typeid(t)] @@ -91,7 +91,7 @@ func cachedCodec(t reflect.Type) codec { return c } -func cacheLoad() map[unsafe.Pointer]codec { +func cacheLoad() map[unsafe.Pointer]*codec { p := cache.Load() if p == nil { return nil @@ -100,8 +100,8 @@ func cacheLoad() map[unsafe.Pointer]codec { return *p } -func cacheStore(typ reflect.Type, cod codec, oldCodecs map[unsafe.Pointer]codec) { - newCodecs := make(map[unsafe.Pointer]codec, len(oldCodecs)+1) +func cacheStore(typ reflect.Type, cod *codec, oldCodecs map[unsafe.Pointer]*codec) { + newCodecs := make(map[unsafe.Pointer]*codec, len(oldCodecs)+1) maps.Copy(newCodecs, oldCodecs) newCodecs[typeid(typ)] = cod @@ -112,7 +112,7 @@ func typeid(t reflect.Type) unsafe.Pointer { return (*iface)(unsafe.Pointer(&t)).ptr } -func constructCachedCodec(t reflect.Type, cache map[unsafe.Pointer]codec) codec { +func constructCachedCodec(t reflect.Type, cache map[unsafe.Pointer]*codec) *codec { seen := make(seenMap) c := constructCodec(t, seen, t.Kind() == reflect.Ptr) @@ -131,79 +131,81 @@ type seenType struct { type seenMap map[reflect.Type]seenType -func constructCodec(t reflect.Type, seen seenMap, canAddr bool) (c codec) { +func constructCodec(t reflect.Type, seen seenMap, canAddr bool) *codec { + var c *codec + switch t { case nullType, nil: - c = codec{encode: encoder.encodeNull, decode: decoder.decodeNull} + c = &codec{encode: encoder.encodeNull, decode: decoder.decodeNull} case numberType: - c = codec{encode: encoder.encodeNumber, decode: decoder.decodeNumber} + c = &codec{encode: encoder.encodeNumber, decode: decoder.decodeNumber} case bytesType: - c = codec{encode: encoder.encodeBytes, decode: decoder.decodeBytes} + c = &codec{encode: encoder.encodeBytes, decode: decoder.decodeBytes} case durationType: - c = codec{encode: encoder.encodeDuration, decode: decoder.decodeDuration} + c = &codec{encode: encoder.encodeDuration, decode: decoder.decodeDuration} case timeType: - c = codec{encode: encoder.encodeTime, decode: decoder.decodeTime} + c = &codec{encode: encoder.encodeTime, decode: decoder.decodeTime} case interfaceType: - c = codec{encode: encoder.encodeInterface, decode: decoder.decodeInterface} + c = &codec{encode: encoder.encodeInterface, decode: decoder.decodeInterface} case rawMessageType: - c = codec{encode: encoder.encodeRawMessage, decode: decoder.decodeRawMessage} + c = &codec{encode: encoder.encodeRawMessage, decode: decoder.decodeRawMessage} } - if c.encode != nil { - return + if c != nil { + return c } switch t.Kind() { case reflect.Bool: - c = codec{encode: encoder.encodeBool, decode: decoder.decodeBool} + c = &codec{encode: encoder.encodeBool, decode: decoder.decodeBool} case reflect.Int: - c = codec{encode: encoder.encodeInt, decode: decoder.decodeInt} + c = &codec{encode: encoder.encodeInt, decode: decoder.decodeInt} case reflect.Int8: - c = codec{encode: encoder.encodeInt8, decode: decoder.decodeInt8} + c = &codec{encode: encoder.encodeInt8, decode: decoder.decodeInt8} case reflect.Int16: - c = codec{encode: encoder.encodeInt16, decode: decoder.decodeInt16} + c = &codec{encode: encoder.encodeInt16, decode: decoder.decodeInt16} case reflect.Int32: - c = codec{encode: encoder.encodeInt32, decode: decoder.decodeInt32} + c = &codec{encode: encoder.encodeInt32, decode: decoder.decodeInt32} case reflect.Int64: - c = codec{encode: encoder.encodeInt64, decode: decoder.decodeInt64} + c = &codec{encode: encoder.encodeInt64, decode: decoder.decodeInt64} case reflect.Uint: - c = codec{encode: encoder.encodeUint, decode: decoder.decodeUint} + c = &codec{encode: encoder.encodeUint, decode: decoder.decodeUint} case reflect.Uintptr: - c = codec{encode: encoder.encodeUintptr, decode: decoder.decodeUintptr} + c = &codec{encode: encoder.encodeUintptr, decode: decoder.decodeUintptr} case reflect.Uint8: - c = codec{encode: encoder.encodeUint8, decode: decoder.decodeUint8} + c = &codec{encode: encoder.encodeUint8, decode: decoder.decodeUint8} case reflect.Uint16: - c = codec{encode: encoder.encodeUint16, decode: decoder.decodeUint16} + c = &codec{encode: encoder.encodeUint16, decode: decoder.decodeUint16} case reflect.Uint32: - c = codec{encode: encoder.encodeUint32, decode: decoder.decodeUint32} + c = &codec{encode: encoder.encodeUint32, decode: decoder.decodeUint32} case reflect.Uint64: - c = codec{encode: encoder.encodeUint64, decode: decoder.decodeUint64} + c = &codec{encode: encoder.encodeUint64, decode: decoder.decodeUint64} case reflect.Float32: - c = codec{encode: encoder.encodeFloat32, decode: decoder.decodeFloat32} + c = &codec{encode: encoder.encodeFloat32, decode: decoder.decodeFloat32} case reflect.Float64: - c = codec{encode: encoder.encodeFloat64, decode: decoder.decodeFloat64} + c = &codec{encode: encoder.encodeFloat64, decode: decoder.decodeFloat64} case reflect.String: - c = codec{encode: encoder.encodeString, decode: decoder.decodeString} + c = &codec{encode: encoder.encodeString, decode: decoder.decodeString} case reflect.Interface: c = constructInterfaceCodec(t) @@ -252,12 +254,12 @@ func constructCodec(t reflect.Type, seen seenMap, canAddr bool) (c codec) { c.decode = constructTextUnmarshalerDecodeFunc(t, true) } - return + return c } -func constructStringCodec(t reflect.Type, seen seenMap, canAddr bool) codec { +func constructStringCodec(t reflect.Type, seen seenMap, canAddr bool) *codec { c := constructCodec(t, seen, canAddr) - return codec{ + return &codec{ encode: constructStringEncodeFunc(c.encode), decode: constructStringDecodeFunc(c.decode), } @@ -281,11 +283,11 @@ func constructStringToIntDecodeFunc(t reflect.Type, decode decodeFunc) decodeFun } } -func constructArrayCodec(t reflect.Type, seen seenMap, canAddr bool) codec { +func constructArrayCodec(t reflect.Type, seen seenMap, canAddr bool) *codec { e := t.Elem() c := constructCodec(e, seen, canAddr) s := alignedSize(e) - return codec{ + return &codec{ encode: constructArrayEncodeFunc(s, t, c.encode), decode: constructArrayDecodeFunc(s, t, c.decode), } @@ -305,7 +307,18 @@ func constructArrayDecodeFunc(size uintptr, t reflect.Type, decode decodeFunc) d } } -func constructSliceCodec(t reflect.Type, seen seenMap) codec { +func constructSliceCodec(t reflect.Type, seen seenMap) *codec { + // Detect slice type recursion (and prevent stack overflow). + seenInfo := seen[t] + c := seenInfo.codec + if c != nil { + return c + } + + c = new(codec) + seenInfo.codec = c + seen[t] = seenInfo + e := t.Elem() s := alignedSize(e) @@ -314,7 +327,6 @@ func constructSliceCodec(t reflect.Type, seen seenMap) codec { // default encoding and decoding behaviors by implementing marshaler and // unmarshaler interfaces. p := reflect.PointerTo(e) - c := codec{} switch { case e.Implements(jsonMarshalerType): @@ -353,11 +365,11 @@ func constructSliceCodec(t reflect.Type, seen seenMap) codec { return c } - c := constructCodec(e, seen, true) - return codec{ - encode: constructSliceEncodeFunc(s, t, c.encode), - decode: constructSliceDecodeFunc(s, t, c.decode), - } + inner := constructCodec(e, seen, true) + c.encode = constructSliceEncodeFunc(s, t, inner.encode) + c.decode = constructSliceDecodeFunc(s, t, inner.decode) + + return c } func constructSliceEncodeFunc(size uintptr, t reflect.Type, encode encodeFunc) encodeFunc { @@ -372,7 +384,7 @@ func constructSliceDecodeFunc(size uintptr, t reflect.Type, decode decodeFunc) d } } -func constructMapCodec(t reflect.Type, seen seenMap) codec { +func constructMapCodec(t reflect.Type, seen seenMap) *codec { var sortKeys sortFunc k := t.Key() v := t.Elem() @@ -380,42 +392,44 @@ func constructMapCodec(t reflect.Type, seen seenMap) codec { // Faster implementations for some common cases. switch { case k == stringType && v == interfaceType: - return codec{ + return &codec{ encode: encoder.encodeMapStringInterface, decode: decoder.decodeMapStringInterface, } case k == stringType && v == rawMessageType: - return codec{ + return &codec{ encode: encoder.encodeMapStringRawMessage, decode: decoder.decodeMapStringRawMessage, } case k == stringType && v == stringType: - return codec{ + return &codec{ encode: encoder.encodeMapStringString, decode: decoder.decodeMapStringString, } case k == stringType && v == stringsType: - return codec{ + return &codec{ encode: encoder.encodeMapStringStringSlice, decode: decoder.decodeMapStringStringSlice, } case k == stringType && v == boolType: - return codec{ + return &codec{ encode: encoder.encodeMapStringBool, decode: decoder.decodeMapStringBool, } } - kc := codec{} + var kc *codec vc := constructCodec(v, seen, false) if k.Implements(textMarshalerType) || reflect.PointerTo(k).Implements(textUnmarshalerType) { - kc.encode = constructTextMarshalerEncodeFunc(k, false) - kc.decode = constructTextUnmarshalerDecodeFunc(k, true) + kc = &codec{ + encode: constructTextMarshalerEncodeFunc(k, false), + decode: constructTextUnmarshalerDecodeFunc(k, true), + } sortKeys = func(keys []reflect.Value) { sort.Slice(keys, func(i, j int) bool { @@ -429,8 +443,10 @@ func constructMapCodec(t reflect.Type, seen seenMap) codec { } else { switch k.Kind() { case reflect.String: - kc.encode = encoder.encodeString - kc.decode = decoder.decodeString + kc = &codec{ + encode: encoder.encodeString, + decode: decoder.decodeString, + } sortKeys = func(keys []reflect.Value) { sort.Slice(keys, func(i, j int) bool { return keys[i].String() < keys[j].String() }) @@ -468,7 +484,7 @@ func constructMapCodec(t reflect.Type, seen seenMap) codec { vc.encode = constructInlineValueEncodeFunc(vc.encode) } - return codec{ + return &codec{ encode: constructMapEncodeFunc(t, kc.encode, vc.encode, sortKeys), decode: constructMapDecodeFunc(t, kc.decode, vc.decode), } @@ -490,9 +506,9 @@ func constructMapDecodeFunc(t reflect.Type, decodeKey, decodeValue decodeFunc) d } } -func constructStructCodec(t reflect.Type, seen seenMap, canAddr bool) codec { +func constructStructCodec(t reflect.Type, seen seenMap, canAddr bool) *codec { st := constructStructType(t, seen, canAddr) - return codec{ + return &codec{ encode: constructStructEncodeFunc(st), decode: constructStructDecodeFunc(st), } @@ -502,7 +518,7 @@ func constructStructType(t reflect.Type, seen seenMap, canAddr bool) *structType // Used for preventing infinite recursion on types that have pointers to // themselves. seenInfo := seen[t] - st := seen[t].structType + st := seenInfo.structType if st == nil { st = &structType{ @@ -555,8 +571,8 @@ func constructStructDecodeFunc(st *structType) decodeFunc { } } -func constructEmbeddedStructPointerCodec(t reflect.Type, unexported bool, offset uintptr, field codec) codec { - return codec{ +func constructEmbeddedStructPointerCodec(t reflect.Type, unexported bool, offset uintptr, field *codec) *codec { + return &codec{ encode: constructEmbeddedStructPointerEncodeFunc(t, unexported, offset, field.encode), decode: constructEmbeddedStructPointerDecodeFunc(t, unexported, offset, field.decode), } @@ -775,10 +791,10 @@ func encodeKeyFragment(s string, flags AppendFlags) string { return *(*string)(unsafe.Pointer(&b)) } -func constructPointerCodec(t reflect.Type, seen seenMap) codec { +func constructPointerCodec(t reflect.Type, seen seenMap) *codec { e := t.Elem() c := constructCodec(e, seen, true) - return codec{ + return &codec{ encode: constructPointerEncodeFunc(e, c.encode), decode: constructPointerDecodeFunc(e, c.decode), } @@ -796,8 +812,8 @@ func constructPointerDecodeFunc(t reflect.Type, decode decodeFunc) decodeFunc { } } -func constructInterfaceCodec(t reflect.Type) codec { - return codec{ +func constructInterfaceCodec(t reflect.Type) *codec { + return &codec{ encode: constructMaybeEmptyInterfaceEncoderFunc(t), decode: constructMaybeEmptyInterfaceDecoderFunc(t), } @@ -815,8 +831,8 @@ func constructMaybeEmptyInterfaceDecoderFunc(t reflect.Type) decodeFunc { } } -func constructUnsupportedTypeCodec(t reflect.Type) codec { - return codec{ +func constructUnsupportedTypeCodec(t reflect.Type) *codec { + return &codec{ encode: constructUnsupportedTypeEncodeFunc(t), decode: constructUnsupportedTypeDecodeFunc(t), } @@ -1000,7 +1016,7 @@ type structType struct { } type structField struct { - codec codec + codec *codec offset uintptr empty emptyFunc tag bool From 9aaaa4b7b1c1cb2b5f733400ac1232083303151e Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sun, 30 Nov 2025 11:58:11 -0700 Subject: [PATCH 10/10] json: pass remaining cycle/unsupported-value tests --- json/codec.go | 15 +++++++++++++++ json/encode.go | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/json/codec.go b/json/codec.go index 3852e37..395ff01 100644 --- a/json/codec.go +++ b/json/codec.go @@ -366,6 +366,21 @@ func constructSliceCodec(t reflect.Type, seen seenMap) *codec { } inner := constructCodec(e, seen, true) + + // If encode/decode functions are nil, that means this is a recursive type, + // and so we can use delayed binding to define self-referential calls. + if inner.encode == nil { + inner.encode = func(e encoder, b []byte, p unsafe.Pointer) ([]byte, error) { + return c.encode(e, b, p) + } + } + + if inner.decode == nil { + inner.decode = func(d decoder, b []byte, p unsafe.Pointer) ([]byte, error) { + return c.decode(d, b, p) + } + } + c.encode = constructSliceEncodeFunc(s, t, inner.encode) c.decode = constructSliceDecodeFunc(s, t, inner.decode) diff --git a/json/encode.go b/json/encode.go index a65dc41..791ad0b 100644 --- a/json/encode.go +++ b/json/encode.go @@ -297,7 +297,7 @@ func (e encoder) encodeTime(b []byte, p unsafe.Pointer) ([]byte, error) { func (e encoder) encodeArray(b []byte, p unsafe.Pointer, n int, size uintptr, t reflect.Type, encode encodeFunc) ([]byte, error) { if shouldCheckForRefCycle(&e) { - key := cycleKey{ptr: p} + key := cycleKey{ptr: p, len: n} if hasRefCycle(&e, key) { return b, refCycleError(t, p) }