-
Notifications
You must be signed in to change notification settings - Fork 52
json: fix cycle detection #153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
e960845
dabc507
f786a42
d8717df
722d2e3
b34b8b9
41dfddc
40c79bc
396975c
9aaaa4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ import ( | |
| "fmt" | ||
| "math" | ||
| "reflect" | ||
| "runtime" | ||
| "sort" | ||
| "strconv" | ||
| "sync" | ||
|
|
@@ -17,6 +18,23 @@ import ( | |
|
|
||
| const hex = "0123456789abcdef" | ||
|
|
||
| func (e encoder) appendAny(b []byte, x any) ([]byte, error) { | ||
| if x == nil { | ||
| // Special case for nil values because it makes the rest of the code | ||
| // simpler to assume that it won't be seeing nil pointers. | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| t := reflect.TypeOf(x) | ||
| p := (*iface)(unsafe.Pointer(&x)).ptr | ||
| c := cachedCodec(t) | ||
|
|
||
| b, err := c.encode(e, b, p) | ||
| runtime.KeepAlive(x) | ||
|
|
||
| return b, err | ||
| } | ||
|
|
||
| func (e encoder) encodeNull(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| return append(b, "null"...), nil | ||
| } | ||
|
|
@@ -241,7 +259,7 @@ func (e encoder) encodeToString(b []byte, p unsafe.Pointer, encode encodeFunc) ( | |
| func (e encoder) encodeBytes(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| v := *(*[]byte)(p) | ||
| if v == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| n := base64.StdEncoding.EncodedLen(len(v)) + 2 | ||
|
|
@@ -299,7 +317,7 @@ func (e encoder) encodeSlice(b []byte, p unsafe.Pointer, size uintptr, t reflect | |
| s := (*slice)(p) | ||
|
|
||
| if s.data == nil && s.len == 0 && s.cap == 0 { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| return e.encodeArray(b, s.data, s.len, size, t, encode) | ||
|
|
@@ -308,7 +326,7 @@ func (e encoder) encodeSlice(b []byte, p unsafe.Pointer, size uintptr, t reflect | |
| func (e encoder) encodeMap(b []byte, p unsafe.Pointer, t reflect.Type, encodeKey, encodeValue encodeFunc, sortKeys sortFunc) ([]byte, error) { | ||
| m := reflect.NewAt(t, p).Elem() | ||
| if m.IsNil() { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| keys := m.MapKeys() | ||
|
|
@@ -363,7 +381,7 @@ var mapslicePool = sync.Pool{ | |
| func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| m := *(*map[string]any)(p) | ||
| if m == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| if (e.flags & SortMapKeys) == 0 { | ||
|
|
@@ -383,7 +401,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e | |
| b, _ = e.encodeString(b, unsafe.Pointer(&k)) | ||
| b = append(b, ':') | ||
|
|
||
| b, err = Append(b, v, e.flags) | ||
| b, err = e.appendAny(b, v) | ||
| if err != nil { | ||
| return b, err | ||
| } | ||
|
|
@@ -417,7 +435,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e | |
| b, _ = e.encodeString(b, unsafe.Pointer(&elem.key)) | ||
| b = append(b, ':') | ||
|
|
||
| b, err = Append(b, elem.val, e.flags) | ||
| b, err = e.appendAny(b, elem.val) | ||
| if err != nil { | ||
| break | ||
| } | ||
|
|
@@ -441,7 +459,7 @@ func (e encoder) encodeMapStringInterface(b []byte, p unsafe.Pointer) ([]byte, e | |
| func (e encoder) encodeMapStringRawMessage(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| m := *(*map[string]RawMessage)(p) | ||
| if m == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| if (e.flags & SortMapKeys) == 0 { | ||
|
|
@@ -520,7 +538,7 @@ func (e encoder) encodeMapStringRawMessage(b []byte, p unsafe.Pointer) ([]byte, | |
| func (e encoder) encodeMapStringString(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| m := *(*map[string]string)(p) | ||
| if m == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| if (e.flags & SortMapKeys) == 0 { | ||
|
|
@@ -586,7 +604,7 @@ func (e encoder) encodeMapStringString(b []byte, p unsafe.Pointer) ([]byte, erro | |
| func (e encoder) encodeMapStringStringSlice(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| m := *(*map[string][]string)(p) | ||
| if m == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| stringSize := unsafe.Sizeof("") | ||
|
|
@@ -667,7 +685,7 @@ func (e encoder) encodeMapStringStringSlice(b []byte, p unsafe.Pointer) ([]byte, | |
| func (e encoder) encodeMapStringBool(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| m := *(*map[string]bool)(p) | ||
| if m == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| if (e.flags & SortMapKeys) == 0 { | ||
|
|
@@ -794,30 +812,31 @@ func (e encoder) encodeEmbeddedStructPointer(b []byte, p unsafe.Pointer, t refle | |
| } | ||
|
|
||
| func (e encoder) encodePointer(b []byte, p unsafe.Pointer, t reflect.Type, encode encodeFunc) ([]byte, error) { | ||
| if p = *(*unsafe.Pointer)(p); p != nil { | ||
| if e.ptrDepth++; e.ptrDepth >= startDetectingCyclesAfter { | ||
| if _, seen := e.ptrSeen[p]; seen { | ||
| // TODO: reconstruct the reflect.Value from p + t so we can set | ||
| // the erorr's Value field? | ||
| return b, &UnsupportedValueError{Str: fmt.Sprintf("encountered a cycle via %s", t)} | ||
| } | ||
| if e.ptrSeen == nil { | ||
| e.ptrSeen = make(map[unsafe.Pointer]struct{}) | ||
| } | ||
| e.ptrSeen[p] = struct{}{} | ||
| defer delete(e.ptrSeen, p) | ||
| // p was a pointer to the actual user data pointer: | ||
| // dereference it to operate on the user data pointer. | ||
| p = *(*unsafe.Pointer)(p) | ||
| if p == nil { | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| if shouldCheckForRefCycle(&e) { | ||
| key := cycleKey{ptr: p} | ||
| if hasRefCycle(&e, key) { | ||
| return b, refCycleError(t, p) | ||
| } | ||
| return encode(e, b, p) | ||
|
|
||
| defer freeRefCycleInfo(&e, key) | ||
| } | ||
| return e.encodeNull(b, nil) | ||
|
|
||
| return encode(e, b, p) | ||
| } | ||
|
|
||
| func (e encoder) encodeInterface(b []byte, p unsafe.Pointer) ([]byte, error) { | ||
| return Append(b, *(*any)(p), e.flags) | ||
| return e.appendAny(b, *(*any)(p)) | ||
| } | ||
|
|
||
| func (e encoder) encodeMaybeEmptyInterface(b []byte, p unsafe.Pointer, t reflect.Type) ([]byte, error) { | ||
| return Append(b, reflect.NewAt(t, p).Elem().Interface(), e.flags) | ||
| return e.appendAny(b, reflect.NewAt(t, p).Elem().Interface()) | ||
| } | ||
|
|
||
| func (e encoder) encodeUnsupportedTypeError(b []byte, p unsafe.Pointer, t reflect.Type) ([]byte, error) { | ||
|
|
@@ -828,7 +847,7 @@ func (e encoder) encodeRawMessage(b []byte, p unsafe.Pointer) ([]byte, error) { | |
| v := *(*RawMessage)(p) | ||
|
|
||
| if v == nil { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
|
|
||
| var s []byte | ||
|
|
@@ -862,7 +881,7 @@ func (e encoder) encodeJSONMarshaler(b []byte, p unsafe.Pointer, t reflect.Type, | |
| switch v.Kind() { | ||
| case reflect.Ptr, reflect.Interface: | ||
| if v.IsNil() { | ||
| return append(b, "null"...), nil | ||
| return e.encodeNull(b, nil) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -968,3 +987,64 @@ func appendCompactEscapeHTML(dst []byte, src []byte) []byte { | |
|
|
||
| return dst | ||
| } | ||
|
|
||
| // shouldCheckForRefCycle determines whether checking for reference cycles | ||
| // is reasonable to do at this time. | ||
| // | ||
| // When true, checkRefCycle should be called and any error handled, | ||
| // and then a deferred call to freeRefCycleInfo should be made. | ||
| // | ||
| // This should only be called from encoder methods that are possible points | ||
| // that could directly contribute to a reference cycle. | ||
| func shouldCheckForRefCycle(e *encoder) bool { | ||
| // Note: do not combine this with checkRefCycle, | ||
| // because checkRefCycle is too large to be inlined, | ||
| // and a non-inlined depth check leads to ~5%+ benchmark degradation. | ||
| e.refDepth++ | ||
| return e.refDepth >= startDetectingCyclesAfter | ||
| } | ||
|
|
||
| // refCycleError constructs an [UnsupportedValueError]. | ||
| func refCycleError(t reflect.Type, p unsafe.Pointer) error { | ||
| v := reflect.NewAt(t, p) | ||
| return &UnsupportedValueError{ | ||
| Value: v, | ||
| Str: fmt.Sprintf("encountered a cycle via %s", t), | ||
| } | ||
| } | ||
|
|
||
| // hasRefCycle returns an error if a reference cycle was detected. | ||
| // The data pointer passed in should be equivalent to one of: | ||
| // | ||
| // - A normal Go pointer, e.g. `unsafe.Pointer(&T)` | ||
| // - The pointer to a map header, e.g. `*(*unsafe.Pointer)(&map[K]V)` | ||
| // | ||
| // Many [encoder] methods accept a pointer-to-a-pointer, | ||
| // and so those may need to be derenced in order to safely pass them here. | ||
| func hasRefCycle(e *encoder, key cycleKey) bool { | ||
| _, seen := e.refSeen[key] | ||
| if seen { | ||
| return true | ||
| } | ||
|
|
||
| if e.refSeen == nil { | ||
| e.refSeen = cycleMapPool.Get().(cycleMap) | ||
| } | ||
|
|
||
| e.refSeen[key] = struct{}{} | ||
|
|
||
| return false | ||
| } | ||
|
|
||
| // freeRefCycle performs the cleanup operation for [checkRefCycle]. | ||
| // p must be the same value passed into a prior call to checkRefCycle. | ||
| func freeRefCycleInfo(e *encoder, key cycleKey) { | ||
| delete(e.refSeen, key) | ||
| if len(e.refSeen) == 0 { | ||
| // There are no remaining elements, | ||
| // so we can release this map for later reuse. | ||
| m := e.refSeen | ||
| e.refSeen = nil | ||
| cycleMapPool.Put(m) | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth adding some tests for this commit? (Or maybe coming in a later commit?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't add bespoke tests here because the borrowed stdlib tests provide very good coverage over this behavior. That's pretty easy because this PR was specifically developed against those stdlib tests, including the behavior in this specific commit. That said, it looks like map cycles have no test coverage. I will add tests for that. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thread safety for this access, or is "single thread assumed" documented somewhere higher up?
Can you fix the typo on line 62 - "Marshal"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This concurrency behavior was already in place within this json package, and the decision is maintained within this PR despite some minor refactoring around the concept.
Essentially the "global cache" is stored in an
atomic.Pointerwrapping a map. To access the cache, the map is atomically loaded but then read normally. To update the cache, we first create a shallow copy of the map (cached values are never modified), then set our new key(s), then atomically store the new map in place of the old one.Thus reads concurrent with writes are okay: the reader already has what it needs from whichever map it had loaded.
Concurrent writes are also okay: if both goroutines needed to handle different types which hadn't already been in the map, both will build and use codecs for the immediate operation they're performing, and then the last writer will "win" the race, keeping its particular codec in the cache for next time. The type that lost the race will get regenerated in a future call and eventually end up durably in the cache.
The code comments aptly refer to this as an eventually consistent cache, because over time, with enough repeat calls, there will be fewer and fewer benign races each time (any type that wins a race never needs to participate in the race again), and eventually every needed type will end up in the cache.