Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 106 additions & 17 deletions func.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"unsafe"

"github.com/ebitengine/purego/internal/strings"
"github.com/ebitengine/purego/internal/xreflect"
)

const (
Expand All @@ -26,6 +25,21 @@ var thePool = sync.Pool{New: func() any {
return new(syscall15Args)
}}

// cachedStructInfo holds a cached struct type and the indices of non-padding fields.
type cachedStructInfo struct {
typ reflect.Type
valueIndices []int // indices of non-padding fields in the struct
}

// preBundleInfo holds pre-computed bundling information created at registration time
// to avoid per-call cache key construction and sync.Map lookups for Darwin ARM64
// stack argument packing.
type preBundleInfo struct {
key string
info cachedStructInfo
pool *sync.Pool
}

// RegisterLibFunc is a wrapper around RegisterFunc that uses the C function returned from Dlsym(handle, name).
// It panics if it can't find the name symbol.
func RegisterLibFunc(fptr any, handle uintptr, name string) {
Expand Down Expand Up @@ -229,6 +243,56 @@ func RegisterFunc(fptr any, cfn uintptr) {
// When callbacks can unpack tightly-packed arguments, this workaround can be removed.
isCallback := isCallbackFunction(cfn)

// Pre-compute arg kinds at registration time to avoid per-call reflect overhead.
type argKind uint8
const (
akUint argKind = iota
akInt
akFloat32
akFloat64
akString
akBool
akPtr
akFunc
akStruct
akVariadic // last arg is []any
)
numIn := ty.NumIn()
argKinds := make([]argKind, numIn)
for i := 0; i < numIn; i++ {
switch ty.In(i).Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
argKinds[i] = akInt
case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
argKinds[i] = akUint
case reflect.Float32:
argKinds[i] = akFloat32
case reflect.Float64:
argKinds[i] = akFloat64
case reflect.String:
argKinds[i] = akString
case reflect.Bool:
argKinds[i] = akBool
case reflect.Func:
argKinds[i] = akFunc
case reflect.Struct:
argKinds[i] = akStruct
case reflect.Ptr, reflect.UnsafePointer, reflect.Slice:
argKinds[i] = akPtr
}
}
// Check if the last arg is variadic []any
if numIn > 0 {
lastIn := ty.In(numIn - 1)
if lastIn.Kind() == reflect.Slice && lastIn.Elem().Kind() == reflect.Interface {
argKinds[numIn-1] = akVariadic
}
}

// Pre-compute bundle info for Darwin ARM64 stack argument packing.
// This avoids per-call cache key construction and sync.Map lookups.
preBundleInfoVal := precomputeBundleInfo(ty)

v := reflect.MakeFunc(ty, func(args []reflect.Value) (results []reflect.Value) {
var sysargs [maxArgs]uintptr
var floats [numOfFloatRegisters]uintptr
Expand Down Expand Up @@ -273,10 +337,6 @@ func RegisterFunc(fptr any, cfn uintptr) {
}

var keepAlive []any
defer func() {
runtime.KeepAlive(keepAlive)
runtime.KeepAlive(args)
}()

var arm64_r8 uintptr
if ty.NumOut() == 1 && ty.Out(0).Kind() == reflect.Struct {
Expand All @@ -295,33 +355,57 @@ func RegisterFunc(fptr any, cfn uintptr) {
}
}
for i, v := range args {
if variadic, ok := xreflect.TypeAssert[[]any](args[i]); ok {
ak := argKinds[i]
// Handle variadic expansion
if ak == akVariadic {
if i != len(args)-1 {
panic("purego: can only expand last parameter")
}
variadic := v.Interface().([]any)
for _, x := range variadic {
keepAlive = addValue(reflect.ValueOf(x), keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack)
}
continue
}
// Check if we need to start Darwin ARM64 C-style stack packing
// Skip tight packing for callbacks since they still use 8-byte slot unpacking
// TODO: Remove !isCallback condition once callback unpacking supports tight packing
if runtime.GOARCH == "arm64" && runtime.GOOS == "darwin" && !isCallback && shouldBundleStackArgs(v, numInts, numFloats) {
// Collect and separate remaining args into register vs stack
var stackArgsBuf [maxArgs]reflect.Value
stackArgs, newKeepAlive := collectStackArgs(args, i, numInts, numFloats,
keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack)
keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack, stackArgsBuf[:])
keepAlive = newKeepAlive

// Bundle stack arguments with C-style packing
bundleStackArgs(stackArgs, addStack)
bundleStackArgsWithInfo(stackArgs, addStack, preBundleInfoVal)
break
}
keepAlive = addValue(v, keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack)
// Fast dispatch using pre-computed arg kind
switch ak {
case akUint:
addInt(uintptr(v.Uint()))
case akInt:
addInt(uintptr(v.Int()))
case akFloat32:
addFloat(uintptr(math.Float32bits(float32(v.Float()))))
case akFloat64:
addFloat(uintptr(math.Float64bits(v.Float())))
case akString:
ptr := strings.CString(v.String())
keepAlive = append(keepAlive, ptr)
addInt(uintptr(unsafe.Pointer(ptr)))
case akBool:
if v.Bool() {
addInt(1)
} else {
addInt(0)
}
case akPtr:
addInt(v.Pointer())
case akFunc:
addInt(NewCallback(v.Interface()))
case akStruct:
keepAlive = addStruct(v, &numInts, &numFloats, &numStack, addInt, addFloat, addStack, keepAlive)
}
}

syscall := thePool.Get().(*syscall15Args)
defer thePool.Put(syscall)

if runtime.GOARCH == "loong64" || runtime.GOARCH == "riscv64" {
*syscall = syscall15Args{
Expand Down Expand Up @@ -353,6 +437,9 @@ func RegisterFunc(fptr any, cfn uintptr) {
syscall.f1 = syscall.a2 // on amd64 a2 stores the float return. On 32bit platforms floats aren't support
}
if ty.NumOut() == 0 {
thePool.Put(syscall)
runtime.KeepAlive(keepAlive)
runtime.KeepAlive(args)
return nil
}
outType := ty.Out(0)
Expand Down Expand Up @@ -388,13 +475,15 @@ func RegisterFunc(fptr any, cfn uintptr) {
default:
panic("purego: unsupported return kind: " + outType.Kind().String())
}
thePool.Put(syscall)
runtime.KeepAlive(keepAlive)
runtime.KeepAlive(args)
if len(args) > 0 {
// reuse args slice instead of allocating one when possible
args[0] = v
return args[:1]
} else {
return []reflect.Value{v}
}
return []reflect.Value{v}
})
fn.Set(v)
}
Expand Down
8 changes: 7 additions & 1 deletion struct_386.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ func structFitsInRegisters(val reflect.Value, tempNumInts, tempNumFloats int) (b
// collectStackArgs is not used on 386.
func collectStackArgs(args []reflect.Value, startIdx int, numInts, numFloats int,
keepAlive []any, addInt, addFloat, addStack func(uintptr),
pNumInts, pNumFloats, pNumStack *int) ([]reflect.Value, []any) {
pNumInts, pNumFloats, pNumStack *int, stackBuf []reflect.Value) ([]reflect.Value, []any) {
panic("purego: collectStackArgs should not be called on 386")
}

func precomputeBundleInfo(ty reflect.Type) *preBundleInfo { return nil }

// bundleStackArgs is not used on 386.
func bundleStackArgs(stackArgs []reflect.Value, addStack func(uintptr)) {
panic("purego: bundleStackArgs should not be called on 386")
}

func bundleStackArgsWithInfo(stackArgs []reflect.Value, addStack func(uintptr), pre *preBundleInfo) {
panic("purego: bundleStackArgsWithInfo should not be called on 386")
}
8 changes: 7 additions & 1 deletion struct_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,17 @@ func structFitsInRegisters(val reflect.Value, tempNumInts, tempNumFloats int) (b
// collectStackArgs is not used on amd64.
func collectStackArgs(args []reflect.Value, startIdx int, numInts, numFloats int,
keepAlive []any, addInt, addFloat, addStack func(uintptr),
pNumInts, pNumFloats, pNumStack *int) ([]reflect.Value, []any) {
pNumInts, pNumFloats, pNumStack *int, stackBuf []reflect.Value) ([]reflect.Value, []any) {
panic("purego: collectStackArgs should not be called on amd64")
}

func precomputeBundleInfo(ty reflect.Type) *preBundleInfo { return nil }

// bundleStackArgs is not used on amd64.
func bundleStackArgs(stackArgs []reflect.Value, addStack func(uintptr)) {
panic("purego: bundleStackArgs should not be called on amd64")
}

func bundleStackArgsWithInfo(stackArgs []reflect.Value, addStack func(uintptr), pre *preBundleInfo) {
panic("purego: bundleStackArgsWithInfo should not be called on amd64")
}
8 changes: 7 additions & 1 deletion struct_arm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ func structFitsInRegisters(val reflect.Value, tempNumInts, tempNumFloats int) (b
// collectStackArgs is not used on arm.
func collectStackArgs(args []reflect.Value, startIdx int, numInts, numFloats int,
keepAlive []any, addInt, addFloat, addStack func(uintptr),
pNumInts, pNumFloats, pNumStack *int) ([]reflect.Value, []any) {
pNumInts, pNumFloats, pNumStack *int, stackBuf []reflect.Value) ([]reflect.Value, []any) {
panic("purego: collectStackArgs should not be called on arm")
}

func precomputeBundleInfo(ty reflect.Type) *preBundleInfo { return nil }

// bundleStackArgs is not used on arm.
func bundleStackArgs(stackArgs []reflect.Value, addStack func(uintptr)) {
panic("purego: bundleStackArgs should not be called on arm")
}

func bundleStackArgsWithInfo(stackArgs []reflect.Value, addStack func(uintptr), pre *preBundleInfo) {
panic("purego: bundleStackArgsWithInfo should not be called on arm")
}
Loading