diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5dc66a..bc78c65 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,6 +94,43 @@ jobs: - name: cargo clippy -p ordvec-ffi run: cargo clippy -p ordvec-ffi --all-targets -- -D warnings + # ---------------------------------------------------------------------- + # Repo-local Go wrapper over the C ABI. Build the release static library + # first because the cgo wrapper links the source-tree archive directly. + # ---------------------------------------------------------------------- + go-wrapper: + name: go wrapper (cgo) + runs-on: ubuntu-latest + steps: + - uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable (2026-03-27) + with: + toolchain: stable + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 + - name: cargo build -p ordvec-ffi --release + run: cargo build -p ordvec-ffi --release + - name: go test + working-directory: ordvec-go + env: + GOCACHE: ${{ runner.temp }}/go-build + run: go test -count=1 ./... + - name: go test -race + working-directory: ordvec-go + env: + GOCACHE: ${{ runner.temp }}/go-build + run: go test -race -count=1 ./... + - name: go test (cgocheck2) + working-directory: ordvec-go + env: + GOEXPERIMENT: cgocheck2 + GOCACHE: ${{ runner.temp }}/go-build-cgocheck2 + run: go test -count=1 ./... + # ---------------------------------------------------------------------- # Test matrix across the three GitHub-hosted OSes. Covers the default # surface, the experimental feature, the no-default-features build, and a diff --git a/ordvec-go/README.md b/ordvec-go/README.md new file mode 100644 index 0000000..8fde2ef --- /dev/null +++ b/ordvec-go/README.md @@ -0,0 +1,21 @@ +# ordvec-go + +Thin cgo wrapper over the local `ordvec-ffi` C ABI. + +Build the Rust library before running Go tests or linking a Go program: + +```sh +cargo build -p ordvec-ffi --release +cd ordvec-go +go test ./... +go test -race ./... +GOEXPERIMENT=cgocheck2 go test ./... +``` + +`Index.Close` should be called explicitly. A finalizer is installed as a safety +net, but it is not a resource-management strategy. + +Search with `nil` options or `nil` `SearchOptions.Candidates` performs a full +search. An empty, non-nil `Candidates` slice is treated as an explicit empty +subset and returns a typed `StatusBadArgument`, matching the C ABI v1 +pointer/count contract. diff --git a/ordvec-go/go.mod b/ordvec-go/go.mod new file mode 100644 index 0000000..1903981 --- /dev/null +++ b/ordvec-go/go.mod @@ -0,0 +1,4 @@ +module github.com/Fieldnote-Echo/ordvec/ordvec-go + +go 1.22 + diff --git a/ordvec-go/ordvec.go b/ordvec-go/ordvec.go new file mode 100644 index 0000000..6ca2593 --- /dev/null +++ b/ordvec-go/ordvec.go @@ -0,0 +1,336 @@ +package ordvec + +/* +#cgo CFLAGS: -I${SRCDIR}/../ordvec-ffi/include +#cgo linux LDFLAGS: ${SRCDIR}/../target/release/libordvec_ffi.a -ldl -lm -lpthread +#cgo darwin LDFLAGS: ${SRCDIR}/../target/release/libordvec_ffi.a -lm -lpthread +#cgo windows LDFLAGS: -L${SRCDIR}/../target/release -lordvec_ffi -lws2_32 -lbcrypt -luserenv +#include +#include "ordvec.h" + +static ordvec_status_t ordvec_go_index_search( + const ordvec_index_t* index, + const float* query, + uint64_t dim, + uint64_t k, + const uint32_t* candidate_rows, + uint64_t candidate_count, + uint64_t user_tag, + ordvec_hit_t* hits_out, + uint64_t hits_capacity, + uint64_t* returned_out, + ordvec_search_stats_t* stats_out +) { + ordvec_search_params_t params; + ordvec_search_params_init(¶ms); + params.query = query; + params.dim = dim; + params.k = k; + params.candidate_rows = candidate_rows; + params.candidate_count = candidate_count; + params.user_tag = user_tag; + return ordvec_index_search(index, ¶ms, hits_out, hits_capacity, returned_out, stats_out); +} +*/ +import "C" + +import ( + "errors" + "fmt" + "runtime" + "strings" + "sync" + "unsafe" +) + +type Status uint32 + +const ( + StatusOK Status = C.ORDVEC_STATUS_OK + StatusNullPointer Status = C.ORDVEC_STATUS_NULL_POINTER + StatusBadArgument Status = C.ORDVEC_STATUS_BAD_ARGUMENT + StatusBadStructSize Status = C.ORDVEC_STATUS_BAD_STRUCT_SIZE + StatusUnsupportedFormat Status = C.ORDVEC_STATUS_UNSUPPORTED_FORMAT + StatusCorruptIndex Status = C.ORDVEC_STATUS_CORRUPT_INDEX + StatusIO Status = C.ORDVEC_STATUS_IO + StatusDimMismatch Status = C.ORDVEC_STATUS_DIM_MISMATCH + StatusNonfiniteQuery Status = C.ORDVEC_STATUS_NONFINITE_QUERY + StatusRowIDOutOfRange Status = C.ORDVEC_STATUS_ROW_ID_OUT_OF_RANGE + StatusBufferTooSmall Status = C.ORDVEC_STATUS_BUFFER_TOO_SMALL + StatusUnsupportedOperation Status = C.ORDVEC_STATUS_UNSUPPORTED_OPERATION + StatusPanic Status = C.ORDVEC_STATUS_PANIC + StatusInternal Status = C.ORDVEC_STATUS_INTERNAL +) + +type Kind uint32 + +const ( + KindUnknown Kind = C.ORDVEC_INDEX_KIND_UNKNOWN + KindRankQuant Kind = C.ORDVEC_INDEX_KIND_RANK_QUANT + KindBitmap Kind = C.ORDVEC_INDEX_KIND_BITMAP +) + +var ErrClosed = errors.New("ordvec: index closed") + +type StatusError struct { + Status Status + Message string +} + +func (e *StatusError) Error() string { + if e.Message == "" { + return fmt.Sprintf("ordvec: %s", e.Status) + } + return fmt.Sprintf("ordvec: %s: %s", e.Status, e.Message) +} + +func (s Status) String() string { + return C.GoString(C.ordvec_status_name(C.ordvec_status_t(s))) +} + +type Info struct { + Kind Kind + FormatVersion uint32 + Dim uint64 + BitWidth uint32 + NTop uint32 + VectorCount uint64 + BytesPerVec uint64 + SourceFileSizeBytes uint64 + Capabilities uint64 +} + +type Hit struct { + RowID uint64 + ID uint64 + Score float32 +} + +type Stats struct { + ABIVersion uint32 + Kind Kind + Dim uint64 + BitWidth uint32 + NTop uint32 + K uint64 + UserTag uint64 + VectorCount uint64 + CandidateCount uint64 + ReturnedCount uint64 + TotalNS uint64 + PrepareNS uint64 + ScoreNS uint64 + SelectNS uint64 + VectorsScored uint64 + BytesRead uint64 +} + +type SearchOptions struct { + Candidates []uint32 + UserTag uint64 +} + +type Index struct { + mu sync.RWMutex + ptr *C.ordvec_index_t + info Info +} + +var emptyCandidateSentinel uint32 + +func statusError(st C.ordvec_status_t) error { + status := Status(st) + if status == StatusOK { + return nil + } + msg := C.GoString(C.ordvec_last_error()) + if msg == "" { + msg = status.String() + } + return &StatusError{Status: status, Message: msg} +} + +func callStatus(fn func() C.ordvec_status_t) error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + st := fn() + return statusError(st) +} + +func Load(path string) (*Index, error) { + if strings.IndexByte(path, 0) >= 0 { + return nil, errors.New("ordvec: path contains null byte") + } + cpath := C.CString(path) + defer C.free(unsafe.Pointer(cpath)) + + var out *C.ordvec_index_t + err := callStatus(func() C.ordvec_status_t { + return C.ordvec_index_load(cpath, 0, &out) + }) + if err != nil { + return nil, err + } + idx := &Index{ptr: out} + info, err := idx.infoLocked() + if err != nil { + C.ordvec_index_free(out) + return nil, err + } + idx.info = info + runtime.SetFinalizer(idx, (*Index).finalize) + return idx, nil +} + +func (idx *Index) finalize() { + _ = idx.Close() +} + +func (idx *Index) Close() error { + idx.mu.Lock() + defer idx.mu.Unlock() + if idx.ptr == nil { + return nil + } + C.ordvec_index_free(idx.ptr) + idx.ptr = nil + runtime.SetFinalizer(idx, nil) + return nil +} + +func (idx *Index) Info() (Info, error) { + idx.mu.RLock() + defer idx.mu.RUnlock() + if idx.ptr == nil { + return Info{}, ErrClosed + } + return idx.info, nil +} + +func (idx *Index) infoLocked() (Info, error) { + var ci C.ordvec_index_info_t + C.ordvec_index_info_init(&ci) + err := callStatus(func() C.ordvec_status_t { + return C.ordvec_index_info(idx.ptr, &ci) + }) + runtime.KeepAlive(idx) + if err != nil { + return Info{}, err + } + return Info{ + Kind: Kind(ci.kind), + FormatVersion: uint32(ci.format_version), + Dim: uint64(ci.dim), + BitWidth: uint32(ci.bit_width), + NTop: uint32(ci.n_top), + VectorCount: uint64(ci.vector_count), + BytesPerVec: uint64(ci.bytes_per_vec), + SourceFileSizeBytes: uint64(ci.source_file_size_bytes), + Capabilities: uint64(ci.capabilities), + }, nil +} + +func (idx *Index) Search(query []float32, k uint64, opts *SearchOptions) ([]Hit, Stats, error) { + idx.mu.RLock() + defer idx.mu.RUnlock() + if idx.ptr == nil { + return nil, Stats{}, ErrClosed + } + + searchSpace := idx.info.VectorCount + if opts != nil && opts.Candidates != nil { + searchSpace = uint64(len(opts.Candidates)) + } + required := k + if searchSpace < required { + required = searchSpace + } + if required > uint64(int(^uint(0)>>1)) { + return nil, Stats{}, fmt.Errorf("ordvec: required hit count %d overflows int", required) + } + + var pinner runtime.Pinner + defer pinner.Unpin() + + var queryPtr *C.float + if len(query) > 0 { + pinner.Pin(&query[0]) + queryPtr = (*C.float)(unsafe.Pointer(&query[0])) + } + var candidateRows *C.uint32_t + var candidateCount C.uint64_t + var userTag C.uint64_t + if opts != nil { + userTag = C.uint64_t(opts.UserTag) + if opts.Candidates != nil { + candidateCount = C.uint64_t(len(opts.Candidates)) + if len(opts.Candidates) > 0 { + pinner.Pin(&opts.Candidates[0]) + candidateRows = (*C.uint32_t)(unsafe.Pointer(&opts.Candidates[0])) + } else { + pinner.Pin(&emptyCandidateSentinel) + candidateRows = (*C.uint32_t)(unsafe.Pointer(&emptyCandidateSentinel)) + } + } + } + + chits := make([]C.ordvec_hit_t, int(required)) + var hitsPtr *C.ordvec_hit_t + if len(chits) > 0 { + hitsPtr = &chits[0] + } + var returned C.uint64_t + var cstats C.ordvec_search_stats_t + C.ordvec_search_stats_init(&cstats) + err := callStatus(func() C.ordvec_status_t { + return C.ordvec_go_index_search( + idx.ptr, + queryPtr, + C.uint64_t(len(query)), + C.uint64_t(k), + candidateRows, + candidateCount, + userTag, + hitsPtr, + C.uint64_t(len(chits)), + &returned, + &cstats, + ) + }) + runtime.KeepAlive(query) + if opts != nil { + runtime.KeepAlive(opts.Candidates) + } + runtime.KeepAlive(idx) + if err != nil { + return nil, Stats{}, err + } + + hits := make([]Hit, int(returned)) + for i := range hits { + hits[i] = Hit{ + RowID: uint64(chits[i].row_id), + ID: uint64(chits[i].id), + Score: float32(chits[i].score), + } + } + stats := Stats{ + ABIVersion: uint32(cstats.abi_version), + Kind: Kind(cstats.kind), + Dim: uint64(cstats.dim), + BitWidth: uint32(cstats.bit_width), + NTop: uint32(cstats.n_top), + K: uint64(cstats.k), + UserTag: uint64(cstats.user_tag), + VectorCount: uint64(cstats.vector_count), + CandidateCount: uint64(cstats.candidate_count), + ReturnedCount: uint64(cstats.returned_count), + TotalNS: uint64(cstats.total_ns), + PrepareNS: uint64(cstats.prepare_ns), + ScoreNS: uint64(cstats.score_ns), + SelectNS: uint64(cstats.select_ns), + VectorsScored: uint64(cstats.vectors_scored), + BytesRead: uint64(cstats.bytes_read), + } + return hits, stats, nil +} diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go new file mode 100644 index 0000000..9c70d8c --- /dev/null +++ b/ordvec-go/ordvec_test.go @@ -0,0 +1,216 @@ +package ordvec + +import ( + "encoding/binary" + "errors" + "math" + "os" + "path/filepath" + "strings" + "testing" +) + +func writeRankQuantFixture(t *testing.T) string { + t.Helper() + path := filepath.Join(t.TempDir(), "fixture.tvrq") + var b []byte + b = append(b, []byte("TVRQ")...) + b = append(b, 1) // version + b = append(b, 2) // bits + b = binary.LittleEndian.AppendUint32(b, 16) + b = binary.LittleEndian.AppendUint32(b, 4) + row := []byte{0x00, 0x55, 0xAA, 0xFF} + for i := 0; i < 4; i++ { + b = append(b, row...) + } + if err := os.WriteFile(path, b, 0o600); err != nil { + t.Fatal(err) + } + return path +} + +func writeBitmapFixture(t *testing.T) string { + t.Helper() + path := filepath.Join(t.TempDir(), "fixture.tvbm") + var b []byte + b = append(b, []byte("TVBM")...) + b = append(b, 1) // version + b = binary.LittleEndian.AppendUint32(b, 64) + b = binary.LittleEndian.AppendUint32(b, 4) + b = binary.LittleEndian.AppendUint32(b, 4) + row := uint64(0) + for i := 0; i < 4; i++ { + row |= 1 << i + } + for i := 0; i < 4; i++ { + b = binary.LittleEndian.AppendUint64(b, row) + } + if err := os.WriteFile(path, b, 0o600); err != nil { + t.Fatal(err) + } + return path +} + +func query16() []float32 { + q := make([]float32, 16) + for i := range q { + q[i] = float32(i) + } + return q +} + +func query64() []float32 { + q := make([]float32, 64) + for i := 0; i < 4; i++ { + q[i] = 10 + float32(i) + } + return q +} + +func TestLoadInfoSearchRankQuant(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + info, err := idx.Info() + if err != nil { + t.Fatal(err) + } + if info.Kind != KindRankQuant || info.Dim != 16 || info.BitWidth != 2 || info.VectorCount != 4 { + t.Fatalf("unexpected info: %+v", info) + } + + hits, stats, err := idx.Search(query16(), 2, &SearchOptions{UserTag: 99}) + if err != nil { + t.Fatal(err) + } + if len(hits) != 2 { + t.Fatalf("got %d hits", len(hits)) + } + if hits[0].RowID != 0 || hits[0].ID != hits[0].RowID { + t.Fatalf("unexpected first hit: %+v", hits[0]) + } + if stats.UserTag != 99 || stats.CandidateCount != 4 || stats.VectorsScored != 4 || stats.ReturnedCount != 2 { + t.Fatalf("unexpected stats: %+v", stats) + } +} + +func TestRankQuantSubsetSearchOrdersByRowID(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + hits, stats, err := idx.Search(query16(), 2, &SearchOptions{ + Candidates: []uint32{3, 1, 2}, + UserTag: 7, + }) + if err != nil { + t.Fatal(err) + } + if got := []uint64{hits[0].RowID, hits[1].RowID}; got[0] != 1 || got[1] != 2 { + t.Fatalf("unexpected row order: %v", got) + } + if stats.UserTag != 7 || stats.CandidateCount != 3 || stats.VectorsScored != 3 { + t.Fatalf("unexpected stats: %+v", stats) + } +} + +func TestBitmapSubsetSearchAllowsDuplicateHits(t *testing.T) { + idx, err := Load(writeBitmapFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + hits, stats, err := idx.Search(query64(), 3, &SearchOptions{ + Candidates: []uint32{3, 1, 1, 2}, + }) + if err != nil { + t.Fatal(err) + } + got := []uint64{hits[0].RowID, hits[1].RowID, hits[2].RowID} + if got[0] != 1 || got[1] != 1 || got[2] != 2 { + t.Fatalf("unexpected row order: %v", got) + } + if stats.Kind != KindBitmap || stats.NTop != 4 || stats.CandidateCount != 4 { + t.Fatalf("unexpected stats: %+v", stats) + } +} + +func TestNilAndEmptySubsetDistinction(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + if _, _, err := idx.Search(query16(), 1, nil); err != nil { + t.Fatalf("nil options should mean full search: %v", err) + } + if _, _, err := idx.Search(query16(), 1, &SearchOptions{}); err != nil { + t.Fatalf("nil Candidates should mean full search: %v", err) + } + _, _, err = idx.Search(query16(), 1, &SearchOptions{Candidates: []uint32{}}) + var statusErr *StatusError + if !errors.As(err, &statusErr) || statusErr.Status != StatusBadArgument { + t.Fatalf("empty nonnil candidates should be BAD_ARGUMENT, got %T %[1]v", err) + } +} + +func TestTypedStatusErrors(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + q := query16() + q[0] = float32(math.NaN()) + _, _, err = idx.Search(q, 1, nil) + var statusErr *StatusError + if !errors.As(err, &statusErr) { + t.Fatalf("expected StatusError, got %T %[1]v", err) + } + if statusErr.Status != StatusNonfiniteQuery { + t.Fatalf("unexpected status: %v", statusErr.Status) + } + + _, err = Load(filepath.Join(t.TempDir(), "missing.tvrq")) + if !errors.As(err, &statusErr) || statusErr.Status != StatusIO { + t.Fatalf("missing file should be IO status, got %T %[1]v", err) + } +} + +func TestLoadRejectsNullBytePath(t *testing.T) { + _, err := Load("bad\x00path.tvrq") + if err == nil || !strings.Contains(err.Error(), "null byte") { + t.Fatalf("Load should reject null byte paths, got %v", err) + } + var statusErr *StatusError + if errors.As(err, &statusErr) { + t.Fatalf("null byte path should be rejected before C call, got %v", err) + } +} + +func TestCloseIsIdempotentAndErrClosed(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + if err := idx.Close(); err != nil { + t.Fatal(err) + } + if err := idx.Close(); err != nil { + t.Fatal(err) + } + if _, err := idx.Info(); !errors.Is(err, ErrClosed) { + t.Fatalf("Info after Close should return ErrClosed, got %v", err) + } + if _, _, err := idx.Search(query16(), 1, nil); !errors.Is(err, ErrClosed) { + t.Fatalf("Search after Close should return ErrClosed, got %v", err) + } +}