From 60a234c103e683121388f9b735aae2b78d9fc0d8 Mon Sep 17 00:00:00 2001 From: lichen2015 Date: Wed, 25 Feb 2026 10:56:42 +0800 Subject: [PATCH 1/7] init branch format c_api.h fix some code format c_api code build rpm fix some code fix some code remove rpm build Add GitHub Actions release workflow for C API add release linux-arm64 remove c api version fix some code fix some code fix some code refact some code format some code format some code format some code add api reference --- .github/workflows/release.yml | 193 + CMakeLists.txt | 6 + examples/CMakeLists.txt | 15 + examples/c_api/CMakeLists.txt | 65 + examples/c_api/basic_example.c | 240 + examples/c_api/collection_schema_example.c | 253 + examples/c_api/doc_example.c | 520 ++ examples/c_api/field_schema_example.c | 282 + examples/c_api/index_example.c | 330 + examples/c_api/optimized_example.c | 302 + pyproject.toml | 8 + src/CMakeLists.txt | 1 + src/binding/python/CMakeLists.txt | 2 +- src/c_api/API_REFERENCE_CN.md | 1843 ++++++ src/c_api/CMakeLists.txt | 168 + src/c_api/c_api.cc | 5735 +++++++++++++++++ src/db/CMakeLists.txt | 5 +- src/include/zvec/c_api.h | 2329 +++++++ src/include/zvec/db/doc.h | 28 + tests/CMakeLists.txt | 1 + tests/c_api/CMakeLists.txt | 28 + tests/c_api/c_api_test.c | 4399 +++++++++++++ tests/c_api/utils.c | 940 +++ tests/c_api/utils.h | 260 + tests/core/algorithm/ivf/ivf_searcher_test.cc | 2 +- 25 files changed, 17950 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/release.yml create mode 100644 examples/CMakeLists.txt create mode 100644 examples/c_api/CMakeLists.txt create mode 100644 examples/c_api/basic_example.c create mode 100644 examples/c_api/collection_schema_example.c create mode 100644 examples/c_api/doc_example.c create mode 100644 examples/c_api/field_schema_example.c create mode 100644 examples/c_api/index_example.c create mode 100644 examples/c_api/optimized_example.c create mode 100644 src/c_api/API_REFERENCE_CN.md create mode 100644 src/c_api/CMakeLists.txt create mode 100644 src/c_api/c_api.cc create mode 100644 src/include/zvec/c_api.h create mode 100644 tests/c_api/CMakeLists.txt create mode 100644 tests/c_api/c_api_test.c create mode 100644 tests/c_api/utils.c create mode 100644 tests/c_api/utils.h diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..20a57f88 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,193 @@ +name: Release + +permissions: + contents: read + +on: + push: + tags: + - 'v*' # Match v0.3.0, v1.0.0, etc. + workflow_dispatch: # Allow manual trigger + +jobs: + # ============================================================================ + # Linux x64 Build + # ============================================================================ + linux-x64: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-x64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: libzvec-capi-linux-x64.tar.gz + + # ============================================================================ + # Linux ARM64 Build + # ============================================================================ + linux-arm64: + runs-on: ubuntu-24.04-arm + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so (ARM64) + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-arm64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: libzvec-capi-linux-arm64.tar.gz + + # ============================================================================ + # macOS Universal Build (arm64 + x86_64) + # ============================================================================ + macos-universal: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + brew install cmake ninja + + - name: Build libzvec_c_api.dylib (Universal Binary) + env: + CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" + MACOSX_DEPLOYMENT_TARGET: "11.0" + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET="11.0" \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.dylib + echo "=== Check architectures ===" + lipo -archs build/src/c_api/libzvec_c_api.dylib + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.dylib . + tar -czvf libzvec-capi-macos-universal.tar.gz \ + c_api.h \ + libzvec_c_api.dylib + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-macos-universal + path: libzvec-capi-macos-universal.tar.gz + + # ============================================================================ + # Upload to GitHub Releases + # ============================================================================ + upload-release: + needs: [linux-x64, linux-arm64, macos-universal] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Download build artifacts for each platform + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-macos-universal + path: dist/ + + - name: List artifacts + run: ls -la dist/ + + # Upload to GitHub Releases + - uses: softprops/action-gh-release@v1 + with: + files: dist/*.tar.gz + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 52a59754..ad954faa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,9 +34,15 @@ if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "") endif() message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") +option(BUILD_EXAMPLES "Build examples" ON) +message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") + cc_directory(thirdparty) cc_directories(src) cc_directories(tests) +if(BUILD_EXAMPLES) + cc_directories(examples) +endif() if(BUILD_TOOLS) cc_directories(tools) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..66e943ad --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(c_api) \ No newline at end of file diff --git a/examples/c_api/CMakeLists.txt b/examples/c_api/CMakeLists.txt new file mode 100644 index 00000000..759f744f --- /dev/null +++ b/examples/c_api/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Basic example +add_executable(c_api_basic_example basic_example.c) +target_link_libraries(c_api_basic_example PRIVATE zvec_c_api) +target_include_directories(c_api_basic_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_basic_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + + +# Schema example +add_executable(c_api_collection_schema_example collection_schema_example.c) +target_link_libraries(c_api_collection_schema_example PRIVATE zvec_c_api) +target_include_directories(c_api_collection_schema_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_collection_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Struct document example +add_executable(c_api_doc_example doc_example.c) +target_link_libraries(c_api_doc_example PRIVATE zvec_c_api) +target_include_directories(c_api_doc_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_doc_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Index example +add_executable(c_api_index_example index_example.c) +target_link_libraries(c_api_index_example PRIVATE zvec_c_api) +set_target_properties(c_api_index_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Newly added field schema example +add_executable(c_api_field_schema_example field_schema_example.c) +target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) +set_target_properties(c_api_field_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Optimized example +add_executable(c_api_optimized_example optimized_example.c) +target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) +set_target_properties(c_api_optimized_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c new file mode 100644 index 00000000..e4efbdfd --- /dev/null +++ b/examples/c_api/basic_example.c @@ -0,0 +1,240 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a simple test collection using CollectionSchema + */ +static ZVecErrorCode create_simple_test_collection( + ZVecCollection **collection) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + if (!schema) { + return ZVEC_ERROR_INTERNAL_ERROR; + } + + ZVecErrorCode error = ZVEC_OK; + + // Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + // Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create text field (inverted index) + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create embedding field (HNSW index) + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Use default options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + // Create collection using the new API + error = zvec_collection_create_and_open("./test_collection", schema, &options, + collection); + + // Cleanup resources + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + return error; +} + +/** + * @brief Basic C API usage example + */ +int main() { + printf("=== ZVec C API Basic Example ===\n\n"); + + ZVecErrorCode error; + + // Create collection using simplified function + ZVecCollection *collection = NULL; + error = create_simple_test_collection(&collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + return 1; + } + printf("✓ Collection created successfully\n"); + + // Prepare test data + float vector1[] = {0.1f, 0.2f, 0.3f}; + float vector2[] = {0.4f, 0.5f, 0.6f}; + + ZVecDoc *docs[2]; + for (int i = 0; i < 2; ++i) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup allocated resources + for (int j = 0; j < i; ++j) { + zvec_doc_destroy(docs[j]); + } + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + // Manually add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "text", ZVEC_DATA_TYPE_STRING, + "First document", strlen("First document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 3 * sizeof(float)); + + // Manually add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "text", ZVEC_DATA_TYPE_STRING, + "Second document", strlen("Second document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 3 * sizeof(float)); + + // Insert documents + size_t success_count = 0; + size_t error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + zvec_collection_destroy(collection); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + for (int i = 0; i < 2; ++i) { + zvec_doc_destroy(docs[i]); + } + + // Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Get collection statistics + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (handle_error(error, "getting collection stats") == ZVEC_OK) { + printf("✓ Collection stats - Document count: %llu\n", + (unsigned long long)stats->doc_count); + // Free statistics memory + zvec_collection_stats_destroy(stats); + } + + printf("Testing vector query...\n"); + // Query documents + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = + (ZVecByteArray){.data = (uint8_t *)vector1, .length = 3 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("[ERROR] Query failed: %s\n", + error_msg ? error_msg : "Unknown error"); + free(error_msg); + goto cleanup; + } + + printf("✓ Query successful - Returned %zu results\n", result_count); + + // Process query results + for (size_t i = 0; i < result_count && i < 5; ++i) { + const ZVecDoc *doc = results[i]; + const char *pk = zvec_doc_get_pk_copy(doc); + + printf(" Result %zu: PK=%s, DocID=%llu, Score=%.4f\n", i + 1, + pk ? pk : "NULL", (unsigned long long)zvec_doc_get_doc_id(doc), + zvec_doc_get_score(doc)); + + if (pk) { + free((void *)pk); + } + } + + // Free query results memory + zvec_docs_free(results, result_count); + +cleanup: + // Cleanup resources + zvec_collection_destroy(collection); + printf("✓ Example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c new file mode 100644 index 00000000..d69ca989 --- /dev/null +++ b/examples/c_api/collection_schema_example.c @@ -0,0 +1,253 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Collection schema creation and management example + */ +int main() { + printf("=== ZVec Collection Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("schema_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return 1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Set schema properties + schema->max_doc_count_per_segment = 1000000; + printf("✓ Set max documents per segment: %llu\n", + (unsigned long long)schema->max_doc_count_per_segment); + + // 3. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + // 4. Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (!id_field) { + fprintf(stderr, "Failed to create ID field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ ID field added successfully\n"); + + // 5. Create and add text field with inverted index + ZVecFieldSchema *text_field = + zvec_field_schema_create("content", ZVEC_DATA_TYPE_STRING, true, 0); + if (!text_field) { + fprintf(stderr, "Failed to create text field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Text field with inverted index added successfully\n"); + + // 6. Create and add vector field with HNSW index + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (!vector_field) { + fprintf(stderr, "Failed to create vector field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_hnsw_index(vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_field); + if (handle_error(error, "adding vector field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Vector field with HNSW index added successfully\n"); + + // 7. Check field count + // Note: This function may not exist in current API, commenting out for now + // size_t field_count = zvec_collection_schema_get_field_count(schema); + // printf("✓ Total field count: %zu\n", field_count); + + // 8. Create collection with schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./schema_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Collection created successfully with schema\n"); + + // 9. Prepare test data + float vector1[128]; + float vector2[128]; + for (int i = 0; i < 128; i++) { + vector1[i] = (float)(i + 1) / 128.0f; + vector2[i] = (float)(i + 2) / 128.0f; + } + + // 10. Create documents + ZVecDoc *docs[2]; + for (int i = 0; i < 2; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + } + + // Add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "content", ZVEC_DATA_TYPE_STRING, + "First test document", + strlen("First test document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 128 * sizeof(float)); + + // Add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "content", ZVEC_DATA_TYPE_STRING, + "Second test document", + strlen("Second test document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 128 * sizeof(float)); + + // 11. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + // Cleanup + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + + // Cleanup documents + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + + // 12. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed successfully\n"); + } + + // 13. Query test + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)vector1, + .length = 128 * sizeof(float)}; + query.topk = 5; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Returned %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 14. Cleanup resources + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + printf("✓ Schema example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c new file mode 100644 index 00000000..b0e06624 --- /dev/null +++ b/examples/c_api/doc_example.c @@ -0,0 +1,520 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a test document with all data types + * @param doc_index Document index for generating unique data + * @return ZVecDoc* Created document pointer + */ +static ZVecDoc *create_full_type_test_doc(int doc_index) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) { + fprintf(stderr, "Failed to create document\n"); + return NULL; + } + + // Set primary key + char pk_buffer[32]; + snprintf(pk_buffer, sizeof(pk_buffer), "doc_%d", doc_index); + zvec_doc_set_pk(doc, pk_buffer); + + // Add Id field with inverted index + char id_buffer[32]; + snprintf(id_buffer, sizeof(id_buffer), "id_%d", doc_index); + zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_STRING, id_buffer, + strlen(id_buffer)); + + // Add scalar fields with different data types + // String field + char string_value[64]; + snprintf(string_value, sizeof(string_value), "test_string_%d", doc_index); + zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_value, strlen(string_value)); + + // Boolean field + bool bool_value = (doc_index % 2 == 0); + zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + + // Integer fields + int32_t int32_value = doc_index * 1000; + zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + + int64_t int64_value = (int64_t)doc_index * 1000000LL; + zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + + // Floating point fields + float float_value = (float)doc_index * 1.5f; + zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + + double double_value = (double)doc_index * 2.718281828; + zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + + // Vector fields with different dimensions + // FP32 vector (3D) + float fp32_vector[3] = {(float)doc_index, (float)doc_index * 2.0f, + (float)doc_index * 3.0f}; + zvec_doc_add_field_by_value(doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, + fp32_vector, 3 * sizeof(float)); + + // Larger FP32 vector (16D) + float large_vector[16]; + for (int i = 0; i < 16; i++) { + large_vector[i] = (float)(doc_index * 16 + i) / 256.0f; + } + zvec_doc_add_field_by_value(doc, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vector, 16 * sizeof(float)); + + return doc; +} + +/** + * @brief Compare two documents for equality + */ +static bool compare_documents(const ZVecDoc *doc1, const ZVecDoc *doc2) { + if (!doc1 || !doc2) return false; + + // Compare primary keys + const char *pk1 = zvec_doc_get_pk_pointer(doc1); + const char *pk2 = zvec_doc_get_pk_pointer(doc2); + + if (!pk1 || !pk2 || strcmp(pk1, pk2) != 0) { + return false; + } + + // TODO: Compare other fields and values + + return true; +} + +/** + * @brief Print document fields and their values + * @param doc The document to print + * @param doc_index Document index for identification + */ +static void print_doc(const ZVecDoc *doc, int doc_index) { + if (!doc) { + printf("Document %d: NULL document\n", doc_index); + return; + } + + printf("\n=== Document %d ===\n", doc_index); + + // Print primary key + const char *pk = zvec_doc_get_pk_pointer(doc); + printf("Primary Key: %s\n", pk ? pk : "NULL"); + + // Print document ID + uint64_t doc_id = zvec_doc_get_doc_id(doc); + printf("Document ID: %llu\n", (unsigned long long)doc_id); + + // Print score + float score = zvec_doc_get_score(doc); + printf("Score: %.6f\n", score); + + // Print scalar fields + printf("\nScalar Fields:\n"); + + // ID field (using pointer function for strings) + const void *id_value = NULL; + size_t id_size = 0; + ZVecErrorCode error = zvec_doc_get_field_value_pointer( + doc, "id", ZVEC_DATA_TYPE_STRING, &id_value, &id_size); + if (error == ZVEC_OK && id_value) { + printf(" id: %.*s\n", (int)id_size, (const char *)id_value); + } + + // String field (using pointer function for strings) + const void *string_value = NULL; + size_t string_size = 0; + error = zvec_doc_get_field_value_pointer( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_value, &string_size); + if (error == ZVEC_OK && string_value) { + printf(" string_field: %.*s\n", (int)string_size, + (const char *)string_value); + } + + // Boolean field + bool bool_value; + error = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + if (error == ZVEC_OK) { + printf(" bool_field: %s\n", bool_value ? "true" : "false"); + } + + // Int32 field + int32_t int32_value; + error = + zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + if (error == ZVEC_OK) { + printf(" int32_field: %d\n", int32_value); + } + + // Int64 field + int64_t int64_value; + error = + zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + if (error == ZVEC_OK) { + printf(" int64_field: %lld\n", (long long)int64_value); + } + + // Float field + float float_value; + error = + zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + if (error == ZVEC_OK) { + printf(" float_field: %.6f\n", float_value); + } + + // Double field + double double_value; + error = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + if (error == ZVEC_OK) { + printf(" double_field: %.6f\n", double_value); + } + + // Print vector fields (using copy function for complex types) + printf("\nVector Fields:\n"); + + // FP32 vector (3D) + void *fp32_vector = NULL; + size_t fp32_size = 0; + error = zvec_doc_get_field_value_copy( + doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, &fp32_vector, &fp32_size); + if (error == ZVEC_OK && fp32_vector) { + const float *vec = (const float *)fp32_vector; + size_t dim = fp32_size / sizeof(float); + printf(" vector_fp32 (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(fp32_vector); // Free the allocated memory + } + + // Large vector (16D) + void *large_vector = NULL; + size_t large_size = 0; + error = zvec_doc_get_field_value_copy(doc, "large_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, + &large_vector, &large_size); + if (error == ZVEC_OK && large_vector) { + const float *vec = (const float *)large_vector; + size_t dim = large_size / sizeof(float); + printf(" large_vector (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(large_vector); // Free the allocated memory + } + + printf("==================\n\n"); +} + +/** + * @brief Document creation, manipulation, and query example + */ +int main() { + printf("=== ZVec Document Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema for document testing + ZVecCollectionSchema *schema = + zvec_collection_schema_create("doc_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields for all data types + printf("Creating fields for all data types...\n"); + + // Id field with inverted index + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with inverted index added\n"); + } + } + + // Scalar fields + ZVecFieldSchema *string_field = + zvec_field_schema_create("string_field", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *bool_field = + zvec_field_schema_create("bool_field", ZVEC_DATA_TYPE_BOOL, true, 0); + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32_field", ZVEC_DATA_TYPE_INT32, true, 0); + ZVecFieldSchema *int64_field = + zvec_field_schema_create("int64_field", ZVEC_DATA_TYPE_INT64, true, 0); + ZVecFieldSchema *float_field = + zvec_field_schema_create("float_field", ZVEC_DATA_TYPE_FLOAT, true, 0); + ZVecFieldSchema *double_field = + zvec_field_schema_create("double_field", ZVEC_DATA_TYPE_DOUBLE, true, 0); + + if (string_field) zvec_collection_schema_add_field(schema, string_field); + if (bool_field) zvec_collection_schema_add_field(schema, bool_field); + if (int32_field) zvec_collection_schema_add_field(schema, int32_field); + if (int64_field) zvec_collection_schema_add_field(schema, int64_field); + if (float_field) zvec_collection_schema_add_field(schema, float_field); + if (double_field) zvec_collection_schema_add_field(schema, double_field); + + // Vector fields + ZVecFieldSchema *vector_fp32_field = zvec_field_schema_create( + "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 16); + + if (vector_fp32_field) { + zvec_field_schema_set_hnsw_index(vector_fp32_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_fp32_field); + if (handle_error(error, "adding vector FP32 field") == ZVEC_OK) { + printf("✓ Vector FP32 field with HNSW index added\n"); + } + } + + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf("✓ Large vector field with HNSW index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./doc_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create and insert multiple test documents + printf("Creating and inserting test documents...\n"); + + const int doc_count = 5; + ZVecDoc *test_docs[doc_count]; + + for (int i = 0; i < doc_count; i++) { + test_docs[i] = create_full_type_test_doc(i); + if (!test_docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(test_docs[j]); + } + goto cleanup; + } + printf("✓ Created document %d with PK: %s\n", i, + zvec_doc_get_pk_pointer(test_docs[i])); + } + + // Print all documents before insertion + printf("\nDocuments before insertion:\n"); + for (int i = 0; i < doc_count; i++) { + print_doc(test_docs[i], i); + } + + // Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)test_docs, + doc_count, &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 6. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Warning: Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Use the first document's vector for querying + float query_vector[] = {0.0f, 0.0f, 0.0f}; + ZVecVectorQuery query = { + .field_name = + (ZVecString){.data = "vector_fp32", .length = strlen("vector_fp32")}, + .query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 3 * sizeof(float)}, + .topk = 5, + .filter = (ZVecString){.data = "", .length = 0}, + .include_vector = true, + .include_doc_id = true, + .output_fields = {.strings = NULL, .count = 0}}; + + ZVecDoc **query_results = NULL; + size_t result_count = 0; + + error = + zvec_collection_query(collection, &query, &query_results, &result_count); + if (handle_error(error, "querying documents") != ZVEC_OK) { + query_results = NULL; + result_count = 0; + } + + printf("Query returned %zu results\n", result_count); + + // Print query results + printf("\nQuery Results:\n"); + for (size_t i = 0; i < result_count; i++) { + print_doc(query_results[i], i); + } + + // Compare query results + for (size_t i = 0; i < result_count && i < doc_count; i++) { + const char *result_pk = zvec_doc_get_pk_pointer(query_results[i]); + printf("Comparing query result[%zu]: %s\n", i, result_pk); + + // Find matching original document + bool found = false; + for (int j = 0; j < doc_count; j++) { + const char *original_pk = zvec_doc_get_pk_pointer(test_docs[j]); + if (strcmp(result_pk, original_pk) == 0) { + if (compare_documents(test_docs[j], query_results[i])) { + printf("✓ Query result %s matches original document\n", result_pk); + } else { + printf("✗ Query result %s does not match original document\n", + result_pk); + } + found = true; + break; + } + } + + if (!found) { + printf("⚠ Original document not found for: %s\n", result_pk); + } + } + + // 7. Filter query test + printf("\n=== Filter Query Test ===\n"); + + // Create filtered query + ZVecVectorQuery filtered_query = query; + filtered_query.filter = + (ZVecString){.data = "string_field = 'string_field_0'", + .length = strlen("string_field = 'string_field_0'")}; + + ZVecDoc **filtered_results = NULL; + size_t filtered_count = 0; + + error = zvec_collection_query(collection, &filtered_query, &filtered_results, + &filtered_count); + if (handle_error(error, "filtered querying") == ZVEC_OK) { + printf("Filtered query returned %zu results\n", filtered_count); + + // Verify filter results + bool filter_correct = true; + for (size_t i = 0; i < filtered_count; i++) { + // Note: Field value access may require different API + // For now, we'll just check that we got results + const char *pk = zvec_doc_get_pk_pointer(filtered_results[i]); + if (strstr(pk, "doc_") == NULL) { + filter_correct = false; + break; + } + } + + if (filter_correct) { + printf("✓ Filter query results are correct\n"); + } else { + printf("✗ Filter query results are incorrect\n"); + } + + if (filtered_results) { + zvec_docs_free(filtered_results, filtered_count); + } + } + + // 8. Cleanup query results + if (query_results) { + zvec_docs_free(query_results, result_count); + } + + // 9. Cleanup documents + for (int i = 0; i < doc_count; i++) { + zvec_doc_destroy(test_docs[i]); + } + + // 10. Final cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Document example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c new file mode 100644 index 00000000..8db81d8d --- /dev/null +++ b/examples/c_api/field_schema_example.c @@ -0,0 +1,282 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Field schema creation and management example + */ +int main() { + printf("=== ZVec Field Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("field_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different types of index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params || !hnsw_params || !flat_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create scalar fields with different data types + printf("Creating scalar fields...\n"); + + // String field with inverted index + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + if (name_field) { + zvec_field_schema_set_invert_index(name_field, invert_params); + error = zvec_collection_schema_add_field(schema, name_field); + if (handle_error(error, "adding name field") == ZVEC_OK) { + printf("✓ String field 'name' with inverted index added\n"); + } + } + + // Integer field + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + if (age_field) { + error = zvec_collection_schema_add_field(schema, age_field); + if (handle_error(error, "adding age field") == ZVEC_OK) { + printf("✓ Integer field 'age' added\n"); + } + } + + // Float field + ZVecFieldSchema *score_field = + zvec_field_schema_create("score", ZVEC_DATA_TYPE_FLOAT, true, 0); + if (score_field) { + error = zvec_collection_schema_add_field(schema, score_field); + if (handle_error(error, "adding score field") == ZVEC_OK) { + printf("✓ Float field 'score' added\n"); + } + } + + // Boolean field + ZVecFieldSchema *active_field = + zvec_field_schema_create("active", ZVEC_DATA_TYPE_BOOL, false, 0); + if (active_field) { + error = zvec_collection_schema_add_field(schema, active_field); + if (handle_error(error, "adding active field") == ZVEC_OK) { + printf("✓ Boolean field 'active' added\n"); + } + } + + // 4. Create vector fields with different dimensions and indexes + printf("Creating vector fields...\n"); + + // Small dimension vector with HNSW index + ZVecFieldSchema *small_vector_field = zvec_field_schema_create( + "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (small_vector_field) { + zvec_field_schema_set_hnsw_index(small_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, small_vector_field); + if (handle_error(error, "adding small vector field") == ZVEC_OK) { + printf( + "✓ Small vector field 'small_vector' (32D) with HNSW index added\n"); + } + } + + // Medium dimension vector with Flat index + ZVecFieldSchema *medium_vector_field = zvec_field_schema_create( + "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (medium_vector_field) { + zvec_field_schema_set_flat_index(medium_vector_field, flat_params); + error = zvec_collection_schema_add_field(schema, medium_vector_field); + if (handle_error(error, "adding medium vector field") == ZVEC_OK) { + printf( + "✓ Medium vector field 'medium_vector' (128D) with Flat index " + "added\n"); + } + } + + // Large dimension vector with HNSW index + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 512); + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf( + "✓ Large vector field 'large_vector' (512D) with HNSW index added\n"); + } + } + + // 5. Create collection with the schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./field_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 6. Create test documents with various field types + printf("Creating test documents...\n"); + + ZVecDoc *doc1 = zvec_doc_create(); + ZVecDoc *doc2 = zvec_doc_create(); + + if (!doc1 || !doc2) { + fprintf(stderr, "Failed to create documents\n"); + goto cleanup; + } + + // Document 1 + zvec_doc_set_pk(doc1, "user1"); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "Alice Johnson", strlen("Alice Johnson")); + int32_t age1 = 28; + zvec_doc_add_field_by_value(doc1, "age", ZVEC_DATA_TYPE_INT32, &age1, + sizeof(age1)); + float score1 = 87.5f; + zvec_doc_add_field_by_value(doc1, "score", ZVEC_DATA_TYPE_FLOAT, &score1, + sizeof(score1)); + bool active1 = true; + zvec_doc_add_field_by_value(doc1, "active", ZVEC_DATA_TYPE_BOOL, &active1, + sizeof(active1)); + + // Add vector data + float small_vec1[32]; + float medium_vec1[128]; + float large_vec1[512]; + + for (int i = 0; i < 32; i++) small_vec1[i] = (float)i / 32.0f; + for (int i = 0; i < 128; i++) medium_vec1[i] = (float)i / 128.0f; + for (int i = 0; i < 512; i++) large_vec1[i] = (float)i / 512.0f; + + zvec_doc_add_field_by_value(doc1, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec1, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec1, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec1, 512 * sizeof(float)); + + // Document 2 + zvec_doc_set_pk(doc2, "user2"); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, "Bob Smith", + strlen("Bob Smith")); + int32_t age2 = 35; + zvec_doc_add_field_by_value(doc2, "age", ZVEC_DATA_TYPE_INT32, &age2, + sizeof(age2)); + float score2 = 92.0f; + zvec_doc_add_field_by_value(doc2, "score", ZVEC_DATA_TYPE_FLOAT, &score2, + sizeof(score2)); + bool active2 = false; + zvec_doc_add_field_by_value(doc2, "active", ZVEC_DATA_TYPE_BOOL, &active2, + sizeof(active2)); + + // Add vector data + float small_vec2[32]; + float medium_vec2[128]; + float large_vec2[512]; + + for (int i = 0; i < 32; i++) small_vec2[i] = (float)(32 - i) / 32.0f; + for (int i = 0; i < 128; i++) medium_vec2[i] = (float)(128 - i) / 128.0f; + for (int i = 0; i < 512; i++) large_vec2[i] = (float)(512 - i) / 512.0f; + + zvec_doc_add_field_by_value(doc2, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec2, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec2, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec2, 512 * sizeof(float)); + + // 7. Insert documents + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 8. Flush and test queries + zvec_collection_flush(collection); + printf("✓ Collection flushed\n"); + + // Test vector query on medium vector field + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "medium_vector", .length = strlen("medium_vector")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)medium_vec1, + .length = 128 * sizeof(float)}; + query.topk = 2; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Found %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 9. Cleanup +cleanup: + if (doc1) zvec_doc_destroy(doc1); + if (doc2) zvec_doc_destroy(doc2); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + + printf("✓ Field schema example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c new file mode 100644 index 00000000..f4362ac0 --- /dev/null +++ b/examples/c_api/index_example.c @@ -0,0 +1,330 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Index creation and management example + */ +int main() { + printf("=== ZVec Index Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("index_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different index parameter configurations + printf("Creating index parameters...\n"); + + // Inverted index parameters + ZVecInvertIndexParams *invert_params_standard = + zvec_index_params_invert_create(true, false); + ZVecInvertIndexParams *invert_params_extended = + zvec_index_params_invert_create(true, true); + + // HNSW index parameters with different configurations + ZVecHnswIndexParams *hnsw_params_fast = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecHnswIndexParams *hnsw_params_balanced = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 32, 200, 100); + ZVecHnswIndexParams *hnsw_params_accurate = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 64, 400, 200); + + // Flat index parameters + ZVecFlatIndexParams *flat_params_l2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecFlatIndexParams *flat_params_cosine = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || + !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || + !flat_params_cosine) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with different index types + printf("Creating fields with various index types...\n"); + + // Fields with inverted indexes + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params_standard); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with standard inverted index added\n"); + } + } + + ZVecFieldSchema *category_field = + zvec_field_schema_create("category", ZVEC_DATA_TYPE_STRING, true, 0); + if (category_field) { + zvec_field_schema_set_invert_index(category_field, invert_params_extended); + error = zvec_collection_schema_add_field(schema, category_field); + if (handle_error(error, "adding category field") == ZVEC_OK) { + printf("✓ Category field with extended inverted index added\n"); + } + } + + // Vector fields with HNSW indexes (different configurations) + ZVecFieldSchema *fast_search_field = zvec_field_schema_create( + "fast_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 64); + if (fast_search_field) { + zvec_field_schema_set_hnsw_index(fast_search_field, hnsw_params_fast); + error = zvec_collection_schema_add_field(schema, fast_search_field); + if (handle_error(error, "adding fast search field") == ZVEC_OK) { + printf("✓ Fast search vector field (64D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *balanced_field = zvec_field_schema_create( + "balanced_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (balanced_field) { + zvec_field_schema_set_hnsw_index(balanced_field, hnsw_params_balanced); + error = zvec_collection_schema_add_field(schema, balanced_field); + if (handle_error(error, "adding balanced field") == ZVEC_OK) { + printf("✓ Balanced vector field (128D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *accurate_field = zvec_field_schema_create( + "accurate_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 256); + if (accurate_field) { + zvec_field_schema_set_hnsw_index(accurate_field, hnsw_params_accurate); + error = zvec_collection_schema_add_field(schema, accurate_field); + if (handle_error(error, "adding accurate field") == ZVEC_OK) { + printf("✓ Accurate vector field (256D) with HNSW index added\n"); + } + } + + // Vector field with Flat index + ZVecFieldSchema *exact_field = zvec_field_schema_create( + "exact_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (exact_field) { + zvec_field_schema_set_flat_index(exact_field, flat_params_l2); + error = zvec_collection_schema_add_field(schema, exact_field); + if (handle_error(error, "adding exact field") == ZVEC_OK) { + printf("✓ Exact search vector field (32D) with Flat index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./index_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create test data + printf("Creating test documents...\n"); + + ZVecDoc *docs[3]; + for (int i = 0; i < 3; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + goto cleanup; + } + } + + // Prepare vector data + float fast_vec[3][64]; + float balanced_vec[3][128]; + float accurate_vec[3][256]; + float exact_vec[3][32]; + + // Generate different vector patterns for testing + for (int doc_idx = 0; doc_idx < 3; doc_idx++) { + for (int i = 0; i < 64; i++) { + fast_vec[doc_idx][i] = (float)(doc_idx * 64 + i) / (64.0f * 3.0f); + } + for (int i = 0; i < 128; i++) { + balanced_vec[doc_idx][i] = (float)(doc_idx * 128 + i) / (128.0f * 3.0f); + } + for (int i = 0; i < 256; i++) { + accurate_vec[doc_idx][i] = (float)(doc_idx * 256 + i) / (256.0f * 3.0f); + } + for (int i = 0; i < 32; i++) { + exact_vec[doc_idx][i] = (float)(doc_idx * 32 + i) / (32.0f * 3.0f); + } + } + + // Populate documents + for (int i = 0; i < 3; i++) { + char pk[16]; + snprintf(pk, sizeof(pk), "doc%d", i + 1); + zvec_doc_set_pk(docs[i], pk); + + char id_val[16]; + snprintf(id_val, sizeof(id_val), "ID_%d", i + 1); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, id_val, + strlen(id_val)); + + char category_val[16]; + snprintf(category_val, sizeof(category_val), "cat_%d", (i % 2) + 1); + zvec_doc_add_field_by_value(docs[i], "category", ZVEC_DATA_TYPE_STRING, + category_val, strlen(category_val)); + + zvec_doc_add_field_by_value(docs[i], "fast_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, fast_vec[i], + 64 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "balanced_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, balanced_vec[i], + 128 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "accurate_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, accurate_vec[i], + 256 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "exact_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, exact_vec[i], + 32 * sizeof(float)); + } + + // 6. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // Cleanup documents + for (int i = 0; i < 3; i++) { + zvec_doc_destroy(docs[i]); + } + + // 7. Flush collection to build indexes + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed - indexes built\n"); + } + + // 8. Test different query types + printf("Testing various index queries...\n"); + + // Test HNSW query (balanced) + ZVecVectorQuery hnsw_query = {0}; + hnsw_query.field_name = (ZVecString){.data = "balanced_vector", + .length = strlen("balanced_vector")}; + hnsw_query.query_vector = (ZVecByteArray){.data = (uint8_t *)balanced_vec[0], + .length = 128 * sizeof(float)}; + hnsw_query.topk = 2; + hnsw_query.filter = (ZVecString){.data = "", .length = 0}; + hnsw_query.include_vector = false; + hnsw_query.include_doc_id = true; + hnsw_query.output_fields.strings = NULL; + hnsw_query.output_fields.count = 0; + + ZVecDoc **hnsw_results = NULL; + size_t hnsw_result_count = 0; + error = zvec_collection_query(collection, &hnsw_query, &hnsw_results, + &hnsw_result_count); + if (error == ZVEC_OK) { + printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); + zvec_docs_free(hnsw_results, hnsw_result_count); + } + + // Test Flat query (exact) + ZVecVectorQuery flat_query = {0}; + flat_query.field_name = + (ZVecString){.data = "exact_vector", .length = strlen("exact_vector")}; + flat_query.query_vector = (ZVecByteArray){.data = (uint8_t *)exact_vec[0], + .length = 32 * sizeof(float)}; + flat_query.topk = 2; + flat_query.filter = (ZVecString){.data = "", .length = 0}; + flat_query.include_vector = false; + flat_query.include_doc_id = true; + flat_query.output_fields.strings = NULL; + flat_query.output_fields.count = 0; + + ZVecDoc **flat_results = NULL; + size_t flat_result_count = 0; + error = zvec_collection_query(collection, &flat_query, &flat_results, + &flat_result_count); + if (error == ZVEC_OK) { + printf("✓ Flat (exact) query successful - Found %zu results\n", + flat_result_count); + zvec_docs_free(flat_results, flat_result_count); + } + + // 9. Performance comparison information + printf("\nIndex Performance Characteristics:\n"); + printf("- Inverted Index: Fast text search, supports filtering\n"); + printf( + "- HNSW Index: Approximate nearest neighbor search, good balance of " + "speed/accuracy\n"); + printf("- Flat Index: Exact search, slower but 100%% accurate\n"); + printf( + "- Trade-off: Speed vs Accuracy - choose based on your requirements\n"); + + // 10. Cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + + printf("✓ Index example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c new file mode 100644 index 00000000..3441af6e --- /dev/null +++ b/examples/c_api/optimized_example.c @@ -0,0 +1,302 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create test vector data + */ +static float *create_test_vector(size_t dimension) { + float *vector = malloc(dimension * sizeof(float)); + if (!vector) { + return NULL; + } + + for (size_t i = 0; i < dimension; i++) { + vector[i] = (float)rand() / RAND_MAX; + } + + return vector; +} + +/** + * @brief Optimized C API usage example with performance considerations + */ +int main() { + printf("=== ZVec Optimized C API Example ===\n\n"); + + // Get version information + const char *version = zvec_get_version(); + printf("ZVec Version: %s\n\n", version ? version : "Unknown"); + + ZVecErrorCode error; + + // 1. Create optimized collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("optimized_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create optimized index parameters + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, + 32, // Higher M for better connectivity + 200, // Construction ef for quality + 50 // Search ef for performance + ); + + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with optimized configuration + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + if (!id_field || !text_field || !embedding_field) { + fprintf(stderr, "Failed to create field schemas\n"); + goto cleanup_params; + } + + // Set indexes + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + + // Add fields to schema + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) + goto cleanup_fields; + + printf("✓ Fields configured with indexes\n"); + + // 4. Create collection with optimized options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.enable_mmap = true; // Enable memory mapping for better performance + + ZVecCollection *collection = NULL; + error = zvec_collection_create_and_open("./optimized_example_collection", + schema, &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_fields; + } + printf("✓ Collection created with optimized settings\n"); + + // 5. Bulk insert test data + const size_t DOC_COUNT = 1000; + const size_t BATCH_SIZE = 100; + + printf("Inserting %zu documents in batches of %zu...\n", DOC_COUNT, + BATCH_SIZE); + + clock_t start_time = clock(); + + for (size_t batch_start = 0; batch_start < DOC_COUNT; + batch_start += BATCH_SIZE) { + size_t current_batch_size = (batch_start + BATCH_SIZE > DOC_COUNT) + ? DOC_COUNT - batch_start + : BATCH_SIZE; + + ZVecDoc **batch_docs = malloc(current_batch_size * sizeof(ZVecDoc *)); + if (!batch_docs) { + fprintf(stderr, "Failed to allocate batch documents\n"); + break; + } + + // Create batch documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + if (!batch_docs[i]) { + fprintf(stderr, "Failed to create document\n"); + // Cleanup previous documents in batch + for (size_t j = 0; j < i; j++) { + zvec_doc_destroy(batch_docs[j]); + } + free(batch_docs); + goto cleanup_collection; + } + + size_t doc_id = batch_start + i; + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%zu", doc_id); + zvec_doc_set_pk(batch_docs[i], pk); + + // Add ID field + char id_str[32]; + snprintf(id_str, sizeof(id_str), "ID_%zu", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_STRING, + id_str, strlen(id_str)); + + // Add text field + char text_str[64]; + snprintf(text_str, sizeof(text_str), + "Document number %zu with sample text", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "text", ZVEC_DATA_TYPE_STRING, + text_str, strlen(text_str)); + + // Add vector field + float *vector = create_test_vector(128); + if (vector) { + zvec_doc_add_field_by_value(batch_docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vector, + 128 * sizeof(float)); + free(vector); + } + } + + // Insert batch + size_t success_count, error_count; + error = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + if (handle_error(error, "inserting batch") != ZVEC_OK) { + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + goto cleanup_collection; + } + + printf(" Batch %zu-%zu: %zu successful, %zu failed\n", batch_start, + batch_start + current_batch_size - 1, success_count, error_count); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + } + + clock_t insert_end_time = clock(); + double insert_time = + ((double)(insert_end_time - start_time)) / CLOCKS_PER_SEC; + printf("✓ Bulk insertion completed in %.3f seconds (%.0f docs/sec)\n", + insert_time, DOC_COUNT / insert_time); + + // 6. Flush and optimize collection + printf("Flushing and optimizing collection...\n"); + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + printf("✓ Collection optimized\n"); + + // 7. Performance query test + printf("Testing query performance...\n"); + + float *query_vector = create_test_vector(128); + if (!query_vector) { + fprintf(stderr, "Failed to create query vector\n"); + goto cleanup_collection; + } + + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 128 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + const int QUERY_COUNT = 100; + start_time = clock(); + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("Query %d failed: %s\n", q, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + continue; + } + + if (results) { + zvec_docs_free(results, result_count); + } + } + + clock_t query_end_time = clock(); + double query_time = ((double)(query_end_time - start_time)) / CLOCKS_PER_SEC; + double avg_query_time = (query_time * 1000) / QUERY_COUNT; + + printf("✓ Performance test completed\n"); + printf(" Average query time: %.2f ms\n", avg_query_time); + printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); + + free(query_vector); + + // 8. Memory usage information + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf("Collection Statistics:\n"); + printf(" Document count: %llu\n", (unsigned long long)stats->doc_count); + zvec_collection_stats_destroy(stats); + } + + // 9. Cleanup +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_fields: + // Field schemas are managed by the collection schema, no need to destroy + // individually + +cleanup_params: + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Optimized example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 486b0b36..fe9c090f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,14 @@ build-dir = "build" wheel.expand-macos-universal-tags = true wheel.packages = ["python/zvec"] +# Exclude unnecessary files from wheel +wheel.exclude = [ + "**/*.dylib", + "**/*.a", + "lib/cmake/**", + "lib/pkgconfig/**", +] + # Source distribution sdist.include = [ "README.md", diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 00383c99..0f3a85ee 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ cc_directory(ailego) cc_directory(turbo) cc_directory(core) cc_directory(db) +cc_directory(c_api) if(BUILD_PYTHON_BINDINGS) cc_directory(binding) endif() diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt index 160b25ea..c78aa033 100644 --- a/src/binding/python/CMakeLists.txt +++ b/src/binding/python/CMakeLists.txt @@ -56,4 +56,4 @@ elseif (APPLE) ) endif () -target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) +target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) \ No newline at end of file diff --git a/src/c_api/API_REFERENCE_CN.md b/src/c_api/API_REFERENCE_CN.md new file mode 100644 index 00000000..115313c7 --- /dev/null +++ b/src/c_api/API_REFERENCE_CN.md @@ -0,0 +1,1843 @@ +# ZVec C API 参考文档 + +**版本**: 0.3.0 +**许可**: Apache License 2.0 + +--- + +## 目录 + +1. [概述](#概述) +2. [快速开始](#快速开始) +3. [版本管理](#版本管理) +4. [错误处理](#错误处理) +5. [初始化与关闭](#初始化与关闭) +6. [配置管理](#配置管理) +7. [数据结构](#数据结构) +8. [Schema 管理](#schema-管理) +9. [Collection 管理](#collection-管理) +10. [索引管理](#索引管理) +11. [文档操作](#文档操作) +12. [数据增删改](#数据增删改) +13. [数据查询](#数据查询) +14. [工具函数](#工具函数) +15. [完整示例](#完整示例) + +--- + +## 概述 + +ZVec C API 是 ZVec 向量数据库的 C 语言接口,提供了完整的向量存储、索引和检索功能。本接口采用 C ABI,可与 C、C++、Rust、Go 等语言互操作。 + +### 核心概念 + +| 概念 | 说明 | +|------|------| +| **Collection** | 数据集合,类似数据库中的表 | +| **Schema** | 集合的结构定义,包含字段信息 | +| **Document** | 单条数据记录 | +| **Index** | 字段索引,加速查询 | +| **Field** | 字段,支持标量和向量类型 | + +--- + +## 快速开始 + +### 最小可用示例 + +```c +#include "zvec/c_api.h" +#include + +int main() { + // 1. 初始化库 + zvec_initialize(NULL); + + // 2. 创建集合 Schema + ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); + ZVecFieldSchema *field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_collection_schema_add_field(schema, field); + + // 3. 创建并打开集合 + ZVecCollection *collection = NULL; + ZVecErrorCode rc = zvec_collection_create_and_open( + "./my_data", schema, NULL, &collection); + + if (rc != ZVEC_OK) { + char *err_msg; + zvec_get_last_error(&err_msg); + printf("Error: %s\n", err_msg); + return 1; + } + + // 4. 创建索引 + ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + zvec_collection_create_hnsw_index(collection, "embedding", params); + + // 5. 插入数据 + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "doc_001"); + float vec[] = {0.1f, 0.2f, 0.3f}; + zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, sizeof(vec)); + + size_t success, errors; + zvec_collection_insert(collection, &doc, 1, &success, &errors); + zvec_doc_destroy(doc); + + // 6. 查询 + ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", ZVEC_FLOAT_ARRAY(vec, 3), 10, ""); + ZVecDoc **results; + size_t count; + zvec_collection_query(collection, &query, &results, &count); + + // 7. 清理 + zvec_docs_free(results, count); + zvec_index_params_hnsw_destroy(params); + zvec_collection_close(collection); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_shutdown(); + + return 0; +} +``` + +--- + +## 版本管理 + +### 获取版本信息 + +```c +// 获取完整版本字符串 +const char *version = zvec_get_version(); +// 输出示例:"0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + +// 获取各部分版本号 +int major = zvec_get_version_major(); // 0 +int minor = zvec_get_version_minor(); // 3 +int patch = zvec_get_version_patch(); // 0 +``` + +### 版本兼容性检查 + +```c +// 检查当前库版本是否满足最低要求 +bool compatible = zvec_check_version(0, 2, 0); +if (!compatible) { + printf("Library version too old!\n"); +} +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_get_version()` | 无 | `const char*` | 获取完整版本字符串 | +| `zvec_get_version_major()` | 无 | `int` | 获取主版本号 | +| `zvec_get_version_minor()` | 无 | `int` | 获取次版本号 | +| `zvec_get_version_patch()` | 无 | `int` | 获取补丁版本号 | +| `zvec_check_version()` | `major, minor, patch` | `bool` | 检查版本兼容性 | + +--- + +## 错误处理 + +### 错误码枚举 + +```c +typedef enum { + ZVEC_OK = 0, // 成功 + ZVEC_ERROR_NOT_FOUND = 1, // 资源未找到 + ZVEC_ERROR_ALREADY_EXISTS = 2, // 资源已存在 + ZVEC_ERROR_INVALID_ARGUMENT = 3, // 无效参数 + ZVEC_ERROR_PERMISSION_DENIED = 4, // 权限拒绝 + ZVEC_ERROR_FAILED_PRECONDITION = 5, // 前置条件失败 + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, // 资源耗尽 + ZVEC_ERROR_UNAVAILABLE = 7, // 服务不可用 + ZVEC_ERROR_INTERNAL_ERROR = 8, // 内部错误 + ZVEC_ERROR_NOT_SUPPORTED = 9, // 不支持的操作 + ZVEC_ERROR_UNKNOWN = 10 // 未知错误 +} ZVecErrorCode; +``` + +### 获取错误信息 + +```c +// 获取详细错误信息 +ZVecErrorDetails details; +zvec_get_last_error_details(&details); +printf("Error %d: %s\n", details.code, details.message); +printf(" at %s:%d in %s()\n", details.file, details.line, details.function); + +// 获取错误消息字符串 +char *error_msg; +ZVecErrorCode code = zvec_get_last_error(&error_msg); +if (code != ZVEC_OK) { + printf("Error: %s\n", error_msg); + free(error_msg); // 需要调用者释放 +} + +// 清除错误状态 +zvec_clear_error(); + +// 错误码转字符串 +const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); +// 返回:"Invalid argument" +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_get_last_error(&msg)` | `char **msg` | `ZVecErrorCode` | 获取最后错误消息 | +| `zvec_get_last_error_details(&details)` | `ZVecErrorDetails*` | `ZVecErrorCode` | 获取详细错误信息 | +| `zvec_clear_error()` | 无 | void | 清除错误状态 | +| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | + +--- + +## 初始化与关闭 + +### 初始化库 + +```c +// 使用默认配置初始化 +ZVecErrorCode rc = zvec_initialize(NULL); + +// 使用自定义配置初始化 +ZVecConfigData *config = zvec_config_data_create(); +zvec_config_data_set_memory_limit(config, 2UL * 1024 * 1024 * 1024); // 2GB +zvec_config_data_set_query_thread_count(config, 4); +rc = zvec_initialize(config); +zvec_config_data_destroy(config); + +if (rc != ZVEC_OK) { + // 处理初始化失败 +} +``` + +### 关闭库 + +```c +// 关闭前确保所有 Collection 已关闭 +zvec_collection_close(collection); +zvec_collection_destroy(collection); + +// 关闭库,释放所有资源 +ZVecErrorCode rc = zvec_shutdown(); +``` + +### 检查初始化状态 + +```c +bool initialized; +zvec_is_initialized(&initialized); +if (!initialized) { + zvec_initialize(NULL); +} +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_initialize(config)` | `const ZVecConfigData*` | `ZVecErrorCode` | 初始化库 | +| `zvec_shutdown()` | 无 | `ZVecErrorCode` | 关闭库 | +| `zvec_is_initialized(&initialized)` | `bool*` | `ZVecErrorCode` | 检查是否已初始化 | + +--- + +## 配置管理 + +### 配置数据结构 + +```c +typedef struct { + uint64_t memory_limit_bytes; // 内存限制(字节) + + // 日志配置 + ZVecLogType log_type; + void *log_config; // ZVecConsoleLogConfig 或 ZVecFileLogConfig + + // 查询配置 + uint32_t query_thread_count; // 查询线程数 + float invert_to_forward_scan_ratio; // 倒排转正扫比例 + float brute_force_by_keys_ratio; // 暴力检索比例 + + // 优化配置 + uint32_t optimize_thread_count; // 优化线程数 +} ZVecConfigData; +``` + +### 日志配置 + +```c +// 控制台日志配置 +typedef struct { + ZVecLogLevel level; // 日志级别 +} ZVecConsoleLogConfig; + +// 文件日志配置 +typedef struct { + ZVecLogLevel level; // 日志级别 + ZVecString dir; // 日志目录 + ZVecString basename; // 日志文件基础名 + uint32_t file_size; // 文件大小 (MB) + uint32_t overdue_days; // 过期天数 +} ZVecFileLogConfig; +``` + +### 日志级别 + +```c +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; +``` + +### 配置创建与销毁 + +```c +// 创建配置 +ZVecConfigData *config = zvec_config_data_create(); + +// 创建控制台日志配置 +ZVecConsoleLogConfig *console_log = zvec_config_console_log_create( + ZVEC_LOG_LEVEL_INFO); + +// 创建文件日志配置 +ZVecFileLogConfig *file_log = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, + "/var/log/zvec", // 日志目录 + "zvec", // 基础文件名 + 100, // 文件大小 100MB + 30 // 保留 30 天 +); + +// 设置配置 +zvec_config_data_set_memory_limit(config, 1024 * 1024 * 1024); +zvec_config_data_set_log_config(config, ZVEC_LOG_TYPE_CONSOLE, console_log); +zvec_config_data_set_query_thread_count(config, 8); +zvec_config_data_set_optimize_thread_count(config, 4); + +// 销毁配置 +zvec_config_console_log_destroy(console_log); +zvec_config_file_log_destroy(file_log); +zvec_config_data_destroy(config); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_config_data_create()` | 无 | `ZVecConfigData*` | 创建配置数据 | +| `zvec_config_data_destroy(config)` | `ZVecConfigData*` | void | 销毁配置数据 | +| `zvec_config_data_set_memory_limit(config, bytes)` | config, 字节数 | `ZVecErrorCode` | 设置内存限制 | +| `zvec_config_data_set_log_config(config, type, cfg)` | config, 类型,配置 | `ZVecErrorCode` | 设置日志配置 | +| `zvec_config_data_set_query_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置查询线程数 | +| `zvec_config_data_set_optimize_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置优化线程数 | +| `zvec_config_console_log_create(level)` | 日志级别 | `ZVecConsoleLogConfig*` | 创建控制台日志配置 | +| `zvec_config_console_log_destroy(cfg)` | 配置指针 | void | 销毁控制台日志配置 | +| `zvec_config_file_log_create(...)` | 级别,目录,文件名,大小,天数 | `ZVecFileLogConfig*` | 创建文件日志配置 | +| `zvec_config_file_log_destroy(cfg)` | 配置指针 | void | 销毁文件日志配置 | + +--- + +## 数据结构 + +### 字符串类型 + +```c +// 字符串视图(不拥有内存) +typedef struct { + const char *data; + size_t length; +} ZVecStringView; + +// 可变字符串(拥有内存) +typedef struct { + char *data; + size_t length; + size_t capacity; +} ZVecString; + +// 字符串数组 +typedef struct { + ZVecString *strings; + size_t count; +} ZVecStringArray; +``` + +### 数组类型 + +```c +// Float 数组 +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +// Int64 数组 +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +// 字节数组 +typedef struct { + const uint8_t *data; + size_t length; +} ZVecByteArray; + +// 可变字节数组 +typedef struct { + uint8_t *data; + size_t length; + size_t capacity; +} ZVecMutableByteArray; +``` + +### 字符串操作 + +```c +// 从 C 字符串创建 +ZVecString *str = zvec_string_create("Hello, World!"); + +// 从字符串视图创建 +ZVecStringView view = {"Hello", 5}; +ZVecString *str2 = zvec_string_create_from_view(&view); + +// 创建二进制安全字符串(可包含 null 字节) +uint8_t data[] = {0x00, 0x01, 0x02, 0x03}; +ZVecString *bin_str = zvec_bin_create(data, sizeof(data)); + +// 复制字符串 +ZVecString *copy = zvec_string_copy(str); + +// 获取 C 字符串 +const char *c_str = zvec_string_c_str(str); + +// 获取长度 +size_t len = zvec_string_length(str); + +// 比较字符串 +int cmp = zvec_string_compare(str1, str2); // 返回 -1, 0, 1 + +// 释放字符串 +zvec_free_string(str); +``` + +### 数组操作 + +```c +// 创建字符串数组 +ZVecStringArray *arr = zvec_string_array_create(10); + +// 添加字符串 +zvec_string_array_add(arr, 0, "first"); +zvec_string_array_add(arr, 1, "second"); + +// 销毁字符串数组 +zvec_string_array_destroy(arr); + +// 创建字节数组 +ZVecMutableByteArray *byte_arr = zvec_byte_array_create(1024); +zvec_byte_array_destroy(byte_arr); + +// 创建 float 数组 +ZVecFloatArray *float_arr = zvec_float_array_create(100); +zvec_float_array_destroy(float_arr); + +// 创建 int64 数组 +ZVecInt64Array *int_arr = zvec_int64_array_create(50); +zvec_int64_array_destroy(int_arr); + +// 释放 uint8 数组 +zvec_free_uint8_array(uint8_t *array); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_string_create(str)` | `const char*` | `ZVecString*` | 从 C 字符串创建 | +| `zvec_string_create_from_view(view)` | `ZVecStringView*` | `ZVecString*` | 从视图创建字符串 | +| `zvec_bin_create(data, length)` | `uint8_t*`, size_t | `ZVecString*` | 创建二进制字符串 | +| `zvec_string_copy(str)` | `ZVecString*` | `ZVecString*` | 复制字符串 | +| `zvec_string_c_str(str)` | `ZVecString*` | `const char*` | 获取 C 字符串 | +| `zvec_string_length(str)` | `ZVecString*` | size_t | 获取长度 | +| `zvec_string_compare(s1, s2)` | 两个字符串 | int | 比较字符串 | +| `zvec_free_string(str)` | `ZVecString*` | void | 释放字符串 | +| `zvec_string_array_create(count)` | size_t | `ZVecStringArray*` | 创建字符串数组 | +| `zvec_string_array_add(arr, idx, str)` | arr, 索引,字符串 | void | 添加字符串 | +| `zvec_string_array_destroy(arr)` | `ZVecStringArray*` | void | 销毁字符串数组 | +| `zvec_byte_array_create(capacity)` | size_t | `ZVecMutableByteArray*` | 创建字节数组 | +| `zvec_byte_array_destroy(arr)` | `ZVecMutableByteArray*` | void | 销毁字节数组 | +| `zvec_float_array_create(count)` | size_t | `ZVecFloatArray*` | 创建 float 数组 | +| `zvec_float_array_destroy(arr)` | `ZVecFloatArray*` | void | 销毁 float 数组 | +| `zvec_int64_array_create(count)` | size_t | `ZVecInt64Array*` | 创建 int64 数组 | +| `zvec_int64_array_destroy(arr)` | `ZVecInt64Array*` | void | 销毁 int64 数组 | +| `zvec_free_uint8_array(arr)` | `uint8_t*` | void | 释放 uint8 数组 | + +--- + +## Schema 管理 + +### 数据类型 + +```c +typedef enum { + // 标量类型 + ZVEC_DATA_TYPE_UNDEFINED = 0, + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + // 向量类型 + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + // 稀疏向量类型 + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + // 数组类型 + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; +``` + +### 字段 Schema + +```c +typedef struct { + ZVecString *name; // 字段名 + ZVecDataType data_type; // 数据类型 + bool nullable; // 是否可空 + uint32_t dimension; // 向量维度(仅向量类型使用) + ZVecIndexParams *index_params; // 索引参数 +} ZVecFieldSchema; +``` + +### 创建字段 Schema + +```c +// 创建标量字段 +ZVecFieldSchema *id_field = zvec_field_schema_create( + "id", ZVEC_DATA_TYPE_STRING, false, 0); + +// 创建向量字段(768 维) +ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768); + +// 创建带索引的字段 +ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); +zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + +// 或者使用专用函数 +zvec_field_schema_set_invert_index(field, invert_params); +zvec_field_schema_set_hnsw_index(field, hnsw_params); +zvec_field_schema_set_flat_index(field, flat_params); +zvec_field_schema_set_ivf_index(field, ivf_params); + +// 设置索引参数 +zvec_field_schema_set_index_params(field, index_params); + +// 销毁字段 Schema +zvec_field_schema_destroy(field); +zvec_free_field_schema(field); +``` + +### Collection Schema + +```c +typedef struct { + ZVecString *name; // 集合名 + ZVecFieldSchema **fields; // 字段数组 + size_t field_count; // 字段数量 + size_t field_capacity; // 字段容量 + uint64_t max_doc_count_per_segment; // 每段最大文档数 +} ZVecCollectionSchema; +``` + +### 创建 Collection Schema + +```c +// 创建 Schema +ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); + +// 添加单个字段 +ZVecFieldSchema *field = zvec_field_schema_create( + "title", ZVEC_DATA_TYPE_STRING, false, 0); +zvec_collection_schema_add_field(schema, field); + +// 批量添加字段 +ZVecFieldSchema fields[3] = { + *zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0), + *zvec_field_schema_create("embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768), + *zvec_field_schema_create("timestamp", ZVEC_DATA_TYPE_INT64, true, 0) +}; +zvec_collection_schema_add_fields(schema, fields, 3); + +// 获取字段数量 +size_t count = zvec_collection_schema_get_field_count(schema); + +// 按索引获取字段 +ZVecFieldSchema *f = zvec_collection_schema_get_field(schema, 0); + +// 按名称查找字段 +ZVecFieldSchema *f = zvec_collection_schema_find_field(schema, "embedding"); + +// 删除字段 +zvec_collection_schema_remove_field(schema, "title"); + +// 批量删除字段 +const char *field_names[] = {"field1", "field2"}; +zvec_collection_schema_remove_fields(schema, field_names, 2); + +// 设置每段最大文档数 +zvec_collection_schema_set_max_doc_count_per_segment(schema, 500000); + +// 获取每段最大文档数 +uint64_t max_docs = zvec_collection_schema_get_max_doc_count_per_segment(schema); + +// 验证 Schema +ZVecString *error_msg; +ZVecErrorCode rc = zvec_collection_schema_validate(schema, &error_msg); +if (rc != ZVEC_OK) { + printf("Invalid schema: %s\n", error_msg->data); + zvec_free_string(error_msg); +} + +// 销毁 Schema +zvec_collection_schema_destroy(schema); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_field_schema_create(name, type, nullable, dim)` | 名,类型,是否可空,维度 | `ZVecFieldSchema*` | 创建字段 Schema | +| `zvec_field_schema_destroy(schema)` | `ZVecFieldSchema*` | void | 销毁字段 Schema | +| `zvec_field_schema_set_index_params(schema, params)` | schema, 索引参数 | `ZVecErrorCode` | 设置索引参数 | +| `zvec_field_schema_set_invert_index(schema, params)` | schema, 倒排参数 | void | 设置倒排索引 | +| `zvec_field_schema_set_hnsw_index(schema, params)` | schema, HNSW 参数 | void | 设置 HNSW 索引 | +| `zvec_field_schema_set_flat_index(schema, params)` | schema, Flat 参数 | void | 设置 Flat 索引 | +| `zvec_field_schema_set_ivf_index(schema, params)` | schema, IVF 参数 | void | 设置 IVF 索引 | +| `zvec_free_field_schema(schema)` | `ZVecFieldSchema*` | void | 释放字段 Schema | +| `zvec_collection_schema_create(name)` | 集合名 | `ZVecCollectionSchema*` | 创建集合 Schema | +| `zvec_collection_schema_destroy(schema)` | `ZVecCollectionSchema*` | void | 销毁集合 Schema | +| `zvec_collection_schema_add_field(schema, field)` | schema, 字段 | `ZVecErrorCode` | 添加字段 | +| `zvec_collection_schema_add_fields(schema, fields, count)` | schema, 字段数组,数量 | `ZVecErrorCode` | 批量添加字段 | +| `zvec_collection_schema_remove_field(schema, name)` | schema, 字段名 | `ZVecErrorCode` | 删除字段 | +| `zvec_collection_schema_remove_fields(schema, names, count)` | schema, 字段名数组,数量 | `ZVecErrorCode` | 批量删除字段 | +| `zvec_collection_schema_get_field_count(schema)` | `ZVecCollectionSchema*` | size_t | 获取字段数量 | +| `zvec_collection_schema_get_field(schema, index)` | schema, 索引 | `ZVecFieldSchema*` | 按索引获取字段 | +| `zvec_collection_schema_find_field(schema, name)` | schema, 字段名 | `ZVecFieldSchema*` | 按名查找字段 | +| `zvec_collection_schema_validate(schema, &error)` | schema, 错误输出 | `ZVecErrorCode` | 验证 Schema | +| `zvec_collection_schema_set_max_doc_count_per_segment(schema, count)` | schema, 数量 | `ZVecErrorCode` | 设置段最大文档数 | +| `zvec_collection_schema_get_max_doc_count_per_segment(schema)` | `ZVecCollectionSchema*` | uint64_t | 获取段最大文档数 | + +--- + +## Collection 管理 + +### Collection 选项 + +```c +typedef struct { + bool enable_mmap; // 是否启用内存映射 + size_t max_buffer_size; // 最大缓冲区大小 + bool read_only; // 是否只读模式 + uint64_t max_doc_count_per_segment; // 每段最大文档数 +} ZVecCollectionOptions; +``` + +### 创建和打开 Collection + +```c +// 初始化默认选项 +ZVecCollectionOptions options; +zvec_collection_options_init_default(&options); + +// 或使用宏 +ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + +// 自定义选项 +options.enable_mmap = true; +options.max_buffer_size = 2 * 1024 * 1024; // 2MB +options.read_only = false; +options.max_doc_count_per_segment = 500000; + +// 创建并打开 +ZVecCollection *collection; +ZVecErrorCode rc = zvec_collection_create_and_open( + "/path/to/data", schema, &options, &collection); + +// 打开已有集合 +rc = zvec_collection_open("/path/to/data", &options, &collection); +``` + +### Collection 操作 + +```c +// 关闭集合 +rc = zvec_collection_close(collection); + +// 销毁集合 +rc = zvec_collection_destroy(collection); + +// 刷盘数据 +rc = zvec_collection_flush(collection); + +// 获取 Schema +ZVecCollectionSchema *schema; +rc = zvec_collection_get_schema(collection, &schema); +// 使用后销毁 +zvec_collection_schema_destroy(schema); + +// 获取选项 +ZVecCollectionOptions *options; +rc = zvec_collection_get_options(collection, &options); +// 使用后销毁 +free(options); + +// 获取统计信息 +typedef struct { + uint64_t doc_count; // 文档总数 + ZVecString **index_names; // 索引名数组 + float *index_completeness; // 索引完成度数组 + size_t index_count; // 索引数量 +} ZVecCollectionStats; + +ZVecCollectionStats *stats; +rc = zvec_collection_get_stats(collection, &stats); +printf("Documents: %lu\n", stats->doc_count); +printf("Indexes: %zu\n", stats->index_count); +zvec_collection_stats_destroy(stats); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_collection_options_init_default(&opts)` | `ZVecCollectionOptions*` | void | 初始化默认选项 | +| `zvec_collection_create_and_open(path, schema, opts, &coll)` | 路径,Schema, 选项,输出 | `ZVecErrorCode` | 创建并打开集合 | +| `zvec_collection_open(path, opts, &coll)` | 路径,选项,输出 | `ZVecErrorCode` | 打开已有集合 | +| `zvec_collection_close(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 关闭集合 | +| `zvec_collection_destroy(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 销毁集合 | +| `zvec_collection_flush(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 刷盘数据 | +| `zvec_collection_get_schema(coll, &schema)` | 集合,输出 | `ZVecErrorCode` | 获取 Schema | +| `zvec_collection_get_options(coll, &opts)` | 集合,输出 | `ZVecErrorCode` | 获取选项 | +| `zvec_collection_get_stats(coll, &stats)` | 集合,输出 | `ZVecErrorCode` | 获取统计信息 | +| `zvec_collection_stats_destroy(stats)` | `ZVecCollectionStats*` | void | 销毁统计信息 | + +--- + +## 索引管理 + +### 索引类型 + +```c +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, // HNSW 图索引 + ZVEC_INDEX_TYPE_IVF = 3, // 倒排文件索引 + ZVEC_INDEX_TYPE_FLAT = 4, // 暴力检索 + ZVEC_INDEX_TYPE_INVERT = 10 // 标量倒排索引 +} ZVecIndexType; +``` + +### 距离度量类型 + +```c +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, // L2 距离 + ZVEC_METRIC_TYPE_IP = 2, // 内积 + ZVEC_METRIC_TYPE_COSINE = 3, // 余弦相似度 + ZVEC_METRIC_TYPE_MIPSL2 = 4 // L2 内积 +} ZVecMetricType; +``` + +### 量化类型 + +```c +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, // FP16 量化 + ZVEC_QUANTIZE_TYPE_INT8 = 2, // INT8 量化 + ZVEC_QUANTIZE_TYPE_INT4 = 3 // INT4 量化 +} ZVecQuantizeType; +``` + +### HNSW 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 + int m; // 图连接度参数 + int ef_construction; // 构建时探索因子 + int ef_search; // 搜索时探索因子 +} ZVecHnswIndexParams; + +// 创建 HNSW 参数 +ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, // 距离类型 + ZVEC_QUANTIZE_TYPE_UNDEFINED, // 量化类型 + 16, // m: 图连接度 + 200, // ef_construction: 构建探索因子 + 50 // ef_search: 搜索探索因子 +); + +// 或使用初始化函数 +ZVecHnswIndexParams params; +zvec_index_params_hnsw_init(¶ms, + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +// 或使用宏 +ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +zvec_index_params_hnsw_destroy(params); +``` + +### IVF 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 + int n_list; // 聚类中心数量 + int n_iters; // 迭代次数 + bool use_soar; // 是否使用 SOAR 算法 + int n_probe; // 搜索时探测的聚类数 +} ZVecIVFIndexParams; + +// 创建 IVF 参数 +ZVecIVFIndexParams *params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, // 距离类型 + ZVEC_QUANTIZE_TYPE_INT8, // 量化类型 + 1024, // n_list: 聚类中心数 + 25, // n_iters: 迭代次数 + true, // use_soar: 使用 SOAR + 20 // n_probe: 探测聚类数 +); + +// 或使用宏 +ZVecIVFIndexParams params = ZVEC_IVF_PARAMS( + ZVEC_METRIC_TYPE_L2, 1024, 25, true, 20, ZVEC_QUANTIZE_TYPE_INT8); + +zvec_index_params_ivf_destroy(params); +``` + +### Flat 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 +} ZVecFlatIndexParams; + +// 创建 Flat 参数 +ZVecFlatIndexParams *params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +// 或使用宏 +ZVecFlatIndexParams params = ZVEC_FLAT_PARAMS( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +zvec_index_params_flat_destroy(params); +``` + +### 标量倒排索引参数 + +```c +typedef struct { + ZVecBaseIndexParams base; // 基类参数 + bool enable_range_optimization; // 是否启用范围优化 + bool enable_extended_wildcard; // 是否启用通配符 +} ZVecInvertIndexParams; + +// 创建倒排索引参数 +ZVecInvertIndexParams *params = zvec_index_params_invert_create( + true, // enable_range_optimization + false // enable_extended_wildcard +); + +// 或使用宏 +ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + +// 或使用初始化函数 +ZVecInvertIndexParams params; +zvec_index_params_invert_init(¶ms, true, false); + +zvec_index_params_invert_destroy(params); +``` + +### 创建索引 + +```c +// 通用创建索引函数 +zvec_collection_create_index(collection, "embedding", index_params); + +// 类型安全的创建索引函数 +zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); +zvec_collection_create_ivf_index(collection, "embedding", ivf_params); +zvec_collection_create_flat_index(collection, "embedding", flat_params); +zvec_collection_create_invert_index(collection, "title", invert_params); + +// 删除索引 +zvec_collection_drop_index(collection, "embedding"); + +// 优化集合(重建索引、合并段) +zvec_collection_optimize(collection); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_index_params_base_init(params, type)` | 参数,类型 | void | 初始化基础参数 | +| `zvec_index_params_invert_init(params, range_opt, wildcard)` | 参数,范围优化,通配符 | void | 初始化倒排参数 | +| `zvec_index_params_vector_init(params, idx, metric, quant)` | 参数,索引类型,度量,量化 | void | 初始化向量索引参数 | +| `zvec_index_params_hnsw_init(params, metric, m, ef_c, ef_s, quant)` | 参数,度量,m, ef_construction, ef_search, 量化 | void | 初始化 HNSW 参数 | +| `zvec_index_params_ivf_init(params, metric, nlist, niters, soar, nprobe, quant)` | 参数,度量,nlist, niters, soar, nprobe, 量化 | void | 初始化 IVF 参数 | +| `zvec_index_params_flat_init(params, metric, quant)` | 参数,度量,量化 | void | 初始化 Flat 参数 | +| `zvec_index_params_invert_create(range_opt, wildcard)` | 范围优化,通配符 | `ZVecInvertIndexParams*` | 创建倒排参数 | +| `zvec_index_params_vector_create(type, metric, quant)` | 类型,度量,量化 | `ZVecVectorIndexParams*` | 创建向量索引参数 | +| `zvec_index_params_hnsw_create(metric, quant, m, ef_c, ef_s)` | 度量,量化,m, ef_construction, ef_search | `ZVecHnswIndexParams*` | 创建 HNSW 参数 | +| `zvec_index_params_ivf_create(metric, quant, nlist, niters, soar, nprobe)` | 度量,量化,nlist, niters, soar, nprobe | `ZVecIVFIndexParams*` | 创建 IVF 参数 | +| `zvec_index_params_flat_create(metric, quant)` | 度量,量化 | `ZVecFlatIndexParams*` | 创建 Flat 参数 | +| `zvec_index_params_invert_destroy(params)` | 参数 | void | 销毁倒排参数 | +| `zvec_index_params_vector_destroy(params)` | 参数 | void | 销毁向量索引参数 | +| `zvec_index_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 参数 | +| `zvec_index_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 参数 | +| `zvec_index_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 参数 | +| `zvec_collection_create_index(coll, field, params)` | 集合,字段,参数 | `ZVecErrorCode` | 创建索引 | +| `zvec_collection_create_hnsw_index(...)` | 集合,字段,HNSW 参数 | `ZVecErrorCode` | 创建 HNSW 索引 | +| `zvec_collection_create_ivf_index(...)` | 集合,字段,IVF 参数 | `ZVecErrorCode` | 创建 IVF 索引 | +| `zvec_collection_create_flat_index(...)` | 集合,字段,Flat 参数 | `ZVecErrorCode` | 创建 Flat 索引 | +| `zvec_collection_create_invert_index(...)` | 集合,字段,倒排参数 | `ZVecErrorCode` | 创建倒排索引 | +| `zvec_collection_drop_index(coll, field)` | 集合,字段名 | `ZVecErrorCode` | 删除索引 | +| `zvec_collection_optimize(coll)` | 集合 | `ZVecErrorCode` | 优化集合 | + +--- + +## 文档操作 + +### 文档结构 + +```c +typedef struct ZVecDoc ZVecDoc; // 不透明指针 + +// 字段值联合 +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; +} ZVecFieldValue; + +// 文档字段 +typedef struct { + ZVecString name; + ZVecDataType data_type; + ZVecFieldValue value; +} ZVecDocField; +``` + +### 创建和销毁文档 + +```c +// 创建文档 +ZVecDoc *doc = zvec_doc_create(); + +// 清空文档 +zvec_doc_clear(doc); + +// 销毁文档 +zvec_doc_destroy(doc); +``` + +### 设置文档属性 + +```c +// 设置主键 +zvec_doc_set_pk(doc, "doc_001"); + +// 设置文档 ID +zvec_doc_set_doc_id(doc, 12345); + +// 设置分数 +zvec_doc_set_score(doc, 0.95f); + +// 设置操作类型 +typedef enum { + ZVEC_DOC_OP_INSERT = 0, // 插入 + ZVEC_DOC_OP_UPDATE = 1, // 更新 + ZVEC_DOC_OP_UPSERT = 2, // 插入或更新 + ZVEC_DOC_OP_DELETE = 3 // 删除 +} ZVecDocOperator; + +zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); +``` + +### 获取文档属性 + +```c +// 获取文档 ID +uint64_t id = zvec_doc_get_doc_id(doc); + +// 获取分数 +float score = zvec_doc_get_score(doc); + +// 获取操作类型 +ZVecDocOperator op = zvec_doc_get_operator(doc); + +// 获取主键指针(不复制) +const char *pk = zvec_doc_get_pk_pointer(doc); + +// 获取主键副本(需手动释放) +const char *pk = zvec_doc_get_pk_copy(doc); +free((void*)pk); + +// 获取字段数量 +size_t count = zvec_doc_get_field_count(doc); + +// 检查文档是否为空 +bool empty = zvec_doc_is_empty(doc); + +// 检查是否包含字段 +bool has = zvec_doc_has_field(doc, "embedding"); + +// 检查字段是否有值 +bool has_value = zvec_doc_has_field_value(doc, "embedding"); + +// 检查字段是否为 null +bool is_null = zvec_doc_is_field_null(doc, "optional_field"); +``` + +### 添加字段 + +```c +// 按值添加字段 +float embedding[768] = {0.1f, 0.2f, ...}; +zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, embedding, sizeof(embedding)); + +// 添加字符串字段 +const char *title = "Hello World"; +zvec_doc_add_field_by_value(doc, "title", + ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); + +// 添加整数字段 +int64_t timestamp = 1234567890; +zvec_doc_add_field_by_value(doc, "timestamp", + ZVEC_DATA_TYPE_INT64, ×tamp, sizeof(timestamp)); + +// 按结构添加字段 +ZVecDocField field; +field.name = ZVEC_STRING("score"); +field.data_type = ZVEC_DATA_TYPE_FLOAT; +field.value.float_value = 0.95f; +zvec_doc_add_field_by_struct(doc, &field); + +// 删除字段 +zvec_doc_remove_field(doc, "title"); +``` + +### 获取字段值 + +```c +// 获取基本类型值 +float float_val; +zvec_doc_get_field_value_basic(doc, "score", + ZVEC_DATA_TYPE_FLOAT, &float_val, sizeof(float_val)); + +int64_t int_val; +zvec_doc_get_field_value_basic(doc, "timestamp", + ZVEC_DATA_TYPE_INT64, &int_val, sizeof(int_val)); + +// 获取字段值副本(需手动释放) +void *value; +size_t value_size; + +// 获取字符串 +zvec_doc_get_field_value_copy(doc, "title", ZVEC_DATA_TYPE_STRING, &value, &value_size); +printf("Title: %s\n", (char*)value); +free(value); + +// 获取向量 +zvec_doc_get_field_value_copy(doc, "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, &value, &value_size); +float *vec = (float*)value; +// 使用... +free(value); + +// 获取二进制数据 +zvec_doc_get_field_value_copy(doc, "data", ZVEC_DATA_TYPE_BINARY, &value, &value_size); +zvec_free_uint8_array((uint8_t*)value); + +// 获取字段值指针(无需释放,数据在文档内) +const void *value; +size_t value_size; +zvec_doc_get_field_value_pointer(doc, "score", ZVEC_DATA_TYPE_FLOAT, &value, &value_size); +float score = *(float*)value; +``` + +### 获取所有字段名 + +```c +char **field_names; +size_t count; +zvec_doc_get_field_names(doc, &field_names, &count); + +for (size_t i = 0; i < count; i++) { + printf("Field %zu: %s\n", i, field_names[i]); +} + +// 释放 +zvec_free_str_array(field_names, count); +``` + +### 序列化/反序列化 + +```c +// 序列化 +uint8_t *data; +size_t size; +ZVecErrorCode rc = zvec_doc_serialize(doc, &data, &size); + +// 保存到文件 +FILE *f = fopen("doc.bin", "wb"); +fwrite(data, 1, size, f); +fclose(f); +zvec_free_uint8_array(data); + +// 从文件读取 +FILE *f = fopen("doc.bin", "rb"); +fseek(f, 0, SEEK_END); +size_t file_size = ftell(f); +fseek(f, 0, SEEK_SET); +uint8_t *buffer = malloc(file_size); +fread(buffer, 1, file_size, f); +fclose(f); + +// 反序列化 +ZVecDoc *new_doc; +rc = zvec_doc_deserialize(buffer, file_size, &new_doc); +free(buffer); + +// 使用... +zvec_doc_destroy(new_doc); +``` + +### 文档合并 + +```c +// 合并两个文档 +ZVecDoc *doc1 = zvec_doc_create(); +ZVecDoc *doc2 = zvec_doc_create(); + +// 设置字段... +zvec_doc_merge(doc1, doc2); // 将 doc2 的字段合并到 doc1 +``` + +### 内存使用 + +```c +size_t bytes = zvec_doc_memory_usage(doc); +printf("Document uses %zu bytes\n", bytes); +``` + +### 验证文档 + +```c +char *error_msg; +ZVecErrorCode rc = zvec_doc_validate(doc, schema, false, &error_msg); +if (rc != ZVEC_OK) { + printf("Invalid document: %s\n", error_msg); + free(error_msg); +} +``` + +### 文档详细信息 + +```c +char *detail_str; +zvec_doc_to_detail_string(doc, &detail_str); +printf("Document: %s\n", detail_str); +free(detail_str); +``` + +### 批量释放文档 + +```c +ZVecDoc **docs = malloc(count * sizeof(ZVecDoc*)); +// 填充 docs... + +// 批量释放 +zvec_docs_free(docs, count); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_doc_create()` | 无 | `ZVecDoc*` | 创建文档 | +| `zvec_doc_destroy(doc)` | `ZVecDoc*` | void | 销毁文档 | +| `zvec_doc_clear(doc)` | `ZVecDoc*` | void | 清空文档 | +| `zvec_doc_set_pk(doc, pk)` | doc, 主键 | void | 设置主键 | +| `zvec_doc_set_doc_id(doc, id)` | doc, ID | void | 设置文档 ID | +| `zvec_doc_set_score(doc, score)` | doc, 分数 | void | 设置分数 | +| `zvec_doc_set_operator(doc, op)` | doc, 操作类型 | void | 设置操作类型 | +| `zvec_doc_get_doc_id(doc)` | `ZVecDoc*` | uint64_t | 获取文档 ID | +| `zvec_doc_get_score(doc)` | `ZVecDoc*` | float | 获取分数 | +| `zvec_doc_get_operator(doc)` | `ZVecDoc*` | `ZVecDocOperator` | 获取操作类型 | +| `zvec_doc_get_pk_pointer(doc)` | `ZVecDoc*` | `const char*` | 获取主键指针 | +| `zvec_doc_get_pk_copy(doc)` | `ZVecDoc*` | `const char*` | 获取主键副本 | +| `zvec_doc_get_field_count(doc)` | `ZVecDoc*` | size_t | 获取字段数量 | +| `zvec_doc_is_empty(doc)` | `ZVecDoc*` | bool | 检查是否为空 | +| `zvec_doc_has_field(doc, name)` | doc, 字段名 | bool | 检查是否包含字段 | +| `zvec_doc_has_field_value(doc, name)` | doc, 字段名 | bool | 检查字段是否有值 | +| `zvec_doc_is_field_null(doc, name)` | doc, 字段名 | bool | 检查字段是否为 null | +| `zvec_doc_add_field_by_value(doc, name, type, value, size)` | doc, 名,类型,值,大小 | `ZVecErrorCode` | 添加字段 | +| `zvec_doc_add_field_by_struct(doc, field)` | doc, 字段结构 | `ZVecErrorCode` | 按结构添加字段 | +| `zvec_doc_remove_field(doc, name)` | doc, 字段名 | `ZVecErrorCode` | 删除字段 | +| `zvec_doc_get_field_value_basic(doc, name, type, buf, size)` | doc, 名,类型,缓冲区,大小 | `ZVecErrorCode` | 获取基本类型值 | +| `zvec_doc_get_field_value_copy(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值副本 | +| `zvec_doc_get_field_value_pointer(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值指针 | +| `zvec_doc_get_field_names(doc, &names, &count)` | doc, 名称输出,数量输出 | `ZVecErrorCode` | 获取所有字段名 | +| `zvec_doc_serialize(doc, &data, &size)` | doc, 数据输出,大小输出 | `ZVecErrorCode` | 序列化 | +| `zvec_doc_deserialize(data, size, &doc)` | 数据,大小,文档输出 | `ZVecErrorCode` | 反序列化 | +| `zvec_doc_merge(doc, other)` | doc, 源文档 | void | 合并文档 | +| `zvec_doc_memory_usage(doc)` | `ZVecDoc*` | size_t | 获取内存使用 | +| `zvec_doc_validate(doc, schema, is_update, &err)` | doc, schema, 是否更新,错误输出 | `ZVecErrorCode` | 验证文档 | +| `zvec_doc_to_detail_string(doc, &str)` | doc, 字符串输出 | `ZVecErrorCode` | 获取详细信息字符串 | +| `zvec_docs_free(docs, count)` | 文档数组,数量 | void | 批量释放文档 | +| `zvec_free_str_array(arr, count)` | 字符串数组,数量 | void | 释放字符串数组 | + +--- + +## 数据增删改 + +### 插入文档 + +```c +ZVecDoc *docs[3]; +docs[0] = zvec_doc_create(); +docs[1] = zvec_doc_create(); +docs[2] = zvec_doc_create(); + +zvec_doc_set_pk(docs[0], "doc_001"); +zvec_doc_set_pk(docs[1], "doc_002"); +zvec_doc_set_pk(docs[2], "doc_003"); + +// 添加字段... + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_insert(collection, + (const ZVecDoc**)docs, 3, &success_count, &error_count); + +printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); + +// 清理 +zvec_docs_free(docs, 3); +``` + +### 更新文档 + +```c +ZVecDoc *doc = zvec_doc_create(); +zvec_doc_set_pk(doc, "doc_001"); + +// 设置要更新的字段 +float new_embedding[768] = {0.2f, 0.3f, ...}; +zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, new_embedding, sizeof(new_embedding)); + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_update(collection, + (const ZVecDoc**)&doc, 1, &success_count, &error_count); + +zvec_doc_destroy(doc); +``` + +### 插入或更新(Upsert) + +```c +ZVecDoc *doc = zvec_doc_create(); +zvec_doc_set_pk(doc, "doc_001"); +// 设置字段... + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_upsert(collection, + (const ZVecDoc**)&doc, 1, &success_count, &error_count); + +zvec_doc_destroy(doc); +``` + +### 删除文档 + +```c +// 按主键删除 +const char *pks[] = {"doc_001", "doc_002", "doc_003"}; +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_delete(collection, + pks, 3, &success_count, &error_count); + +// 按过滤条件删除 +rc = zvec_collection_delete_by_filter(collection, "category='spam'"); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_collection_insert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入文档 | +| `zvec_collection_update(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 更新文档 | +| `zvec_collection_upsert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入或更新 | +| `zvec_collection_delete(coll, pks, count, &success, &error)` | 集合,主键数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 按主键删除 | +| `zvec_collection_delete_by_filter(coll, filter)` | 集合,过滤表达式 | `ZVecErrorCode` | 按条件删除 | + +--- + +## 数据查询 + +### 向量查询参数 + +```c +typedef struct { + ZVecIndexType index_type; // 索引类型 + float radius; // 搜索半径 + bool is_linear; // 是否线性搜索 + bool is_using_refiner; // 是否使用优化器 +} ZVecQueryParams; +``` + +### HNSW 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + int ef; // 搜索时探索因子 +} ZVecHnswQueryParams; + +// 创建 +ZVecHnswQueryParams *params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, + 100, // ef + 0.0f, // radius + false, // is_linear + true // is_using_refiner +); + +zvec_query_params_hnsw_set_ef(params, 200); +zvec_query_params_hnsw_destroy(params); +``` + +### IVF 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + int nprobe; // 探测聚类数 + float scale_factor; // 缩放因子 +} ZVecIVFQueryParams; + +// 创建 +ZVecIVFQueryParams *params = zvec_query_params_ivf_create( + ZVEC_INDEX_TYPE_IVF, + 20, // nprobe + true, // is_using_refiner + 1.0f // scale_factor +); + +zvec_query_params_ivf_set_nprobe(params, 50); +zvec_query_params_ivf_set_scale_factor(params, 1.5f); +zvec_query_params_ivf_destroy(params); +``` + +### Flat 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + float scale_factor; // 缩放因子 +} ZVecFlatQueryParams; + +ZVecFlatQueryParams *params = zvec_query_params_flat_create( + ZVEC_INDEX_TYPE_FLAT, + false, // is_using_refiner + 1.0f // scale_factor +); + +zvec_query_params_flat_destroy(params); +``` + +### 基础查询参数 + +```c +// 创建基础参数 +ZVecQueryParams *params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + +// 设置属性 +zvec_query_params_set_index_type(params, ZVEC_INDEX_TYPE_HNSW); +zvec_query_params_set_radius(params, 0.5f); +zvec_query_params_set_is_linear(params, true); +zvec_query_params_set_is_using_refiner(params, true); + +zvec_query_params_destroy(params); +``` + +### 向量查询 + +```c +typedef struct { + int topk; // 返回结果数 + ZVecString field_name; // 查询字段名 + ZVecByteArray query_vector; // 查询向量 + ZVecByteArray query_sparse_indices; // 稀疏向量索引 + ZVecByteArray query_sparse_values; // 稀疏向量值 + ZVecString filter; // 过滤表达式 + bool include_vector; // 是否返回向量 + bool include_doc_id; // 是否返回文档 ID + ZVecStringArray output_fields; // 输出字段列表 + ZVecQueryParamsUnion *query_params; // 查询参数 +} ZVecVectorQuery; + +// 使用宏快速创建 +float query_vec[768] = {0.1f, 0.2f, ...}; +ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", // 字段名 + ZVEC_FLOAT_ARRAY(query_vec, 768), + 10, // topK + "category='news'" // 过滤条件 +); + +// 手动创建 +ZVecVectorQuery query = { + .topk = 10, + .field_name = ZVEC_STRING("embedding"), + .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), + .filter = ZVEC_STRING(""), + .include_vector = true, + .include_doc_id = true, + .output_fields.strings = NULL, + .output_fields.count = 0, + .query_params = NULL +}; + +// 执行查询 +ZVecDoc **results; +size_t result_count; +ZVecErrorCode rc = zvec_collection_query(collection, &query, &results, &result_count); + +if (rc == ZVEC_OK) { + for (size_t i = 0; i < result_count; i++) { + const char *pk = zvec_doc_get_pk_pointer(results[i]); + float score = zvec_doc_get_score(results[i]); + printf("Result %zu: pk=%s, score=%f\n", i, pk, score); + } +} + +// 释放结果 +zvec_docs_free(results, result_count); +``` + +### 分组向量查询 + +```c +typedef struct { + ZVecString field_name; // 查询字段名 + ZVecByteArray query_vector; // 查询向量 + ZVecByteArray query_sparse_indices; // 稀疏向量索引 + ZVecByteArray query_sparse_values; // 稀疏向量值 + ZVecString filter; // 过滤表达式 + bool include_vector; // 是否返回向量 + ZVecStringArray output_fields; // 输出字段列表 + ZVecString group_by_field_name; // 分组字段名 + uint32_t group_count; // 分组数量 + uint32_t group_topk; // 每组返回结果数 + ZVecQueryParamsUnion *query_params; // 查询参数 +} ZVecGroupByVectorQuery; + +// 创建分组查询 +ZVecGroupByVectorQuery query = { + .field_name = ZVEC_STRING("embedding"), + .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), + .filter = ZVEC_STRING(""), + .include_vector = false, + .group_by_field_name = ZVEC_STRING("category"), + .group_count = 5, + .group_topk = 3, + .query_params = NULL +}; + +// 执行查询 +ZVecDoc **results; +ZVecString **group_values; +size_t result_count; + +ZVecErrorCode rc = zvec_collection_query_by_group( + collection, &query, &results, &group_values, &result_count); + +if (rc == ZVEC_OK) { + for (size_t i = 0; i < result_count; i++) { + printf("Group: %s\n", group_values[i]->data); + // 处理结果... + } +} + +// 释放结果 +zvec_docs_free(results, result_count); +zvec_string_array_destroy((ZVecStringArray*)group_values); +``` + +### 按主键获取 + +```c +const char *pks[] = {"doc_001", "doc_002", "doc_003"}; +ZVecDoc **documents; +size_t found_count; + +ZVecErrorCode rc = zvec_collection_fetch(collection, + pks, 3, &documents, &found_count); + +printf("Found %zu documents\n", found_count); + +// 使用... +zvec_docs_free(documents, found_count); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_query_params_create(type)` | 索引类型 | `ZVecQueryParams*` | 创建查询参数 | +| `zvec_query_params_hnsw_create(type, ef, radius, linear, refiner)` | 类型,ef, 半径,线性,优化器 | `ZVecHnswQueryParams*` | 创建 HNSW 查询参数 | +| `zvec_query_params_ivf_create(type, nprobe, refiner, scale)` | 类型,nprobe, 优化器,缩放因子 | `ZVecIVFQueryParams*` | 创建 IVF 查询参数 | +| `zvec_query_params_flat_create(type, refiner, scale)` | 类型,优化器,缩放因子 | `ZVecFlatQueryParams*` | 创建 Flat 查询参数 | +| `zvec_query_params_union_create(type)` | 索引类型 | `ZVecQueryParamsUnion*` | 创建查询参数联合 | +| `zvec_query_params_destroy(params)` | 参数 | void | 销毁查询参数 | +| `zvec_query_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 查询参数 | +| `zvec_query_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 查询参数 | +| `zvec_query_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 查询参数 | +| `zvec_query_params_union_destroy(params)` | 参数 | void | 销毁查询参数联合 | +| `zvec_query_params_set_index_type(params, type)` | 参数,类型 | `ZVecErrorCode` | 设置索引类型 | +| `zvec_query_params_set_radius(params, radius)` | 参数,半径 | `ZVecErrorCode` | 设置搜索半径 | +| `zvec_query_params_set_is_linear(params, linear)` | 参数,是否线性 | `ZVecErrorCode` | 设置线性搜索 | +| `zvec_query_params_set_is_using_refiner(params, refiner)` | 参数,是否优化器 | `ZVecErrorCode` | 设置优化器 | +| `zvec_query_params_hnsw_set_ef(params, ef)` | 参数,ef | `ZVecErrorCode` | 设置 ef | +| `zvec_query_params_ivf_set_nprobe(params, nprobe)` | 参数,nprobe | `ZVecErrorCode` | 设置 nprobe | +| `zvec_query_params_ivf_set_scale_factor(params, scale)` | 参数,缩放因子 | `ZVecErrorCode` | 设置缩放因子 | +| `zvec_collection_query(coll, query, &results, &count)` | 集合,查询,结果输出,数量输出 | `ZVecErrorCode` | 向量查询 | +| `zvec_collection_query_by_group(coll, query, &results, &groups, &count)` | 集合,分组查询,结果输出,分组值输出,数量输出 | `ZVecErrorCode` | 分组向量查询 | +| `zvec_collection_fetch(coll, pks, count, &docs, &found)` | 集合,主键数组,数量,文档输出,找到数量 | `ZVecErrorCode` | 按主键获取 | + +--- + +## 工具函数 + +### 类型转字符串 + +```c +// 数据类型转字符串 +const char *type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_VECTOR_FP32); +// 返回:"VECTOR_FP32" + +// 索引类型转字符串 +const char *idx_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); +// 返回:"HNSW" + +// 距离类型转字符串 +const char *metric_str = zvec_metric_type_to_string(ZVEC_METRIC_TYPE_COSINE); +// 返回:"COSINE" + +// 错误码转字符串 +const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); +// 返回:"Invalid argument" +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_data_type_to_string(type)` | `ZVecDataType` | `const char*` | 数据类型转字符串 | +| `zvec_index_type_to_string(type)` | `ZVecIndexType` | `const char*` | 索引类型转字符串 | +| `zvec_metric_type_to_string(type)` | `ZVecMetricType` | `const char*` | 距离类型转字符串 | +| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | + +--- + +## 完整示例 + +### 构建可搜索的向量数据库 + +```c +#include "zvec/c_api.h" +#include +#include + +#define DIM 768 +#define DOC_COUNT 1000 + +// 生成随机向量 +void generate_vector(float *vec, size_t dim) { + for (size_t i = 0; i < dim; i++) { + vec[i] = (float)rand() / RAND_MAX; + } +} + +int main() { + ZVecErrorCode rc; + + // ========== 1. 初始化 ========== + printf("Initializing ZVec...\n"); + rc = zvec_initialize(NULL); + if (rc != ZVEC_OK) { + fprintf(stderr, "Failed to initialize: %s\n", + zvec_error_code_to_string(rc)); + return 1; + } + printf("Version: %s\n", zvec_get_version()); + + // ========== 2. 创建 Schema ========== + printf("Creating schema...\n"); + ZVecCollectionSchema *schema = zvec_collection_schema_create("documents"); + + // ID 字段 + ZVecFieldSchema *id_field = zvec_field_schema_create( + "id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // 向量字段 + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, DIM); + zvec_collection_schema_add_field(schema, embedding_field); + + // 标题字段 + ZVecFieldSchema *title_field = zvec_field_schema_create( + "title", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecInvertIndexParams *invert_params = zvec_index_params_invert_create( + true, true); // 启用范围优化和通配符 + zvec_field_schema_set_invert_index(title_field, invert_params); + zvec_collection_schema_add_field(schema, title_field); + + // 时间戳字段 + ZVecFieldSchema *ts_field = zvec_field_schema_create( + "timestamp", ZVEC_DATA_TYPE_INT64, true, 0); + zvec_collection_schema_add_field(schema, ts_field); + + // 验证 Schema + ZVecString *error_msg; + rc = zvec_collection_schema_validate(schema, &error_msg); + if (rc != ZVEC_OK) { + fprintf(stderr, "Invalid schema: %s\n", error_msg->data); + zvec_free_string(error_msg); + return 1; + } + + // ========== 3. 创建 Collection ========== + printf("Creating collection...\n"); + ZVecCollection *collection; + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + rc = zvec_collection_create_and_open( + "./my_vector_db", schema, &options, &collection); + if (rc != ZVEC_OK) { + fprintf(stderr, "Failed to create collection: %s\n", + zvec_error_code_to_string(rc)); + return 1; + } + + // ========== 4. 创建索引 ========== + printf("Creating HNSW index...\n"); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, + ZVEC_QUANTIZE_TYPE_UNDEFINED, + 16, // m + 200, // ef_construction + 50 // ef_search + ); + rc = zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + // ========== 5. 批量插入数据 ========== + printf("Inserting %d documents...\n", DOC_COUNT); + + ZVecDoc **docs = malloc(DOC_COUNT * sizeof(ZVecDoc*)); + float vectors[DOC_COUNT][DIM]; + + for (int i = 0; i < DOC_COUNT; i++) { + docs[i] = zvec_doc_create(); + + // 设置主键 + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%06d", i); + zvec_doc_set_pk(docs[i], pk); + + // 生成随机向量 + generate_vector(vectors[i], DIM); + zvec_doc_add_field_by_value(docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vectors[i], sizeof(float) * DIM); + + // 添加标题 + char title[64]; + snprintf(title, sizeof(title), "Document Title %d", i); + zvec_doc_add_field_by_value(docs[i], "title", + ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); + + // 添加时间戳 + int64_t ts = 1700000000 + i * 1000; + zvec_doc_add_field_by_value(docs[i], "timestamp", + ZVEC_DATA_TYPE_INT64, &ts, sizeof(ts)); + } + + size_t success_count, error_count; + rc = zvec_collection_insert(collection, + (const ZVecDoc**)docs, DOC_COUNT, &success_count, &error_count); + printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); + + // 清理文档 + zvec_docs_free(docs, DOC_COUNT); + free(docs); + + // 刷盘 + zvec_collection_flush(collection); + + // ========== 6. 查询 ========== + printf("\nPerforming vector search...\n"); + + // 生成查询向量 + float query_vec[DIM]; + generate_vector(query_vec, DIM); + + // 创建查询 + ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", + ZVEC_FLOAT_ARRAY(query_vec, DIM), + 10, // topK + "timestamp > 1700500000" // 过滤条件 + ); + + // 执行查询 + ZVecDoc **results; + size_t result_count; + rc = zvec_collection_query(collection, &query, &results, &result_count); + + if (rc == ZVEC_OK) { + printf("Found %zu results:\n", result_count); + for (size_t i = 0; i < result_count; i++) { + const char *pk = zvec_doc_get_pk_pointer(results[i]); + float score = zvec_doc_get_score(results[i]); + + // 获取标题 + const char *title; + size_t title_size; + zvec_doc_get_field_value_copy(results[i], "title", + ZVEC_DATA_TYPE_STRING, (void**)&title, &title_size); + + printf(" [%zu] %s - score: %.4f - title: %s\n", + i, pk, score, title); + free((void*)title); + } + } + + // 释放结果 + zvec_docs_free(results, result_count); + + // ========== 7. 获取统计信息 ========== + printf("\nCollection statistics:\n"); + ZVecCollectionStats *stats; + rc = zvec_collection_get_stats(collection, &stats); + if (rc == ZVEC_OK) { + printf(" Total documents: %lu\n", stats->doc_count); + printf(" Index count: %zu\n", stats->index_count); + for (size_t i = 0; i < stats->index_count; i++) { + printf(" Index %zu: %s (%.1f%% complete)\n", + i, stats->index_names[i]->data, + stats->index_completeness[i] * 100); + } + zvec_collection_stats_destroy(stats); + } + + // ========== 8. 清理 ========== + printf("\nCleaning up...\n"); + zvec_collection_close(collection); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_shutdown(); + + printf("Done!\n"); + return 0; +} +``` + +### 编译示例 + +```bash +gcc -o example example.c -lzvec -I./include -L./lib +./example +``` + +--- + +## 附录 + +### 内存管理约定 + +| 创建函数 | 释放函数 | 说明 | +|----------|----------|------| +| `zvec_*_create()` | `zvec_*_destroy()` | 需要成对调用 | +| `zvec_collection_create_and_open()` | `zvec_collection_close()` + `zvec_collection_destroy()` | Collection 生命周期 | +| `zvec_doc_create()` | `zvec_doc_destroy()` | 文档生命周期 | +| `zvec_get_last_error(&msg)` | `free(msg)` | 错误消息需手动释放 | +| `zvec_doc_get_field_value_copy()` | `free()` 或 `zvec_free_uint8_array()` | 字段值副本需释放 | +| 查询返回的 `results` | `zvec_docs_free()` | 查询结果批量释放 | + +### 宏定义速查 + +```c +// 索引参数宏 +ZVEC_HNSW_PARAMS(metric, m, ef_construction, ef_search, quant) +ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) +ZVEC_FLAT_PARAMS(metric, quant) +ZVEC_INVERT_PARAMS(range_opt, wildcard) + +// 数据结构宏 +ZVEC_STRING(str) +ZVEC_STRING_VIEW(str) +ZVEC_FLOAT_ARRAY(data_ptr, len) +ZVEC_INT64_ARRAY(data_ptr, len) + +// 选项宏 +ZVEC_DEFAULT_OPTIONS() + +// 查询宏 +ZVEC_VECTOR_QUERY(field_name, query_vec, top_k, filter) + +// 文档字段宏 +ZVEC_DOC_FIELD(name, type, value_union) +``` + +### 最佳实践 + +1. **初始化检查**: 总是检查 `zvec_initialize()` 的返回值 +2. **错误处理**: 每次 API 调用后检查返回值,使用 `zvec_get_last_error()` 获取详情 +3. **资源释放**: 确保所有创建的资源都被正确释放 +4. **批量操作**: 使用批量插入/更新/删除提高性能 +5. **索引选择**: + - 小规模数据 (< 10 万): 使用 Flat 索引 + - 中等规模 (10 万 -1000 万): 使用 HNSW 索引 + - 大规模 (> 1000 万): 使用 IVF 索引 +6. **查询优化**: 合理使用过滤条件减少扫描范围 diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt new file mode 100644 index 00000000..565479ab --- /dev/null +++ b/src/c_api/CMakeLists.txt @@ -0,0 +1,168 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +include(GNUInstallDirs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# C API library source files +set(ZVEC_C_API_SOURCES + c_api.cc +) + +# C API library header files +set(ZVEC_C_API_HEADERS + ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h +) + +# ============================================================================= +# Build FAT Shared Library (zvec_c_api.so) +# ============================================================================= +# BUILD_RELEASE_FAT_LIBS=ON: Fully self-contained, zero external dependencies +# Users only need -lzvec_c_api +# BUILD_RELEASE_FAT_LIBS=OFF: Development mode, third-party libs linked normally +# Allows parallel test execution without symbol conflicts +# +# Implementation: +# - Always embeds zvec_db, zvec_core, zvec_ailego via --whole-archive +# - For release: also embeds all third-party libs (rocksdb, glog, protobuf, etc.) +# - Uses --exclude-libs,ALL to hide third-party symbols from export +# ============================================================================= +add_library(zvec_c_api SHARED + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} +) + +# Set library properties +set_target_properties(zvec_c_api PROPERTIES + OUTPUT_NAME "zvec_c_api" + POSITION_INDEPENDENT_CODE ON + # Hide all symbols by default, only export C API + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON +) + +find_package(Threads REQUIRED) + +# FAT mode: embed ALL libraries (including third-party) statically +# This creates a truly self-contained library with zero external dependencies +# Users only need to link libzvec_c_api.so without installing any dependencies +if(APPLE) + # Combine all libraries in a single target_link_libraries call + target_link_libraries(zvec_c_api + PRIVATE + # zvec static libraries + zvec_db + zvec_core + zvec_ailego + # Third-party libraries + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + Threads::Threads + ${CMAKE_DL_LIBS} + ) + + # Then use target_link_libraries with -force_load on macOS + # This ensures all symbols from static libraries are included + # Note: sparsehash and magic_enum are header-only, skip them + target_link_libraries(zvec_c_api PRIVATE + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + ) + +else() + target_link_libraries(zvec_c_api + PRIVATE + # Force load all zvec static libraries (extract all objects) + "-Wl,--whole-archive" + zvec_db + zvec_core + zvec_ailego + "-Wl,--no-whole-archive" + # Force load ALL third-party libraries for zero-dependency deployment + "-Wl,--whole-archive" + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + "-Wl,--no-whole-archive" + Threads::Threads + ${CMAKE_DL_LIBS} + ) +endif() + +# Include directories +target_include_directories(zvec_c_api + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src +) + +# Compile options +target_compile_options(zvec_c_api PRIVATE + $<$:-Wall -Wextra -Wpedantic> + $<$:-Wall -Wextra -Wpedantic> +) + +# ============================================================================= +# Installation Rules +# ============================================================================= + +# Install shared library +install(TARGETS zvec_c_api + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +# Install headers +install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec +) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc new file mode 100644 index 00000000..c26ceee7 --- /dev/null +++ b/src/c_api/c_api.cc @@ -0,0 +1,5735 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ============================================================================= +// RAII Helpers and Error Handling Macros +// ============================================================================= + +namespace { + +// RAII guard for malloc-allocated memory +template +struct MallocGuard { + T *ptr; + explicit MallocGuard(T *p = nullptr) : ptr(p) {} + ~MallocGuard() { + if (ptr) std::free(ptr); + } + MallocGuard(const MallocGuard &) = delete; + MallocGuard &operator=(const MallocGuard &) = delete; + MallocGuard(MallocGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + MallocGuard &operator=(MallocGuard &&other) noexcept { + if (this != &other) { + if (ptr) std::free(ptr); + ptr = other.ptr; + other.ptr = nullptr; + } + return *this; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } + T **ptr_ptr() { + return &ptr; + } +}; + +// RAII guard for C++ objects allocated with new +template +struct DeleteGuard { + T *ptr; + explicit DeleteGuard(T *p = nullptr) : ptr(p) {} + ~DeleteGuard() { + delete ptr; + } + DeleteGuard(const DeleteGuard &) = delete; + DeleteGuard &operator=(const DeleteGuard &) = delete; + DeleteGuard(DeleteGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } +}; + +// RAII guard for array allocated with new[] +template +struct DeleteArrayGuard { + T *ptr; + explicit DeleteArrayGuard(T *p = nullptr) : ptr(p) {} + ~DeleteArrayGuard() { + delete[] ptr; + } + DeleteArrayGuard(const DeleteArrayGuard &) = delete; + DeleteArrayGuard &operator=(const DeleteArrayGuard &) = delete; + DeleteArrayGuard(DeleteArrayGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } +}; + +} // namespace + +// Error checking macros - these preserve __LINE__ accuracy +#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +#define ZVEC_CHECK_COND(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +// For void functions (no return value): +#define ZVEC_TRY_BEGIN_VOID try { +#define ZVEC_CATCH_END_VOID \ + } \ + catch (const std::exception &e) { \ + set_last_error(std::string("Exception: ") + e.what()); \ + } + +// For functions returning pointer - complete try-catch wrapper +// Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_NULL(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return nullptr; \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +// For functions returning ErrorCode +// Usage: ZVEC_TRY_RETURN_ERROR("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_INTERNAL_ERROR; \ + } + +// For functions returning scalar values (int, float, size_t, etc.) +// Usage: ZVEC_TRY_RETURN_SCALAR("error msg", error_value, code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return (error_val); \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return (error_val); \ + } + +// Global status flags +static std::atomic g_initialized{false}; +static std::mutex g_init_mutex; + +// Thread-local storage for error information +static thread_local std::string last_error_message; +static thread_local ZVecErrorDetails last_error_details; + +// Helper function: set error information +static void set_last_error(const std::string &msg) { + last_error_message = msg; + + last_error_details.code = ZVEC_ERROR_UNKNOWN; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = nullptr; + last_error_details.line = 0; + last_error_details.function = nullptr; +} + +// Error setting function with detailed information +static void set_last_error_details(ZVecErrorCode code, const std::string &msg, + const char *file = nullptr, int line = 0, + const char *function = nullptr) { + last_error_message = msg; + last_error_details.code = code; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = file; + last_error_details.line = line; + last_error_details.function = function; +} + +// ============================================================================= +// Version information interface implementation +// ============================================================================= + +// Store dynamically generated version information +static std::string g_version_info; +static std::mutex g_version_mutex; + +const char *zvec_get_version(void) { + std::lock_guard lock(g_version_mutex); + + if (g_version_info.empty()) { + ZVEC_TRY_BEGIN_VOID + std::string version = ZVEC_VERSION_STRING; + + // Try to get Git information + std::string git_info; +#ifdef ZVEC_GIT_DESCRIBE + git_info = ZVEC_GIT_DESCRIBE; +#elif defined(ZVEC_GIT_COMMIT_HASH) + git_info = std::string("g") + ZVEC_GIT_COMMIT_HASH; +#endif + + if (!git_info.empty()) { + version += "-" + git_info; + } + + version += + " (built " + std::string(__DATE__) + " " + std::string(__TIME__) + ")"; + + g_version_info = version; + ZVEC_CATCH_END_VOID + } + + return g_version_info.c_str(); +} + +bool zvec_check_version(int major, int minor, int patch) { + if (major < 0 || minor < 0 || patch < 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative", __FILE__, + __LINE__, __FUNCTION__); + return false; + } + + if (ZVEC_VERSION_MAJOR > major) return true; + if (ZVEC_VERSION_MAJOR < major) return false; + + if (ZVEC_VERSION_MINOR > minor) return true; + if (ZVEC_VERSION_MINOR < minor) return false; + + return ZVEC_VERSION_PATCH >= patch; +} + +int zvec_get_version_major(void) { + return ZVEC_VERSION_MAJOR; +} + +int zvec_get_version_minor(void) { + return ZVEC_VERSION_MINOR; +} + +int zvec_get_version_patch(void) { + return ZVEC_VERSION_PATCH; +} + +// ============================================================================= +// String management functions implementation +// ============================================================================= + +ZVecString *zvec_string_create(const char *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + size_t len = strlen(str); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(len + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, str, len + 1); + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + return zstr; +} + +ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { + if (!view || !view->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(view->length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, view->data, view->length); + data_buffer[view->length] = '\0'; + zstr->data = data_buffer; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; +} + +ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { + if (!data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for binary data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, data, length); + data_buffer[length] = '\0'; + zstr->data = data_buffer; + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; +} + +ZVecString *zvec_string_copy(const ZVecString *str) { + if (!str || !str->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + return zvec_string_create(str->data); +} + +const char *zvec_string_c_str(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return str->data; +} + +size_t zvec_string_length(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return 0; + } + + return str->length; +} + +int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { + if (!str1 || !str2) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + if (!str1->data || !str2->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String data cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + return strcmp(str1->data, str2->data); +} + +// ============================================================================= +// Configuration-related functions implementation +// ============================================================================= + +ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { + ZVecConsoleLogConfig *config = + static_cast(malloc(sizeof(ZVecConsoleLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConsoleLogConfig", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + config->level = level; + return config; +} + +ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, + const char *dir, + const char *basename, + uint32_t file_size, + uint32_t overdue_days) { + if (!dir || !basename) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Directory or basename cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecFileLogConfig *config = + static_cast(malloc(sizeof(ZVecFileLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFileLogConfig", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + config->level = level; + ZVecString *dir_str = zvec_string_create(dir); + ZVecString *basename_str = zvec_string_create(basename); + + if (!dir_str || !basename_str) { + if (dir_str) zvec_free_string(dir_str); + if (basename_str) zvec_free_string(basename_str); + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create strings for file log config", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + config->dir = *dir_str; + config->basename = *basename_str; + config->file_size = file_size; + config->overdue_days = overdue_days; + + // Free the temporary string wrappers (data is copied by value) + free(dir_str); + free(basename_str); + + return config; +} + +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = + static_cast(malloc(sizeof(ZVecConfigData))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConfigData", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecConsoleLogConfig *log_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create console log config", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + config->log_config = log_config; + config->log_type = ZVEC_LOG_TYPE_CONSOLE; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + return config; +} + +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + if (config) { + free(config); + } +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir.data) free((void *)config->dir.data); + if (config->basename.data) free((void *)config->basename.data); + free(config); + } +} + +void zvec_config_data_destroy(ZVecConfigData *config) { + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + (ZVecConsoleLogConfig *)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + } + } + free(config); +} + +ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, + uint64_t memory_limit_bytes) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->memory_limit_bytes = memory_limit_bytes; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, + ZVecLogType log_type, + void *log_config) { + if (!config || !log_config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + (ZVecConsoleLogConfig *)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + } + } + + config->log_type = log_type; + config->log_config = log_config; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, + uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->query_thread_count = thread_count; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->optimize_thread_count = thread_count; + return ZVEC_OK; +} + + +// ============================================================================= +// Initialization and cleanup interface implementation +// ============================================================================= + +ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { + std::lock_guard lock(g_init_mutex); + + if (g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_ALREADY_EXISTS, + "Library already initialized"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + + ZVEC_TRY_RETURN_ERROR( + "Initialization failed", + // Convert to C++ configuration object + if (config) { + zvec::GlobalConfig::ConfigData cpp_config{}; + cpp_config.memory_limit_bytes = config->memory_limit_bytes; + cpp_config.query_thread_count = config->query_thread_count; + cpp_config.invert_to_forward_scan_ratio = + config->invert_to_forward_scan_ratio; + cpp_config.brute_force_by_keys_ratio = + config->brute_force_by_keys_ratio; + cpp_config.optimize_thread_count = config->optimize_thread_count; + + // Set log configuration + if (config->log_config) { + std::shared_ptr log_config; + + switch (config->log_type) { + case ZVEC_LOG_TYPE_CONSOLE: { + ZVecConsoleLogConfig *console_config = + (ZVecConsoleLogConfig *)config->log_config; + auto console_level = static_cast( + console_config->level); + log_config = + std::make_shared( + console_level); + break; + } + case ZVEC_LOG_TYPE_FILE: { + ZVecFileLogConfig *file_config = + (ZVecFileLogConfig *)config->log_config; + auto file_level = + static_cast(file_config->level); + std::string dir(file_config->dir.data, file_config->dir.length); + std::string basename(file_config->basename.data, + file_config->basename.length); + log_config = std::make_shared( + file_level, dir, basename); + break; + } + default: + throw std::runtime_error("Unknown log type"); + } + cpp_config.log_config = log_config; + } + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } else { + // Initialize with default configuration + zvec::GlobalConfig::ConfigData default_config; + auto status = zvec::GlobalConfig::Instance().Initialize(default_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } g_initialized.store(true); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_shutdown(void) { + std::lock_guard lock(g_init_mutex); + + if (!g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_FAILED_PRECONDITION, + "Library not initialized"); + return ZVEC_ERROR_FAILED_PRECONDITION; + } + + ZVEC_TRY_RETURN_ERROR("Shutdown failed", g_initialized.store(false); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_is_initialized(bool *initialized) { + if (!initialized) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Initialized flag pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *initialized = g_initialized.load(); + return ZVEC_OK; +} + +// ============================================================================= +// Error handling interface implementation +// ============================================================================= + +ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { + if (!error_details) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_details = last_error_details; + return ZVEC_OK; +} + +void zvec_clear_error(void) { + last_error_message.clear(); + last_error_details = {}; +} + +// Helper functions: convert internal status to error code +static ZVecErrorCode status_to_error_code(const zvec::Status &status) { + if (status.code() < zvec::StatusCode::OK || + status.code() > zvec::StatusCode::UNKNOWN) { + set_last_error("Unexpected status code: " + + std::to_string(static_cast(status.code()))); + return ZVEC_ERROR_UNKNOWN; + } + + return static_cast(status.code()); +} + +// Helper function: handle Expected results +template +static ZVecErrorCode handle_expected_result( + const tl::expected &result, T *out_value = nullptr) { + if (result.has_value()) { + if (out_value) { + *out_value = result.value(); + } + return ZVEC_OK; + } else { + set_last_error(result.error().message()); + return status_to_error_code(result.error()); + } +} + +// Helper function: copy strings +static char *copy_string(const std::string &str) { + if (str.empty()) return nullptr; + char *copy = static_cast(malloc(str.length() + 1)); + strcpy(copy, str.c_str()); + return copy; +} + +static zvec::DataType convert_data_type(ZVecDataType zvec_type) { + if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || + zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { + return zvec::DataType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +static ZVecDataType convert_zvec_data_type(zvec::DataType cpp_type) { + if (cpp_type < zvec::DataType::UNDEFINED || + cpp_type > zvec::DataType::ARRAY_DOUBLE) { + return ZVEC_DATA_TYPE_UNDEFINED; + } + + return static_cast(cpp_type); +} + +// Helper function: convert metric type +static zvec::MetricType convert_metric_type(ZVecMetricType metric_type) { + if (metric_type < ZVEC_METRIC_TYPE_UNDEFINED || + metric_type > ZVEC_METRIC_TYPE_MIPSL2) { + return zvec::MetricType::UNDEFINED; + } + + return static_cast(metric_type); +} + +// Helper function: convert ZVecIndexType to internal IndexType +static zvec::IndexType convert_index_type(ZVecIndexType zvec_type) { + if (zvec_type < ZVEC_INDEX_TYPE_UNDEFINED || + zvec_type > ZVEC_INDEX_TYPE_INVERT) { + return zvec::IndexType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: convert ZVecQuantizeType to internal QuantizeType +static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { + if (zvec_type < ZVEC_QUANTIZE_TYPE_UNDEFINED || + zvec_type > ZVEC_QUANTIZE_TYPE_INT4) { + return zvec::QuantizeType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: set field index params +static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, + const ZVecFieldSchema *zvec_field) { + if (!zvec_field->index_params) { + return zvec::Status::OK(); + } + + switch (zvec_field->index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *params = + &zvec_field->index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *params = + &zvec_field->index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *params = + &zvec_field->index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + break; + } + default: + break; + } + + return zvec::Status::OK(); +} + +// ============================================================================= +// Memory Management interface implementation +// ============================================================================= + +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + free((void *)str->data); + } + free(str); + } +} + +ZVecStringArray *zvec_string_array_create(size_t count) { + ZVecStringArray *array = (ZVecStringArray *)malloc(sizeof(ZVecStringArray)); + array->count = count; + array->strings = (ZVecString *)malloc(sizeof(ZVecString) * count); + memset(array->strings, 0, sizeof(ZVecString) * count); + return array; +} + +void zvec_string_array_add(ZVecStringArray *array, size_t idx, + const char *str) { + if (idx >= array->count) return; + size_t len = strlen(str); + array->strings[idx].data = (char *)malloc(len + 1); + memcpy(array->strings[idx].data, str, len + 1); + array->strings[idx].length = len; + array->strings[idx].capacity = len + 1; +} + +void zvec_string_array_destroy(ZVecStringArray *array) { + if (!array) return; + for (size_t i = 0; i < array->count; i++) { + free((void *)array->strings[i].data); + } + free(array->strings); + free(array); +} + + +// Byte array helper functions +ZVecMutableByteArray *zvec_byte_array_create(size_t capacity) { + ZVecMutableByteArray *array = + (ZVecMutableByteArray *)malloc(sizeof(ZVecMutableByteArray)); + if (!array) return nullptr; + + array->data = (uint8_t *)malloc(capacity); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = 0; + array->capacity = capacity; + memset(array->data, 0, capacity); + return array; +} + +void zvec_byte_array_destroy(ZVecMutableByteArray *array) { + if (!array) return; + if (array->data) { + free(array->data); + } + free(array); +} + +// Float array helper functions +ZVecFloatArray *zvec_float_array_create(size_t count) { + ZVecFloatArray *array = (ZVecFloatArray *)malloc(sizeof(ZVecFloatArray)); + if (!array) return nullptr; + + array->data = (const float *)malloc(sizeof(float) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(float) * count); + return array; +} + +void zvec_float_array_destroy(ZVecFloatArray *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +// Int64 array helper functions +ZVecInt64Array *zvec_int64_array_create(size_t count) { + ZVecInt64Array *array = (ZVecInt64Array *)malloc(sizeof(ZVecInt64Array)); + if (!array) return nullptr; + + array->data = (const int64_t *)malloc(sizeof(int64_t) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(int64_t) * count); + return array; +} + +void zvec_int64_array_destroy(ZVecInt64Array *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +void zvec_free_float_array(float *array) { + if (array) { + free(array); + } +} + +void zvec_free_str_array(char **array, size_t count) { + if (!array) return; + + // If count is 0, only free the string array itself, don't process internal + // strings + if (count == 0) { + free(array); + return; + } + + for (size_t i = 0; i < count; ++i) { + if (array[i]) { // Only free when string pointer is not null + free(array[i]); + } + } + free(array); +} + +ZVecErrorCode zvec_get_last_error(char **error_msg) { + if (!error_msg) { + set_last_error("Invalid argument: error_msg cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_msg = copy_string(last_error_message); + return ZVEC_OK; +} + +void zvec_free_uint8_array(uint8_t *array) { + if (array) { + free(array); + } +} + +void zvec_free_field_schema(ZVecFieldSchema *field_schema) { + if (field_schema) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + free(field_schema); + } +} + +// ============================================================================= +// Index parameters management interface implementation +// ============================================================================= + +void zvec_index_params_base_init(ZVecBaseIndexParams *params, + ZVecIndexType index_type) { + if (params) { + params->index_type = index_type; + } +} + +void zvec_index_params_invert_init(ZVecInvertIndexParams *params, + bool enable_range_opt, + bool enable_wildcard) { + if (params) { + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + } +} + +void zvec_index_params_vector_init(ZVecVectorIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + } +} + +void zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, + ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + } +} + +void zvec_index_params_flat_init(ZVecFlatIndexParams *params, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + } +} + +void zvec_index_params_ivf_init(ZVecIVFIndexParams *params, + ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + } +} + +void zvec_index_params_init_default(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type) { + if (!params) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return; + } + + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + zvec_index_params_invert_init(¶ms->params.invert_params, false, + false); + break; + + case ZVEC_INDEX_TYPE_HNSW: + zvec_index_params_hnsw_init(¶ms->params.hnsw_params, metric_type, 16, + 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_FLAT: + zvec_index_params_flat_init(¶ms->params.flat_params, metric_type, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_IVF: + zvec_index_params_ivf_init(¶ms->params.ivf_params, metric_type, 100, + 10, false, 10, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + default: + set_last_error_details(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type", + __FILE__, __LINE__, __FUNCTION__); + break; + } +} + +void zvec_index_params_destroy(ZVecIndexParams *params) { + if (params) { + free(params); + } +} + +ZVecInvertIndexParams *zvec_index_params_invert_create(bool enable_range_opt, + bool enable_wildcard) { + ZVecInvertIndexParams *params = static_cast( + malloc(sizeof(ZVecInvertIndexParams))); + if (!params) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecInvertIndexParams", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + return params; +} + +ZVecVectorIndexParams *zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + ZVecVectorIndexParams *params = static_cast( + malloc(sizeof(ZVecVectorIndexParams))); + if (!params) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecVectorIndexParams", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + return params; +} + +ZVecHnswIndexParams *zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search) { + ZVecHnswIndexParams *params = + static_cast(malloc(sizeof(ZVecHnswIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + return params; +} + +ZVecFlatIndexParams *zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { + ZVecFlatIndexParams *params = + static_cast(malloc(sizeof(ZVecFlatIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + return params; +} + +ZVecIVFIndexParams *zvec_index_params_ivf_create(ZVecMetricType metric_type, + ZVecQuantizeType quantize_type, + int n_list, int n_iters, + bool use_soar, int n_probe) { + ZVecIVFIndexParams *params = + static_cast(malloc(sizeof(ZVecIVFIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, metric_type, + quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + return params; +} + +void zvec_index_params_invert_destroy(ZVecInvertIndexParams *params) { + if (params) { + free(params); + } +} + +void zvec_index_params_vector_destroy(ZVecVectorIndexParams *params) { + if (params) { + free(params); + } +} + +void zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params) { + if (params) { + free(params); + } +} + +void zvec_index_params_flat_destroy(ZVecFlatIndexParams *params) { + if (params) { + free(params); + } +} + +void zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params) { + if (params) { + free(params); + } +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecFieldSchema *schema = + static_cast(malloc(sizeof(ZVecFieldSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + schema->index_params = nullptr; + + return schema; +} + +void zvec_field_schema_destroy(ZVecFieldSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + free(schema); + } +} + +ZVecErrorCode zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!index_params) { + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + free(schema->index_params); + schema->index_params = nullptr; + } + return ZVEC_OK; + } + + if (!schema->index_params) { + schema->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + } + + *schema->index_params = *index_params; + + return ZVEC_OK; +} + +void zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params) { + if (field_schema && invert_params) { + if (!field_schema->index_params) { + field_schema->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->index_params->params.invert_params = *invert_params; + } +} + +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecHnswIndexParams *hnsw_params) { + if (field_schema && hnsw_params) { + if (!field_schema->index_params) { + field_schema->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->index_params->params.hnsw_params = *hnsw_params; + } +} + +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecFlatIndexParams *flat_params) { + if (field_schema && flat_params) { + if (!field_schema->index_params) { + field_schema->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->index_params->params.flat_params = *flat_params; + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIVFIndexParams *ivf_params) { + if (field_schema && ivf_params) { + if (!field_schema->index_params) { + field_schema->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->index_params->params.ivf_params = *ivf_params; + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + free(field_schema->index_params); + field_schema->index_params = nullptr; + } + + zvec_free_string(field_schema->name); + field_schema->name = nullptr; +} + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +void zvec_collection_options_init_default(ZVecCollectionOptions *options) { + if (!options) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer cannot be null", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; +} + +// ============================================================================= +// CollectionSchema management interface implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecCollectionSchema *schema = + static_cast(malloc(sizeof(ZVecCollectionSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecCollectionSchema", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for collection name", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; +} + +void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + free(schema->fields); + } + + free(schema); + } +} + +ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, + ZVecFieldSchema *field) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field || !field->name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field or field name cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && field->name && + zvec_string_compare(schema->fields[i]->name, field->name) == 0) { + set_last_error_details( + ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field->name->data + "' already exists", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_ALREADY_EXISTS; + } + } + + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, + const ZVecFieldSchema *fields, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!fields && field_count > 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &field = fields[i]; + if (!field.name || !field.name->data || field.name->length == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + + std::to_string(i) + " has invalid name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } + + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &src_field = fields[i]; + + ZVecFieldSchema *new_field = + static_cast(malloc(sizeof(ZVecFieldSchema))); + if (!new_field) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for new field", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + new_field->name = zvec_string_copy(src_field.name); + if (!new_field->name) { + free(new_field); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to copy field name", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + new_field->data_type = src_field.data_type; + new_field->nullable = src_field.nullable; + new_field->dimension = src_field.dimension; + + if (src_field.index_params) { + new_field->index_params = + static_cast(malloc(sizeof(ZVecIndexParams))); + if (!new_field->index_params) { + zvec_free_string(new_field->name); + free(new_field); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for index params", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + *(new_field->index_params) = *(src_field.index_params); + } else { + new_field->index_params = nullptr; + } + + schema->fields[schema->field_count] = new_field; + schema->field_count++; + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); + + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + return ZVEC_OK; + } + } + + set_last_error_details(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; +} + +ZVecErrorCode zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_names && field_count > 0) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + std::to_string(i) + " is null", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + std::vector remove_indices; + std::vector not_found_fields; + + for (size_t field_idx = 0; field_idx < field_count; ++field_idx) { + std::string target_name(field_names[field_idx]); + bool found = false; + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, target_name.c_str()) == 0) { + remove_indices.push_back(i); + found = true; + break; + } + } + + if (!found) { + not_found_fields.push_back(target_name); + } + } + + + if (!not_found_fields.empty()) { + std::string error_msg = "Fields not found: "; + for (size_t i = 0; i < not_found_fields.size(); ++i) { + error_msg += "'" + not_found_fields[i] + "'"; + if (i < not_found_fields.size() - 1) { + error_msg += ", "; + } + } + set_last_error_details(ZVEC_ERROR_NOT_FOUND, error_msg, __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + if (index >= schema->field_count) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field index out of bounds", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field->name) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { + if (!schema) return; + + ZVEC_TRY_BEGIN_VOID + if (schema->name) { + zvec_free_string(schema->name); + } + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_cleanup(schema->fields[i]); + } + delete[] schema->fields; + schema->fields = nullptr; + schema->field_count = 0; + } + + schema->max_doc_count_per_segment = 0; + ZVEC_CATCH_END_VOID +} + +// ============================================================================= +// Helper functions +// ============================================================================= + +const char *zvec_error_code_to_string(ZVecErrorCode error_code) { + switch (error_code) { + case ZVEC_OK: + return "OK"; + case ZVEC_ERROR_NOT_FOUND: + return "NOT_FOUND"; + case ZVEC_ERROR_ALREADY_EXISTS: + return "ALREADY_EXISTS"; + case ZVEC_ERROR_INVALID_ARGUMENT: + return "INVALID_ARGUMENT"; + case ZVEC_ERROR_PERMISSION_DENIED: + return "PERMISSION_DENIED"; + case ZVEC_ERROR_FAILED_PRECONDITION: + return "FAILED_PRECONDITION"; + case ZVEC_ERROR_RESOURCE_EXHAUSTED: + return "RESOURCE_EXHAUSTED"; + case ZVEC_ERROR_UNAVAILABLE: + return "UNAVAILABLE"; + case ZVEC_ERROR_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case ZVEC_ERROR_NOT_SUPPORTED: + return "NOT_SUPPORTED"; + case ZVEC_ERROR_UNKNOWN: + return "UNKNOWN"; + default: + return "UNKNOWN_ERROR_CODE"; + } +} + +const char *zvec_data_type_to_string(ZVecDataType data_type) { + switch (data_type) { + case ZVEC_DATA_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_DATA_TYPE_BINARY: + return "BINARY"; + case ZVEC_DATA_TYPE_STRING: + return "STRING"; + case ZVEC_DATA_TYPE_BOOL: + return "BOOL"; + case ZVEC_DATA_TYPE_INT32: + return "INT32"; + case ZVEC_DATA_TYPE_INT64: + return "INT64"; + case ZVEC_DATA_TYPE_UINT32: + return "UINT32"; + case ZVEC_DATA_TYPE_UINT64: + return "UINT64"; + case ZVEC_DATA_TYPE_FLOAT: + return "FLOAT"; + case ZVEC_DATA_TYPE_DOUBLE: + return "DOUBLE"; + case ZVEC_DATA_TYPE_VECTOR_BINARY32: + return "VECTOR_BINARY32"; + case ZVEC_DATA_TYPE_VECTOR_BINARY64: + return "VECTOR_BINARY64"; + case ZVEC_DATA_TYPE_VECTOR_FP16: + return "VECTOR_FP16"; + case ZVEC_DATA_TYPE_VECTOR_FP32: + return "VECTOR_FP32"; + case ZVEC_DATA_TYPE_VECTOR_FP64: + return "VECTOR_FP64"; + case ZVEC_DATA_TYPE_VECTOR_INT4: + return "VECTOR_INT4"; + case ZVEC_DATA_TYPE_VECTOR_INT8: + return "VECTOR_INT8"; + case ZVEC_DATA_TYPE_VECTOR_INT16: + return "VECTOR_INT16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + return "SPARSE_VECTOR_FP16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: + return "SPARSE_VECTOR_FP32"; + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return "ARRAY_BINARY"; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return "ARRAY_STRING"; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return "ARRAY_BOOL"; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return "ARRAY_INT32"; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return "ARRAY_INT64"; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return "ARRAY_UINT32"; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return "ARRAY_UINT64"; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; + default: + return "UNKNOWN_DATA_TYPE"; + } +} + +const char *zvec_index_type_to_string(ZVecIndexType index_type) { + switch (index_type) { + case ZVEC_INDEX_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_INDEX_TYPE_HNSW: + return "HNSW"; + case ZVEC_INDEX_TYPE_IVF: + return "IVF"; + case ZVEC_INDEX_TYPE_FLAT: + return "FLAT"; + case ZVEC_INDEX_TYPE_INVERT: + return "INVERT"; + default: + return "UNKNOWN_INDEX_TYPE"; + } +} + +const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { + switch (metric_type) { + case ZVEC_METRIC_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_METRIC_TYPE_L2: + return "L2"; + case ZVEC_METRIC_TYPE_IP: + return "IP"; + case ZVEC_METRIC_TYPE_COSINE: + return "COSINE"; + case ZVEC_METRIC_TYPE_MIPSL2: + return "MIPSL2"; + default: + return "UNKNOWN_METRIC_TYPE"; + } +} + +bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { + bool is_vector_field = + (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + return is_vector_field; +} + +// ============================================================================= +// Doc functions implementation +// ============================================================================= + +ZVecDoc *zvec_doc_create(void) { + ZVEC_TRY_RETURN_NULL("Failed to create document", { + auto doc_ptr = + new std::shared_ptr(std::make_shared()); + return reinterpret_cast(doc_ptr); + }) +} + +void zvec_doc_destroy(ZVecDoc *doc) { + if (doc) { + delete reinterpret_cast *>(doc); + } +} + +void zvec_doc_clear(ZVecDoc *doc) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + ZVEC_CATCH_END_VOID +} + +void zvec_docs_free(ZVecDoc **docs, size_t count) { + if (!docs) return; + + for (size_t i = 0; i < count; ++i) { + zvec_doc_destroy(docs[i]); + } + + free(docs); +} + +void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { + if (!doc || !pk) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_pk(std::string(pk)); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_doc_id(doc_id); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_score(ZVecDoc *doc, float score) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_score(score); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_operator(static_cast(op)); + ZVEC_CATCH_END_VOID +} + +// ============================================================================= +// Document interface implementation +// ============================================================================= + +// Helper function to extract scalar values from raw data +template +T extract_scalar_value(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size != sizeof(T)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return T{}; + } + return *static_cast(value); +} + +// Helper function to extract vector values from raw data +template +std::vector extract_vector_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to extract array values from raw data +template +std::vector extract_array_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to handle sparse vector extraction +template +std::pair, std::vector> extract_sparse_vector( + const void *value, size_t value_size, ZVecErrorCode *error_code) { + if (value_size < sizeof(uint32_t)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *data = static_cast(value); + uint32_t nnz = data[0]; + + size_t required_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(T)); + if (value_size < required_size) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *indices = data + 1; + const T *values = reinterpret_cast(indices + nnz); + + std::vector index_vec(indices, indices + nnz); + std::vector value_vec(values, values + nnz); + + return std::make_pair(std::move(index_vec), std::move(value_vec)); +} + +// Helper function to extract string array from raw data (C-string array) +std::vector extract_string_array(const void *value, + size_t value_size) { + std::vector string_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + size_t str_len = strlen(data + pos); + if (pos + str_len >= value_size) { + break; + } + string_array.emplace_back(data + pos, str_len); + pos += str_len + 1; + } + return string_array; +} + +// Helper function to extract string array from ZVecString** array +std::vector extract_string_array_from_zvec( + ZVecString **zvec_strings, size_t count) { + std::vector string_array; + string_array.reserve(count); + + for (size_t i = 0; i < count; ++i) { + if (zvec_strings[i] && zvec_strings[i]->data) { + string_array.emplace_back(zvec_strings[i]->data, zvec_strings[i]->length); + } else { + string_array.emplace_back("", 0); + } + } + + return string_array; +} + +// Helper function to extract binary array from raw data +std::vector extract_binary_array(const void *value, + size_t value_size) { + std::vector binary_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + if (pos + sizeof(uint32_t) > value_size) { + break; + } + uint32_t bin_len = *reinterpret_cast(data + pos); + pos += sizeof(uint32_t); + + if (pos + bin_len > value_size) { + break; + } + binary_array.emplace_back(data + pos, bin_len); + pos += bin_len; + } + return binary_array; +} + +static std::vector convert_zvec_docs_to_internal( + const ZVecDoc **zvec_docs, size_t doc_count) { + std::vector docs; + docs.reserve(doc_count); + + for (size_t i = 0; i < doc_count; ++i) { + docs.push_back( + *(*reinterpret_cast *>(zvec_docs[i]))); + } + + return docs; +} + + +static zvec::Status convert_zvec_collection_schema_to_internal( + const ZVecCollectionSchema *schema, + zvec::CollectionSchema::Ptr &collection_schema) { + std::string coll_name(schema->name->data, schema->name->length); + collection_schema = std::make_shared(coll_name); + collection_schema->set_max_doc_count_per_segment( + schema->max_doc_count_per_segment); + + for (size_t i = 0; i < schema->field_count; ++i) { + const ZVecFieldSchema &zvec_field = *schema->fields[i]; + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + std::string field_name = + std::string(zvec_field.name->data, zvec_field.name->length); + zvec::FieldSchema::Ptr field_schema; + + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + } + + if (zvec_field.index_params != nullptr) { + zvec::Status status = set_field_index_params(field_schema, &zvec_field); + if (!status.ok()) { + return status; + } + } + + zvec::Status status = collection_schema->add_field(field_schema); + if (!status.ok()) { + return status; + } + } + + return zvec::Status::OK(); +} + +static zvec::Status convert_zvec_field_schema_to_internal( + const ZVecFieldSchema &zvec_field, zvec::FieldSchema::Ptr &field_schema) { + // Validate input + if (!zvec_field.name) { + return zvec::Status::InvalidArgument("Field name cannot be null"); + } + + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + if (data_type == zvec::DataType::UNDEFINED) { + return zvec::Status::InvalidArgument("Invalid data type"); + } + + std::string field_name(zvec_field.name->data, zvec_field.name->length); + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + + if (zvec_field.index_params != nullptr) { + switch (zvec_field.index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto *params = &zvec_field.index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto *params = &zvec_field.index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto *params = &zvec_field.index_params->params.ivf_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->n_list, params->n_iters, params->use_soar, + quantize); + field_schema->set_index_params(index_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + + if (zvec_field.index_params != nullptr && + zvec_field.index_params->index_type == ZVEC_INDEX_TYPE_INVERT) { + auto *params = &zvec_field.index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + } + } + + return zvec::Status::OK(); +} + +ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, + ZVecDataType data_type, + const void *value, + size_t value_size) { + if (!doc || !field_name || !value) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + std::string name(field_name); ZVecErrorCode error_code = ZVEC_OK; + + switch (data_type) { + // Scalar types + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + bool val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for bool type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT32: { + int32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT64: { + int64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + uint32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + uint64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + float val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for float type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + double val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for double type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + auto vec = extract_vector_values(value, value_size, + &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int8 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors are packed - each byte contains 2 int4 values + size_t count = value_size * 2; + const int8_t *packed_vals = static_cast(value); + std::vector vec; + vec.reserve(count); + + // Unpack int4 values + for (size_t i = 0; i < value_size; ++i) { + int8_t byte_val = packed_vals[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + vec.push_back((byte_val >> 4) & 0x0F); + } + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, binary_array); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // Check if this is a ZVecString** array or a C-string array + // ZVecString** array has pointer-sized elements + constexpr size_t ptr_size = sizeof(void *); + if (value_size % ptr_size == 0) { + // Likely a ZVecString** array + size_t count = value_size / ptr_size; + ZVecString **zvec_str_array = + reinterpret_cast(const_cast(value)); + auto string_array = + extract_string_array_from_zvec(zvec_str_array, count); + (*doc_ptr)->set(name, string_array); + } else { + // C-string array (null-terminated strings) + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, string_array); + } + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_bool type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_float type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_double type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + + default: + set_last_error("Unsupported data type: " + std::to_string(data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, + const ZVecDocField *field) { + if (!doc || !field) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + + std::string name(field->name.data, field->name.length); + + switch (field->data_type) { + // Scalar types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + (*doc_ptr)->set(name, field->value.bool_value); + break; + } + case ZVEC_DATA_TYPE_INT32: { + (*doc_ptr)->set(name, field->value.int32_value); + break; + } + case ZVEC_DATA_TYPE_INT64: { + (*doc_ptr)->set(name, field->value.int64_value); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + (*doc_ptr)->set(name, field->value.uint32_value); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + (*doc_ptr)->set(name, field->value.uint64_value); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + (*doc_ptr)->set(name, field->value.float_value); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + (*doc_ptr)->set(name, field->value.double_value); + break; + } + + // Vector types (in ZVecDataType order: BINARY32, BINARY64, FP16, FP32, + // FP64, INT4, INT8, INT16) + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + size_t byte_count = (field->value.vector_value.length + 1) / 2; + const int8_t *packed_data = + reinterpret_cast(field->value.vector_value.data); + std::vector vec; + vec.reserve(field->value.vector_value.length); + + for (size_t i = 0; + i < byte_count && vec.size() < field->value.vector_value.length; + ++i) { + int8_t byte_val = packed_data[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + if (vec.size() < field->value.vector_value.length) { + vec.push_back((byte_val >> 4) & 0x0F); + } + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types (in ZVecDataType order: FP16, FP32) + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Array types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + std::vector array_values; + const uint8_t *data_ptr = field->value.binary_value.data; + size_t total_length = field->value.binary_value.length; + size_t offset = 0; + + while (offset + sizeof(uint32_t) <= total_length) { + uint32_t elem_length = + *reinterpret_cast(data_ptr + offset); + offset += sizeof(uint32_t); + + if (offset + elem_length <= total_length) { + std::string elem( + reinterpret_cast(data_ptr + offset), + elem_length); + array_values.push_back(elem); + offset += elem_length; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + std::vector array_values; + const char *data_ptr = field->value.string_value.data; + size_t total_length = field->value.string_value.length; + size_t offset = 0; + + while (offset < total_length) { + size_t str_len = strlen(data_ptr + offset); + if (str_len > 0 && offset + str_len <= total_length) { + array_values.emplace_back(data_ptr + offset, str_len); + offset += str_len + 1; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + std::vector array_values( + reinterpret_cast(field->value.binary_value.data), + reinterpret_cast(field->value.binary_value.data) + + field->value.binary_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + std::vector array_values(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + + default: + set_last_error("Unsupported data type: " + + std::to_string(field->data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->pk_ref().data(); +} + +const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + const std::string &pk = (*doc_ptr)->pk_ref(); + if (pk.empty()) return nullptr; + + char *result = static_cast(malloc(pk.length() + 1)); + strcpy(result, pk.c_str()); + return result; +} + +uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document ID", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->doc_id();) +} + +float zvec_doc_get_score(const ZVecDoc *doc) { + if (!doc) return 0.0f; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document score", 0.0f, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->score();) +} + +ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { + if (!doc) return ZVEC_DOC_OP_INSERT; // default + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document operator", ZVEC_DOC_OP_INSERT, + auto doc_ptr = reinterpret_cast *>(doc); + zvec::Operator op = (*doc_ptr)->get_operator(); + return static_cast(op);) +} + +size_t zvec_doc_get_field_count(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get field count", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->field_names().size();) +} + +ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void *value_buffer, + size_t buffer_size) { + if (!doc || !field_name || !value_buffer) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle basic data types that return values directly + switch (field_type) { + case ZVEC_DATA_TYPE_BOOL: { + if (buffer_size < sizeof(bool)) { + set_last_error("Buffer too small for bool value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const bool val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT32: { + if (buffer_size < sizeof(int32_t)) { + set_last_error("Buffer too small for int32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT64: { + if (buffer_size < sizeof(int64_t)) { + set_last_error("Buffer too small for int64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT32: { + if (buffer_size < sizeof(uint32_t)) { + set_last_error("Buffer too small for uint32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT64: { + if (buffer_size < sizeof(uint64_t)) { + set_last_error("Buffer too small for uint64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + if (buffer_size < sizeof(float)) { + set_last_error("Buffer too small for float value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const float val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + if (buffer_size < sizeof(double)) { + set_last_error("Buffer too small for double value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const double val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + default: { + set_last_error("Data type not supported for basic value return"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void **value, size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value copy", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle copy-returning data types (allocate new memory) + switch (field_type) { + // Basic types - copy the actual values + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(bool)); + if (!buffer) { + set_last_error("Memory allocation failed for bool"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(float)); + if (!buffer) { + set_last_error("Memory allocation failed for float"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(double)); + if (!buffer) { + set_last_error("Memory allocation failed for double"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(double); + break; + } + + // String and binary types - copy the data + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(val.length()); + if (!buffer) { + set_last_error("Memory allocation failed for string/binary"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), val.length()); + *value = buffer; + *value_size = val.length(); + break; + } + + // Vector types - copy the data + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(zvec::float16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int8_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int8 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + + // Sparse vector types - create flattened representation + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + using SparseVecFP16 = + std::pair, std::vector>; + const SparseVecFP16 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = sizeof(size_t) + nnz * (sizeof(uint32_t) + + sizeof(zvec::float16_t)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP16"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(zvec::float16_t); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + using SparseVecFP32 = + std::pair, std::vector>; + const SparseVecFP32 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(float); + } + + *value = buffer; + *value_size = total_size; + break; + } + + // Array types - create serialized representations + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + using BinaryArray = std::vector; + const BinaryArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &bin_val : array_vals) { + total_size += bin_val.length(); + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for binary array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &bin_val : array_vals) { + memcpy(ptr, bin_val.data(), bin_val.length()); + ptr += bin_val.length(); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + using StringArray = std::vector; + const StringArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &str_val : array_vals) { + total_size += str_val.length() + 1; // +1 for null terminator + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for string array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &str_val : array_vals) { + memcpy(ptr, str_val.c_str(), str_val.length()); + ptr += str_val.length(); + *ptr = '\0'; + ptr++; + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + using BoolArray = std::vector; + const BoolArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t byte_count = (array_vals.size() + 7) / 8; + void *buffer = malloc(byte_count); + if (!buffer) { + set_last_error("Memory allocation failed for bool array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + uint8_t *bytes = static_cast(buffer); + memset(bytes, 0, byte_count); + + for (size_t i = 0; i < array_vals.size(); ++i) { + if (array_vals[i]) { + bytes[i / 8] |= (1 << (i % 8)); + } + } + + *value = buffer; + *value_size = byte_count; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + using Int32Array = std::vector; + const Int32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + using Int64Array = std::vector; + const Int64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + using UInt32Array = std::vector; + const UInt32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + using UInt64Array = std::vector; + const UInt64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + using FloatArray = std::vector; + const FloatArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for float array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + using DoubleArray = std::vector; + const DoubleArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for double array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + const void **value, + size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value pointer", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Get field value based on data type + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.data(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.c_str(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + const bool &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 vectors typically stored as uint16_t + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(zvec::float16_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors typically stored as int8_t with 2 values per byte + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int16_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(double); + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +bool zvec_doc_is_empty(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return true; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if document is empty", true, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_empty();) +} + +ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to remove field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->remove(std::string(field_name)); return ZVEC_OK;) +} + + +bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has(std::string(field_name));) +} + +bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field value existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has_value(std::string(field_name));) +} + +bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if field is null", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_null(std::string(field_name));) +} + +ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, + size_t *count) { + if (!doc || !field_names || !count) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field names", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector names = (*doc_ptr)->field_names(); + + *count = names.size(); + if (*count == 0) { + *field_names = nullptr; + return ZVEC_OK; + } + + *field_names = static_cast(malloc(*count * sizeof(char *))); + if (!*field_names) { + set_last_error("Failed to allocate memory for field names"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *count; ++i) { + (*field_names)[i] = copy_string(names[i]); + if (!(*field_names)[i]) { + for (size_t j = 0; j < i; ++j) { + free((*field_names)[j]); + } + free(*field_names); + *field_names = nullptr; + set_last_error("Failed to copy field name"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, + size_t *size) { + if (!doc || !data || !size) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to serialize document", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector serialized_data = (*doc_ptr)->serialize(); + + *size = serialized_data.size(); + if (*size == 0) { + *data = nullptr; + return ZVEC_OK; + } + + *data = static_cast(malloc(*size)); + if (!*data) { + set_last_error("Failed to allocate memory for serialized data"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(*data, serialized_data.data(), *size); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, + ZVecDoc **doc) { + if (!data || !doc || size == 0) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to deserialize document", + auto deserialized_doc = zvec::Doc::deserialize(data, size); + if (!deserialized_doc) { + set_last_error("Failed to deserialize document"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_ptr = new std::shared_ptr(deserialized_doc); + *doc = reinterpret_cast(doc_ptr); return ZVEC_OK;) +} + +void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { + if (!doc || !other) { + set_last_error("Document pointers are null"); + return; + } + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + auto other_ptr = reinterpret_cast *>(other); + (*doc_ptr)->merge(**other_ptr); + ZVEC_CATCH_END_VOID +} + +size_t zvec_doc_memory_usage(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return 0; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document memory usage", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->memory_usage();) +} + +ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, + const ZVecCollectionSchema *schema, + bool is_update, char **error_msg) { + if (!doc || !schema) { + set_last_error("Document or schema pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to validate document", + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + auto doc_ptr = reinterpret_cast *>(doc); + status = (*doc_ptr)->validate(schema_ptr, is_update); if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + if (error_msg) { *error_msg = nullptr; } return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { + if (!doc || !detail_str) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get document detail string", + auto doc_ptr = reinterpret_cast *>(doc); + std::string detail = (*doc_ptr)->to_detail_string(); + *detail_str = copy_string(detail); + + if (!*detail_str && !detail.empty()) { + set_last_error("Failed to copy detail string"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + return ZVEC_OK;) +} + +// ============================================================================= +// Collection functions implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection) { + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_create_and_open_with_schema", + if (!path || !schema || !collection) { + set_last_error("Path, schema, or collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::shared_ptr + schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::CreateAndOpen(path, *schema_ptr, + collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_open(const char *path, + const ZVecCollectionOptions *options, + ZVecCollection **collection) { + if (!path || !collection) { + set_last_error("Invalid arguments: path and collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::Open(path, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + delete reinterpret_cast *>(collection); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Destroy(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Flush(); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, + ZVecCollectionSchema **schema) { + if (!collection || !schema) { + set_last_error("Invalid arguments: collection and schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + const auto &cpp_schema = result.value(); + + // Create new schema structure + ZVecCollectionSchema *c_schema = static_cast( + malloc(sizeof(ZVecCollectionSchema))); + if (!c_schema) { + set_last_error("Failed to allocate memory for schema"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize the schema structure + c_schema->name = nullptr; + c_schema->fields = nullptr; + c_schema->field_count = 0; + c_schema->field_capacity = 0; + c_schema->max_doc_count_per_segment = + cpp_schema.max_doc_count_per_segment(); + + // Set collection name + c_schema->name = zvec_string_create(cpp_schema.name().c_str()); + if (!c_schema->name) { + free(c_schema); + set_last_error("Failed to allocate memory for collection name"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Convert and copy fields + const auto &cpp_fields = cpp_schema.fields(); + c_schema->field_count = cpp_fields.size(); + c_schema->field_capacity = cpp_fields.size(); + + if (c_schema->field_count > 0) { + // Allocate array of field pointers + c_schema->fields = static_cast( + malloc(c_schema->field_count * sizeof(ZVecFieldSchema *))); + if (!c_schema->fields) { + zvec_collection_schema_destroy(c_schema); + set_last_error("Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize all field pointers to nullptr + for (size_t i = 0; i < c_schema->field_count; ++i) { + c_schema->fields[i] = nullptr; + } + + size_t i = 0; + for (const auto &cpp_field : cpp_fields) { + try { + // Create new field schema + c_schema->fields[i] = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!c_schema->fields[i]) { + throw std::bad_alloc(); + } + + // Copy field name using zvec_string_create + c_schema->fields[i]->name = + zvec_string_create(cpp_field->name().c_str()); + if (!c_schema->fields[i]->name) { + throw std::bad_alloc(); + } + + // Convert data type + c_schema->fields[i]->data_type = + convert_zvec_data_type(cpp_field->data_type()); + + // Copy dimension for vector fields + c_schema->fields[i]->dimension = cpp_field->dimension(); + + // Copy nullable flag + c_schema->fields[i]->nullable = cpp_field->nullable(); + + // Initialize index parameters + c_schema->fields[i]->index_params = nullptr; + + // Convert index parameters based on the actual type + auto index_params = cpp_field->index_params(); + if (index_params) { + switch (index_params->type()) { + case zvec::IndexType::HNSW: { + // Cast to HnswIndexParams and convert + auto hnsw_params = + std::dynamic_pointer_cast( + index_params); + if (hnsw_params) { + auto c_hnsw_params = static_cast( + malloc(sizeof(ZVecHnswIndexParams))); + if (!c_hnsw_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_hnsw_params->base.base.index_type = + ZVEC_INDEX_TYPE_HNSW; + c_hnsw_params->base.metric_type = + static_cast( + hnsw_params->metric_type()); + c_hnsw_params->base.quantize_type = + static_cast( + hnsw_params->quantize_type()); + + // Set HNSW-specific parameters + c_hnsw_params->m = hnsw_params->m(); + c_hnsw_params->ef_construction = + hnsw_params->ef_construction(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_hnsw_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_HNSW; + } + break; + } + + case zvec::IndexType::IVF: { + // Cast to IVFIndexParams and convert + auto ivf_params = + std::dynamic_pointer_cast( + index_params); + if (ivf_params) { + auto c_ivf_params = static_cast( + malloc(sizeof(ZVecIVFIndexParams))); + if (!c_ivf_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_ivf_params->base.base.index_type = ZVEC_INDEX_TYPE_IVF; + c_ivf_params->base.metric_type = + static_cast( + ivf_params->metric_type()); + c_ivf_params->base.quantize_type = + static_cast( + ivf_params->quantize_type()); + + // Set IVF-specific parameters + c_ivf_params->n_list = ivf_params->n_list(); + c_ivf_params->n_iters = ivf_params->n_iters(); + c_ivf_params->use_soar = ivf_params->use_soar(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_ivf_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_IVF; + } + break; + } + + case zvec::IndexType::FLAT: { + // Cast to FlatIndexParams and convert + auto flat_params = + std::dynamic_pointer_cast( + index_params); + if (flat_params) { + auto c_flat_params = static_cast( + malloc(sizeof(ZVecFlatIndexParams))); + if (!c_flat_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_flat_params->base.base.index_type = + ZVEC_INDEX_TYPE_FLAT; + c_flat_params->base.metric_type = + static_cast( + flat_params->metric_type()); + c_flat_params->base.quantize_type = + static_cast( + flat_params->quantize_type()); + + // Flat index has no additional parameters + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_flat_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_FLAT; + } + break; + } + + case zvec::IndexType::INVERT: { + // Cast to InvertIndexParams and convert + auto invert_params = + std::dynamic_pointer_cast( + index_params); + if (invert_params) { + auto c_invert_params = + static_cast( + malloc(sizeof(ZVecInvertIndexParams))); + if (!c_invert_params) { + throw std::bad_alloc(); + } + + // Initialize the base index parameters + c_invert_params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + + // Set Invert-specific parameters + c_invert_params->enable_range_optimization = + invert_params->enable_range_optimization(); + c_invert_params->enable_extended_wildcard = + invert_params->enable_extended_wildcard(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_invert_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_INVERT; + } + break; + } + + default: + // For undefined or unsupported index types, set to NULL + c_schema->fields[i]->index_params = nullptr; + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_UNDEFINED; + break; + } + } else { + // No index parameters, set to NULL + c_schema->fields[i]->index_params = nullptr; + } + } catch (const std::bad_alloc &) { + // Clean up already allocated fields + for (size_t j = 0; j <= i; ++j) { + if (c_schema->fields[j]) { + zvec_field_schema_destroy(c_schema->fields[j]); + } + } + free(c_schema->fields); + zvec_free_string(c_schema->name); + free(c_schema); + set_last_error("Failed to allocate memory for field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ++i; + } + } + + *schema = c_schema; + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, + ZVecCollectionOptions **options) { + if (!collection || !options) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get collection options", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Options(); + + if (!result.has_value()) { + set_last_error("Failed to get collection option: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Create and initialize options structure + *options = static_cast( + malloc(sizeof(ZVecCollectionOptions))); + if (!*options) { + set_last_error("Failed to allocate memory for options"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + (*options) + ->enable_mmap = result.value().enable_mmap_; + (*options)->max_buffer_size = result.value().max_buffer_size_; + (*options)->read_only = result.value().read_only_; + (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, + ZVecCollectionStats **stats) { + if (!collection || !stats) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get detailed collection stats", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Stats(); + + if (!result.has_value()) { + set_last_error("Failed to get collection stats: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + *stats = static_cast( + malloc(sizeof(ZVecCollectionStats))); + if (!*stats) { + set_last_error("Failed to allocate memory for stats"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + (*stats)->doc_count = result.value().doc_count; + (*stats)->index_count = result.value().index_completeness.size(); + if ((*stats)->index_count > 0) { + (*stats)->index_completeness = static_cast( + malloc((*stats)->index_count * sizeof(float))); + (*stats)->index_names = static_cast( + malloc((*stats)->index_count * sizeof(ZVecString *))); + int i = 0; + for (auto &[name, completeness] : result.value().index_completeness) { + (*stats)->index_completeness[i] = completeness; + (*stats)->index_names[i] = zvec_string_create(name.c_str()); + i++; + } + } + } else { + (*stats)->index_completeness = nullptr; + (*stats)->index_names = nullptr; + } + + return error_code;) +} + +ZVecCollectionStats *zvec_collection_stats_create(void) { + ZVecCollectionStats *stats = + static_cast(malloc(sizeof(ZVecCollectionStats))); + if (!stats) { + return nullptr; + } + stats->doc_count = 0; + stats->index_count = 0; + stats->index_completeness = nullptr; + stats->index_names = nullptr; + return stats; +} + +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + free(stats); + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + ZVecQueryParams *params = + static_cast(malloc(sizeof(ZVecQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->index_type = index_type; + params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; + return params; +} + +ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, + int ef, float radius, + bool is_linear, + bool is_using_refiner) { + ZVecHnswQueryParams *params = + static_cast(malloc(sizeof(ZVecHnswQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; + return params; +} + +ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, + int nprobe, + bool is_using_refiner, + float scale_factor) { + ZVecIVFQueryParams *params = + static_cast(malloc(sizeof(ZVecIVFQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; + return params; +} + +ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, + bool is_using_refiner, + float scale_factor) { + ZVecFlatQueryParams *params = + static_cast(malloc(sizeof(ZVecFlatQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; + return params; +} + +ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { + ZVecQueryParamsUnion *params = + static_cast(malloc(sizeof(ZVecQueryParamsUnion))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParamsUnion", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: + params->params.hnsw_params.base.index_type = index_type; + params->params.hnsw_params.ef = + zvec::core_interface::kDefaultHnswEfSearch; + break; + case ZVEC_INDEX_TYPE_IVF: + params->params.ivf_params.base.index_type = index_type; + params->params.ivf_params.nprobe = 10; + params->params.ivf_params.scale_factor = 10.0f; + break; + case ZVEC_INDEX_TYPE_FLAT: + params->params.flat_params.base.index_type = index_type; + params->params.flat_params.scale_factor = 10.0f; + break; + default: + params->params.base_params.index_type = index_type; + break; + } + + return params; +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { + if (params) { + free(params); + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->radius = radius; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_linear = is_linear; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { + if (!params) { + set_last_error("HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->ef = ef; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { + if (!params) { + set_last_error("IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->nprobe = nprobe; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->scale_factor = scale_factor; + return ZVEC_OK; +} + + +// ============================================================================= +// Index Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params) { + if (!collection || !column_name || !index_params) { + set_last_error( + "Invalid arguments: collection, column_name, and index_params cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_create_index", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(column_name); + + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + &index_params->params.invert_params; + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = &index_params->params.hnsw_params; + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = &index_params->params.flat_params; + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = &index_params->params.ivf_params; + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, ivf_params->use_soar, + quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; +} + } + ) + } + + ZVecErrorCode zvec_collection_create_index_with_params( + ZVecCollection *collection, const char *field_name, + const void *index_params) { + if (!collection || !field_name || !index_params) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(field_name); + + const ZVecBaseIndexParams *base_params = + static_cast(index_params); + + ZVEC_TRY_RETURN_ERROR("Exception occurred", + switch (base_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + static_cast(index_params); + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + +case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = + static_cast(index_params); + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = + static_cast(index_params); + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = + static_cast(index_params); + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, ivf_params->use_soar, + quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; +} + } + ) + } + + ZVecErrorCode zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecHnswIndexParams *hnsw_params) { + if (!hnsw_params) { + set_last_error("Invalid HNSW parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + hnsw_params); + } + + ZVecErrorCode zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecFlatIndexParams *flat_params) { + if (!flat_params) { + set_last_error("Invalid Flat parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + flat_params); + } + + ZVecErrorCode zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIVFIndexParams *ivf_params) { + if (!ivf_params) { + set_last_error("Invalid IVF parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + ivf_params); + } + + ZVecErrorCode zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecInvertIndexParams *invert_params) { + if (!invert_params) { + set_last_error("Invalid Invert parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + invert_params); + } + + ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropIndex(column_name); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->Optimize(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + + // ============================================================================= + // Column Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, + const ZVecFieldSchema *field_schema, + const char *expression) { + if (!collection || !field_schema) { + set_last_error( + "Invalid arguments: collection and field_schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::DataType data_type = convert_data_type(field_schema->data_type); + if (data_type == zvec::DataType::UNDEFINED) { + set_last_error("Invalid data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::string field_name(field_schema->name->data, + field_schema->name->length); + bool is_vector_field = check_is_vector_field(*field_schema); + zvec::FieldSchema::Ptr schema; + if (is_vector_field) { + schema = std::make_shared(field_name, data_type, + field_schema->dimension, + field_schema->nullable); + } else { + schema = std::make_shared(field_name, data_type, + field_schema->nullable); + } + + std::string expr = expression ? expression : ""; + zvec::Status status = (*coll_ptr)->AddColumn(schema, expr); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropColumn(column_name); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_alter_column( + ZVecCollection *collection, const char *column_name, const char *new_name, + const ZVecFieldSchema *new_schema) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string rename = new_name ? new_name : ""; + + zvec::FieldSchema::Ptr schema = nullptr; + if (new_schema) { + auto status = + convert_zvec_field_schema_to_internal(*new_schema, schema); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + zvec::Status status = + (*coll_ptr)->AlterColumn(column_name, rename, schema); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + // ============================================================================= + // DML Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_docs", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } else { + *success_count = 0; + *error_count = doc_count; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_update(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !pks || pk_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, + const char *filter) { + if (!collection || !filter) { + set_last_error("Invalid arguments: collection,filter cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + auto status = (*coll_ptr)->DeleteByFilter(filter); if (!status.ok()) { + set_last_error(status.message()); + return status_to_error_code(status); + } return ZVEC_OK;) + } + + // ============================================================================= + // Data query interface implementation + // ============================================================================= + + // Helper function to convert common query parameters + void convert_common_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + internal_query.topk_ = query->topk; + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.include_doc_id_ = query->include_doc_id; + + // Binary data conversion (query_vector) + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + // Sparse vector data conversion + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + // Output fields conversion + if (query->output_fields.count > 0) { + internal_query.output_fields_ = std::vector(); + for (size_t i = 0; i < query->output_fields.count; ++i) { + internal_query.output_fields_->emplace_back( + query->output_fields.strings[i].data, + query->output_fields.strings[i].length); + } + } + } + + // Helper function to convert query parameters + void convert_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + convert_common_query_params(internal_query, query); + + // QueryParams conversion + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } + } + + // Helper function to convert group by query parameters + void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, + const ZVecGroupByVectorQuery *query) { + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.group_by_field_name_ = std::string( + query->group_by_field_name.data, query->group_by_field_name.length); + internal_query.group_count_ = query->group_count; + internal_query.group_topk_ = query->group_topk; + + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + if (query->output_fields.count > 0) { + if (!internal_query.output_fields_.has_value()) { + internal_query.output_fields_ = std::vector(); + } + for (size_t i = 0; i < query->output_fields.count; ++i) { + internal_query.output_fields_->push_back( + std::string(query->output_fields.strings[i].data, + query->output_fields.strings[i].length)); + } + } + + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } + } + + // Helper function to convert document results to C API format + ZVecErrorCode convert_document_results( + const std::vector> &query_results, + ZVecDoc ***results, size_t *result_count) { + *result_count = query_results.size(); + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const auto &internal_doc = query_results[i]; + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < i; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = *internal_doc; // Copy assignment + (*results)[i] = c_doc; // Store the pointer, not dereference + } + + return ZVEC_OK; + } + + // Helper function to convert grouped document results to C API format + ZVecErrorCode convert_grouped_document_results( + const std::vector &group_results, ZVecDoc ***results, + ZVecString ***group_by_values, size_t *result_count) { + // Calculate total document count across all groups + size_t total_docs = 0; + for (const auto &group_result : group_results) { + total_docs += group_result.docs_.size(); + } + + // Allocate memory for document pointers and group by values + *result_count = total_docs; + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + *group_by_values = static_cast( + malloc(group_results.size() * sizeof(ZVecString *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ grouped results to C API format + size_t doc_index = 0; + for (const auto &group_result : group_results) { + for (const auto &internal_doc : group_result.docs_) { + if (doc_index >= *result_count) { + break; + } + + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = internal_doc; // Copy assignment + + ZVecString *c_group_value = + zvec_string_create(group_result.group_by_value_.c_str()); + if (!c_group_value) { + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + zvec_free_string((*group_by_values)[doc_index]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create string wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + (*group_by_values)[doc_index] = c_group_value; + (*results)[doc_index] = c_doc; + ++doc_index; + } + } + + return ZVEC_OK; + } + + // Helper function to convert fetched document results to C API format + ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, + ZVecDoc ***results, + size_t *doc_count) { + // Calculate actual document count (some PKs might not exist) + size_t actual_count = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr) { + actual_count++; + } + } + + // Allocate memory for document pointers + *doc_count = actual_count; + if (*doc_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast(malloc(*doc_count * sizeof(ZVecDoc *))); + if (!*results) { + set_last_error("Failed to allocate memory for document pointers"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ DocPtrMap to C ZVecDoc pointer array + size_t index = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr && index < *doc_count) { + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *doc_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto cpp_doc_ptr = + reinterpret_cast *>(c_doc); + *(*cpp_doc_ptr) = *doc_ptr; // Copy assignment + + // Set the primary key explicitly + zvec_doc_set_pk(c_doc, pk.c_str()); + + (*results)[index] = c_doc; + ++index; + } + } + + return ZVEC_OK; + } + + ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, + const ZVecVectorQuery *query, + ZVecDoc ***results, + size_t *result_count) { + if (!collection || !query || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results and result_count " + "cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert query parameters using helper function + zvec::VectorQuery internal_query; + convert_query_params(internal_query, query); + + auto result = (*coll_ptr)->Query(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &query_results = result.value(); + error_code = + convert_document_results(query_results, results, result_count); + } else { + *results = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count) { + if (!collection || !query || !results || !group_by_values || + !result_count) { + set_last_error( + "Invalid arguments: collection, query, results, group_by_values and " + "result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + zvec::GroupByVectorQuery internal_query; + convert_groupby_query_params(internal_query, query); + + auto result = (*coll_ptr)->GroupByQuery(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &group_results = result.value(); + error_code = convert_grouped_document_results( + group_results, results, group_by_values, result_count); + } else { + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + ZVecDoc ***results, size_t *doc_count) { + if (!collection || !pks || !results || !doc_count) { + set_last_error( + "Invalid arguments: collection, pks, results and doc_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle empty case + if (pk_count == 0) { + *results = nullptr; + *doc_count = 0; + return ZVEC_OK; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_fetch", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert C array to C++ vector + std::vector pk_vector; pk_vector.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + pk_vector.emplace_back(pks[i]); + } else { + set_last_error("Null primary key at index " + std::to_string(i)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + // Call C++ fetch method + auto result = (*coll_ptr)->Fetch(pk_vector); + if (!result.has_value()) { + set_last_error("Failed to fetch documents: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + const auto &doc_map = result.value(); + return convert_fetched_document_results(doc_map, results, doc_count);) + } diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 765a1b4a..0384659b 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -14,11 +14,10 @@ cc_directory(sqlengine) file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) cc_library( - NAME zvec_db STATIC STRICT SRCS_NO_GLOB + NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} - PUBINCS ${PROJECT_ROOT_DIR}/src/include - LIBS + LIBS zvec_ailego zvec_core glog diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h new file mode 100644 index 00000000..b81ee860 --- /dev/null +++ b/src/include/zvec/c_api.h @@ -0,0 +1,2329 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_C_API_H +#define ZVEC_C_API_H + +#include +#include +#include +#include + +// ============================================================================= +// API Export Control +// ============================================================================= + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifdef ZVEC_BUILD_SHARED +#define ZVEC_EXPORT __declspec(dllexport) +#elif defined(ZVEC_USE_SHARED) +#define ZVEC_EXPORT __declspec(dllimport) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL __cdecl +#else +#if __GNUC__ >= 4 +#define ZVEC_EXPORT __attribute__((visibility("default"))) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +// ============================================================================= +// Version Information +// ============================================================================= + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR 0 + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR 3 + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH 0 + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "0.3.0" + +/** + * @brief Get library version information + * + * Return format: "{base_version}[-{git_info}] (built {build_time})" + * Example: "0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + * + * @return const char* Version string, managed internally by the library, caller + * should not free + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_get_version(void); + +/** + * @brief Check API version compatibility + * + * Check if the current library version meets the specified minimum version + * requirements Following semantic versioning specification: MAJOR.MINOR.PATCH + * + * @param major Required major version number + * @param minor Required minor version number + * @param patch Required patch version number + * @return bool Returns true if compatible, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_check_version(int major, int minor, int patch); + +/** + * @brief Get major version number + * + * @return int Major version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_major(void); + +/** + * @brief Get minor version number + * + * @return int Minor version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_minor(void); + + +/** + * @brief Get patch version number + * + * @return int Patch version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_patch(void); + + +// ============================================================================= +// Error Code Definitions +// ============================================================================= + +/** + * @brief ZVec C API error code enumeration + */ +typedef enum { + ZVEC_OK = 0, /**< Success */ + ZVEC_ERROR_NOT_FOUND = 1, /**< Resource not found */ + ZVEC_ERROR_ALREADY_EXISTS = 2, /**< Resource already exists */ + ZVEC_ERROR_INVALID_ARGUMENT = 3, /**< Invalid argument */ + ZVEC_ERROR_PERMISSION_DENIED = 4, /**< Permission denied */ + ZVEC_ERROR_FAILED_PRECONDITION = 5, /**< Failed precondition */ + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, /**< Resource exhausted */ + ZVEC_ERROR_UNAVAILABLE = 7, /**< Unavailable */ + ZVEC_ERROR_INTERNAL_ERROR = 8, /**< Internal error */ + ZVEC_ERROR_NOT_SUPPORTED = 9, /**< Unsupported operation */ + ZVEC_ERROR_UNKNOWN = 10 /**< Unknown error */ +} ZVecErrorCode; + +/** + * @brief Error details structure + */ +typedef struct { + ZVecErrorCode code; /**< Error code */ + const char *message; /**< Error message */ + const char *file; /**< File where error occurred */ + int line; /**< Line number where error occurred */ + const char *function; /**< Function where error occurred */ +} ZVecErrorDetails; + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Get last error message + * @param[out] error_msg Returned error message string (needs to be freed by + * calling free) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Basic Data Structures +// ============================================================================= + +/** + * @brief String view structure (does not own memory) + */ +typedef struct { + const char *data; /**< String data pointer */ + size_t length; /**< String length */ +} ZVecStringView; + +/** + * @brief Mutable string structure (owns memory) + */ +typedef struct { + char *data; /**< String data pointer */ + size_t length; /**< String length */ + size_t capacity; /**< Allocated capacity */ +} ZVecString; + +/** + * @brief String array structure + */ +typedef struct { + ZVecString *strings; /**< String array */ + size_t count; /**< String count */ +} ZVecStringArray; + +/** + * @brief Float array structure + */ +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +/** + * @brief Integer array structure + */ +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +/** + * @brief Byte array structure + */ +typedef struct { + const uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Array length */ +} ZVecByteArray; + +/** + * @brief Mutable byte array structure + */ +typedef struct { + uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Current length */ + size_t capacity; /**< Allocated capacity */ +} ZVecMutableByteArray; + +// ============================================================================= +// String management functions +// ============================================================================= + +/** + * @brief Create string from C string + * @param str C string + * @return ZVecString* Pointer to the newly created string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); + +/** + * @brief Create string from string view + * + * Creates a new ZVecString by copying data from a ZVecStringView. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param view Pointer to source string view (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL +zvec_string_create_from_view(const ZVecStringView *view); + +/** + * @brief Create binary-safe string from raw data + * + * Creates a new ZVecString from raw binary data that may contain null bytes. + * Unlike zvec_string_create(), this function takes explicit length parameter + * and doesn't rely on null-termination. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param data Raw binary data pointer (must not be NULL) + * @param length Length of data in bytes + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + * @note This function is suitable for binary data containing null bytes + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, + size_t length); + +/** + * @brief Copy string + * + * Creates a new ZVecString by copying an existing string. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param str Pointer to source string (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_copy(const ZVecString *str); + +/** + * @brief Get C string from ZVecString + * @param str ZVecString pointer + * @return const char* C string + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_string_c_str(const ZVecString *str); + +/** + * @brief Get string length + * @param str ZVecString pointer + * @return size_t String length + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); + +/** + * @brief Compare two strings + * @param str1 First string + * @param str2 Second string + * @return int Comparison result (-1, 0, or 1) + */ +ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, + const ZVecString *str2); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + + +// ============================================================================= +// Array Memory management functions +// ============================================================================= + +/** + * @brief Create a new string array + * @param count Initial number of strings to allocate space for + * @return Pointer to the newly created string array, or NULL on failure + */ +ZVEC_EXPORT ZVecStringArray *ZVEC_CALL zvec_string_array_create(size_t count); + +/** + * @brief Add a string to the string array at specified index + * @param array String array pointer + * @param idx Index position where the string should be added + * @param str Null-terminated C string to add + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_add(ZVecStringArray *array, + size_t idx, const char *str); + +/** + * @brief Destroy string array and free all associated memory + * @param array String array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_destroy(ZVecStringArray *array); + +/** + * @brief Create a new mutable byte array + * @param capacity Initial capacity in bytes + * @return Pointer to the newly created byte array, or NULL on failure + */ +ZVEC_EXPORT ZVecMutableByteArray *ZVEC_CALL +zvec_byte_array_create(size_t capacity); + + +/** + * @brief Destroy byte array and free all associated memory + * @param array Byte array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_byte_array_destroy(ZVecMutableByteArray *array); + +/** + * @brief Create a new float array + * @param count Number of floats to allocate space for + * @return Pointer to the newly created float array, or NULL on failure + */ +ZVEC_EXPORT ZVecFloatArray *ZVEC_CALL zvec_float_array_create(size_t count); + +/** + * @brief Destroy float array and free all associated memory + * @param array Float array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_float_array_destroy(ZVecFloatArray *array); + +/** + * @brief Create a new int64 array + * @param count Number of int64 values to allocate space for + * @return Pointer to the newly created int64 array, or NULL on failure + */ +ZVEC_EXPORT ZVecInt64Array *ZVEC_CALL zvec_int64_array_create(size_t count); + +/** + * @brief Destroy int64 array and free all associated memory + * @param array Int64 array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_int64_array_destroy(ZVecInt64Array *array); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + + +// ============================================================================= +// Configuration and Options Structures +// ============================================================================= + +/** + * @brief Log level enumeration + */ +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; + +/** + * @brief Log type enumeration + */ +typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; + +/** + * @brief Console log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ +} ZVecConsoleLogConfig; + +/** + * @brief File log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ + ZVecString dir; /**< Log directory */ + ZVecString basename; /**< Log file base name */ + uint32_t file_size; /**< Log file size (MB) */ + uint32_t overdue_days; /**< Log expiration days */ +} ZVecFileLogConfig; + +/** + * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) + */ +typedef struct { + uint64_t memory_limit_bytes; /**< Memory limit in bytes */ + + // log + ZVecLogType log_type; + void *log_config; /**< Log configuration (ZVecConsoleLogConfig or + ZVecFileLogConfig) */ + + // query + uint32_t query_thread_count; /**< Query thread count */ + float invert_to_forward_scan_ratio; /**< Inverted to forward scan ratio */ + float brute_force_by_keys_ratio; /**< Brute force by keys ratio */ + + // optimize + uint32_t optimize_thread_count; /**< Optimize thread count */ +} ZVecConfigData; + +/** + * @brief Create console log configuration + * @param level Log level + * @return ZVecConsoleLogConfig* Pointer to the newly created console log + * configuration + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_console_log_create(ZVecLogLevel level); + +/** + * @brief Create file log configuration + * @param level Log level + * @param dir Log directory + * @param basename Log file base name + * @param file_size Log file size (MB) + * @param overdue_days Log expiration days + * @return ZVecFileLogConfig* Pointer to the newly created file log + * configuration + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( + ZVecLogLevel level, const char *dir, const char *basename, + uint32_t file_size, uint32_t overdue_days); + +/** + * @brief Destroy console log configuration + * @param config Console log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); + +/** + * @brief Destroy file log configuration + * @param config File log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_file_log_destroy(ZVecFileLogConfig *config); + + +/** + * @brief Create configuration data + * @return ZVecConfigData* Pointer to the newly created configuration data + */ +ZVEC_EXPORT ZVecConfigData *ZVEC_CALL zvec_config_data_create(void); + +/** + * @brief Destroy configuration data + * @param config Configuration data pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); + +/** + * @brief Set memory limit in configuration data + * @param config Configuration data pointer + * @param memory_limit_bytes Memory limit in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( + ZVecConfigData *config, uint64_t memory_limit_bytes); + +/** + * @brief Set log configuration in configuration data + * @param config Configuration data pointer + * @param log_config Log configuration pointer (ownership is transferred to + * config, do not free separately) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( + ZVecConfigData *config, ZVecLogType log_type, void *log_config); + +/** + * @brief Set query thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Query thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Set optimize thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Optimize thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +// ============================================================================= +// Initialization and Cleanup Interface +// ============================================================================= + +/** + * @brief Initialize ZVec library + * @param config Configuration data (optional, NULL means using default + * configuration) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_initialize(const ZVecConfigData *config); + +/** + * @brief Clean up ZVec library resources + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); + +/** + * @brief Check if library is initialized + * @param[out] initialized Whether initialized + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_is_initialized(bool *initialized); + +// ============================================================================= +// Data Type Enumerations +// ============================================================================= + +/** + * @brief Data type enumeration + */ +typedef enum { + ZVEC_DATA_TYPE_UNDEFINED = 0, + + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; + +/** + * @brief Index type enumeration + */ +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, + ZVEC_INDEX_TYPE_IVF = 3, + ZVEC_INDEX_TYPE_FLAT = 4, + ZVEC_INDEX_TYPE_INVERT = 10 +} ZVecIndexType; + +/** + * @brief Distance metric type enumeration + */ +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, + ZVEC_METRIC_TYPE_IP = 2, + ZVEC_METRIC_TYPE_COSINE = 3, + ZVEC_METRIC_TYPE_MIPSL2 = 4 +} ZVecMetricType; + +/** + * @brief Quantization type enumeration + */ +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, + ZVEC_QUANTIZE_TYPE_INT8 = 2, + ZVEC_QUANTIZE_TYPE_INT4 = 3 +} ZVecQuantizeType; + +// ============================================================================= +// Forward Declarations +// ============================================================================= + +typedef struct ZVecCollection ZVecCollection; + +// ============================================================================= +// Index Parameters Structures +// ============================================================================= + +/** + * @brief Base index parameters structure + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ +} ZVecBaseIndexParams; + +/** + * @brief Scalar index parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + bool enable_range_optimization; /**< Whether to enable range optimization */ + bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ +} ZVecInvertIndexParams; + +/** + * @brief Vector index base parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + ZVecMetricType metric_type; /**< Distance metric type */ + ZVecQuantizeType quantize_type; /**< Quantization type */ +} ZVecVectorIndexParams; + +/** + * @brief HNSW index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int m; /**< Graph connectivity parameter */ + int ef_construction; /**< Exploration factor during construction */ + int ef_search; /**< Exploration factor during search */ +} ZVecHnswIndexParams; + +/** + * @brief Flat index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + // Flat index has no additional parameters +} ZVecFlatIndexParams; + +/** + * @brief IVF index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int n_list; /**< Number of cluster centers */ + int n_iters; /**< Number of iterations */ + bool use_soar; /**< Whether to use SOAR algorithm */ + int n_probe; /**< Number of clusters to probe during search */ +} ZVecIVFIndexParams; + +/** + * @brief Generic index parameters union + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + union { + ZVecInvertIndexParams invert_params; /**< Scalar index parameters */ + ZVecHnswIndexParams hnsw_params; /**< HNSW index parameters */ + ZVecFlatIndexParams flat_params; /**< Flat index parameters */ + ZVecIVFIndexParams ivf_params; /**< IVF index parameters */ + } params; +} ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures +// ============================================================================= + +/** + * @brief Field schema structure + */ +typedef struct { + ZVecString *name; /**< Field name */ + ZVecDataType data_type; /**< Data type */ + bool nullable; /**< Whether nullable */ + uint32_t dimension; /**< Vector dimension (only used for vector fields) */ + ZVecIndexParams *index_params; /**< Index parameters, NULL means no index */ +} ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Creation and Destruction Interface +// ============================================================================= + +/** + * @brief Initialize base index parameters + * @param params Base index parameters structure pointer + * @param index_type Index type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_base_init( + ZVecBaseIndexParams *params, ZVecIndexType index_type); + +/** + * @brief Initialize scalar index parameters + * @param params Scalar index parameters structure pointer + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable wildcard expansion + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_invert_init( + ZVecInvertIndexParams *params, bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Initialize vector index parameters + * @param params Vector index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_vector_init( + ZVecVectorIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize HNSW index parameters + * @param params HNSW index parameters structure pointer + * @param metric_type Metric type + * @param m Connectivity parameter + * @param ef_construction Construction exploration factor + * @param ef_search Search exploration factor + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init( + ZVecHnswIndexParams *params, ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize Flat index parameters + * @param params Flat index parameters structure pointer + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init( + ZVecFlatIndexParams *params, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Initialize IVF index parameters + * @param params IVF index parameters structure pointer + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Search probe count + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init( + ZVecIVFIndexParams *params, ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize generic index parameters + * @param params Generic index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type (only valid for vector indexes) + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default( + ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type); + +/** + * @brief Destroy index parameters (free internal dynamically allocated memory) + * @param params Index parameters structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); + + +/** + * @brief Create inverted index parameters + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + * @return ZVecInvertIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecInvertIndexParams *ZVEC_CALL +zvec_index_params_invert_create(bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Create vector index base parameters + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecVectorIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecVectorIndexParams *ZVEC_CALL zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Create HNSW index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @param m Graph degree parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + + * @return ZVecHnswIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecHnswIndexParams *ZVEC_CALL zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search); + +/** + * @brief Create Flat index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecFlatIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecFlatIndexParams *ZVEC_CALL zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Create IVF index parameters + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Number of clusters to probe during search + * @param quantize_type Quantization type + * @return ZVecIVFIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecIVFIndexParams *ZVEC_CALL zvec_index_params_ivf_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int n_list, + int n_iters, bool use_soar, int n_probe); + + +/** + * @brief Destroy inverted index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_invert_destroy(ZVecInvertIndexParams *params); + +/** + * @brief Destroy vector index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_vector_destroy(ZVecVectorIndexParams *params); + +/** + * @brief Destroy HNSW index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params); + +/** + * @brief Destroy Flat index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_flat_destroy(ZVecFlatIndexParams *params); + +/** + * @brief Destroy IVF index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params); + + +// ============================================================================= +// Query Parameters Structures +// ============================================================================= + +/** + * @brief Base query parameters structure (corresponds to zvec::QueryParams) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + float radius; /**< Search radius */ + bool is_linear; /**< Whether linear search */ + bool is_using_refiner; /**< Whether using refiner */ +} ZVecQueryParams; + +/** + * @brief HNSW query parameters structure (corresponds to zvec::HnswQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int ef; /**< Exploration factor during search */ +} ZVecHnswQueryParams; + +/** + * @brief IVF query parameters structure (corresponds to zvec::IVFQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int nprobe; /**< Number of clusters to probe during search */ + float scale_factor; /**< Scale factor */ +} ZVecIVFQueryParams; + +/** + * @brief Flat query parameters structure (corresponds to zvec::FlatQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + float scale_factor; /**< Scale factor */ +} ZVecFlatQueryParams; + +/** + * @brief Query parameters union (supports query parameters for different index + * types) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type, used to distinguish the parameter + type stored in the union */ + union { + ZVecQueryParams base_params; /**< Base query parameters */ + ZVecHnswQueryParams hnsw_params; /**< HNSW query parameters */ + ZVecIVFQueryParams ivf_params; /**< IVF query parameters */ + ZVecFlatQueryParams flat_params; /**< Flat query parameters */ + } params; +} ZVecQueryParamsUnion; + +// ============================================================================= +// Query Structures (Updated Version, Including QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (aligned with zvec::VectorQuery, includes + * QueryParams) + */ +typedef struct { + int topk; /**< Number of results to return */ + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + bool include_doc_id; /**< Whether to include document ID */ + ZVecStringArray output_fields; /**< Output field list (NULL means all) */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (aligned with zvec::GroupByVectorQuery, + * includes QueryParams) + */ +typedef struct { + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + ZVecStringArray output_fields; /**< Output field list */ + ZVecString group_by_field_name; /**< Group by field name */ + uint32_t group_count; /**< Number of groups */ + uint32_t group_topk; /**< Number of results to return per group */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Create HNSW query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_HNSW) + * @param ef Exploration factor during search + * @param radius Search radius + * @param is_linear Whether linear search + * @param is_using_refiner Whether using refiner + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL +zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, + bool is_linear, bool is_using_refiner); + +/** + * @brief Create IVF query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_IVF) + * @param nprobe Number of clusters to probe during search + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL +zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, + bool is_using_refiner, float scale_factor); + +/** + * @brief Create Flat query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_FLAT) + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL zvec_query_params_flat_create( + ZVecIndexType index_type, bool is_using_refiner, float scale_factor); + +/** + * @brief Create query parameters union + * @param index_type Index type + * @return ZVecQueryParamsUnion* Pointer to the newly created query parameters + * union + */ +ZVEC_EXPORT ZVecQueryParamsUnion *ZVEC_CALL +zvec_query_params_union_create(ZVecIndexType index_type); + + +/** + * @brief Destroy base query parameters + * @param params query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Destroy query parameters union + * @param params Query parameters union pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_union_destroy(ZVecQueryParamsUnion *params); + +/** + * @brief Set query parameters index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Set search radius for query parameters + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Set scale factor for query parameters + * @param params Query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Set whether to use refiner for query parameters + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Set exploration factor for HNSW query parameters + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Set number of probe clusters for IVF query parameters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Set scale factor for IVF/Flat query parameters + * @param params IVF or Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Collection options structure + */ +typedef struct { + bool enable_mmap; /**< Whether to enable memory mapping */ + size_t max_buffer_size; /**< Maximum buffer size */ + bool read_only; /**< Whether read-only mode */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionOptions; + + +/** + * @brief Collection statistics structure + */ +typedef struct { + uint64_t doc_count; /**< Total document count */ + ZVecString **index_names; /**< Index name array */ + float *index_completeness; /**< Index completeness array */ + size_t index_count; /**< Index name count */ +} ZVecCollectionStats; + + +/** + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); + +/** + * @brief Destroy field schema + * @param schema Field schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); + +/** + * @brief Set index parameters for field + * @param schema Field schema pointer + * @param index_params Index parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params); + + +/** + * @brief Set inverted index parameters for field schema + * @param field_schema Field schema pointer + * @param invert_params Inverted index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params); + +/** + * @brief Set HNSW index parameters for field schema + * @param field_schema Field schema pointer + * @param hnsw_params HNSW index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( + ZVecFieldSchema *field_schema, const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Set Flat index parameters for field schema + * @param field_schema Field schema pointer + * @param flat_params Flat index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( + ZVecFieldSchema *field_schema, const ZVecFlatIndexParams *flat_params); + +/** + * @brief Set IVF index parameters for field schema + * @param field_schema Field schema pointer + * @param ivf_params IVF index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( + ZVecFieldSchema *field_schema, const ZVecIVFIndexParams *ivf_params); + + +// ============================================================================= +// Collection Schema Structures +// ============================================================================= + +/** + * @brief Collection schema structure + */ +typedef struct { + ZVecString *name; /**< Collection name */ + ZVecFieldSchema **fields; /**< Field array */ + size_t field_count; /**< Field count */ + size_t field_capacity; /**< Field array capacity */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionSchema; + +/** + * @brief Create collection schema + * @param name Collection name + * @return ZVecCollectionSchema* Pointer to the newly created collection schema + */ +ZVEC_EXPORT ZVecCollectionSchema *ZVEC_CALL +zvec_collection_schema_create(const char *name); + +/** + * @brief Destroy collection schema + * @param schema Collection schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_schema_destroy(ZVecCollectionSchema *schema); + +/** + * @brief Add field to collection schema + * @param schema Collection schema pointer + * @param field Field schema pointer (function takes ownership) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( + ZVecCollectionSchema *schema, ZVecFieldSchema *field); + +/** + * @brief Add multiple fields to collection schema at once + * + * @param schema Collection schema pointer + * @param fields Array of fields to add + * @param field_count Number of fields to add + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, + size_t field_count); + +/** + * @brief Remove field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_field( + ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Remove multiple fields from collection schema at once + * + * @param schema Collection schema pointer + * @param field_names Array of field names to remove + * @param field_count Number of fields to remove + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count); + +/** + * @brief Get field count + * + * @param schema Collection schema pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_schema_get_field_count(const ZVecCollectionSchema *schema); + +/** + * @brief Find field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, returns NULL if not found + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Validate collection schema + * @param schema Collection schema pointer + * @param[out] error_msg Error message (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg); + + +/** + * @brief Get field by index + * @param schema Collection schema pointer + * @param index Field index + * @return ZVecFieldSchema* Field schema pointer + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index); + +/** + * @brief Set maximum document count per segment + * @param schema Collection schema pointer + * @param max_doc_count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count); + +/** + * @brief Get maximum document count per segment of collection schema + * + * @param schema Collection schema pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema); + + +// ============================================================================= +// Collection Management Functions +// ============================================================================= + +/** + * @brief Create and open collection + * @param path Collection path + * @param schema Collection schema pointer + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection); + + +/** + * @brief Open existing collection + * @param path Collection path + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_open(const char *path, const ZVecCollectionOptions *options, + ZVecCollection **collection); + + +/** + * @brief Close collection + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_close(ZVecCollection *collection); + + +/** + * @brief Destroy collection + * + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_destroy(ZVecCollection *collection); + +/** + * @brief Flush collection data to disk + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_flush(ZVecCollection *collection); + +/** + * @brief Get collection schema + * @param collection Collection handle + * @param[out] schema + * Returned collection schema pointer (needs to be freed by calling + * zvec_collection_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( + const ZVecCollection *collection, ZVecCollectionSchema **schema); + + +/** + * @brief Initialize default collection options + * @param options Collection options structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_init_default(ZVecCollectionOptions *options); + +/** + * @brief Get collection options + * @param collection Collection handle + * @param[out] options + * Returned collection options pointer (needs to be freed by calling + * zvec_collection_options_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_options( + const ZVecCollection *collection, ZVecCollectionOptions **options); + +/** + * @brief Get collection statistics + * @param collection Collection handle + * @param[out] stats + * Returned statistics pointer (needs to be freed by calling + * zvec_collection_stats_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( + const ZVecCollection *collection, ZVecCollectionStats **stats); + +/** + * @brief Destroy collection statistics + * @param stats Statistics pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_stats_destroy(ZVecCollectionStats *stats); + +/** + * @brief Free field schema memory + * + * @param field_schema Field schema pointer to be freed + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_free_field_schema(ZVecFieldSchema *field_schema); + + +// ============================================================================= +// Index Management Interface +// ============================================================================= + +/** + * @brief Create index + * + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_create_index(ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *index_params); + +/** + * @brief Create index for collection field (using specific type parameters) + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters (select appropriate structure based on + * index type) + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( + ZVecCollection *collection, const char *field_name, + const void + *index_params); // Determine specific type based on index_type field + +/** + * @brief Create HNSW index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param hnsw_params HNSW index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Create Flat index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param flat_params Flat index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecFlatIndexParams *flat_params); + +/** + * @brief Create IVF index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param ivf_params IVF index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIVFIndexParams *ivf_params); + +/** + * @brief Create scalar index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param invert_params Scalar index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Drop index + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); + +/** + * @brief Optimize collection (rebuild indexes, merge segments, etc.) + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_optimize(ZVecCollection *collection); + +// ============================================================================= +// Column Management Interface (DDL) +// ============================================================================= + +/** + * @brief Add column + * @param collection Collection handle + * @param field_schema Field schema pointer + * @param default_expression Default value expression (can be NULL) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_column( + ZVecCollection *collection, const ZVecFieldSchema *field_schema, + const char *default_expression); + +/** + * @brief Drop column + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_column(ZVecCollection *collection, const char *field_name); + +/** + * @brief Alter column + * @param collection Collection handle + * @param old_name Original field name + * @param new_name New field name (can be NULL to indicate no renaming) + * @param new_schema New field schema (can be NULL to indicate no schema + * modification) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_column( + ZVecCollection *collection, const char *old_name, const char *new_name, + const ZVecFieldSchema *new_schema); + + +/** + * @brief Document structure (opaque pointer mode) + * Internal implementation details are not visible to the outside, and + * operations are performed through API functions + */ +typedef struct ZVecDoc ZVecDoc; + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Insert documents into collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully inserted documents + * @param[out] error_count Number of failed insertions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Update documents in collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully updated documents + * @param[out] error_count Number of failed updates + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Insert or update documents in collection (upsert operation) + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successful operations + * @param[out] error_count Number of failed operations + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents from collection + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] success_count Number of successfully deleted documents + * @param[out] error_count Number of failed deletions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents by filter condition + * @param collection Collection handle + * @param filter Filter expression + * @param[out] deleted_count Number of deleted documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_by_filter( + ZVecCollection *collection, const char *filter); + +// ============================================================================= +// Data Query Interface (DQL) +// ============================================================================= + +/** + * @brief Vector similarity search + * @param collection Collection handle + * @param query Query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( + const ZVecCollection *collection, const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count); + +/** + * @brief Grouped vector similarity search + * @param collection Collection handle + * @param query Grouped query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] group_by_values Returned group by field values array (needs to be + * freed by calling zvec_string_array_destroy) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); + +/** + * @brief Fetch documents by primary keys + * @param collection Collection handle + * @param primary_keys Primary key array + * @param count Number of primary keys + * @param[out] documents Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] found_count Number of found documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_fetch( + ZVecCollection *collection, const char *const *primary_keys, size_t count, + ZVecDoc ***documents, size_t *found_count); + +// ============================================================================= +// Document Related Structures +// ============================================================================= + +/** + * @brief Document field value union + */ +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; /**< Binary data value */ +} ZVecFieldValue; + +/** + * @brief Document field structure + */ +typedef struct { + ZVecString name; ///< Field name + ZVecDataType data_type; ///< Data type + ZVecFieldValue value; ///< Field value +} ZVecDocField; + +/** + * @brief Document operator enumeration + */ +typedef enum { + ZVEC_DOC_OP_INSERT = 0, ///< Insert operation + ZVEC_DOC_OP_UPDATE = 1, ///< Update operation + ZVEC_DOC_OP_UPSERT = 2, ///< Insert or update operation + ZVEC_DOC_OP_DELETE = 3 ///< Delete operation +} ZVecDocOperator; + + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Create a new document object + * + * @return ZVecDoc* Pointer to the newly created document object, returns NULL + * on failure + */ +ZVEC_EXPORT ZVecDoc *ZVEC_CALL zvec_doc_create(void); + +/** + * @brief Destroy the document object and release all resources + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_destroy(ZVecDoc *doc); + +/** + * @brief Clear the document object + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_clear(ZVecDoc *doc); + +/** + * @brief Add field to document by value + * + * @param doc Document object pointer + * @param field_name Field name + * @param data_type Data type + * @param value Value pointer + * @param value_size Value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_add_field_by_value( + ZVecDoc *doc, const char *field_name, ZVecDataType data_type, + const void *value, size_t value_size); + +/** + * @brief Add field to document by structure + * + * @param doc Document object pointer + * @param field Field structure pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_add_field_by_struct(ZVecDoc *doc, const ZVecDocField *field); + +/** + * @brief Remove field from document + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_remove_field(ZVecDoc *doc, const char *field_name); + + +/** + * @brief Batch release document array + * + * @param documents Document pointer array + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **documents, size_t count); + +/** + * @brief Set document primary key + * + * @param doc Pointer to the document structure + * @param pk Primary key string + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_pk(ZVecDoc *doc, const char *pk); + +/** + * @brief Set document ID + * + * @param doc Document structure pointer + * @param doc_id Document ID + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id); + +/** + * @brief Set document score + * + * @param doc Document structure pointer + * @param score Score value + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); + +/** + * @brief Set document operator + * + * @param doc Document structure pointer + * @param op Operator + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, + ZVecDocOperator op); + +/** + * @brief Get document ID + * + * @param doc Document structure pointer + * @return uint64_t Document ID + */ +ZVEC_EXPORT uint64_t ZVEC_CALL zvec_doc_get_doc_id(const ZVecDoc *doc); + +/** + * @brief Get document score + * + * @param doc Document structure pointer + * @return float Score value + */ +ZVEC_EXPORT float ZVEC_CALL zvec_doc_get_score(const ZVecDoc *doc); + +/** + * @brief Get document operator + * + * @param doc Document structure pointer + * @return ZVecDocOperator Operator + */ +ZVEC_EXPORT ZVecDocOperator ZVEC_CALL zvec_doc_get_operator(const ZVecDoc *doc); + +/** + * @brief Get document field count + * + * @param doc Document structure pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_get_field_count(const ZVecDoc *doc); + + +/** + * @brief Get document primary key pointer (no copy) + * + * @param doc Document object pointer + * @return const char* Primary key string pointer, returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_pointer(const ZVecDoc *doc); + +/** + * @brief Get document primary key copy (needs manual release) + * + * @param doc Document object pointer + * @return const char* Primary key string copy, needs to call free() to release, + * returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); + +/** + * @brief Get field value (basic type returned directly) + * + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * FLOAT, DOUBLE. The value is copied directly into the provided buffer. + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * or zvec_doc_get_field_value_pointer instead. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type (must be a basic numeric type) + * @param value_buffer Output buffer to receive the value + * @param buffer_size Size of the output buffer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void *value_buffer, size_t buffer_size); + +/** + * @brief Get field value copy (allocate new memory) + * + * Supports all data types including: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING, BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, + * zvec_free_uint8_array() for binary data). + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (needs manual release) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void **value, size_t *value_size); + +/** + * @brief Get field value pointer (data remains in document) + * + * Supports data types where direct pointer access is safe: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING (returns null-terminated C string), BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, + * ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid + * as long as the document exists. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (points to document-internal data) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_pointer( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + const void **value, size_t *value_size); + +/** + * @brief Check if document is empty + * + * @param doc Document object pointer + * @return bool Returns true if document is empty, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_empty(const ZVecDoc *doc); + +/** + * @brief Check if document contains specified field + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field exists, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field has value + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field has value, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field_value(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field is null + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field is null, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_field_null(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Get all field names of document + * + * @param doc Document object pointer + * @param[out] field_names + * Returned field name array (needs to call zvec_free_str_array to release) + * @param[out] count Returned field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_names( + const ZVecDoc *doc, char ***field_names, size_t *count); + +/** + * @brief Release string array memory + * + * @param array String array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str_array(char **array, size_t count); + +/** + * @brief Serialize document + * + * @param doc Document object pointer + * @param[out] data Returned serialized data (needs to call + * zvec_free_uint8_array to release) + * @param[out] size Returned data size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_serialize(const ZVecDoc *doc, + uint8_t **data, + size_t *size); + +/** + * @brief Deserialize document + * + * @param data Serialized data + * @param size Data size + * @param[out] doc Returned document object pointer (needs to call + * zvec_doc_destroy to release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_deserialize(const uint8_t *data, + size_t size, + ZVecDoc **doc); + +/** + * @brief Merge two documents + * + * @param doc Target document object pointer + * @param other Source document object pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other); + +/** + * @brief Get document memory usage + * + * @param doc Document object pointer + * @return size_t Memory usage (bytes) + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_memory_usage(const ZVecDoc *doc); + +/** + * @brief Validate document against Schema + * + * @param doc Document object pointer + * @param schema Schema object pointer + * @param is_update Whether it's an update operation + * @param[out] error_msg Error message (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_validate(const ZVecDoc *doc, const ZVecCollectionSchema *schema, + bool is_update, char **error_msg); + +/** + * @brief Get detailed string representation of document + * + * @param doc Document object pointer + * @param[out] detail_str Returned detailed string (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); + +/** + * @brief Free docs array memory + * @param docs Document array pointer + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); + + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * @brief Convert error code to description string + * @param error_code Error code + * @return const char* Error description string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_error_code_to_string(ZVecErrorCode error_code); + +/** + * @brief Convert data type to string + * @param data_type Data type + * @return const char* Data type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_data_type_to_string(ZVecDataType data_type); + +/** + * @brief Convert index type to string + * @param index_type Index type + * @return const char* Index type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_index_type_to_string(ZVecIndexType index_type); + +/** + * @brief Convert metric type to string + * @param metric_type Metric type + * @return const char* Metric type string + */ +const char *zvec_metric_type_to_string(ZVecMetricType metric_type); + + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * @brief Simplified HNSW index parameters initialization macro + * @param metric Distance metric type + * @param m_ Connectivity parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + * @param quant Quantization type + * + * Usage example: + * ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, + * 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + */ +#define ZVEC_HNSW_PARAMS(metric, m_, ef_construction, ef_search, quant) \ + (ZVecHnswIndexParams) { \ + .base.base.index_type = ZVEC_INDEX_TYPE_HNSW, .base.metric_type = metric, \ + .base.quantize_type = quant, .m = m_, .ef_construction = ef_construction, \ + .ef_search = ef_search \ + } + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ + (ZVecInvertIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ + .enable_range_optimization = range_opt, \ + .enable_extended_wildcard = wildcard \ + } + +/** + * @brief Simplified Flat index parameters initialization macro + * @param metric Distance metric type + * @param quant Quantization type + */ +#define ZVEC_FLAT_PARAMS(metric, quant) \ + (ZVecFlatIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_FLAT, .base.metric_type = metric, \ + .base.quantize_type = quant \ + } + +/** + * @brief Simplified IVF index parameters initialization macro + * @param metric Distance metric type + * @param nlist Number of cluster centers + * @param niters Number of iterations + * @param soar Whether to use SOAR algorithm + * @param nprobe Number of clusters to probe during search + * @param quant Quantization type + */ +#define ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) \ + (ZVecIVFIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_IVF, .base.metric_type = metric, \ + .base.quantize_type = quant, .n_list = nlist, .n_iters = niters, \ + .use_soar = soar, .n_probe = nprobe \ + } + +/** + * @brief Simplified string initialization macro + * @param str String content + * + * Usage example: + * ZVecString name = ZVEC_STRING("my_collection"); + */ +#define ZVEC_STRING(str) \ + (ZVecString) { \ + .data = str, .length = strlen(str) \ + } + +/** + * @brief Simplified string view initialization macro + * @param str String content + * + * Usage example: + * ZVecStringView name = ZVEC_STRING_VIEW("my_collection"); + */ +#define ZVEC_STRING_VIEW(str) \ + (ZVecStringView) { \ + .data = str, .length = strlen(str) \ + } + +// Has been replaced by the new ZVEC_STRING_VIEW macro + +/** + * @brief Simplified float array initialization macro + * @param data_ptr Float array pointer + * @param len Array length + * + * Usage example: + * float vectors[] = {0.1f, 0.2f, 0.3f}; + * ZVecFloatArray vec_array = ZVEC_FLOAT_ARRAY(vectors, 3); + */ +#define ZVEC_FLOAT_ARRAY(data_ptr, len) \ + (ZVecFloatArray) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified integer array initialization macro + * @param data_ptr Integer array pointer + * @param len Array length + */ +#define ZVEC_INT64_ARRAY(data_ptr, len) \ + (ZVecInt64Array) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified collection options initialization macro (using default + * values) + * + * Usage example: + * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); + */ +#define ZVEC_DEFAULT_OPTIONS() \ + (ZVecCollectionOptions) { \ + .enable_mmap = true, .max_buffer_size = 1048576, .read_only = false, \ + .max_doc_count_per_segment = 1000000 \ + } + +/** + * @brief Simplified vector query initialization macro + * @param field_name_str Query field name + * @param query_vec Query vector array + * @param top_k Number of results to return + * @param filter_str Filter condition string + * + * Usage example: + * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, + * ""); + */ +#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ + (ZVecVectorQuery) { \ + .field_name = ZVEC_STRING(field_name_str), .query_vector = query_vec, \ + .topk = top_k, .filter = ZVEC_STRING(filter_str), .include_vector = 1, \ + .include_doc_id = 1 \ + } + +/** + * @brief Simplified document field initialization macro + * @param name_str Field name + * @param type Data type + * @param value_union Field value union + * + * Usage example: + * ZVecDocField field = ZVEC_DOC_FIELD("id", ZVEC_DATA_TYPE_STRING, + * {.string_value = ZVEC_STRING("doc1")}); + */ +#define ZVEC_DOC_FIELD(name_str, type, value_union) \ + (ZVecDocField) { \ + .name = ZVEC_STRING(name_str), .data_type = type, .value = value_union \ + } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ZVEC_C_API_H diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h index fa056053..e6d13c86 100644 --- a/src/include/zvec/db/doc.h +++ b/src/include/zvec/db/doc.h @@ -68,6 +68,10 @@ class Doc { return pk_; } + const std::string &pk_ref() const { + return pk_; + } + void set_score(float score) { score_ = score; } @@ -103,6 +107,10 @@ class Doc { return op_; } + Operator get_operator() const { + return op_; + } + // Set field value template bool set(const std::string &field_name, T value) { @@ -232,6 +240,26 @@ class Doc { return std::nullopt; } + // Get field value as const reference, throws exception if field doesn't exist + // or type mismatches + template + const T &get_ref(const std::string &field_name) const { + auto it = fields_.find(field_name); + if (it == fields_.end()) { + throw std::runtime_error("Field '" + field_name + "' not found"); + } + + if (std::holds_alternative(it->second)) { + throw std::runtime_error("Field '" + field_name + "' is null"); + } + + try { + return std::get(it->second); + } catch (const std::bad_variant_access &) { + throw std::runtime_error("Field '" + field_name + "' type mismatch"); + } + } + void remove(const std::string &field_name) { fields_.erase(field_name); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03250f1c..e1ffc326 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,3 +4,4 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) +cc_directories(c_api) \ No newline at end of file diff --git a/tests/c_api/CMakeLists.txt b/tests/c_api/CMakeLists.txt new file mode 100644 index 00000000..ad2f62e1 --- /dev/null +++ b/tests/c_api/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake) + +file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) + +foreach(CC_SRCS ${ALL_TEST_SRCS}) + get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE) + cc_gtest( + NAME ${CC_TARGET} + STRICT + LIBS zvec_c_api + SRCS ${CC_SRCS} utils.c + INCS . .. ../../src + ) +endforeach() diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c new file mode 100644 index 00000000..18465110 --- /dev/null +++ b/tests/c_api/c_api_test.c @@ -0,0 +1,4399 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#ifdef _POSIX_C_SOURCE +#include +#endif +#include +#include "utils.h" + +// ============================================================================= +// Test helper macro definitions +// ============================================================================= + +static int test_count = 0; +static int passed_count = 0; +static int current_test_passed = 1; // Track if current test function passes + +#define TEST_START() \ + do { \ + printf("Running test: %s\n", __func__); \ + test_count++; \ + current_test_passed = 1; \ + } while (0) + +#define TEST_ASSERT(condition) \ + do { \ + if (condition) { \ + printf(" ✓ PASS\n"); \ + } else { \ + printf(" ✗ FAIL at line %d\n", __LINE__); \ + current_test_passed = 0; \ + } \ + } while (0) + +#define TEST_END() \ + do { \ + if (current_test_passed) { \ + passed_count++; \ + } \ + } while (0) + +// ============================================================================= +// Helper functions tests +// ============================================================================= + +void test_version_functions(void) { + TEST_START(); + + // Test version retrieval functions + const char *version = zvec_get_version(); + TEST_ASSERT(version != NULL); + + // Test version component retrieval + int major = zvec_get_version_major(); + int minor = zvec_get_version_minor(); + int patch = zvec_get_version_patch(); + + TEST_ASSERT(major >= 0); + TEST_ASSERT(minor >= 0); + TEST_ASSERT(patch >= 0); + + TEST_ASSERT(zvec_check_version(major, minor, patch)); + + // Test version checking functions + bool compatible = zvec_check_version(0, 3, 0); + TEST_ASSERT(compatible == true); + + bool not_compatible = zvec_check_version(99, 99, 99); + TEST_ASSERT(not_compatible == false); + + TEST_END(); +} + +void test_error_handling_functions(void) { + TEST_START(); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_get_last_error(&error_msg); + TEST_ASSERT(err == ZVEC_OK); + + if (error_msg) { + free(error_msg); + } + + // Test error clearing + zvec_clear_error(); + + // Test error details retrieval + ZVecErrorDetails error_details = {0}; + err = zvec_get_last_error_details(&error_details); + TEST_ASSERT(err == ZVEC_OK); + + TEST_END(); +} + +void test_zvec_config() { + TEST_START(); + + // Test 1: Console log config creation and destruction + ZVecConsoleLogConfig *console_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(console_config != NULL); + if (console_config) { + TEST_ASSERT(console_config->level == ZVEC_LOG_LEVEL_INFO); + zvec_config_console_log_destroy(console_config); + } + + // Test 2: File log config creation and destruction + ZVecFileLogConfig *file_config = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); + TEST_ASSERT(file_config != NULL); + if (file_config) { + TEST_ASSERT(file_config->level == ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(file_config->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(file_config->basename.data, "test_log") == 0); + TEST_ASSERT(file_config->file_size == 100); + TEST_ASSERT(file_config->overdue_days == 7); + zvec_config_file_log_destroy(file_config); + } + + // Test 3: File log config edge cases + ZVecFileLogConfig *empty_file_config = + zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); + TEST_ASSERT(empty_file_config != NULL); + if (empty_file_config) { + TEST_ASSERT(empty_file_config->level == ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(empty_file_config->dir.data, "") == 0); + TEST_ASSERT(strcmp(empty_file_config->basename.data, "") == 0); + TEST_ASSERT(empty_file_config->file_size == 0); + TEST_ASSERT(empty_file_config->overdue_days == 0); + zvec_config_file_log_destroy(empty_file_config); + } + + // Test 4: Log config creation with console type + ZVecConsoleLogConfig *temp_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); + TEST_ASSERT(temp_console != NULL); + if (temp_console) { + zvec_config_console_log_destroy(temp_console); + } + + // Test 5: Log config creation with file type + ZVecFileLogConfig *temp_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(temp_file != NULL); + TEST_ASSERT(temp_file->level == ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT(strcmp(temp_file->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(temp_file->basename.data, "app") == 0); + TEST_ASSERT(temp_file->file_size == 50); + TEST_ASSERT(temp_file->overdue_days == 30); + + zvec_config_file_log_destroy(temp_file); + + // Test 6: Config data creation and basic operations + ZVecConfigData *config_data = zvec_config_data_create(); + TEST_ASSERT(config_data != NULL); + if (config_data) { + // Test initial values + TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + + // Test memory limit setting + ZVecErrorCode err = + zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->memory_limit_bytes == 1024 * 1024 * 1024); + + // Test thread count settings + err = zvec_config_data_set_query_thread_count(config_data, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->query_thread_count == 8); + + err = zvec_config_data_set_optimize_thread_count(config_data, 4); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->optimize_thread_count == 4); + + // Test log config replacement + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(config_data->log_config != NULL); + + ZVecFileLogConfig *new_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(new_file != NULL); + zvec_config_data_set_log_config(config_data, ZVEC_LOG_TYPE_FILE, new_file); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(config_data->log_config != NULL); + + zvec_config_data_destroy(config_data); + } + + // Test 7: Edge cases and error conditions + // Test NULL pointer handling + ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_log_config(NULL, ZVEC_LOG_TYPE_CONSOLE, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_query_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_optimize_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test boundary values + ZVecConfigData *boundary_config = zvec_config_data_create(); + if (boundary_config) { + // Test zero values + err = zvec_config_data_set_memory_limit(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == 0); + + // Test maximum values + err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == UINT64_MAX); + + // Test zero thread counts + err = zvec_config_data_set_query_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->query_thread_count == 0); + + err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->optimize_thread_count == 0); + + zvec_config_data_destroy(boundary_config); + } + + // Test 8: Memory leak prevention - double destroy safety + ZVecConfigData *double_destroy_test = zvec_config_data_create(); + if (double_destroy_test) { + zvec_config_data_destroy(double_destroy_test); + } + + TEST_END(); +} + +void test_zvec_initialize() { + TEST_START(); + + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + if (config) { + TEST_ASSERT(config->log_config != NULL); + TEST_ASSERT(config->log_type == ZVEC_LOG_TYPE_CONSOLE); + } + ZVecErrorCode err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + bool is_initialized = false; + zvec_is_initialized(&is_initialized); + TEST_ASSERT(is_initialized); + + TEST_END(); +} + +// ============================================================================= +// Schema-related tests +// ============================================================================= + +void test_schema_basic_operations(void) { + TEST_START(); + + // Test 1: Basic Schema creation and destruction + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + TEST_ASSERT(schema != NULL); + TEST_ASSERT(schema->name != NULL); + TEST_ASSERT(strcmp(schema->name->data, "demo") == 0); + TEST_ASSERT(schema->field_count == 0); + TEST_ASSERT(schema->fields == NULL); + TEST_ASSERT(schema->max_doc_count_per_segment > 0); + + // Test 2: Schema field count operations + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 0); + + // Test 3: Adding fields to schema + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecErrorCode err = zvec_collection_schema_add_field(schema, id_field); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_add == 1); + + // Test 4: Finding fields in schema + const ZVecFieldSchema *found_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(found_field != NULL); + TEST_ASSERT(strcmp(found_field->name->data, "id") == 0); + TEST_ASSERT(found_field->data_type == ZVEC_DATA_TYPE_INT64); + + // Test 5: Getting field by index + ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(indexed_field != NULL); + TEST_ASSERT(strcmp(indexed_field->name->data, "id") == 0); + + // Test 6: Adding multiple fields + ZVecFieldSchema fields_to_add[2]; + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + + fields_to_add[0] = *name_field; + fields_to_add[1] = *age_field; + + err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_multi_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_multi_add == 3); + + // Test 7: Finding newly added fields + const ZVecFieldSchema *name_found = + zvec_collection_schema_find_field(schema, "name"); + TEST_ASSERT(name_found != NULL); + TEST_ASSERT(strcmp(name_found->name->data, "name") == 0); + + const ZVecFieldSchema *age_found = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(age_found != NULL); + TEST_ASSERT(strcmp(age_found->name->data, "age") == 0); + + // Test 8: Setting and getting max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 10000); + + // Test 9: Schema validation + ZVecString *validation_error = NULL; + err = zvec_collection_schema_validate(schema, &validation_error); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(validation_error == NULL); + + // Test 10: Removing single field + err = zvec_collection_schema_remove_field(schema, "age"); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_remove = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_remove == 2); + + const ZVecFieldSchema *removed_field = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(removed_field == NULL); + + // Test 11: Removing multiple fields + const char *fields_to_remove[] = {"name", "id"}; + err = zvec_collection_schema_remove_fields(schema, fields_to_remove, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t final_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(final_count == 0); + + // Test 12: Schema cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_edge_cases(void) { + TEST_START(); + + // Test 1: NULL parameter handling for schema creation + ZVecCollectionSchema *null_schema = zvec_collection_schema_create(NULL); + TEST_ASSERT(null_schema == NULL); + + // Test 2: Empty string schema name + ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); + TEST_ASSERT(empty_schema != NULL); + TEST_ASSERT(empty_schema->name != NULL); + TEST_ASSERT(strcmp(empty_schema->name->data, "") == 0); + zvec_collection_schema_destroy(empty_schema); + + // Test 3: Very long schema name + char long_name[1024]; + memset(long_name, 'a', 1023); + long_name[1023] = '\0'; + ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); + TEST_ASSERT(long_schema != NULL); + TEST_ASSERT(long_schema->name != NULL); + TEST_ASSERT(strlen(long_schema->name->data) == 1023); + zvec_collection_schema_destroy(long_schema); + + // Test 4: NULL schema parameter handling for all functions + ZVecErrorCode err; + size_t count = zvec_collection_schema_get_field_count(NULL); + TEST_ASSERT(count == 0); + + const ZVecFieldSchema *null_field = + zvec_collection_schema_find_field(NULL, "test"); + TEST_ASSERT(null_field == NULL); + + ZVecFieldSchema *null_indexed_field = + zvec_collection_schema_get_field(NULL, 0); + TEST_ASSERT(null_indexed_field == NULL); + + uint64_t null_max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(NULL); + TEST_ASSERT(null_max_doc_count == 0); + + err = zvec_collection_schema_set_max_doc_count_per_segment(NULL, 1000); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + ZVecString *null_validation_error = NULL; + err = zvec_collection_schema_validate(NULL, &null_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(null_validation_error == NULL); + + err = zvec_collection_schema_add_field(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_add_fields(NULL, NULL, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_remove_field(NULL, "test"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + const char *null_field_names[] = {NULL}; + err = zvec_collection_schema_remove_fields(NULL, null_field_names, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 5: Working with valid schema for edge cases + ZVecCollectionSchema *schema = zvec_collection_schema_create("edge_test"); + TEST_ASSERT(schema != NULL); + + // Test 6: Adding NULL field to schema + err = zvec_collection_schema_add_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 7: Adding fields with NULL array + err = zvec_collection_schema_add_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 8: Adding zero fields + err = zvec_collection_schema_add_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 9: Finding field with NULL name + const ZVecFieldSchema *null_name_field = + zvec_collection_schema_find_field(schema, NULL); + TEST_ASSERT(null_name_field == NULL); + + // Test 10: Finding non-existent field + const ZVecFieldSchema *nonexistent_field = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent_field == NULL); + + // Test 11: Getting field with invalid index + ZVecFieldSchema *invalid_index_field = + zvec_collection_schema_get_field(schema, 1000); + TEST_ASSERT(invalid_index_field == NULL); + + // Test 12: Getting field from empty schema with index 0 + ZVecFieldSchema *zero_index_field = + zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(zero_index_field == NULL); + + // Test 13: Removing field with NULL name + err = zvec_collection_schema_remove_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 14: Removing non-existent field + err = zvec_collection_schema_remove_field(schema, "nonexistent"); + TEST_ASSERT(err == ZVEC_ERROR_NOT_FOUND); + + // Test 15: Removing fields with NULL array + err = zvec_collection_schema_remove_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 16: Removing zero fields + err = zvec_collection_schema_remove_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 17: Setting extremely large max doc count + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + uint64_t retrieved_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(retrieved_max_count == UINT64_MAX); + + // Test 18: Setting zero max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + uint64_t zero_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(zero_max_count == 0); + + // Test 19: Schema validation with empty schema + ZVecString *empty_validation_error = NULL; + err = zvec_collection_schema_validate(schema, &empty_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 20: Add duplicate field names + ZVecFieldSchema *first_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *second_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_STRING, false, 0); + + err = zvec_collection_schema_add_field(schema, first_id); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_schema_add_field(schema, second_id); + TEST_ASSERT(err == ZVEC_ERROR_ALREADY_EXISTS); + zvec_field_schema_destroy(second_id); + + // Verify fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count == 1); + + // Test 21: Cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_field_operations(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test field count + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 5); + + // Test finding non-existent field + const ZVecFieldSchema *nonexistent = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent == NULL); + + // Test finding existing field + const ZVecFieldSchema *id_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(id_field != NULL); + if (id_field) { + TEST_ASSERT(strcmp(id_field->name->data, "id") == 0); + TEST_ASSERT(id_field->data_type == ZVEC_DATA_TYPE_INT64); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_normal_schema_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(false, "test_normal", NULL, NULL, 1000); + TEST_ASSERT(schema != NULL); + + if (schema) { + TEST_ASSERT(strcmp(schema->name->data, "test_normal") == 0); + + // Verify field count + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_schema_with_indexes(void) { + TEST_START(); + + // Test Schema with scalar index + ZVecCollectionSchema *scalar_index_schema = + zvec_test_create_schema_with_scalar_index(true, true, + "scalar_index_test"); + TEST_ASSERT(scalar_index_schema != NULL); + if (scalar_index_schema) { + zvec_collection_schema_destroy(scalar_index_schema); + } + + // Test Schema with vector index + ZVecCollectionSchema *vector_index_schema = + zvec_test_create_schema_with_vector_index(false, "vector_index_test", + NULL); + TEST_ASSERT(vector_index_schema != NULL); + if (vector_index_schema) { + zvec_collection_schema_destroy(vector_index_schema); + } + + TEST_END(); +} + +void test_schema_max_doc_count(void) { + TEST_START(); + + // Test 1: Setting max doc count to a valid value + ZVecCollectionSchema *schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + ZVecErrorCode err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 1000); + + zvec_collection_schema_destroy(schema); + + // Test 2: Setting max doc count to zero + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 0); + + zvec_collection_schema_destroy(schema); + + // Test 3: Setting max doc count to maximum value + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == UINT64_MAX); + + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Collection-related tests +// ============================================================================= + +void test_collection_basic_operations(void) { + TEST_START(); + + // Create temporary directory + char temp_dir[] = "/tmp/zvec_test_collection_basic_operations"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test collection operations + ZVecDoc *doc1 = zvec_test_create_doc(1, schema, NULL); + ZVecDoc *doc2 = zvec_test_create_doc(2, schema, NULL); + ZVecDoc *doc3 = zvec_test_create_doc(3, schema, NULL); + + TEST_ASSERT(doc1 != NULL); + TEST_ASSERT(doc2 != NULL); + TEST_ASSERT(doc3 != NULL); + + if (doc1 && doc2 && doc3) { + ZVecDoc *docs[] = {doc1, doc2, doc3}; + size_t success_count, error_count; + + // Test insert operation + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 3); + TEST_ASSERT(error_count == 0); + + // Test update operation + zvec_doc_set_score(doc1, 0.95f); + ZVecDoc *update_docs[] = {doc1}; + err = zvec_collection_update(collection, (const ZVecDoc **)update_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test upsert operation + zvec_doc_set_pk(doc3, "pk_3_modified"); + ZVecDoc *upsert_docs[] = {doc3}; + err = zvec_collection_upsert(collection, (const ZVecDoc **)upsert_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test delete operation by primary keys + const char *pks[] = {"pk_1", "pk_2"}; + err = zvec_collection_delete(collection, pks, 2, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 2); + TEST_ASSERT(error_count == 0); + + // Test delete by filter + err = zvec_collection_delete_by_filter(collection, "id > 0"); + TEST_ASSERT(err == ZVEC_OK); + + // Clean up documents + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + zvec_doc_destroy(doc3); + } + + // Test collection flush + err = zvec_collection_flush(collection); + TEST_ASSERT(err == ZVEC_OK); + + // Test collection optimization + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_edge_cases(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_edge_cases"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + + // Test empty name collection + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test long name collection + char long_name[256]; + memset(long_name, 'a', 255); + long_name[255] = '\0'; + + char long_path[512]; + snprintf(long_path, sizeof(long_path), "%s/%s", temp_dir, + "very_long_collection_name_that_tests_path_limits"); + + err = zvec_collection_create_and_open(long_path, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test NULL name集合 + err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_delete_by_filter(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_delete_by_filter"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test normal deletion filtering + err = zvec_collection_delete_by_filter(collection, "id > 1"); + TEST_ASSERT(err == ZVEC_OK); + + // Test NULL filter + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test empty string filter + err = zvec_collection_delete_by_filter(collection, ""); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_stats(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + // Basic validation of statistics + TEST_ASSERT(stats->doc_count == + 0); // New collection should have no documents + zvec_collection_stats_destroy(stats); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Field-related tests +// ============================================================================= + +void test_field_schema_functions(void) { + TEST_START(); + + // Test scalar field creation + ZVecFieldSchema scalar_field = {0}; + ZVecString name1 = {0}; + name1.data = "test_field"; + name1.length = 10; + scalar_field.name = &name1; + scalar_field.data_type = ZVEC_DATA_TYPE_STRING; + scalar_field.nullable = true; + scalar_field.dimension = 0; + + TEST_ASSERT(strcmp(scalar_field.name->data, "test_field") == 0); + TEST_ASSERT(scalar_field.data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(scalar_field.nullable == true); + + // Test vector field creation + ZVecFieldSchema vector_field = {0}; + ZVecString name2 = {0}; + name2.data = "vec_field"; + name2.length = 9; + vector_field.name = &name2; + vector_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + vector_field.nullable = false; + vector_field.dimension = 128; + + TEST_ASSERT(strcmp(vector_field.name->data, "vec_field") == 0); + TEST_ASSERT(vector_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field.dimension == 128); + + // Test sparse vector field creation + ZVecFieldSchema sparse_field = {0}; + ZVecString name3 = {0}; + name3.data = "sparse_field"; + name3.length = 12; + sparse_field.name = &name3; + sparse_field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + sparse_field.nullable = false; + sparse_field.dimension = 0; + + TEST_ASSERT(strcmp(sparse_field.name->data, "sparse_field") == 0); + TEST_ASSERT(sparse_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + TEST_END(); +} + +void test_field_helper_functions(void) { + TEST_START(); + + // Test scalar field helper functions + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( + "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT(strcmp(scalar_field->name->data, "test_scalar") == 0); + TEST_ASSERT(scalar_field->data_type == ZVEC_DATA_TYPE_INT32); + free(scalar_field); + } + if (invert_params) { + free(invert_params); + } + + // Test vector field helper functions + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecFieldSchema *vector_field = zvec_test_create_vector_field( + "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(vector_field->name->data, "test_vector") == 0); + TEST_ASSERT(vector_field->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field->dimension == 128); + free(vector_field); + } + if (hnsw_params) { + free(hnsw_params); + } + + TEST_END(); +} + +// ============================================================================= +// Document-related tests +// ============================================================================= + +void test_doc_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test complete document creation + ZVecDoc *doc = zvec_test_create_doc(1, schema, NULL); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_destroy(doc); + } + + // Test null value document creation + ZVecDoc *null_doc = zvec_test_create_doc_null(2, schema, NULL); + TEST_ASSERT(null_doc != NULL); + if (null_doc) { + zvec_doc_destroy(null_doc); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_doc_primary_key(void) { + TEST_START(); + + // Test primary key generation + char *pk = zvec_test_make_pk(12345); + TEST_ASSERT(pk != NULL); + if (pk) { + TEST_ASSERT(strcmp(pk, "pk_12345") == 0); + free(pk); + } + + TEST_END(); +} + +// Test for zvec_doc_add_field_by_value - covers all data types +void test_doc_add_field_by_value(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + // Scalar types + // BINARY + const char *binary_data = "binary"; + ZVecErrorCode err = + zvec_doc_add_field_by_value(doc, "binary_field", ZVEC_DATA_TYPE_BINARY, + binary_data, strlen(binary_data)); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + const char *string_data = "hello"; + err = zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_data, strlen(string_data)); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + int64_t int64_val = -9876543210LL; + err = zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + uint32_t uint32_val = 4294967295U; + err = zvec_doc_add_field_by_value(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + uint64_t uint64_val = 18446744073709551615ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + float float_val = 3.14159f; + err = zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + double double_val = 3.14159265358979; + err = zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // Vector types + // VECTOR_BINARY32 + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + err = zvec_doc_add_field_by_value(doc, "binary32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + binary32_vec, sizeof(binary32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + err = zvec_doc_add_field_by_value(doc, "binary64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + binary64_vec, sizeof(binary64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec, + sizeof(fp16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + err = zvec_doc_add_field_by_value(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, fp32_vec, + sizeof(fp32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec, + sizeof(fp64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 (packed - each byte contains 2 values) + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0}; + err = zvec_doc_add_field_by_value(doc, "int4_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT4, int4_vec, + sizeof(int4_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec, + sizeof(int8_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec, + sizeof(int16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 - format: [nnz(size_t)][indices...][values...] + size_t sparse_fp16_nnz = 3; + uint32_t sparse_fp16_indices[] = {0, 5, 10}; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + size_t sparse_fp16_size = sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices) + + sizeof(sparse_fp16_values); + char *sparse_fp16_buffer = (char *)malloc(sparse_fp16_size); + memcpy(sparse_fp16_buffer, &sparse_fp16_nnz, sizeof(sparse_fp16_nnz)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz), sparse_fp16_indices, + sizeof(sparse_fp16_indices)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices), + sparse_fp16_values, sizeof(sparse_fp16_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_buffer, sparse_fp16_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_buffer); + + // SPARSE_VECTOR_FP32 + size_t sparse_fp32_nnz = 3; + uint32_t sparse_fp32_indices[] = {2, 7, 15}; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + size_t sparse_fp32_size = sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices) + + sizeof(sparse_fp32_values); + char *sparse_fp32_buffer = (char *)malloc(sparse_fp32_size); + memcpy(sparse_fp32_buffer, &sparse_fp32_nnz, sizeof(sparse_fp32_nnz)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz), sparse_fp32_indices, + sizeof(sparse_fp32_indices)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices), + sparse_fp32_values, sizeof(sparse_fp32_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_buffer, sparse_fp32_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_buffer); + + // Array types + // ARRAY_BINARY - format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING - null-terminated strings + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + bool array_bool_data[] = {true, false, true, false}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +// Test for zvec_doc_add_field_by_struct - covers all data types +void test_doc_add_field_by_struct(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err; + ZVecDocField field; + + // Scalar types + // BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "binary_field"; + field.name.length = strlen("binary_field"); + field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x01, 0x02, 0x03, 0x04}; + field.value.binary_value.data = binary_data; + field.value.binary_value.length = sizeof(binary_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + memset(&field, 0, sizeof(field)); + field.name.data = "string_field"; + field.name.length = strlen("string_field"); + field.data_type = ZVEC_DATA_TYPE_STRING; + const char *string_data = "hello world"; + field.value.string_value.data = (char *)string_data; + field.value.string_value.length = strlen(string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "bool_field"; + field.name.length = strlen("bool_field"); + field.data_type = ZVEC_DATA_TYPE_BOOL; + field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "int32_field"; + field.name.length = strlen("int32_field"); + field.data_type = ZVEC_DATA_TYPE_INT32; + field.value.int32_value = -12345; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "int64_field"; + field.name.length = strlen("int64_field"); + field.data_type = ZVEC_DATA_TYPE_INT64; + field.value.int64_value = -9876543210LL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "uint32_field"; + field.name.length = strlen("uint32_field"); + field.data_type = ZVEC_DATA_TYPE_UINT32; + field.value.uint32_value = 4294967295U; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "uint64_field"; + field.name.length = strlen("uint64_field"); + field.data_type = ZVEC_DATA_TYPE_UINT64; + field.value.uint64_value = 18446744073709551615ULL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "float_field"; + field.name.length = strlen("float_field"); + field.data_type = ZVEC_DATA_TYPE_FLOAT; + field.value.float_value = 3.14159f; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "double_field"; + field.name.length = strlen("double_field"); + field.data_type = ZVEC_DATA_TYPE_DOUBLE; + field.value.double_value = 3.14159265358979; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY32 + memset(&field, 0, sizeof(field)); + field.name.data = "binary32_vec_field"; + field.name.length = strlen("binary32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY32; + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + field.value.vector_value.data = (const float *)binary32_vec; + field.value.vector_value.length = sizeof(binary32_vec) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + memset(&field, 0, sizeof(field)); + field.name.data = "binary64_vec_field"; + field.name.length = strlen("binary64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY64; + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + field.value.vector_value.data = (const float *)binary64_vec; + field.value.vector_value.length = sizeof(binary64_vec) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "fp16_vec_field"; + field.name.length = strlen("fp16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP16; + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + field.value.vector_value.data = (const float *)fp16_vec; + field.value.vector_value.length = sizeof(fp16_vec) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "fp32_vec_field"; + field.name.length = strlen("fp32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + field.value.vector_value.data = fp32_vec; + field.value.vector_value.length = sizeof(fp32_vec) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + memset(&field, 0, sizeof(field)); + field.name.data = "fp64_vec_field"; + field.name.length = strlen("fp64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP64; + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + field.value.vector_value.data = (const float *)fp64_vec; + field.value.vector_value.length = sizeof(fp64_vec) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 + memset(&field, 0, sizeof(field)); + field.name.data = "int4_vec_field"; + field.name.length = strlen("int4_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT4; + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78}; + field.value.vector_value.data = (const float *)int4_vec; + field.value.vector_value.length = + sizeof(int4_vec) * 2; // Each byte contains 2 values + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + memset(&field, 0, sizeof(field)); + field.name.data = "int8_vec_field"; + field.name.length = strlen("int8_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT8; + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + field.value.vector_value.data = (const float *)int8_vec; + field.value.vector_value.length = sizeof(int8_vec) / sizeof(int8_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + memset(&field, 0, sizeof(field)); + field.name.data = "int16_vec_field"; + field.name.length = strlen("int16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT16; + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + field.value.vector_value.data = (const float *)int16_vec; + field.value.vector_value.length = sizeof(int16_vec) / sizeof(int16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp16_field"; + field.name.length = strlen("sparse_fp16_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + field.value.vector_value.data = (const float *)sparse_fp16_values; + field.value.vector_value.length = + sizeof(sparse_fp16_values) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // SPARSE_VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp32_field"; + field.name.length = strlen("sparse_fp32_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + field.value.vector_value.data = sparse_fp32_values; + field.value.vector_value.length = sizeof(sparse_fp32_values) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Array types + // ARRAY_BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "array_binary_field"; + field.name.length = strlen("array_binary_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BINARY; + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + field.value.binary_value.data = array_bin_data; + field.value.binary_value.length = sizeof(array_bin_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING + memset(&field, 0, sizeof(field)); + field.name.data = "array_string_field"; + field.name.length = strlen("array_string_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_STRING; + const char array_string_data[] = "str1\0str2\0str3\0"; + field.value.string_value.data = (char *)array_string_data; + field.value.string_value.length = sizeof(array_string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "array_bool_field"; + field.name.length = strlen("array_bool_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BOOL; + bool array_bool_data[] = {true, false, true, false}; + field.value.binary_value.data = (const uint8_t *)array_bool_data; + field.value.binary_value.length = sizeof(array_bool_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int32_field"; + field.name.length = strlen("array_int32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT32; + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + field.value.vector_value.data = (const float *)array_int32_data; + field.value.vector_value.length = sizeof(array_int32_data) / sizeof(int32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int64_field"; + field.name.length = strlen("array_int64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT64; + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + field.value.vector_value.data = (const float *)array_int64_data; + field.value.vector_value.length = sizeof(array_int64_data) / sizeof(int64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint32_field"; + field.name.length = strlen("array_uint32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT32; + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + field.value.vector_value.data = (const float *)array_uint32_data; + field.value.vector_value.length = + sizeof(array_uint32_data) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint64_field"; + field.name.length = strlen("array_uint64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT64; + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + field.value.vector_value.data = (const float *)array_uint64_data; + field.value.vector_value.length = + sizeof(array_uint64_data) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "array_float_field"; + field.name.length = strlen("array_float_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_FLOAT; + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + field.value.vector_value.data = array_float_data; + field.value.vector_value.length = sizeof(array_float_data) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "array_double_field"; + field.name.length = strlen("array_double_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_DOUBLE; + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + field.value.vector_value.data = (const float *)array_double_data; + field.value.vector_value.length = sizeof(array_double_data) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +void test_doc_basic_operations(void); +void test_doc_get_field_value_basic(void); +void test_doc_get_field_value_copy(void); +void test_doc_get_field_value_pointer(void); +void test_doc_field_operations(void); +void test_doc_error_conditions(void); +void test_doc_serialization(void); +void test_doc_add_field_by_value(void); +void test_doc_add_field_by_struct(void); + +void test_doc_functions(void) { + test_doc_basic_operations(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); +} + +void test_doc_basic_operations(void) { + TEST_START(); + + // Create test document + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Test primary key operations + zvec_doc_set_pk(doc, "test_doc_complete"); + const char *pk = zvec_doc_get_pk_pointer(doc); + TEST_ASSERT(pk != NULL); + TEST_ASSERT(strcmp(pk, "test_doc_complete") == 0); + + // Test document ID and score operations + zvec_doc_set_doc_id(doc, 99999); + uint64_t doc_id = zvec_doc_get_doc_id(doc); + TEST_ASSERT(doc_id == 99999); + + zvec_doc_set_score(doc, 0.95f); + float score = zvec_doc_get_score(doc); + TEST_ASSERT(score == 0.95f); + + // Test operator operations + zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); + ZVecDocOperator op = zvec_doc_get_operator(doc); + TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_basic(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_basic with all supported types " + "===\n"); + + // BOOL type + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + bool bool_result; + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_result, sizeof(bool_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_result == true); + + // INT32 type + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; // Min int32 + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + int32_t int32_result; + err = zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_result == -2147483648); + + // INT64 type + ZVecDocField int64_field; + int64_field.name.data = "int64_field"; + int64_field.name.length = strlen("int64_field"); + int64_field.data_type = ZVEC_DATA_TYPE_INT64; + int64_field.value.int64_value = 9223372036854775807LL; // Max int64 + err = zvec_doc_add_field_by_struct(doc, &int64_field); + TEST_ASSERT(err == ZVEC_OK); + + int64_t int64_result; + err = zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_result, sizeof(int64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_result == 9223372036854775807LL); + + // UINT32 type + ZVecDocField uint32_field; + uint32_field.name.data = "uint32_field"; + uint32_field.name.length = strlen("uint32_field"); + uint32_field.data_type = ZVEC_DATA_TYPE_UINT32; + uint32_field.value.uint32_value = 4294967295U; // Max uint32 + err = zvec_doc_add_field_by_struct(doc, &uint32_field); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_result; + err = + zvec_doc_get_field_value_basic(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_result, sizeof(uint32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_result == 4294967295U); + + // UINT64 type + ZVecDocField uint64_field; + uint64_field.name.data = "uint64_field"; + uint64_field.name.length = strlen("uint64_field"); + uint64_field.data_type = ZVEC_DATA_TYPE_UINT64; + uint64_field.value.uint64_value = 18446744073709551615ULL; // Max uint64 + err = zvec_doc_add_field_by_struct(doc, &uint64_field); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_result; + err = + zvec_doc_get_field_value_basic(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_result, sizeof(uint64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_result == 18446744073709551615ULL); + + // FLOAT type + ZVecDocField float_field; + float_field.name.data = "float_field"; + float_field.name.length = strlen("float_field"); + float_field.data_type = ZVEC_DATA_TYPE_FLOAT; + float_field.value.float_value = 3.14159265359f; + err = zvec_doc_add_field_by_struct(doc, &float_field); + TEST_ASSERT(err == ZVEC_OK); + + float float_result; + err = zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_result, sizeof(float_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabsf(float_result - 3.14159265359f) < 1e-6f); + + // DOUBLE type + ZVecDocField double_field; + double_field.name.data = "double_field"; + double_field.name.length = strlen("double_field"); + double_field.data_type = ZVEC_DATA_TYPE_DOUBLE; + double_field.value.double_value = 2.71828182845904523536; + err = zvec_doc_add_field_by_struct(doc, &double_field); + TEST_ASSERT(err == ZVEC_OK); + + double double_result; + err = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_result, sizeof(double_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_copy(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_copy with all supported types " + "===\n"); + + // Basic scalar types first + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *bool_copy_result; + size_t bool_copy_size; + err = zvec_doc_get_field_value_copy(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_copy_result, &bool_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_copy_result != NULL); + TEST_ASSERT(bool_copy_size == sizeof(bool)); + TEST_ASSERT(*(bool *)bool_copy_result == true); + free(bool_copy_result); + + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int32_copy_result; + size_t int32_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_copy_result, &int32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_copy_result != NULL); + TEST_ASSERT(int32_copy_size == sizeof(int32_t)); + TEST_ASSERT(*(int32_t *)int32_copy_result == -12345); + free(int32_copy_result); + + int64_t int64_val = -9223372036854775807LL; + err = zvec_doc_add_field_by_value(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int64_copy_result; + size_t int64_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_copy_result, &int64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_copy_result != NULL); + TEST_ASSERT(int64_copy_size == sizeof(int64_t)); + TEST_ASSERT(*(int64_t *)int64_copy_result == -9223372036854775807LL); + free(int64_copy_result); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint32_copy_result; + size_t uint32_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_copy_result, &uint32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_copy_result != NULL); + TEST_ASSERT(uint32_copy_size == sizeof(uint32_t)); + TEST_ASSERT(*(uint32_t *)uint32_copy_result == 4000000000U); + free(uint32_copy_result); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint64_copy_result; + size_t uint64_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_copy_result, &uint64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_copy_result != NULL); + TEST_ASSERT(uint64_copy_size == sizeof(uint64_t)); + TEST_ASSERT(*(uint64_t *)uint64_copy_result == 18000000000000000000ULL); + free(uint64_copy_result); + + float float_val = 3.14159265f; + err = zvec_doc_add_field_by_value(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *float_copy_result; + size_t float_copy_size; + err = zvec_doc_get_field_value_copy(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_copy_result, &float_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_copy_result != NULL); + TEST_ASSERT(float_copy_size == sizeof(float)); + TEST_ASSERT(fabs(*(float *)float_copy_result - 3.14159265f) < 1e-6f); + free(float_copy_result); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *double_copy_result; + size_t double_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_copy_result, &double_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_copy_result != NULL); + TEST_ASSERT(double_copy_size == sizeof(double)); + TEST_ASSERT(fabs(*(double *)double_copy_result - 2.718281828459045) < 1e-15); + free(double_copy_result); + + // String and binary types + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + void *string_result; + size_t string_size; + err = zvec_doc_get_field_value_copy( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_result != NULL); + TEST_ASSERT(string_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); + free(string_result); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + void *binary_result; + size_t binary_size; + err = zvec_doc_get_field_value_copy( + doc, "binary_field", ZVEC_DATA_TYPE_BINARY, &binary_result, &binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_result != NULL); + TEST_ASSERT(binary_size == 6); + TEST_ASSERT(memcmp(binary_result, "\x00\x01\x02\xFF\xFE\xFD", binary_size) == + 0); + free(binary_result); + + // VECTOR_FP32 type + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + void *fp32_vec_result; + size_t fp32_vec_size; + err = zvec_doc_get_field_value_copy(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_result, &fp32_vec_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_result != NULL); + TEST_ASSERT(fp32_vec_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); + free(fp32_vec_result); + + // VECTOR_FP16 type (16-bit float vector) + uint16_t fp16_data[] = {0x3C00, 0x4000, 0x4200, + 0x4400}; // FP16: 1.0, 2.0, 3.0, 4.0 + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_data, + sizeof(fp16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp16_result; + size_t fp16_size; + err = zvec_doc_get_field_value_copy(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, &fp16_result, + &fp16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_result != NULL); + TEST_ASSERT(fp16_size == sizeof(fp16_data)); + TEST_ASSERT(memcmp(fp16_result, fp16_data, fp16_size) == 0); + free(fp16_result); + + // VECTOR_INT8 type + int8_t int8_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_data, + sizeof(int8_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int8_result; + size_t int8_size; + err = zvec_doc_get_field_value_copy(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, &int8_result, + &int8_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_result != NULL); + TEST_ASSERT(int8_size == sizeof(int8_data)); + TEST_ASSERT(memcmp(int8_result, int8_data, int8_size) == 0); + free(int8_result); + + // VECTOR_BINARY32 type (32-bit aligned binary vector) + uint8_t bin32_data[] = {0xAA, 0x55, 0xAA, 0x55}; + err = zvec_doc_add_field_by_value(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, bin32_data, + sizeof(bin32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin32_result; + size_t bin32_size; + err = zvec_doc_get_field_value_copy(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + &bin32_result, &bin32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin32_result != NULL); + TEST_ASSERT(bin32_size == sizeof(bin32_data)); + TEST_ASSERT(memcmp(bin32_result, bin32_data, bin32_size) == 0); + free(bin32_result); + + // VECTOR_BINARY64 type (64-bit aligned binary vector) + uint64_t bin64_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, bin64_data, + sizeof(bin64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin64_result; + size_t bin64_size; + err = zvec_doc_get_field_value_copy(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_result, &bin64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_result != NULL); + TEST_ASSERT(bin64_size == sizeof(bin64_data)); + TEST_ASSERT(memcmp(bin64_result, bin64_data, bin64_size) == 0); + free(bin64_result); + + // VECTOR_FP64 type (double precision vector) + double fp64_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_data, + sizeof(fp64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp64_result; + size_t fp64_size; + err = zvec_doc_get_field_value_copy(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, &fp64_result, + &fp64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_result != NULL); + TEST_ASSERT(fp64_size == sizeof(fp64_data)); + TEST_ASSERT(memcmp(fp64_result, fp64_data, fp64_size) == 0); + free(fp64_result); + + // VECTOR_INT16 type + int16_t int16_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_data, + sizeof(int16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int16_result; + size_t int16_size; + err = zvec_doc_get_field_value_copy(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_result, &int16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_result != NULL); + TEST_ASSERT(int16_size == sizeof(int16_data)); + TEST_ASSERT(memcmp(int16_result, int16_data, int16_size) == 0); + free(int16_result); + + // SPARSE_VECTOR_FP16 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp16_nnz = 3; + size_t sparse_fp16_size_input = + sizeof(uint32_t) + + sparse_fp16_nnz * (sizeof(uint32_t) + sizeof(uint16_t)); + void *sparse_fp16_input = malloc(sparse_fp16_size_input); + uint32_t *fp16_nnz_ptr = (uint32_t *)sparse_fp16_input; + *fp16_nnz_ptr = sparse_fp16_nnz; + uint32_t *fp16_indices = + (uint32_t *)((char *)sparse_fp16_input + sizeof(uint32_t)); + uint16_t *fp16_values = + (uint16_t *)((char *)sparse_fp16_input + sizeof(uint32_t) + + sparse_fp16_nnz * sizeof(uint32_t)); + fp16_indices[0] = 0; + fp16_indices[1] = 5; + fp16_indices[2] = 10; + fp16_values[0] = 0x3C00; + fp16_values[1] = 0x4000; + fp16_values[2] = 0x4200; // FP16: 1.0, 2.0, 3.0 + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_input, sparse_fp16_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_input); + + void *sparse_fp16_result; + size_t sparse_fp16_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp16_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + &sparse_fp16_result, &sparse_fp16_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp16_result != NULL); + // Sparse vector format: [nnz(size_t)][indices...][values...] + size_t retrieved_nnz = *(size_t *)sparse_fp16_result; + TEST_ASSERT(retrieved_nnz == 3); + uint32_t *retrieved_fp16_indices = + (uint32_t *)((char *)sparse_fp16_result + sizeof(size_t)); + uint16_t *retrieved_fp16_vals = + (uint16_t *)((char *)sparse_fp16_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp16_indices[0] == 0); + TEST_ASSERT(retrieved_fp16_indices[1] == 5); + TEST_ASSERT(retrieved_fp16_indices[2] == 10); + TEST_ASSERT(retrieved_fp16_vals[0] == 0x3C00); + TEST_ASSERT(retrieved_fp16_vals[1] == 0x4000); + TEST_ASSERT(retrieved_fp16_vals[2] == 0x4200); + free(sparse_fp16_result); + + // SPARSE_VECTOR_FP32 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp32_nnz = 4; + size_t sparse_fp32_size_input = + sizeof(uint32_t) + sparse_fp32_nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_fp32_input = malloc(sparse_fp32_size_input); + uint32_t *fp32_nnz_ptr = (uint32_t *)sparse_fp32_input; + *fp32_nnz_ptr = sparse_fp32_nnz; + uint32_t *fp32_indices = + (uint32_t *)((char *)sparse_fp32_input + sizeof(uint32_t)); + float *fp32_values = (float *)((char *)sparse_fp32_input + sizeof(uint32_t) + + sparse_fp32_nnz * sizeof(uint32_t)); + fp32_indices[0] = 2; + fp32_indices[1] = 7; + fp32_indices[2] = 15; + fp32_indices[3] = 20; + fp32_values[0] = 1.5f; + fp32_values[1] = 2.5f; + fp32_values[2] = 3.5f; + fp32_values[3] = 4.5f; + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_input, sparse_fp32_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_input); + + void *sparse_fp32_result; + size_t sparse_fp32_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp32_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + &sparse_fp32_result, &sparse_fp32_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp32_result != NULL); + retrieved_nnz = *(size_t *)sparse_fp32_result; + TEST_ASSERT(retrieved_nnz == 4); + uint32_t *retrieved_fp32_indices = + (uint32_t *)((char *)sparse_fp32_result + sizeof(size_t)); + float *retrieved_fp32_vals = + (float *)((char *)sparse_fp32_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp32_indices[0] == 2); + TEST_ASSERT(retrieved_fp32_indices[1] == 7); + TEST_ASSERT(retrieved_fp32_indices[2] == 15); + TEST_ASSERT(retrieved_fp32_indices[3] == 20); + TEST_ASSERT(fabs(retrieved_fp32_vals[0] - 1.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[1] - 2.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[2] - 3.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[3] - 4.5f) < 1e-5f); + free(sparse_fp32_result); + + // ARRAY_BINARY type + // Format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + void *array_binary_result; + size_t array_binary_size; + err = zvec_doc_get_field_value_copy(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, + &array_binary_result, &array_binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_binary_result != NULL); + // The result is a contiguous buffer of binary data without length prefixes + TEST_ASSERT(array_binary_size == 5); // 1 + 2 + 2 bytes + const uint8_t *result_bytes = (const uint8_t *)array_binary_result; + TEST_ASSERT(result_bytes[0] == 0x01); + TEST_ASSERT(result_bytes[1] == 0x02); + TEST_ASSERT(result_bytes[2] == 0x03); + TEST_ASSERT(result_bytes[3] == 0x04); + TEST_ASSERT(result_bytes[4] == 0x05); + free(array_binary_result); + + + // ARRAY_STRING type + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_string_result; + size_t array_string_size; + err = zvec_doc_get_field_value_copy(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, + &array_string_result, &array_string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_string_result != NULL); + free(array_string_result); + for (int i = 0; i < 3; i++) { + zvec_free_string(array_zvec_str[i]); + } + + free(string_field.value.string_value.data); + + // ARRAY_BOOL type + bool array_bool_data[] = {true, false, true, false, true}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_bool_result; + size_t array_bool_size; + err = zvec_doc_get_field_value_copy(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, + &array_bool_result, &array_bool_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_bool_result != NULL); + // Verify the bit-packed bool array + uint8_t *bool_bytes = (uint8_t *)array_bool_result; + TEST_ASSERT((bool_bytes[0] & 0x01) != 0); // index 0: true + TEST_ASSERT((bool_bytes[0] & 0x02) == 0); // index 1: false + TEST_ASSERT((bool_bytes[0] & 0x04) != 0); // index 2: true + TEST_ASSERT((bool_bytes[0] & 0x08) == 0); // index 3: false + TEST_ASSERT((bool_bytes[0] & 0x10) != 0); // index 4: true + free(array_bool_result); + + // ARRAY_INT32 type + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int32_result; + size_t array_int32_size; + err = zvec_doc_get_field_value_copy(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_result, &array_int32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_result != NULL); + TEST_ASSERT(array_int32_size == sizeof(array_int32_data)); + TEST_ASSERT(((int32_t *)array_int32_result)[0] == 100); + TEST_ASSERT(((int32_t *)array_int32_result)[1] == 200); + TEST_ASSERT(((int32_t *)array_int32_result)[2] == 300); + free(array_int32_result); + + // ARRAY_INT64 type + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int64_result; + size_t array_int64_size; + err = zvec_doc_get_field_value_copy(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_result, &array_int64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_result != NULL); + TEST_ASSERT(array_int64_size == sizeof(array_int64_data)); + TEST_ASSERT(((int64_t *)array_int64_result)[0] == -9223372036854775807LL); + TEST_ASSERT(((int64_t *)array_int64_result)[1] == 0); + TEST_ASSERT(((int64_t *)array_int64_result)[2] == 9223372036854775807LL); + free(array_int64_result); + + // ARRAY_UINT32 type + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint32_result; + size_t array_uint32_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint32_field", + ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_result, &array_uint32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_result != NULL); + TEST_ASSERT(array_uint32_size == sizeof(array_uint32_data)); + TEST_ASSERT(((uint32_t *)array_uint32_result)[0] == 0U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[1] == 1000000U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[2] == 4000000000U); + free(array_uint32_result); + + // ARRAY_UINT64 type + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint64_result; + size_t array_uint64_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint64_field", + ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_result, &array_uint64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_result != NULL); + TEST_ASSERT(array_uint64_size == sizeof(array_uint64_data)); + TEST_ASSERT(((uint64_t *)array_uint64_result)[0] == 0ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[1] == 1000000000000ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[2] == 18000000000000000000ULL); + free(array_uint64_result); + + // ARRAY_FLOAT type + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_float_result; + size_t array_float_size; + err = zvec_doc_get_field_value_copy(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_result, &array_float_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_result != NULL); + TEST_ASSERT(array_float_size == sizeof(array_float_data)); + TEST_ASSERT(((float *)array_float_result)[0] == 1.5f); + TEST_ASSERT(((float *)array_float_result)[1] == 2.5f); + TEST_ASSERT(((float *)array_float_result)[2] == 3.5f); + free(array_float_result); + + // ARRAY_DOUBLE type + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_double_result; + size_t array_double_size; + err = zvec_doc_get_field_value_copy(doc, "array_double_field", + ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_result, &array_double_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_result != NULL); + TEST_ASSERT(array_double_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((double *)array_double_result)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[2] - 3.333333) < 1e-10); + free(array_double_result); + + + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_pointer(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for pointer testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + // Add more fields for comprehensive pointer testing + int64_t int64_val = -9223372036854775807LL; + err = + zvec_doc_add_field_by_value(doc, "int64_field_ptr", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_val, + sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_val, + sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + float float_val = 3.14159265f; + err = + zvec_doc_add_field_by_value(doc, "float_field_ptr", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_val, + sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t bin64_vec_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + bin64_vec_data, sizeof(bin64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec_data[] = {0x3C00, 0x4000, 0x4200, 0x4400}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec_data, + sizeof(fp16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec_data, + sizeof(fp64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec_data, + sizeof(int8_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec_data, + sizeof(int16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + array_uint32_data, sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + array_uint64_data, sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field_ptr", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + array_double_data, sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + printf( + "=== Testing zvec_doc_get_field_value_pointer with all supported types " + "===\n"); + + // Test pointer access to BOOL + const void *bool_ptr; + size_t bool_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_ptr, &bool_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_ptr != NULL); + TEST_ASSERT(bool_ptr_size == sizeof(bool)); + TEST_ASSERT(*(const bool *)bool_ptr == true); + + // Test pointer access to INT32 + const void *int32_ptr; + size_t int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "int32_field", ZVEC_DATA_TYPE_INT32, &int32_ptr, &int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_ptr != NULL); + TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); + TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); + + // Test pointer access to STRING + const void *string_ptr; + size_t string_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "string_field", + ZVEC_DATA_TYPE_STRING, &string_ptr, + &string_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_ptr != NULL); + TEST_ASSERT(string_ptr_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_ptr, "Hello, 世界!", string_ptr_size) == 0); + + // Test pointer access to BINARY + const void *binary_ptr; + size_t binary_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "binary_field", + ZVEC_DATA_TYPE_BINARY, &binary_ptr, + &binary_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_ptr != NULL); + TEST_ASSERT(binary_ptr_size == 6); + TEST_ASSERT(memcmp(binary_ptr, "\x00\x01\x02\xFF\xFE\xFD", binary_ptr_size) == + 0); + + // Test pointer access to VECTOR_FP32 + const void *fp32_vec_ptr; + size_t fp32_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_ptr, &fp32_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_ptr != NULL); + TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); + + // Test pointer access to INT64 + const void *int64_ptr; + size_t int64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int64_field_ptr", + ZVEC_DATA_TYPE_INT64, &int64_ptr, + &int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_ptr != NULL); + TEST_ASSERT(int64_ptr_size == sizeof(int64_t)); + TEST_ASSERT(*(const int64_t *)int64_ptr == -9223372036854775807LL); + + // Test pointer access to UINT32 + const void *uint32_ptr; + size_t uint32_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_ptr, + &uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_ptr != NULL); + TEST_ASSERT(uint32_ptr_size == sizeof(uint32_t)); + TEST_ASSERT(*(const uint32_t *)uint32_ptr == 4000000000U); + + // Test pointer access to UINT64 + const void *uint64_ptr; + size_t uint64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_ptr, + &uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_ptr != NULL); + TEST_ASSERT(uint64_ptr_size == sizeof(uint64_t)); + TEST_ASSERT(*(const uint64_t *)uint64_ptr == 18000000000000000000ULL); + + // Test pointer access to FLOAT + const void *float_ptr; + size_t float_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "float_field_ptr", + ZVEC_DATA_TYPE_FLOAT, &float_ptr, + &float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_ptr != NULL); + TEST_ASSERT(float_ptr_size == sizeof(float)); + TEST_ASSERT(fabs(*(const float *)float_ptr - 3.14159265f) < 1e-6f); + + // Test pointer access to DOUBLE + const void *double_ptr; + size_t double_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_ptr, + &double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_ptr != NULL); + TEST_ASSERT(double_ptr_size == sizeof(double)); + TEST_ASSERT(fabs(*(const double *)double_ptr - 2.718281828459045) < 1e-15); + + // Test pointer access to VECTOR_BINARY64 + const void *bin64_vec_ptr; + size_t bin64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_vec_ptr, &bin64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_vec_ptr != NULL); + TEST_ASSERT(bin64_vec_ptr_size == sizeof(bin64_vec_data)); + TEST_ASSERT(memcmp(bin64_vec_ptr, bin64_vec_data, bin64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP16 + const void *fp16_vec_ptr; + size_t fp16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, + &fp16_vec_ptr, &fp16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_vec_ptr != NULL); + TEST_ASSERT(fp16_vec_ptr_size == sizeof(fp16_vec_data)); + TEST_ASSERT(memcmp(fp16_vec_ptr, fp16_vec_data, fp16_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP64 + const void *fp64_vec_ptr; + size_t fp64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, + &fp64_vec_ptr, &fp64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_vec_ptr != NULL); + TEST_ASSERT(fp64_vec_ptr_size == sizeof(fp64_vec_data)); + TEST_ASSERT(memcmp(fp64_vec_ptr, fp64_vec_data, fp64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT8 + const void *int8_vec_ptr; + size_t int8_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, + &int8_vec_ptr, &int8_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_vec_ptr != NULL); + TEST_ASSERT(int8_vec_ptr_size == sizeof(int8_vec_data)); + TEST_ASSERT(memcmp(int8_vec_ptr, int8_vec_data, int8_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT16 + const void *int16_vec_ptr; + size_t int16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_vec_ptr, &int16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_vec_ptr != NULL); + TEST_ASSERT(int16_vec_ptr_size == sizeof(int16_vec_data)); + TEST_ASSERT(memcmp(int16_vec_ptr, int16_vec_data, int16_vec_ptr_size) == 0); + + // Test pointer access to ARRAY_INT32 + const void *array_int32_ptr; + size_t array_int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int32_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_ptr, &array_int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_ptr != NULL); + TEST_ASSERT(array_int32_ptr_size == sizeof(array_int32_data)); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[0] == 100); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[1] == 200); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[2] == 300); + + // Test pointer access to ARRAY_INT64 + const void *array_int64_ptr; + size_t array_int64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int64_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_ptr, &array_int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_ptr != NULL); + TEST_ASSERT(array_int64_ptr_size == sizeof(array_int64_data)); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[0] == -9223372036854775807LL); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[1] == 0); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[2] == 9223372036854775807LL); + + // Test pointer access to ARRAY_UINT32 + const void *array_uint32_ptr; + size_t array_uint32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_ptr, &array_uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_ptr != NULL); + TEST_ASSERT(array_uint32_ptr_size == sizeof(array_uint32_data)); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[0] == 0U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[1] == 1000000U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[2] == 4000000000U); + + // Test pointer access to ARRAY_UINT64 + const void *array_uint64_ptr; + size_t array_uint64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_ptr, &array_uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_ptr != NULL); + TEST_ASSERT(array_uint64_ptr_size == sizeof(array_uint64_data)); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[0] == 0ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[1] == 1000000000000ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[2] == + 18000000000000000000ULL); + + // Test pointer access to ARRAY_FLOAT + const void *array_float_ptr; + size_t array_float_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_float_field_ptr", ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_ptr, &array_float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_ptr != NULL); + TEST_ASSERT(array_float_ptr_size == sizeof(array_float_data)); + TEST_ASSERT(((const float *)array_float_ptr)[0] == 1.5f); + TEST_ASSERT(((const float *)array_float_ptr)[1] == 2.5f); + TEST_ASSERT(((const float *)array_float_ptr)[2] == 3.5f); + + // Test pointer access to ARRAY_DOUBLE + const void *array_double_ptr; + size_t array_double_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_ptr, &array_double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_ptr != NULL); + TEST_ASSERT(array_double_ptr_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[2] - 3.333333) < 1e-10); + + free(string_field.value.string_value.data); + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_field_operations(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add some fields + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test field count + size_t field_count = zvec_doc_get_field_count(doc); + TEST_ASSERT(field_count >= 3); + + // Test field existence checks + TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "int32_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "string_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "nonexistent") == false); + + TEST_ASSERT(zvec_doc_has_field_value(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_is_field_null(doc, "bool_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nonexistent") == false); + + // Test field names retrieval + char **field_names; + size_t name_count; + err = zvec_doc_get_field_names(doc, &field_names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count >= 3); + TEST_ASSERT(field_names != NULL); + + // Verify some expected fields are present + bool found_key_fields = false; + for (size_t i = 0; i < name_count; i++) { + if (strcmp(field_names[i], "bool_field") == 0 || + strcmp(field_names[i], "int32_field") == 0 || + strcmp(field_names[i], "string_field") == 0) { + found_key_fields = true; + break; + } + } + TEST_ASSERT(found_key_fields == true); + + zvec_free_str_array(field_names, name_count); + free(string_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_error_conditions(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Add a field for error testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + zvec_doc_add_field_by_struct(doc, &bool_field); + + ZVecErrorCode err; + const void *dummy_ptr; + size_t dummy_ptr_size; + int32_t int32_result; + void *string_result; + size_t string_size; + + printf("=== Testing error conditions ===\n"); + + // Test non-existent field + err = + zvec_doc_get_field_value_basic(doc, "missing_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = + zvec_doc_get_field_value_copy(doc, "missing_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "missing_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // Test wrong data type access + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_copy(doc, "bool_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_serialization(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for serialization testing + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Serialization Test"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + printf("=== Testing document serialization ===\n"); + + uint8_t *serialized_data; + size_t data_size; + err = zvec_doc_serialize(doc, &serialized_data, &data_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(serialized_data != NULL); + TEST_ASSERT(data_size > 0); + + ZVecDoc *deserialized_doc; + err = zvec_doc_deserialize(serialized_data, data_size, &deserialized_doc); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_doc != NULL); + + // Verify deserialized document has same field count + size_t field_count = zvec_doc_get_field_count(doc); + size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); + TEST_ASSERT(deserialized_field_count == field_count); + + // Test a field from deserialized document + int32_t deserialized_int32; + err = zvec_doc_get_field_value_basic( + deserialized_doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &deserialized_int32, sizeof(deserialized_int32)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_int32 == -2147483648); + + zvec_free_uint8_array(serialized_data); + free(string_field.value.string_value.data); + zvec_doc_destroy(deserialized_doc); + zvec_doc_destroy(doc); + + TEST_END(); +} + +// ============================================================================= +// Index parameter tests +// ============================================================================= + +void test_index_params(void) { + TEST_START(); + + // Test HNSW parameter creation + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + TEST_ASSERT(hnsw_params != NULL); + if (hnsw_params) { + free(hnsw_params); + } + + // Test Flat parameter creation + ZVecFlatIndexParams *flat_params = zvec_test_create_default_flat_params(); + TEST_ASSERT(flat_params != NULL); + if (flat_params) { + free(flat_params); + } + + // Test scalar index parameter creation + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + TEST_ASSERT(invert_params != NULL); + if (invert_params) { + free(invert_params); + } + + TEST_END(); +} + +// ============================================================================= +// Memory management tests +// ============================================================================= +void test_zvec_string_functions(void) { + TEST_START(); + + // Test string creation and basic operations + ZVecString *str1 = zvec_string_create("Hello World"); + TEST_ASSERT(str1 != NULL); + TEST_ASSERT(zvec_string_length(str1) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str1), "Hello World") == 0); + + // Test string copy + ZVecString *str2 = zvec_string_copy(str1); + TEST_ASSERT(str2 != NULL); + TEST_ASSERT(zvec_string_length(str2) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str2), "Hello World") == 0); + + // Test string comparison + int cmp_result = zvec_string_compare(str1, str2); + TEST_ASSERT(cmp_result == 0); + + ZVecString *str3 = zvec_string_create("Hello"); + TEST_ASSERT(zvec_string_compare(str1, str3) > 0); + + // Test string creation from view + ZVecStringView view = {"Hello View", 10}; + ZVecString *str4 = zvec_string_create_from_view(&view); + TEST_ASSERT(str4 != NULL); + TEST_ASSERT(zvec_string_length(str4) == 10); + TEST_ASSERT(strcmp(zvec_string_c_str(str4), "Hello View") == 0); + + // Test string view with embedded null bytes + char binary_data[] = {'H', 'e', 'l', 'l', 'o', '\0', 'W', 'o', 'r', 'l', 'd'}; + ZVecStringView binary_view = {binary_data, 11}; + ZVecString *str5 = zvec_string_create_from_view(&binary_view); + TEST_ASSERT(str5 != NULL); + TEST_ASSERT(zvec_string_length(str5) == 11); + // Note: strcmp will stop at first null byte, so we need to compare manually + TEST_ASSERT(memcmp(zvec_string_c_str(str5), binary_data, 11) == 0); + + // Cleanup + zvec_free_string(str1); + zvec_free_string(str2); + zvec_free_string(str3); + zvec_free_string(str4); + zvec_free_string(str5); + + TEST_END(); +} + +void test_index_params_functions(void) { + TEST_START(); + + // Test base index params + ZVecBaseIndexParams base_params; + zvec_index_params_base_init(&base_params, ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params.index_type == ZVEC_INDEX_TYPE_HNSW); + + // Test invert index params + ZVecInvertIndexParams invert_params; + zvec_index_params_invert_init(&invert_params, true, false); + TEST_ASSERT(invert_params.base.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.enable_range_optimization == true); + TEST_ASSERT(invert_params.enable_extended_wildcard == false); + + // Test vector index params + ZVecVectorIndexParams vector_params; + zvec_index_params_vector_init(&vector_params, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_L2, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(vector_params.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(vector_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(vector_params.quantize_type == ZVEC_QUANTIZE_TYPE_UNDEFINED); + + // Test HNSW index params + ZVecHnswIndexParams hnsw_params; + zvec_index_params_hnsw_init(&hnsw_params, ZVEC_METRIC_TYPE_COSINE, 16, 200, + 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(hnsw_params.base.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.base.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.m == 16); + TEST_ASSERT(hnsw_params.ef_construction == 200); + TEST_ASSERT(hnsw_params.ef_search == 50); + + // Test Flat index params + ZVecFlatIndexParams flat_params; + zvec_index_params_flat_init(&flat_params, ZVEC_METRIC_TYPE_IP, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(flat_params.base.base.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.base.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test IVF index params + ZVecIVFIndexParams ivf_params; + zvec_index_params_ivf_init(&ivf_params, ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(ivf_params.base.base.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.n_list == 100); + TEST_ASSERT(ivf_params.n_iters == 10); + TEST_ASSERT(ivf_params.use_soar == true); + TEST_ASSERT(ivf_params.n_probe == 5); + + TEST_END(); +} + +void test_utility_functions(void) { + TEST_START(); + + // Test error code to string conversion + const char *error_str = zvec_error_code_to_string(ZVEC_OK); + TEST_ASSERT(error_str != NULL); + TEST_ASSERT(strlen(error_str) > 0); + + error_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(error_str != NULL); + + // Test data type to string conversion + const char *data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_INT32); + TEST_ASSERT(data_type_str != NULL); + TEST_ASSERT(strlen(data_type_str) > 0); + + data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(data_type_str != NULL); + + // Test index type to string conversion + const char *index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(index_type_str != NULL); + TEST_ASSERT(strlen(index_type_str) > 0); + + index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(index_type_str != NULL); + + TEST_END(); +} + +void test_memory_management_functions(void) { + TEST_START(); + + // Test string allocation and deallocation + ZVecString *str = zvec_string_create("Test String"); + TEST_ASSERT(str != NULL); + zvec_free_string(str); + + TEST_END(); +} + +void test_query_params_functions(void) { + TEST_START(); + + // Test basic query parameters creation and destruction + ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params != NULL); + + // Test union query parameters + ZVecQueryParamsUnion *union_params = + zvec_query_params_union_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(union_params != NULL); + + // Test HNSW query parameters + ZVecHnswQueryParams *hnsw_params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, 50, 0.5f, false, true); + TEST_ASSERT(hnsw_params != NULL); + + // Test IVF query parameters + ZVecIVFQueryParams *ivf_params = + zvec_query_params_ivf_create(ZVEC_INDEX_TYPE_IVF, 10, true, 1.5f); + TEST_ASSERT(ivf_params != NULL); + + // Test Flat query parameters + ZVecFlatQueryParams *flat_params = + zvec_query_params_flat_create(ZVEC_INDEX_TYPE_FLAT, false, 2.0f); + TEST_ASSERT(flat_params != NULL); + + // Test setting various parameters on base query params + ZVecErrorCode err; + + // Test index type setting + err = zvec_query_params_set_index_type(base_params, ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(err == ZVEC_OK); + + // Test radius setting + err = zvec_query_params_set_radius(base_params, 0.8f); + TEST_ASSERT(err == ZVEC_OK); + + // Test linear search setting + err = zvec_query_params_set_is_linear(base_params, false); + TEST_ASSERT(err == ZVEC_OK); + + // Test refiner setting + err = zvec_query_params_set_is_using_refiner(base_params, true); + TEST_ASSERT(err == ZVEC_OK); + + // Test HNSW-specific parameters + err = zvec_query_params_hnsw_set_ef(hnsw_params, 75); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF-specific parameters + err = zvec_query_params_ivf_set_nprobe(ivf_params, 15); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF scale factor setting + err = zvec_query_params_ivf_set_scale_factor(ivf_params, 2.5f); + TEST_ASSERT(err == ZVEC_OK); + + // Test destruction of valid parameters + zvec_query_params_destroy(base_params); + zvec_query_params_hnsw_destroy(hnsw_params); + zvec_query_params_ivf_destroy(ivf_params); + zvec_query_params_flat_destroy(flat_params); + zvec_query_params_union_destroy(union_params); + + + // Test boundary cases - null pointer handling + zvec_query_params_hnsw_destroy(NULL); + zvec_query_params_ivf_destroy(NULL); + zvec_query_params_flat_destroy(NULL); + zvec_query_params_union_destroy(NULL); + + + TEST_END(); +} + +void test_collection_stats_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats_functions"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + + // Test normal statistics retrieval + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + TEST_ASSERT(stats->doc_count == 0); + zvec_collection_stats_destroy(stats); + } + + // Test NULL parameters + err = zvec_collection_get_stats(NULL, &stats); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_get_stats(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test statistics destruction boundary cases + zvec_collection_stats_destroy(NULL); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_dml_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_dml"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test insertion function boundary cases + size_t success_count, error_count; + + // Test NULL collection + err = zvec_collection_insert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL document array + err = zvec_collection_insert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test zero document count + ZVecDoc *empty_docs[1]; + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 0, + &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL count pointer + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 1, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test update function boundary cases + err = zvec_collection_update(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test upsert function boundary cases + err = zvec_collection_upsert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion function boundary cases + const char *pks[1]; + err = zvec_collection_delete(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, pks, 0, NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion by filter boundary cases + err = zvec_collection_delete_by_filter(NULL, NULL); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Actual Query Execution Tests +// ============================================================================= + +void test_actual_vector_queries(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_actual_queries"; + + // Create schema with vector field + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add ID field + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // Add vector field with HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Insert test documents + float vec1[] = {1.0f, 0.0f, 0.0f, 0.0f}; + float vec2[] = {0.0f, 1.0f, 0.0f, 0.0f}; + float vec3[] = {0.0f, 0.0f, 1.0f, 0.0f}; + float vec4[] = {0.7f, 0.7f, 0.0f, 0.0f}; // Similar to vec1 and vec2 + + ZVecDoc *docs[4]; + for (int i = 0; i < 4; i++) { + docs[i] = zvec_doc_create(); + zvec_doc_set_pk(docs[i], zvec_test_make_pk(i + 1)); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_INT64, + &(int64_t){i + 1}, sizeof(int64_t)); + } + + zvec_doc_add_field_by_value( + docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, sizeof(vec1)); + zvec_doc_add_field_by_value( + docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, sizeof(vec2)); + zvec_doc_add_field_by_value( + docs[2], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec3, sizeof(vec3)); + zvec_doc_add_field_by_value( + docs[3], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec4, sizeof(vec4)); + + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 4, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 4); + TEST_ASSERT(error_count == 0); + + // Flush collection to build index + zvec_collection_flush(collection); + + // Test 1: Basic vector search + ZVecVectorQuery query1 = {0}; + query1.field_name = (ZVecString){.data = "embedding", .length = 9}; + query1.query_vector = + (ZVecByteArray){.data = (uint8_t *)vec1, .length = sizeof(vec1)}; + query1.topk = 3; + query1.include_vector = true; + query1.include_doc_id = true; + + ZVecDoc **results = NULL; + size_t result_count = 0; + err = zvec_collection_query(collection, &query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count > 0); + TEST_ASSERT(results != NULL); + + // First result should be vec1 itself (distance ~0) + if (result_count > 0) { + float score = zvec_doc_get_score(results[0]); + TEST_ASSERT(score < 0.001f); // Very small distance + } + + zvec_docs_free(results, result_count); + + // Test 2: Search with filter + ZVecVectorQuery query2 = query1; + query2.filter = (ZVecString){.data = "id > 2", .length = 6}; + + err = zvec_collection_query(collection, &query2, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + + // Should only return documents with id > 2 + for (size_t i = 0; i < result_count; i++) { + int64_t id; + zvec_doc_get_field_value_basic(results[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + TEST_ASSERT(id > 2); + } + + zvec_docs_free(results, result_count); + + // Cleanup documents + for (int i = 0; i < 4; i++) { + zvec_doc_destroy(docs[i]); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_index_creation_and_management(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_index_management"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Create HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + err = zvec_collection_create_hnsw_index(collection, "dense", hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Create scalar index + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + TEST_ASSERT(invert_params != NULL); + + err = zvec_collection_create_invert_index(collection, "name", + invert_params); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_drop_index(collection, "name"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 3: Optimize collection + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_invert_destroy(invert_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_ddl_operations(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_ddl"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + size_t field_count = zvec_collection_schema_get_field_count(schema); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Add new column + ZVecFieldSchema *new_field = + zvec_field_schema_create("new_int32", ZVEC_DATA_TYPE_INT32, true, 0); + TEST_ASSERT(new_field != NULL); + + err = zvec_collection_add_column(collection, new_field, NULL); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Get collection schema and verify field count + ZVecCollectionSchema *retrieved_schema = NULL; + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(retrieved_schema != NULL); + + size_t new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT((field_count + 1) == new_field_count); + + // Test 3: Alter column + ZVecFieldSchema *alter_field = + zvec_field_schema_create("new_float", ZVEC_DATA_TYPE_FLOAT, true, 0); + TEST_ASSERT(alter_field != NULL); + + err = zvec_collection_alter_column(collection, "new_int32", "", + alter_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test 4: Drop column + err = zvec_collection_drop_column(collection, "new_float"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 5: Verify field count after drop + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(new_field_count == field_count); + + zvec_collection_schema_destroy(retrieved_schema); + zvec_field_schema_destroy(new_field); + zvec_field_schema_destroy(alter_field); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_field_ddl_operations(void) { + TEST_START(); + + // Test field schema creation with various configurations + ZVecFieldSchema *field1 = + zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); + TEST_ASSERT(field1 != NULL); + TEST_ASSERT(strcmp(field1->name->data, "test_field1") == 0); + TEST_ASSERT(field1->data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(field1->nullable == false); + TEST_ASSERT(field1->dimension == 0); + + ZVecFieldSchema *field2 = zvec_field_schema_create( + "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); + TEST_ASSERT(field2 != NULL); + TEST_ASSERT(field2->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(field2->nullable == true); + TEST_ASSERT(field2->dimension == 128); + + // Test index parameter setting + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + ZVecErrorCode err = zvec_field_schema_set_index_params( + field2, (ZVecIndexParams *)hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Cleanup + zvec_field_schema_destroy(field1); + zvec_field_schema_destroy(field2); + zvec_index_params_hnsw_destroy(hnsw_params); + + TEST_END(); +} + +void test_performance_benchmarks(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_performance"; + + ZVecCollectionSchema *schema = zvec_collection_schema_create("perf_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create simple schema for performance testing + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + TEST_ASSERT(collection != NULL); + + if (collection) { + const size_t BATCH_SIZE = 1000; + const size_t TOTAL_DOCS = 10000; + + // Test bulk insertion performance +#ifdef _POSIX_C_SOURCE + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); +#else + clock_t start_clock = clock(); +#endif + + for (size_t batch_start = 0; batch_start < TOTAL_DOCS; + batch_start += BATCH_SIZE) { + ZVecDoc *batch_docs[BATCH_SIZE]; + size_t current_batch_size = (batch_start + BATCH_SIZE > TOTAL_DOCS) + ? TOTAL_DOCS - batch_start + : BATCH_SIZE; + + // Create batch of documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + zvec_doc_set_pk(batch_docs[i], zvec_test_make_pk(batch_start + i)); + + int64_t id = batch_start + i; + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + + // Create random vector + float vec[128]; + for (int j = 0; j < 128; j++) { + vec[j] = (float)rand() / RAND_MAX; + } + zvec_doc_add_field_by_value(batch_docs[i], "vec", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, + sizeof(vec)); + } + + // Insert batch + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == current_batch_size); + TEST_ASSERT(error_count == 0); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&end_time, NULL); + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; +#else + clock_t end_clock = clock(); + double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; +#endif + printf(" Inserted %zu documents in %.3f seconds (%.0f docs/sec)\n", + TOTAL_DOCS, insert_time, TOTAL_DOCS / insert_time); + + // Flush and optimize + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test query performance + float query_vec[128]; + for (int i = 0; i < 128; i++) { + query_vec[i] = (float)rand() / RAND_MAX; + } + + ZVecVectorQuery query = {0}; + query.field_name = (ZVecString){.data = "vec", .length = 3}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vec, + .length = sizeof(query_vec)}; + query.topk = 10; + query.include_vector = false; + query.include_doc_id = true; + + const int QUERY_COUNT = 100; +#ifdef _POSIX_C_SOURCE + struct timeval query_start_time, query_end_time; + gettimeofday(&query_start_time, NULL); +#else + clock_t query_start_clock = clock(); +#endif + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + err = + zvec_collection_query(collection, &query, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count <= 10); + + zvec_docs_free(results, result_count); + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&query_end_time, NULL); + double query_time = + (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; +#else + clock_t query_end_clock = clock(); + double query_time = + ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; +#endif + double avg_query_time = + (query_time * 1000) / QUERY_COUNT; // ms per query + printf(" Average query time: %.2f ms\n", avg_query_time); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Additional tests for uncovered API functions +// ============================================================================= + +void test_zvec_shutdown(void) { + TEST_START(); + + // Test shutdown + ZVecErrorCode err = zvec_shutdown(); + TEST_ASSERT(err == ZVEC_OK); + + // Re-initialize for other tests + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + zvec_config_data_destroy(config); + + TEST_END(); +} + +void test_index_params_creation_functions(void) { + TEST_START(); + + // Test zvec_index_params_init_default + ZVecIndexParams params; + zvec_index_params_init_default(¶ms, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); + + // Test zvec_index_params_vector_create + ZVecVectorIndexParams *vector_params = zvec_index_params_vector_create( + ZVEC_INDEX_TYPE_HNSW, ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_FP16); + TEST_ASSERT(vector_params != NULL); + TEST_ASSERT(vector_params->base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(vector_params->metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(vector_params->quantize_type == ZVEC_QUANTIZE_TYPE_FP16); + if (vector_params) { + zvec_index_params_vector_destroy(vector_params); + } + + // Test zvec_index_params_ivf_create + ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(ivf_params->base.base.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params->base.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params->n_list == 100); + TEST_ASSERT(ivf_params->n_iters == 10); + TEST_ASSERT(ivf_params->use_soar == true); + TEST_ASSERT(ivf_params->n_probe == 5); + if (ivf_params) { + zvec_index_params_ivf_destroy(ivf_params); + } + + // Test zvec_index_params_vector_destroy + ZVecVectorIndexParams *vector_params2 = zvec_index_params_vector_create( + ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(vector_params2 != NULL); + zvec_index_params_vector_destroy(vector_params2); + + TEST_END(); +} + +void test_collection_advanced_index_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_advanced_index"; + zvec_test_delete_dir(temp_dir); + + // Create schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_collection_schema_add_field(schema, id_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.max_doc_count_per_segment = 1000; + ZVecCollection *collection = NULL; + + ZVecErrorCode err = zvec_collection_create_and_open(temp_dir, schema, + &options, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test zvec_collection_create_flat_index + ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(flat_params != NULL); + err = zvec_collection_create_flat_index(collection, "vec", flat_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_flat_destroy(flat_params); + + // Test zvec_collection_create_ivf_index + ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); + TEST_ASSERT(ivf_params != NULL); + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_ivf_index(collection, "vec", ivf_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_ivf_destroy(ivf_params); + + // Test zvec_collection_create_index_with_params + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_FP16, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + err = zvec_collection_drop_index(collection, "vec"); + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_index_with_params(collection, "vec", + hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_hnsw_destroy(hnsw_params); + + // Test zvec_field_schema_set_ivf_index + ZVecFieldSchema *new_vec_field = zvec_field_schema_create( + "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(new_vec_field != NULL); + ZVecIVFIndexParams *ivf_params2 = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 50, 5, false, 3); + TEST_ASSERT(ivf_params2 != NULL); + zvec_field_schema_set_ivf_index(new_vec_field, ivf_params2); + TEST_ASSERT(new_vec_field->index_params != NULL); + zvec_index_params_ivf_destroy(ivf_params2); + zvec_field_schema_destroy(new_vec_field); + + zvec_collection_destroy(collection); + } + zvec_collection_schema_destroy(schema); + } + + zvec_test_delete_dir(temp_dir); + TEST_END(); +} + +void test_collection_query_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_query_funcs"; + zvec_test_delete_dir(temp_dir); + + // Create schema and collection + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + + zvec_collection_schema_add_field(schema, name_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Insert test documents + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "doc1"); + float vec1[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc1, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, + sizeof(vec1)); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "document1", 9); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_set_pk(doc2, "doc2"); + float vec2[4] = {0.0f, 1.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc2, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, + sizeof(vec2)); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, + "document2", 9); + + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test zvec_collection_fetch + const char *pks[] = {"doc1", "doc2"}; + ZVecDoc **results = NULL; + size_t found_count = 0; + err = zvec_collection_fetch(collection, pks, 2, &results, &found_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(found_count == 2); + zvec_docs_free(results, found_count); + + // Test zvec_collection_query_by_group + ZVecGroupByVectorQuery group_query = {0}; + group_query.field_name = ZVEC_STRING("vec"); + float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; + group_query.query_vector.data = (uint8_t *)query_vec; + group_query.query_vector.length = sizeof(query_vec); + group_query.group_by_field_name = ZVEC_STRING("name"); + group_query.group_count = 2; + group_query.group_topk = 1; + group_query.include_vector = false; + + ZVecStringArray output_fields = {0}; + output_fields.count = 1; + output_fields.strings = + (ZVecString *)malloc(sizeof(ZVecString) * output_fields.count); + output_fields.strings[0] = ZVEC_STRING("name"); + group_query.output_fields = output_fields; + + ZVecDoc **group_results = NULL; + ZVecString **group_values = NULL; + size_t group_result_count = 0; + err = + zvec_collection_query_by_group(collection, &group_query, &group_results, + &group_values, &group_result_count); + TEST_ASSERT(err == ZVEC_OK); + if (group_results) { + zvec_docs_free(group_results, group_result_count); + } + if (group_values) { + for (size_t i = 0; i < group_result_count; i++) { + zvec_free_string(group_values[i]); + } + free(group_values); + } + + free(output_fields.strings); + + // Test zvec_collection_get_options + ZVecCollectionOptions *options = NULL; + err = zvec_collection_get_options(collection, &options); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(options != NULL); + free(options); + + zvec_collection_destroy(collection); + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + } + + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +void test_doc_advanced_functions(void) { + TEST_START(); + + // Test zvec_doc_clear + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "test_pk"); + zvec_doc_add_field_by_value(doc, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_get_field_count(doc) > 0); + zvec_doc_clear(doc); + TEST_ASSERT(zvec_doc_get_field_count(doc) == 0); + + // Test zvec_doc_get_pk_copy + zvec_doc_set_pk(doc, "test_pk_copy"); + const char *pk_copy = zvec_doc_get_pk_copy(doc); + TEST_ASSERT(pk_copy != NULL); + TEST_ASSERT(strcmp(pk_copy, "test_pk_copy") == 0); + free((void *)pk_copy); + + // Test zvec_doc_is_empty + ZVecDoc *empty_doc = zvec_doc_create(); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == true); + zvec_doc_add_field_by_value(empty_doc, "test", ZVEC_DATA_TYPE_INT32, + &(int32_t){1}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == false); + zvec_doc_destroy(empty_doc); + + // Test zvec_doc_memory_usage + ZVecDoc *mem_doc = zvec_doc_create(); + zvec_doc_set_pk(mem_doc, "memory_test"); + char large_data[1024]; + memset(large_data, 'A', sizeof(large_data)); + zvec_doc_add_field_by_value(mem_doc, "large_field", ZVEC_DATA_TYPE_STRING, + large_data, sizeof(large_data)); + size_t mem_usage = zvec_doc_memory_usage(mem_doc); + TEST_ASSERT(mem_usage > 0); + zvec_doc_destroy(mem_doc); + + // Test zvec_doc_merge + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "merge_test"); + zvec_doc_add_field_by_value(doc1, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_add_field_by_value(doc2, "field2", ZVEC_DATA_TYPE_STRING, "merged", + 6); + + zvec_doc_merge(doc1, doc2); + TEST_ASSERT(zvec_doc_has_field(doc1, "field1") == true); + TEST_ASSERT(zvec_doc_has_field(doc1, "field2") == true); + + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + + // Test zvec_doc_validate + ZVecCollectionSchema *schema = zvec_collection_schema_create("validate_test"); + ZVecFieldSchema *val_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, val_field); + + ZVecDoc *val_doc = zvec_doc_create(); + zvec_doc_set_pk(val_doc, "test_pk"); + zvec_doc_add_field_by_value(val_doc, "test_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){42}, sizeof(int32_t)); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_doc_validate(val_doc, schema, false, &error_msg); + TEST_ASSERT(err == ZVEC_OK); + if (error_msg) { + free(error_msg); + } + + zvec_doc_destroy(val_doc); + zvec_collection_schema_destroy(schema); + zvec_doc_destroy(doc); + + // Test zvec_doc_to_detail_string + ZVecDoc *detail_doc = zvec_doc_create(); + zvec_doc_set_pk(detail_doc, "detail_test"); + zvec_doc_add_field_by_value(detail_doc, "int_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){12345}, sizeof(int32_t)); + zvec_doc_add_field_by_value(detail_doc, "str_field", ZVEC_DATA_TYPE_STRING, + "hello", 5); + + char *detail_str = NULL; + err = zvec_doc_to_detail_string(detail_doc, &detail_str); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(detail_str != NULL); + printf(" Document detail: %s\n", detail_str); + free(detail_str); + + zvec_doc_destroy(detail_doc); + + TEST_END(); +} + +void test_array_memory_functions(void) { + TEST_START(); + + // Test ZVecStringArray + ZVecStringArray *str_array = zvec_string_array_create(3); + TEST_ASSERT(str_array != NULL); + if (str_array) { + TEST_ASSERT(str_array->count == 3); + TEST_ASSERT(str_array->strings != NULL); + + // Add strings at specific indices + zvec_string_array_add(str_array, 0, "string1"); + zvec_string_array_add(str_array, 1, "string2"); + zvec_string_array_add(str_array, 2, "string3"); + + // Verify strings were added + TEST_ASSERT(strcmp(str_array->strings[0].data, "string1") == 0); + TEST_ASSERT(strcmp(str_array->strings[1].data, "string2") == 0); + TEST_ASSERT(strcmp(str_array->strings[2].data, "string3") == 0); + zvec_string_array_destroy(str_array); + } + + // Test ZVecMutableByteArray + ZVecMutableByteArray *byte_array = zvec_byte_array_create(1024); + TEST_ASSERT(byte_array != NULL); + if (byte_array) { + TEST_ASSERT(byte_array->capacity == 1024); + TEST_ASSERT(byte_array->length == 0); + TEST_ASSERT(byte_array->data != NULL); + + // Write some data + byte_array->data[0] = 0x01; + byte_array->data[1] = 0x02; + byte_array->data[2] = 0x03; + byte_array->length = 3; + + TEST_ASSERT(byte_array->length == 3); + TEST_ASSERT(byte_array->data[0] == 0x01); + TEST_ASSERT(byte_array->data[1] == 0x02); + TEST_ASSERT(byte_array->data[2] == 0x03); + + zvec_byte_array_destroy(byte_array); + } + + // Test ZVecFloatArray + ZVecFloatArray *float_array = zvec_float_array_create(10); + TEST_ASSERT(float_array != NULL); + if (float_array) { + TEST_ASSERT(float_array->length == 10); + TEST_ASSERT(float_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_float_array_create + // The const qualifier indicates this is typically used for read-only access + // For testing, we verify the allocation succeeded and length is correct + TEST_ASSERT(float_array->data[0] == 0.0f); + TEST_ASSERT(float_array->data[9] == 0.0f); + + zvec_float_array_destroy(float_array); + } + + // Test ZVecInt64Array + ZVecInt64Array *int64_array = zvec_int64_array_create(5); + TEST_ASSERT(int64_array != NULL); + if (int64_array) { + TEST_ASSERT(int64_array->length == 5); + TEST_ASSERT(int64_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_int64_array_create + // The const qualifier indicates this is typically used for read-only access + TEST_ASSERT(int64_array->data[0] == 0); + TEST_ASSERT(int64_array->data[4] == 0); + + zvec_int64_array_destroy(int64_array); + } + + // Test edge case: create with zero size + ZVecMutableByteArray *zero_array = zvec_byte_array_create(0); + TEST_ASSERT(zero_array != NULL); + if (zero_array) { + zvec_byte_array_destroy(zero_array); + } + + TEST_END(); +} + +void test_index_params_destruction(void) { + TEST_START(); + + // Test zvec_index_params_invert_destroy + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + TEST_ASSERT(invert_params != NULL); + zvec_index_params_invert_destroy(invert_params); + + TEST_END(); +} + +// ============================================================================= +// Main function +// ============================================================================= + +int main(void) { + printf("Starting comprehensive C API tests...\n\n"); + + // Clean up previous test directories + printf("Cleaning up previous test directories...\n"); + system("rm -rf /tmp/zvec_test_*"); + printf("Cleanup completed.\n\n"); + + test_version_functions(); + test_error_handling_functions(); + test_zvec_config(); + test_zvec_initialize(); + test_zvec_string_functions(); + + // Schema-related tests + test_schema_basic_operations(); + test_schema_edge_cases(); + test_schema_field_operations(); + test_normal_schema_creation(); + test_schema_with_indexes(); + test_schema_max_doc_count(); + + // Field-related tests + test_field_schema_functions(); + test_field_helper_functions(); + test_field_ddl_operations(); + + // Collection-related tests + test_collection_basic_operations(); + test_collection_edge_cases(); + test_collection_delete_by_filter(); + test_collection_stats(); + test_collection_stats_functions(); + test_collection_dml_functions(); + test_collection_ddl_operations(); + + // Doc-related tests + test_doc_creation(); + test_doc_primary_key(); + test_doc_basic_operations(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); + test_doc_add_field_by_value(); + test_doc_add_field_by_struct(); + + // Index tests + test_index_params(); + test_index_params_functions(); + test_index_creation_and_management(); + + // Query tests + test_query_params_functions(); + test_actual_vector_queries(); + + // Performance tests + // test_performance_benchmarks(); + + // Utility function tests + test_utility_functions(); + + // Memory management tests + test_memory_management_functions(); + + // Additional API coverage tests + test_zvec_shutdown(); + test_index_params_creation_functions(); + test_collection_advanced_index_functions(); + test_collection_query_functions(); + test_doc_advanced_functions(); + test_array_memory_functions(); + test_index_params_destruction(); + + printf("\n=== Comprehensive Test Summary ===\n"); + printf("Total tests: %d\n", test_count); + printf("Passed: %d\n", passed_count); + printf("Failed: %d\n", test_count - passed_count); + + return test_count == passed_count ? 0 : 1; +} diff --git a/tests/c_api/utils.c b/tests/c_api/utils.c new file mode 100644 index 00000000..66c932a4 --- /dev/null +++ b/tests/c_api/utils.c @@ -0,0 +1,940 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" +#include +#include +#include +#include + +// ============================================================================= +// Internal Helper Functions +// ============================================================================= + +static char *strdup_safe(const char *str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char *copy = (char *)malloc(len); + if (copy) { + memcpy(copy, str, len); + } + return copy; +} + +// ============================================================================= +// Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_test_create_temp_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + schema->max_doc_count_per_segment = 1000; + + // Create index parameters using C API + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, true); + ZVecHnswIndexParams *dense_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecHnswIndexParams *sparse_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + + // Create and add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + zvec_collection_schema_add_field(schema, id_field); + + // Create name field (inverted index without optimization) + ZVecInvertIndexParams *name_invert_params = + zvec_index_params_invert_create(false, false); + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(name_field, name_invert_params); + zvec_collection_schema_add_field(schema, name_field); + + // Create weight field (no index) + ZVecFieldSchema *weight_field = + zvec_field_schema_create("weight", ZVEC_DATA_TYPE_FLOAT, true, 0); + zvec_collection_schema_add_field(schema, weight_field); + + // Create dense field (HNSW index) + ZVecFieldSchema *dense_field = + zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); + zvec_collection_schema_add_field(schema, dense_field); + + // Create sparse field (HNSW index) + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); + zvec_collection_schema_add_field(schema, sparse_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + + // Create fields + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, int32_field); + + ZVecFieldSchema *string_field = + zvec_field_schema_create("string", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, string_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create(name ? name : "demo"); + schema->max_doc_count_per_segment = max_doc_count; + + // Create scalar fields (8) + const char *scalar_names[] = {"int32", "string", "uint32", "bool", + "float", "double", "int64", "uint64"}; + ZVecDataType scalar_types[] = {ZVEC_DATA_TYPE_INT32, ZVEC_DATA_TYPE_STRING, + ZVEC_DATA_TYPE_UINT32, ZVEC_DATA_TYPE_BOOL, + ZVEC_DATA_TYPE_FLOAT, ZVEC_DATA_TYPE_DOUBLE, + ZVEC_DATA_TYPE_INT64, ZVEC_DATA_TYPE_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create array fields (8) + const char *array_names[] = {"array_int32", "array_string", "array_uint32", + "array_bool", "array_float", "array_double", + "array_int64", "array_uint64"}; + ZVecDataType array_types[] = { + ZVEC_DATA_TYPE_ARRAY_INT32, ZVEC_DATA_TYPE_ARRAY_STRING, + ZVEC_DATA_TYPE_ARRAY_UINT32, ZVEC_DATA_TYPE_ARRAY_BOOL, + ZVEC_DATA_TYPE_ARRAY_FLOAT, ZVEC_DATA_TYPE_ARRAY_DOUBLE, + ZVEC_DATA_TYPE_ARRAY_INT64, ZVEC_DATA_TYPE_ARRAY_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create vector fields (5) + // dense vectors + ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( + "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + dense_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, dense_fp32); + + ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( + "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); + ZVecFlatIndexParams *flat_params1 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + zvec_collection_schema_add_field(schema, dense_fp16); + + ZVecFieldSchema *dense_int8 = zvec_field_schema_create( + "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); + ZVecFlatIndexParams *flat_params2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_int8, flat_params2); + zvec_collection_schema_add_field(schema, dense_int8); + + // sparse vectors + ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( + "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + sparse_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, sparse_fp32); + + ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( + "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); + ZVecFlatIndexParams *flat_params3 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + zvec_collection_schema_add_field(schema, sparse_fp16); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name) { + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(enable_optimize); + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); + free(invert_params); + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params) { + ZVecHnswIndexParams *default_params = NULL; + if (!vector_index_params) { + default_params = zvec_test_create_default_hnsw_params(); + } + + ZVecCollectionSchema *schema = zvec_test_create_normal_schema( + nullable, name, NULL, + vector_index_params ? vector_index_params : default_params, 1000); + + if (default_params) { + free(default_params); + } + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count) { + return zvec_test_create_normal_schema(false, "demo", NULL, NULL, doc_count); +} + +// ============================================================================= +// Document Creation Helper Functions Implementation +// ============================================================================= + +char *zvec_test_make_pk(uint64_t doc_id) { + char *pk = (char *)malloc(32); // Sufficiently large buffer + if (pk) { + snprintf(pk, 32, "pk_%llu", (unsigned long long)doc_id); + } + return pk; +} + +uint64_t zvec_test_extract_doc_id(const char *pk) { + if (!pk || strlen(pk) < 4) return 0; + return strtoull(pk + 3, NULL, 10); +} + +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + if (!schema) return NULL; + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create test data for each field + for (size_t i = 0; i < schema->field_count; i++) { + // Fix type mismatch issue - remove address operator + const ZVecFieldSchema *field = schema->fields[i]; + // Remove unused variable + // ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_BINARY: { + char binary_str[32]; + snprintf(binary_str, sizeof(binary_str), "binary_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + binary_str, strlen(binary_str)); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(bool){doc_id % 10 == 0}, sizeof(bool)); + break; + } + case ZVEC_DATA_TYPE_INT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + } + case ZVEC_DATA_TYPE_INT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int64_t){(int64_t)doc_id}, + sizeof(int64_t)); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint32_t){(uint32_t)doc_id}, + sizeof(uint32_t)); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint64_t){(uint64_t)doc_id}, + sizeof(uint64_t)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(float){(float)doc_id}, sizeof(float)); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(double){(double)doc_id}, sizeof(double)); + break; + } + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + bool bool_array[10]; + for (int j = 0; j < 10; j++) { + bool_array[j] = (doc_id + j) % 2 == 0; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + bool_array, sizeof(bool_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + int32_t int32_array[10]; + for (int j = 0; j < 10; j++) { + int32_array[j] = (int32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int32_array, sizeof(int32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + int64_t int64_array[10]; + for (int j = 0; j < 10; j++) { + int64_array[j] = (int64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int64_array, sizeof(int64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + uint32_t uint32_array[10]; + for (int j = 0; j < 10; j++) { + uint32_array[j] = (uint32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint32_array, sizeof(uint32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + uint64_t uint64_array[10]; + for (int j = 0; j < 10; j++) { + uint64_array[j] = (uint64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint64_array, sizeof(uint64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + float float_array[10]; + for (int j = 0; j < 10; j++) { + float_array[j] = (float)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + float_array, sizeof(float_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + double double_array[10]; + for (int j = 0; j < 10; j++) { + double_array[j] = (double)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + double_array, sizeof(double_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // String arrays need special handling + char string_data[256]; + size_t offset = 0; + for (int j = 0; j < 10; j++) { + char temp_str[32]; + snprintf(temp_str, sizeof(temp_str), "value_%llu_%d", + (unsigned long long)doc_id, j); + size_t len = strlen(temp_str); + if (offset + len + 1 < sizeof(string_data)) { + strcpy(string_data + offset, temp_str); + offset += len + 1; + } + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_data, offset); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + uint32_t *vector_data = + (uint32_t *)malloc(field->dimension * sizeof(uint32_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint32_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint32_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + uint64_t *vector_data = + (uint64_t *)malloc(field->dimension * sizeof(uint64_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint64_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint64_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 needs special handling, simplified to FP32 here + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)((doc_id + j) % 256); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)((doc_id + j) % 65536); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + // Sparse vectors need special handling + uint32_t nnz = field->dimension > 0 + ? field->dimension / 10 + : 10; // Number of non-zero elements + size_t sparse_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; // Set number of non-zero elements + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; // Index + values[j] = (float)(doc_id + j * 0.1); // Value + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + // Sparse FP16 vectors, simplified handling + uint32_t nnz = field->dimension > 0 ? field->dimension / 10 : 10; + size_t sparse_size = + sizeof(uint32_t) + + nnz * (sizeof(uint32_t) + + sizeof(float)); // Still use float for storage + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; + values[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + + default: + // Unsupported data type + break; + } + + // Remove reference to removed variable err + /* + if (err != ZVEC_OK) { + // Error handling: continue processing other fields + } + */ + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + // Reuse create_doc function, but only process vector fields + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Only create data for vector fields + for (size_t i = 0; i < schema->field_count; i++) { + const ZVecFieldSchema *field = schema->fields[i]; + + // Only process specific vector type fields + if (field->data_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + continue; + } + + ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)(doc_id % 128); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)(doc_id % 32768); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + const size_t nnz = 100; + size_t sparse_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + char *sparse_data = (char *)malloc(sparse_size); + if (sparse_data) { + char *ptr = sparse_data; + *((size_t *)ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t j = 0; j < nnz; j++) { + *((uint32_t *)ptr) = (uint32_t)j; + ptr += sizeof(uint32_t); + *((float *)ptr) = (float)(doc_id + j * 0.1); + ptr += sizeof(float); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + default: + break; + } + + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create data for specified fields + for (size_t i = 0; i < field_count; i++) { + ZVecErrorCode err = ZVEC_OK; + + switch (field_types[i]) { + case ZVEC_DATA_TYPE_INT32: + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: + err = + zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(float){(float)doc_id}, sizeof(float)); + break; + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float vector_data[128]; + for (int j = 0; j < 128; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + vector_data, sizeof(vector_data)); + break; + } + default: + // Other types can be added here + break; + } + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +// ============================================================================= +// Index Parameter Creation Helper Functions Implementation +// ============================================================================= + +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecHnswIndexParams *params = + (ZVecHnswIndexParams *)malloc(sizeof(ZVecHnswIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + params->m = 16; + params->ef_construction = 100; + + return params; +} + +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void) { + ZVecFlatIndexParams *params = + (ZVecFlatIndexParams *)malloc(sizeof(ZVecFlatIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + return params; +} + +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize) { + ZVecInvertIndexParams *params = + (ZVecInvertIndexParams *)malloc(sizeof(ZVecInvertIndexParams)); + if (!params) return NULL; + + params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + params->enable_range_optimization = enable_optimize; + params->enable_extended_wildcard = enable_optimize; + + return params; +} + +// ============================================================================= +// Field Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; + field->index_params = invert_params ? (ZVecIndexParams *)invert_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = dimension; + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; // Sparse vectors don't need fixed dimension + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +// ============================================================================= +// Memory Management Helper Functions Implementation +// ============================================================================= + +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { + if (!fields) return; + + for (size_t i = 0; i < count; i++) { + if (fields[i].name) { + // Free string memory allocated by strdup + if (fields[i].name->data) { + free(fields[i].name->data); + } + free(fields[i].name); + } + // Free index parameter memory + if (fields[i].index_params) { + zvec_index_params_destroy(fields[i].index_params); + free(fields[i].index_params); + } + } + free(fields); +} + +void zvec_test_free_strings(char **strings, size_t count) { + if (!strings) return; + + for (size_t i = 0; i < count; i++) { + if (strings[i]) { + free(strings[i]); + } + } + + free(strings); +} + +// ============================================================================= +// File System Helper Functions Implementation +// ============================================================================= + +/** + * @brief Delete directory and all its contents (wrapper function) + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path) { + if (!dir_path) { + return -1; + } + +#ifdef _WIN32 + // Windows platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rd /s /q \"%s\" >nul 2>&1", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#else + // Unix/Linux/macOS platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rm -rf \"%s\" 2>/dev/null", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#endif +} diff --git a/tests/c_api/utils.h b/tests/c_api/utils.h new file mode 100644 index 00000000..63e5e314 --- /dev/null +++ b/tests/c_api/utils.h @@ -0,0 +1,260 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_TESTS_C_API_UTILS_H +#define ZVEC_TESTS_C_API_UTILS_H + +#include +#include +#include +#include "zvec/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================= +// Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create temporary test schema + * Contains basic scalar fields and vector fields + * + * @return ZVecCollectionSchema* Created schema pointer, needs to be released by + * calling zvec_collection_schema_cleanup + */ +ZVecCollectionSchema *zvec_test_create_temp_schema(void); + +/** + * @brief Create pure scalar schema + * Contains only scalar fields (int32, string) + * + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_scalar_schema(void); + +/** + * @brief Create full-featured schema + * Contains all supported data type fields + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param scalar_index_params Scalar index parameters (can be NULL) + * @param vector_index_params Vector index parameters (can be NULL) + * @param max_doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count); + +/** + * @brief Create schema with scalar index + * + * @param nullable Whether to allow null values + * @param enable_optimize Whether to enable optimization + * @param name Schema name + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name); + +/** + * @brief Create schema with vector index + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param vector_index_params Vector index parameters (can be NULL, uses default + * HNSW parameters) + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create schema with specified maximum document count + * + * @param doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count); + +// ============================================================================= +// Document Creation Helper Functions +// ============================================================================= + +/** + * @brief Generate primary key based on document ID + * + * @param doc_id Document ID + * @return char* Generated primary key string, needs to be released by calling + * free() + */ +char *zvec_test_make_pk(uint64_t doc_id); + +/** + * @brief Create complete document + * Create corresponding test data for each field according to schema + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer, needs to be released by calling + * zvec_doc_destroy + */ +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create partial null document + * Only set values for vector fields, keep scalar fields as null + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create document with specified fields + * Only create data for specified fields + * + * @param doc_id Document ID + * @param field_names Field name array + * @param field_types Field type array + * @param field_count Number of fields + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk); + +// ============================================================================= +// Index Parameter Creation Helper Functions +// ============================================================================= + +/** + * @brief Create default HNSW index parameters + * + * @return ZVecHnswIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void); + +/** + * @brief Create default Flat index parameters + * + * @return ZVecFlatIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void); + +/** + * @brief Create default scalar index parameters + * + * @param enable_optimize Whether to enable optimization + * @return ZVecInvertIndexParams* Created parameter pointer, needs to be + * released by calling free() + */ +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize); + +// ============================================================================= +// Field Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create scalar field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param invert_params Scalar index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer, needs to be released + * by calling free() + */ +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Create vector field schema + * + * @param name Field name + * @param data_type Data type + * @param dimension Vector dimension + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create sparse vector field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +// ============================================================================= +// Memory Management Helper Functions +// ============================================================================= + +/** + * @brief Free field schema array + * + * @param fields Field array pointer + * @param count Number of fields + */ +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count); + +/** + * @brief Free string array + * + * @param strings String array pointer + * @param count Number of strings + */ +void zvec_test_free_strings(char **strings, size_t count); + +/** + * @brief Delete directory and all its contents + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path); + +#ifdef __cplusplus +} +#endif + +#endif // ZVEC_TESTS_C_API_UTILS_H \ No newline at end of file diff --git a/tests/core/algorithm/ivf/ivf_searcher_test.cc b/tests/core/algorithm/ivf/ivf_searcher_test.cc index 9911e0e2..75d5df1c 100644 --- a/tests/core/algorithm/ivf/ivf_searcher_test.cc +++ b/tests/core/algorithm/ivf/ivf_searcher_test.cc @@ -392,7 +392,7 @@ TEST_F(IVFSearcherTest, TestSimpleCosine) { { size_t topk = 33; context->set_topk(topk); - + std::string new_vec; IndexQueryMeta new_meta; ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta)); From e1cac50f0b43b1f84c194b6fab9dafedb2d251d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Wed, 18 Mar 2026 13:40:29 +0900 Subject: [PATCH 2/7] feat(c-api): add nullable/doc-result APIs for agency migration (#234) --- src/c_api/c_api.cc | 257 ++++++++++++++++++++++++++++++++++++++- src/include/zvec/c_api.h | 98 +++++++++++++++ tests/c_api/c_api_test.c | 167 +++++++++++++++++++++++++ 3 files changed, 521 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index c26ceee7..81e534b6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -798,6 +798,73 @@ static char *copy_string(const std::string &str) { return copy; } +// Helper function: free write results returned by detailed DML APIs. +static void free_write_results_internal(ZVecWriteResult *results, + size_t result_count) { + if (!results) { + return; + } + for (size_t i = 0; i < result_count; ++i) { + if (results[i].pk) { + free((void *)results[i].pk); + results[i].pk = nullptr; + } + if (results[i].message) { + free((void *)results[i].message); + results[i].message = nullptr; + } + } + free(results); +} + +// Helper function: convert per-doc statuses to C API write result array. +static ZVecErrorCode build_write_results( + const std::vector &statuses, + const std::vector &pks, ZVecWriteResult **results, + size_t *result_count) { + if (!results || !result_count) { + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *result_count = statuses.size(); + if (*result_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast( + calloc(*result_count, sizeof(ZVecWriteResult))); + if (!*results) { + set_last_error("Failed to allocate memory for write results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const std::string pk = i < pks.size() ? pks[i] : std::string(); + const std::string message = statuses[i].message(); + (*results)[i].pk = copy_string(pk); + (*results)[i].message = copy_string(message); + (*results)[i].code = status_to_error_code(statuses[i]); + } + + return ZVEC_OK; +} + +static std::vector collect_doc_pks(const ZVecDoc **docs, + size_t doc_count) { + std::vector pks; + pks.reserve(doc_count); + for (size_t i = 0; i < doc_count; ++i) { + if (!docs[i]) { + pks.emplace_back(""); + continue; + } + auto doc_ptr = reinterpret_cast *>(docs[i]); + pks.emplace_back((*doc_ptr)->pk_ref()); + } + return pks; +} + static zvec::DataType convert_data_type(ZVecDataType zvec_type) { if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { @@ -1044,6 +1111,12 @@ void zvec_free_uint8_array(uint8_t *array) { } } +void zvec_free_ptr(void *ptr) { + if (ptr) { + free(ptr); + } +} + void zvec_free_field_schema(ZVecFieldSchema *field_schema) { if (field_schema) { if (field_schema->index_params) { @@ -2139,6 +2212,10 @@ void zvec_docs_free(ZVecDoc **docs, size_t count) { free(docs); } +void zvec_write_results_free(ZVecWriteResult *results, size_t result_count) { + free_write_results_internal(results, result_count); +} + void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { if (!doc || !pk) return; @@ -2175,6 +2252,18 @@ void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { ZVEC_CATCH_END_VOID } +ZVecErrorCode zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to set null field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_null(std::string(field_name)); return ZVEC_OK;) +} + // ============================================================================= // Document interface implementation // ============================================================================= @@ -5124,6 +5213,38 @@ default: { return error_code;) } + ZVecErrorCode zvec_collection_insert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { + return error_code; + } + + return build_write_results(result.value(), pks, results, result_count);) + } + ZVecErrorCode zvec_collection_update(ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, @@ -5162,6 +5283,38 @@ default: { return error_code;) } + ZVecErrorCode zvec_collection_update_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_update_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { + return error_code; + } + + return build_write_results(result.value(), pks, results, result_count);) + } + ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, @@ -5200,6 +5353,38 @@ default: { return error_code;) } + ZVecErrorCode zvec_collection_upsert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_upsert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { + return error_code; + } + + return build_write_results(result.value(), pks, results, result_count);) + } + ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, const char *const *pks, size_t pk_count, size_t *success_count, @@ -5242,6 +5427,44 @@ default: { return error_code;) } + ZVecErrorCode zvec_collection_delete_with_results( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + ZVecWriteResult **results, size_t *result_count) { + if (!collection || !pks || pk_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_delete_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } else { + primary_keys.emplace_back(""); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { + return error_code; + } + + return build_write_results(result.value(), primary_keys, results, + result_count);) + } + ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, const char *filter) { if (!collection || !filter) { @@ -5556,6 +5779,34 @@ default: { } // Helper function to convert fetched document results to C API format + static void normalize_nullable_fields_for_fetch( + const zvec::CollectionSchema &schema, zvec::DocPtrMap &doc_map) { + std::vector nullable_fields; + nullable_fields.reserve(schema.fields().size()); + + for (const auto &field : schema.fields()) { + if (field && field->nullable()) { + nullable_fields.push_back(field->name()); + } + } + + if (nullable_fields.empty()) { + return; + } + + for (auto &[_, doc_ptr] : doc_map) { + if (!doc_ptr) { + continue; + } + + for (const auto &field_name : nullable_fields) { + if (!doc_ptr->has(field_name)) { + doc_ptr->set_null(field_name); + } + } + } + } + ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, ZVecDoc ***results, size_t *doc_count) { @@ -5730,6 +5981,10 @@ default: { return ZVEC_ERROR_INTERNAL_ERROR; } - const auto &doc_map = result.value(); + auto doc_map = result.value(); + auto schema_result = (*coll_ptr)->Schema(); + if (schema_result.has_value()) { + normalize_nullable_fields_for_fetch(schema_result.value(), doc_map); + } return convert_fetched_document_results(doc_map, results, doc_count);) } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index b81ee860..1d0c1337 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -383,6 +383,16 @@ ZVEC_EXPORT void ZVEC_CALL zvec_int64_array_destroy(ZVecInt64Array *array); */ ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); +/** + * @brief Free heap memory allocated by zvec C API. + * + * Use this helper for pointer-returning APIs that document malloc-allocated + * buffers. This avoids allocator mismatch across DLL boundaries. + * + * @param ptr Memory pointer returned by zvec C API + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_ptr(void *ptr); + // ============================================================================= // Configuration and Options Structures @@ -1618,6 +1628,15 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_column( */ typedef struct ZVecDoc ZVecDoc; +/** + * @brief Per-document status returned by detailed DML APIs. + */ +typedef struct { + const char *pk; /**< Primary key (allocated by API) */ + ZVecErrorCode code; /**< Per-document status code */ + const char *message; /**< Per-document status message (allocated by API) */ +} ZVecWriteResult; + // ============================================================================= // Data Manipulation Interface (DML) // ============================================================================= @@ -1635,6 +1654,21 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, size_t *error_count); +/** + * @brief Insert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + /** * @brief Update documents in collection * @param collection Collection handle @@ -1648,6 +1682,21 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, size_t *error_count); +/** + * @brief Update documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + /** * @brief Insert or update documents in collection (upsert operation) * @param collection Collection handle @@ -1661,6 +1710,21 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, size_t *error_count); +/** + * @brief Upsert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + /** * @brief Delete documents from collection * @param collection Collection handle @@ -1674,6 +1738,30 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( ZVecCollection *collection, const char *const *pks, size_t pk_count, size_t *success_count, size_t *error_count); +/** + * @brief Delete documents by PK and return per-document statuses. + * + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_with_results( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Free result arrays returned by detailed DML APIs. + * + * @param results Result array pointer + * @param result_count Number of entries in result array + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_write_results_free(ZVecWriteResult *results, size_t result_count); + /** * @brief Delete documents by filter condition * @param collection Collection handle @@ -1872,6 +1960,16 @@ ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op); +/** + * @brief Explicitly mark a document field as null. + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name); + /** * @brief Get document ID * diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 18465110..0c553e1e 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -1625,6 +1625,7 @@ void test_doc_add_field_by_struct(void) { } void test_doc_basic_operations(void); +void test_doc_null_field_api(void); void test_doc_get_field_value_basic(void); void test_doc_get_field_value_copy(void); void test_doc_get_field_value_pointer(void); @@ -1636,6 +1637,7 @@ void test_doc_add_field_by_struct(void); void test_doc_functions(void) { test_doc_basic_operations(); + test_doc_null_field_api(); test_doc_get_field_value_basic(); test_doc_get_field_value_copy(); test_doc_get_field_value_pointer(); @@ -1676,6 +1678,31 @@ void test_doc_basic_operations(void) { TEST_END(); } +void test_doc_null_field_api(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err = zvec_doc_set_field_null(doc, "nullable_field"); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_doc_has_field(doc, "nullable_field") == true); + TEST_ASSERT(zvec_doc_has_field_value(doc, "nullable_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nullable_field") == true); + + err = zvec_doc_set_field_null(NULL, "nullable_field"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_doc_set_field_null(doc, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_doc_destroy(doc); + TEST_END(); +} + void test_doc_get_field_value_basic(void) { TEST_START(); @@ -3178,6 +3205,10 @@ void test_memory_management_functions(void) { TEST_ASSERT(str != NULL); zvec_free_string(str); + void *buffer = malloc(64); + TEST_ASSERT(buffer != NULL); + zvec_free_ptr(buffer); + TEST_END(); } @@ -3388,6 +3419,46 @@ void test_collection_dml_functions(void) { err = zvec_collection_delete_by_filter(collection, NULL); TEST_ASSERT(err != ZVEC_OK); + // Test detailed DML result APIs + ZVecDoc *result_doc = zvec_test_create_doc(101, schema, NULL); + TEST_ASSERT(result_doc != NULL); + if (result_doc) { + ZVecDoc *result_docs[] = {result_doc}; + ZVecWriteResult *results = NULL; + size_t result_count = 0; + + err = zvec_collection_upsert_with_results( + collection, (const ZVecDoc **)result_docs, 1, &results, + &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + TEST_ASSERT(results[0].pk != NULL); + if (results[0].pk) { + TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); + } + TEST_ASSERT(results[0].code == ZVEC_OK); + zvec_write_results_free(results, result_count); + } + + const char *delete_pks[] = {"pk_101"}; + results = NULL; + result_count = 0; + err = zvec_collection_delete_with_results(collection, delete_pks, 1, + &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + TEST_ASSERT(results[0].pk != NULL); + if (results[0].pk) { + TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); + } + zvec_write_results_free(results, result_count); + } + + zvec_doc_destroy(result_doc); + } + zvec_collection_destroy(collection); } @@ -3402,6 +3473,100 @@ void test_collection_dml_functions(void) { TEST_END(); } +void test_collection_nullable_roundtrip(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_nullable_roundtrip"; + zvec_test_delete_dir(temp_dir); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + if (!schema) { + TEST_END(); + return; + } + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_set_pk(doc, "pk_nullable"); + + int64_t id = 77; + err = zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_INT64, &id, + sizeof(id)); + TEST_ASSERT(err == ZVEC_OK); + + const char *name = "nullable"; + err = zvec_doc_add_field_by_value(doc, "name", ZVEC_DATA_TYPE_STRING, + name, strlen(name)); + TEST_ASSERT(err == ZVEC_OK); + + // "weight" in temp schema is nullable. + err = zvec_doc_set_field_null(doc, "weight"); + TEST_ASSERT(err == ZVEC_OK); + + float dense[128]; + for (size_t i = 0; i < 128; ++i) { + dense[i] = (float)i / 128.0f; + } + err = zvec_doc_add_field_by_value(doc, "dense", ZVEC_DATA_TYPE_VECTOR_FP32, + dense, sizeof(dense)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t nnz = 3; + uint32_t sparse_indices[] = {1, 5, 9}; + float sparse_values[] = {0.2f, 0.5f, 0.9f}; + char sparse_buffer[sizeof(nnz) + sizeof(sparse_indices) + + sizeof(sparse_values)]; + memcpy(sparse_buffer, &nnz, sizeof(nnz)); + memcpy(sparse_buffer + sizeof(nnz), sparse_indices, sizeof(sparse_indices)); + memcpy(sparse_buffer + sizeof(nnz) + sizeof(sparse_indices), sparse_values, + sizeof(sparse_values)); + err = zvec_doc_add_field_by_value( + doc, "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, sparse_buffer, + sizeof(sparse_buffer)); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDoc *docs[] = {doc}; + size_t success_count = 0; + size_t error_count = 0; + err = zvec_collection_upsert(collection, (const ZVecDoc **)docs, 1, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + const char *pks[] = {"pk_nullable"}; + ZVecDoc **fetched = NULL; + size_t fetched_count = 0; + err = zvec_collection_fetch(collection, pks, 1, &fetched, &fetched_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fetched_count == 1); + if (fetched && fetched_count == 1) { + TEST_ASSERT(zvec_doc_has_field(fetched[0], "weight") == true); + TEST_ASSERT(zvec_doc_has_field_value(fetched[0], "weight") == false); + TEST_ASSERT(zvec_doc_is_field_null(fetched[0], "weight") == true); + } + zvec_docs_free(fetched, fetched_count); + zvec_doc_destroy(doc); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + // ============================================================================= // Actual Query Execution Tests // ============================================================================= @@ -4348,12 +4513,14 @@ int main(void) { test_collection_stats(); test_collection_stats_functions(); test_collection_dml_functions(); + test_collection_nullable_roundtrip(); test_collection_ddl_operations(); // Doc-related tests test_doc_creation(); test_doc_primary_key(); test_doc_basic_operations(); + test_doc_null_field_api(); test_doc_get_field_value_basic(); test_doc_get_field_value_copy(); test_doc_get_field_value_pointer(); From 58bfcf15c39ed4a7181ad98a42a78990cac7035c Mon Sep 17 00:00:00 2001 From: lc285652 Date: Wed, 18 Mar 2026 12:48:50 +0800 Subject: [PATCH 3/7] format some code --- src/c_api/c_api.cc | 54 ++++++++++++++++++++-------------------- src/include/zvec/c_api.h | 4 +-- tests/c_api/c_api_test.c | 23 +++++++++-------- 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 81e534b6..5f58e8c6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -859,7 +859,8 @@ static std::vector collect_doc_pks(const ZVecDoc **docs, pks.emplace_back(""); continue; } - auto doc_ptr = reinterpret_cast *>(docs[i]); + auto doc_ptr = + reinterpret_cast *>(docs[i]); pks.emplace_back((*doc_ptr)->pk_ref()); } return pks; @@ -5213,9 +5214,11 @@ default: { return error_code;) } - ZVecErrorCode zvec_collection_insert_with_results( - ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, - ZVecWriteResult **results, size_t *result_count) { + ZVecErrorCode zvec_collection_insert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { if (!collection || !docs || doc_count == 0 || !results || !result_count) { set_last_error( "Invalid arguments: collection, docs, doc_count, results and " @@ -5238,9 +5241,7 @@ default: { auto result = (*coll_ptr)->Insert(internal_docs); ZVecErrorCode error_code = handle_expected_result(result); - if (error_code != ZVEC_OK) { - return error_code; - } + if (error_code != ZVEC_OK) { return error_code; } return build_write_results(result.value(), pks, results, result_count);) } @@ -5283,9 +5284,11 @@ default: { return error_code;) } - ZVecErrorCode zvec_collection_update_with_results( - ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, - ZVecWriteResult **results, size_t *result_count) { + ZVecErrorCode zvec_collection_update_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { if (!collection || !docs || doc_count == 0 || !results || !result_count) { set_last_error( "Invalid arguments: collection, docs, doc_count, results and " @@ -5308,9 +5311,7 @@ default: { auto result = (*coll_ptr)->Update(internal_docs); ZVecErrorCode error_code = handle_expected_result(result); - if (error_code != ZVEC_OK) { - return error_code; - } + if (error_code != ZVEC_OK) { return error_code; } return build_write_results(result.value(), pks, results, result_count);) } @@ -5353,9 +5354,11 @@ default: { return error_code;) } - ZVecErrorCode zvec_collection_upsert_with_results( - ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, - ZVecWriteResult **results, size_t *result_count) { + ZVecErrorCode zvec_collection_upsert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { if (!collection || !docs || doc_count == 0 || !results || !result_count) { set_last_error( "Invalid arguments: collection, docs, doc_count, results and " @@ -5378,9 +5381,7 @@ default: { auto result = (*coll_ptr)->Upsert(internal_docs); ZVecErrorCode error_code = handle_expected_result(result); - if (error_code != ZVEC_OK) { - return error_code; - } + if (error_code != ZVEC_OK) { return error_code; } return build_write_results(result.value(), pks, results, result_count);) } @@ -5427,9 +5428,11 @@ default: { return error_code;) } - ZVecErrorCode zvec_collection_delete_with_results( - ZVecCollection *collection, const char *const *pks, size_t pk_count, - ZVecWriteResult **results, size_t *result_count) { + ZVecErrorCode zvec_collection_delete_with_results(ZVecCollection *collection, + const char *const *pks, + size_t pk_count, + ZVecWriteResult **results, + size_t *result_count) { if (!collection || !pks || pk_count == 0 || !results || !result_count) { set_last_error( "Invalid arguments: collection, pks, pk_count, results and " @@ -5457,9 +5460,7 @@ default: { auto result = (*coll_ptr)->Delete(primary_keys); ZVecErrorCode error_code = handle_expected_result(result); - if (error_code != ZVEC_OK) { - return error_code; - } + if (error_code != ZVEC_OK) { return error_code; } return build_write_results(result.value(), primary_keys, results, result_count);) @@ -5985,6 +5986,5 @@ default: { auto schema_result = (*coll_ptr)->Schema(); if (schema_result.has_value()) { normalize_nullable_fields_for_fetch(schema_result.value(), doc_map); - } - return convert_fetched_document_results(doc_map, results, doc_count);) + } return convert_fetched_document_results(doc_map, results, doc_count);) } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 1d0c1337..95bec9c5 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -1759,8 +1759,8 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_with_results( * @param results Result array pointer * @param result_count Number of entries in result array */ -ZVEC_EXPORT void ZVEC_CALL -zvec_write_results_free(ZVecWriteResult *results, size_t result_count); +ZVEC_EXPORT void ZVEC_CALL zvec_write_results_free(ZVecWriteResult *results, + size_t result_count); /** * @brief Delete documents by filter condition diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 0c553e1e..f3da89b6 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -3427,9 +3427,9 @@ void test_collection_dml_functions(void) { ZVecWriteResult *results = NULL; size_t result_count = 0; - err = zvec_collection_upsert_with_results( - collection, (const ZVecDoc **)result_docs, 1, &results, - &result_count); + err = zvec_collection_upsert_with_results(collection, + (const ZVecDoc **)result_docs, + 1, &results, &result_count); TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(result_count == 1); if (results && result_count == 1) { @@ -3516,8 +3516,8 @@ void test_collection_nullable_roundtrip(void) { for (size_t i = 0; i < 128; ++i) { dense[i] = (float)i / 128.0f; } - err = zvec_doc_add_field_by_value(doc, "dense", ZVEC_DATA_TYPE_VECTOR_FP32, - dense, sizeof(dense)); + err = zvec_doc_add_field_by_value( + doc, "dense", ZVEC_DATA_TYPE_VECTOR_FP32, dense, sizeof(dense)); TEST_ASSERT(err == ZVEC_OK); uint32_t nnz = 3; @@ -3526,12 +3526,13 @@ void test_collection_nullable_roundtrip(void) { char sparse_buffer[sizeof(nnz) + sizeof(sparse_indices) + sizeof(sparse_values)]; memcpy(sparse_buffer, &nnz, sizeof(nnz)); - memcpy(sparse_buffer + sizeof(nnz), sparse_indices, sizeof(sparse_indices)); - memcpy(sparse_buffer + sizeof(nnz) + sizeof(sparse_indices), sparse_values, - sizeof(sparse_values)); - err = zvec_doc_add_field_by_value( - doc, "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, sparse_buffer, - sizeof(sparse_buffer)); + memcpy(sparse_buffer + sizeof(nnz), sparse_indices, + sizeof(sparse_indices)); + memcpy(sparse_buffer + sizeof(nnz) + sizeof(sparse_indices), + sparse_values, sizeof(sparse_values)); + err = zvec_doc_add_field_by_value(doc, "sparse", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_buffer, sizeof(sparse_buffer)); TEST_ASSERT(err == ZVEC_OK); ZVecDoc *docs[] = {doc}; From adb62ef9d39535110c39d577a67e706e88684833 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 19 Mar 2026 09:52:26 +0800 Subject: [PATCH 4/7] Flattened index parameters structure --- examples/c_api/basic_example.c | 21 +- examples/c_api/collection_schema_example.c | 32 +- examples/c_api/doc_example.c | 16 +- examples/c_api/field_schema_example.c | 23 +- examples/c_api/index_example.c | 49 +- examples/c_api/optimized_example.c | 12 +- src/c_api/c_api.cc | 696 ++++++--------------- src/include/zvec/c_api.h | 387 ++++-------- tests/c_api/c_api_test.c | 321 +++++----- tests/c_api/utils.c | 128 ++-- tests/c_api/utils.h | 29 +- 11 files changed, 596 insertions(+), 1118 deletions(-) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c index e4efbdfd..2e912248 100644 --- a/examples/c_api/basic_example.c +++ b/examples/c_api/basic_example.c @@ -45,11 +45,14 @@ static ZVecErrorCode create_simple_test_collection( ZVecErrorCode error = ZVEC_OK; - // Create index parameters - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + // Create index parameters using new macros + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + ZVecIndexParams *hnsw_params = &hnsw_params_val; // Create and add ID field (primary key) ZVecFieldSchema *id_field = @@ -58,8 +61,6 @@ static ZVecErrorCode create_simple_test_collection( error = zvec_collection_schema_add_field(schema, id_field); if (error != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return error; } @@ -70,8 +71,6 @@ static ZVecErrorCode create_simple_test_collection( error = zvec_collection_schema_add_field(schema, text_field); if (error != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return error; } @@ -82,8 +81,6 @@ static ZVecErrorCode create_simple_test_collection( error = zvec_collection_schema_add_field(schema, embedding_field); if (error != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return error; } @@ -96,8 +93,6 @@ static ZVecErrorCode create_simple_test_collection( // Cleanup resources zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return error; } diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c index d69ca989..183cc270 100644 --- a/examples/c_api/collection_schema_example.c +++ b/examples/c_api/collection_schema_example.c @@ -54,10 +54,14 @@ int main() { (unsigned long long)schema->max_doc_count_per_segment); // 3. Create index parameters - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; if (!invert_params || !hnsw_params) { fprintf(stderr, "Failed to create index parameters\n"); @@ -71,16 +75,12 @@ int main() { if (!id_field) { fprintf(stderr, "Failed to create ID field\n"); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } error = zvec_collection_schema_add_field(schema, id_field); if (handle_error(error, "adding ID field") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } printf("✓ ID field added successfully\n"); @@ -91,8 +91,6 @@ int main() { if (!text_field) { fprintf(stderr, "Failed to create text field\n"); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } @@ -100,8 +98,6 @@ int main() { error = zvec_collection_schema_add_field(schema, text_field); if (handle_error(error, "adding text field") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } printf("✓ Text field with inverted index added successfully\n"); @@ -112,8 +108,6 @@ int main() { if (!vector_field) { fprintf(stderr, "Failed to create vector field\n"); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } @@ -121,8 +115,6 @@ int main() { error = zvec_collection_schema_add_field(schema, vector_field); if (handle_error(error, "adding vector field") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } printf("✓ Vector field with HNSW index added successfully\n"); @@ -140,8 +132,6 @@ int main() { &options, &collection); if (handle_error(error, "creating collection with schema") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } printf("✓ Collection created successfully with schema\n"); @@ -166,8 +156,6 @@ int main() { } zvec_collection_destroy(collection); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } } @@ -203,8 +191,6 @@ int main() { } zvec_collection_destroy(collection); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return 1; } printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, @@ -245,8 +231,6 @@ int main() { // 14. Cleanup resources zvec_collection_destroy(collection); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); printf("✓ Schema example completed\n"); return 0; diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c index b0e06624..8d8574bb 100644 --- a/examples/c_api/doc_example.c +++ b/examples/c_api/doc_example.c @@ -275,10 +275,14 @@ int main() { printf("✓ Collection schema created\n"); // 2. Create index parameters - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; if (!invert_params || !hnsw_params) { fprintf(stderr, "Failed to create index parameters\n"); @@ -351,8 +355,6 @@ int main() { &options, &collection); if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); return -1; } printf("✓ Collection created successfully\n"); @@ -511,8 +513,6 @@ int main() { cleanup: zvec_collection_destroy(collection); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); printf("✓ Document example completed\n"); diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c index 8db81d8d..c41d0817 100644 --- a/examples/c_api/field_schema_example.c +++ b/examples/c_api/field_schema_example.c @@ -49,12 +49,19 @@ int main() { printf("✓ Collection schema created successfully\n"); // 2. Create different types of index parameters - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); - ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; + // clang-format off + ZVecIndexParams flat_params_val = ZVEC_FLAT_PARAMS( ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *flat_params = &flat_params_val; if (!invert_params || !hnsw_params || !flat_params) { fprintf(stderr, "Failed to create index parameters\n"); @@ -154,9 +161,6 @@ int main() { &options, &collection); if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); - zvec_index_params_flat_destroy(flat_params); return -1; } printf("✓ Collection created successfully\n"); @@ -273,9 +277,6 @@ int main() { if (doc2) zvec_doc_destroy(doc2); zvec_collection_destroy(collection); zvec_collection_schema_destroy(schema); - zvec_index_params_invert_destroy(invert_params); - zvec_index_params_hnsw_destroy(hnsw_params); - zvec_index_params_flat_destroy(flat_params); printf("✓ Field schema example completed\n"); return 0; diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c index f4362ac0..72877263 100644 --- a/examples/c_api/index_example.c +++ b/examples/c_api/index_example.c @@ -52,24 +52,35 @@ int main() { printf("Creating index parameters...\n"); // Inverted index parameters - ZVecInvertIndexParams *invert_params_standard = - zvec_index_params_invert_create(true, false); - ZVecInvertIndexParams *invert_params_extended = - zvec_index_params_invert_create(true, true); + // clang-format off + ZVecIndexParams invert_params_standard_val = ZVEC_INVERT_PARAMS(true, false); + ZVecIndexParams invert_params_extended_val = ZVEC_INVERT_PARAMS(true, true); + // clang-format on + ZVecIndexParams *invert_params_standard = &invert_params_standard_val; + ZVecIndexParams *invert_params_extended = &invert_params_extended_val; // HNSW index parameters with different configurations - ZVecHnswIndexParams *hnsw_params_fast = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); - ZVecHnswIndexParams *hnsw_params_balanced = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 32, 200, 100); - ZVecHnswIndexParams *hnsw_params_accurate = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 64, 400, 200); + // clang-format off + ZVecIndexParams hnsw_params_fast_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams hnsw_params_balanced_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 32, 200, 100, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams hnsw_params_accurate_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_IP, 64, 400, 200, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params_fast = &hnsw_params_fast_val; + ZVecIndexParams *hnsw_params_balanced = &hnsw_params_balanced_val; + ZVecIndexParams *hnsw_params_accurate = &hnsw_params_accurate_val; // Flat index parameters - ZVecFlatIndexParams *flat_params_l2 = zvec_index_params_flat_create( + // clang-format off + ZVecIndexParams flat_params_l2_val = ZVEC_FLAT_PARAMS( ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecFlatIndexParams *flat_params_cosine = zvec_index_params_flat_create( + ZVecIndexParams flat_params_cosine_val = ZVEC_FLAT_PARAMS( ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *flat_params_l2 = &flat_params_l2_val; + ZVecIndexParams *flat_params_cosine = &flat_params_cosine_val; if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || @@ -154,13 +165,6 @@ int main() { if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); // Cleanup index parameters - zvec_index_params_invert_destroy(invert_params_standard); - zvec_index_params_invert_destroy(invert_params_extended); - zvec_index_params_hnsw_destroy(hnsw_params_fast); - zvec_index_params_hnsw_destroy(hnsw_params_balanced); - zvec_index_params_hnsw_destroy(hnsw_params_accurate); - zvec_index_params_flat_destroy(flat_params_l2); - zvec_index_params_flat_destroy(flat_params_cosine); return -1; } printf("✓ Collection created successfully\n"); @@ -317,13 +321,6 @@ int main() { zvec_collection_schema_destroy(schema); // Cleanup index parameters - zvec_index_params_invert_destroy(invert_params_standard); - zvec_index_params_invert_destroy(invert_params_extended); - zvec_index_params_hnsw_destroy(hnsw_params_fast); - zvec_index_params_hnsw_destroy(hnsw_params_balanced); - zvec_index_params_hnsw_destroy(hnsw_params_accurate); - zvec_index_params_flat_destroy(flat_params_l2); - zvec_index_params_flat_destroy(flat_params_cosine); printf("✓ Index example completed\n"); return 0; diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c index 3441af6e..2f87c93d 100644 --- a/examples/c_api/optimized_example.c +++ b/examples/c_api/optimized_example.c @@ -70,12 +70,11 @@ int main() { printf("✓ Collection schema created\n"); // 2. Create optimized index parameters - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, - 32, // Higher M for better connectivity - 200, // Construction ef for quality - 50 // Search ef for performance - ); + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_L2, 32, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; if (!hnsw_params) { fprintf(stderr, "Failed to create HNSW parameters\n"); @@ -294,7 +293,6 @@ int main() { cleanup_params: zvec_collection_schema_destroy(schema); - zvec_index_params_hnsw_destroy(hnsw_params); printf("✓ Optimized example completed\n"); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5f58e8c6..20834a20 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -917,36 +917,42 @@ static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { // Helper function: set field index params static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, const ZVecFieldSchema *zvec_field) { - if (!zvec_field->index_params) { + if (!zvec_field->has_index) { return zvec::Status::OK(); } - switch (zvec_field->index_params->index_type) { + const ZVecIndexParams *params = &zvec_field->index_params; + + switch (params->index_type) { case ZVEC_INDEX_TYPE_HNSW: { - const ZVecHnswIndexParams *params = - &zvec_field->index_params->params.hnsw_params; - auto metric = convert_metric_type(params->base.metric_type); - auto quantize = convert_quantize_type(params->base.quantize_type); + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); auto index_params = std::make_shared( - metric, params->m, params->ef_construction, quantize); + metric, params->hnsw.m, params->hnsw.ef_construction, quantize); field_schema->set_index_params(index_params); break; } case ZVEC_INDEX_TYPE_FLAT: { - const ZVecFlatIndexParams *params = - &zvec_field->index_params->params.flat_params; - auto metric = convert_metric_type(params->base.metric_type); - auto quantize = convert_quantize_type(params->base.quantize_type); + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); auto index_params = std::make_shared(metric, quantize); field_schema->set_index_params(index_params); break; } case ZVEC_INDEX_TYPE_INVERT: { - const ZVecInvertIndexParams *params = - &zvec_field->index_params->params.invert_params; auto index_params = std::make_shared( - params->enable_range_optimization, params->enable_extended_wildcard); + params->invert.enable_range_optimization, + params->invert.enable_extended_wildcard); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); + auto index_params = std::make_shared( + metric, params->ivf.n_list, params->ivf.n_iters, params->ivf.use_soar, + quantize); field_schema->set_index_params(index_params); break; } @@ -1120,9 +1126,7 @@ void zvec_free_ptr(void *ptr) { void zvec_free_field_schema(ZVecFieldSchema *field_schema) { if (field_schema) { - if (field_schema->index_params) { - zvec_index_params_destroy(field_schema->index_params); - } + // index_params is embedded, no need to free free(field_schema); } } @@ -1131,73 +1135,8 @@ void zvec_free_field_schema(ZVecFieldSchema *field_schema) { // Index parameters management interface implementation // ============================================================================= -void zvec_index_params_base_init(ZVecBaseIndexParams *params, - ZVecIndexType index_type) { - if (params) { - params->index_type = index_type; - } -} - -void zvec_index_params_invert_init(ZVecInvertIndexParams *params, - bool enable_range_opt, - bool enable_wildcard) { - if (params) { - zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); - params->enable_range_optimization = enable_range_opt; - params->enable_extended_wildcard = enable_wildcard; - } -} - -void zvec_index_params_vector_init(ZVecVectorIndexParams *params, - ZVecIndexType index_type, - ZVecMetricType metric_type, - ZVecQuantizeType quantize_type) { - if (params) { - zvec_index_params_base_init(¶ms->base, index_type); - params->metric_type = metric_type; - params->quantize_type = quantize_type; - } -} - -void zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, - ZVecMetricType metric_type, int m, - int ef_construction, int ef_search, - ZVecQuantizeType quantize_type) { - if (params) { - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, - metric_type, quantize_type); - params->m = m; - params->ef_construction = ef_construction; - params->ef_search = ef_search; - } -} - -void zvec_index_params_flat_init(ZVecFlatIndexParams *params, - ZVecMetricType metric_type, - ZVecQuantizeType quantize_type) { - if (params) { - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, - metric_type, quantize_type); - } -} - -void zvec_index_params_ivf_init(ZVecIVFIndexParams *params, - ZVecMetricType metric_type, int n_list, - int n_iters, bool use_soar, int n_probe, - ZVecQuantizeType quantize_type) { - if (params) { - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, - metric_type, quantize_type); - params->n_list = n_list; - params->n_iters = n_iters; - params->use_soar = use_soar; - params->n_probe = n_probe; - } -} - -void zvec_index_params_init_default(ZVecIndexParams *params, - ZVecIndexType index_type, - ZVecMetricType metric_type) { +void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type) { if (!params) { set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Index params pointer cannot be null", __FILE__, @@ -1205,27 +1144,35 @@ void zvec_index_params_init_default(ZVecIndexParams *params, return; } + // Zero-initialize the entire structure + memset(params, 0, sizeof(ZVecIndexParams)); + params->index_type = index_type; + params->metric_type = metric_type; + params->quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + // Set default values based on index type switch (index_type) { case ZVEC_INDEX_TYPE_INVERT: - zvec_index_params_invert_init(¶ms->params.invert_params, false, - false); + params->invert.enable_range_optimization = false; + params->invert.enable_extended_wildcard = false; break; case ZVEC_INDEX_TYPE_HNSW: - zvec_index_params_hnsw_init(¶ms->params.hnsw_params, metric_type, 16, - 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + params->hnsw.m = 16; + params->hnsw.ef_construction = 200; + params->hnsw.ef_search = 50; break; case ZVEC_INDEX_TYPE_FLAT: - zvec_index_params_flat_init(¶ms->params.flat_params, metric_type, - ZVEC_QUANTIZE_TYPE_UNDEFINED); + // No additional parameters for Flat break; case ZVEC_INDEX_TYPE_IVF: - zvec_index_params_ivf_init(¶ms->params.ivf_params, metric_type, 100, - 10, false, 10, ZVEC_QUANTIZE_TYPE_UNDEFINED); + params->ivf.n_list = 100; + params->ivf.n_iters = 10; + params->ivf.use_soar = false; + params->ivf.n_probe = 10; break; default: @@ -1235,130 +1182,43 @@ void zvec_index_params_init_default(ZVecIndexParams *params, } } -void zvec_index_params_destroy(ZVecIndexParams *params) { - if (params) { - free(params); - } -} - -ZVecInvertIndexParams *zvec_index_params_invert_create(bool enable_range_opt, - bool enable_wildcard) { - ZVecInvertIndexParams *params = static_cast( - malloc(sizeof(ZVecInvertIndexParams))); - if (!params) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecInvertIndexParams", __FILE__, - __LINE__, __FUNCTION__); - return nullptr; - } - zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); - params->enable_range_optimization = enable_range_opt; - params->enable_extended_wildcard = enable_wildcard; - return params; -} - -ZVecVectorIndexParams *zvec_index_params_vector_create( - ZVecIndexType index_type, ZVecMetricType metric_type, - ZVecQuantizeType quantize_type) { - ZVecVectorIndexParams *params = static_cast( - malloc(sizeof(ZVecVectorIndexParams))); - if (!params) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecVectorIndexParams", __FILE__, - __LINE__, __FUNCTION__); - return nullptr; - } - zvec_index_params_base_init(¶ms->base, index_type); - params->metric_type = metric_type; - params->quantize_type = quantize_type; - return params; -} - -ZVecHnswIndexParams *zvec_index_params_hnsw_create( - ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, - int ef_construction, int ef_search) { - ZVecHnswIndexParams *params = - static_cast(malloc(sizeof(ZVecHnswIndexParams))); - if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecHnswIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return nullptr; - } - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, - metric_type, quantize_type); - params->m = m; - params->ef_construction = ef_construction; - params->ef_search = ef_search; - return params; -} - -ZVecFlatIndexParams *zvec_index_params_flat_create( - ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { - ZVecFlatIndexParams *params = - static_cast(malloc(sizeof(ZVecFlatIndexParams))); - if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFlatIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return nullptr; - } - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, - metric_type, quantize_type); - return params; -} - -ZVecIVFIndexParams *zvec_index_params_ivf_create(ZVecMetricType metric_type, - ZVecQuantizeType quantize_type, - int n_list, int n_iters, - bool use_soar, int n_probe) { - ZVecIVFIndexParams *params = - static_cast(malloc(sizeof(ZVecIVFIndexParams))); - if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIVFIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return nullptr; - } - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, metric_type, - quantize_type); - params->n_list = n_list; - params->n_iters = n_iters; - params->use_soar = use_soar; - params->n_probe = n_probe; - return params; -} - -void zvec_index_params_invert_destroy(ZVecInvertIndexParams *params) { - if (params) { - free(params); - } -} - -void zvec_index_params_vector_destroy(ZVecVectorIndexParams *params) { - if (params) { - free(params); - } -} - -void zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params) { - if (params) { - free(params); +void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, + int ef_construction, int ef_search) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type", __FILE__, + __LINE__, __FUNCTION__); + return; } + params->hnsw.m = m; + params->hnsw.ef_construction = ef_construction; + params->hnsw.ef_search = ef_search; } -void zvec_index_params_flat_destroy(ZVecFlatIndexParams *params) { - if (params) { - free(params); +void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, + bool use_soar, int n_probe) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type", __FILE__, + __LINE__, __FUNCTION__); + return; } + params->ivf.n_list = n_list; + params->ivf.n_iters = n_iters; + params->ivf.use_soar = use_soar; + params->ivf.n_probe = n_probe; } -void zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params) { - if (params) { - free(params); +void zvec_index_params_set_invert(ZVecIndexParams *params, + bool enable_range_opt, bool enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type", __FILE__, + __LINE__, __FUNCTION__); + return; } + params->invert.enable_range_optimization = enable_range_opt; + params->invert.enable_extended_wildcard = enable_wildcard; } // ============================================================================= @@ -1396,7 +1256,8 @@ ZVecFieldSchema *zvec_field_schema_create(const char *name, schema->data_type = data_type; schema->nullable = nullable; schema->dimension = dimension; - schema->index_params = nullptr; + memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + schema->has_index = false; return schema; } @@ -1404,10 +1265,7 @@ ZVecFieldSchema *zvec_field_schema_create(const char *name, void zvec_field_schema_destroy(ZVecFieldSchema *schema) { if (schema) { zvec_free_string(schema->name); - if (schema->index_params) { - zvec_index_params_destroy(schema->index_params); - schema->index_params = nullptr; - } + // index_params is embedded, no need to free free(schema); } } @@ -1422,115 +1280,57 @@ ZVecErrorCode zvec_field_schema_set_index_params( } if (!index_params) { - if (schema->index_params) { - zvec_index_params_destroy(schema->index_params); - free(schema->index_params); - schema->index_params = nullptr; - } + memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + schema->has_index = false; return ZVEC_OK; } - if (!schema->index_params) { - schema->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!schema->index_params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } - } - - *schema->index_params = *index_params; + schema->index_params = *index_params; + schema->has_index = true; return ZVEC_OK; } -void zvec_field_schema_set_invert_index( - ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params) { +void zvec_field_schema_set_invert_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *invert_params) { if (field_schema && invert_params) { - if (!field_schema->index_params) { - field_schema->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!field_schema->index_params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return; - } - } - - field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; - field_schema->index_params->params.invert_params = *invert_params; + field_schema->index_params = *invert_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->has_index = true; } } void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, - const ZVecHnswIndexParams *hnsw_params) { + const ZVecIndexParams *hnsw_params) { if (field_schema && hnsw_params) { - if (!field_schema->index_params) { - field_schema->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!field_schema->index_params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return; - } - } - - field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; - field_schema->index_params->params.hnsw_params = *hnsw_params; + field_schema->index_params = *hnsw_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->has_index = true; } } void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, - const ZVecFlatIndexParams *flat_params) { + const ZVecIndexParams *flat_params) { if (field_schema && flat_params) { - if (!field_schema->index_params) { - field_schema->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!field_schema->index_params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return; - } - } - - field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; - field_schema->index_params->params.flat_params = *flat_params; + field_schema->index_params = *flat_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->has_index = true; } } void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, - const ZVecIVFIndexParams *ivf_params) { + const ZVecIndexParams *ivf_params) { if (field_schema && ivf_params) { - if (!field_schema->index_params) { - field_schema->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!field_schema->index_params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIndexParams", - __FILE__, __LINE__, __FUNCTION__); - return; - } - } - - field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; - field_schema->index_params->params.ivf_params = *ivf_params; + field_schema->index_params = *ivf_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->has_index = true; } } static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { if (!field_schema) return; - if (field_schema->index_params) { - zvec_index_params_destroy(field_schema->index_params); - free(field_schema->index_params); - field_schema->index_params = nullptr; - } - + // index_params is embedded, no need to free zvec_free_string(field_schema->name); field_schema->name = nullptr; } @@ -1741,22 +1541,8 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, new_field->data_type = src_field.data_type; new_field->nullable = src_field.nullable; new_field->dimension = src_field.dimension; - - if (src_field.index_params) { - new_field->index_params = - static_cast(malloc(sizeof(ZVecIndexParams))); - if (!new_field->index_params) { - zvec_free_string(new_field->name); - free(new_field); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for index params", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } - *(new_field->index_params) = *(src_field.index_params); - } else { - new_field->index_params = nullptr; - } + new_field->index_params = src_field.index_params; + new_field->has_index = src_field.has_index; schema->fields[schema->field_count] = new_field; schema->field_count++; @@ -2441,7 +2227,7 @@ static zvec::Status convert_zvec_collection_schema_to_internal( zvec_field.nullable); } - if (zvec_field.index_params != nullptr) { + if (zvec_field.has_index) { zvec::Status status = set_field_index_params(field_schema, &zvec_field); if (!status.ok()) { return status; @@ -2476,33 +2262,38 @@ static zvec::Status convert_zvec_field_schema_to_internal( field_schema = std::make_shared( field_name, data_type, zvec_field.dimension, zvec_field.nullable); - if (zvec_field.index_params != nullptr) { - switch (zvec_field.index_params->index_type) { + if (zvec_field.has_index) { + switch (zvec_field.index_params.index_type) { case ZVEC_INDEX_TYPE_HNSW: { - auto *params = &zvec_field.index_params->params.hnsw_params; - auto metric = convert_metric_type(params->base.metric_type); - auto quantize = convert_quantize_type(params->base.quantize_type); + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); auto index_params = std::make_shared( - metric, params->m, params->ef_construction, quantize); + metric, zvec_field.index_params.hnsw.m, + zvec_field.index_params.hnsw.ef_construction, quantize); field_schema->set_index_params(index_params); break; } case ZVEC_INDEX_TYPE_FLAT: { - auto *params = &zvec_field.index_params->params.flat_params; - auto metric = convert_metric_type(params->base.metric_type); - auto quantize = convert_quantize_type(params->base.quantize_type); + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); auto index_params = std::make_shared(metric, quantize); field_schema->set_index_params(index_params); break; } case ZVEC_INDEX_TYPE_IVF: { - auto *params = &zvec_field.index_params->params.ivf_params; - auto metric = convert_metric_type(params->base.metric_type); - auto quantize = convert_quantize_type(params->base.quantize_type); + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); auto index_params = std::make_shared( - metric, params->n_list, params->n_iters, params->use_soar, - quantize); + metric, zvec_field.index_params.ivf.n_list, + zvec_field.index_params.ivf.n_iters, + zvec_field.index_params.ivf.use_soar, quantize); field_schema->set_index_params(index_params); break; } @@ -2519,11 +2310,11 @@ static zvec::Status convert_zvec_field_schema_to_internal( field_schema = std::make_shared(field_name, data_type, zvec_field.nullable); - if (zvec_field.index_params != nullptr && - zvec_field.index_params->index_type == ZVEC_INDEX_TYPE_INVERT) { - auto *params = &zvec_field.index_params->params.invert_params; + if (zvec_field.has_index && + zvec_field.index_params.index_type == ZVEC_INDEX_TYPE_INVERT) { auto index_params = std::make_shared( - params->enable_range_optimization, params->enable_extended_wildcard); + zvec_field.index_params.invert.enable_range_optimization, + zvec_field.index_params.invert.enable_extended_wildcard); field_schema->set_index_params(index_params); } } @@ -4341,158 +4132,105 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, // Copy nullable flag c_schema->fields[i]->nullable = cpp_field->nullable(); - // Initialize index parameters - c_schema->fields[i]->index_params = nullptr; + // Initialize index parameters (embedded, not pointer) + memset(&c_schema->fields[i]->index_params, 0, + sizeof(ZVecIndexParams)); + c_schema->fields[i]->has_index = false; // Convert index parameters based on the actual type auto index_params = cpp_field->index_params(); if (index_params) { switch (index_params->type()) { case zvec::IndexType::HNSW: { - // Cast to HnswIndexParams and convert auto hnsw_params = std::dynamic_pointer_cast( index_params); if (hnsw_params) { - auto c_hnsw_params = static_cast( - malloc(sizeof(ZVecHnswIndexParams))); - if (!c_hnsw_params) { - throw std::bad_alloc(); - } - - // Initialize the base vector index parameters - c_hnsw_params->base.base.index_type = + c_schema->fields[i]->index_params.index_type = ZVEC_INDEX_TYPE_HNSW; - c_hnsw_params->base.metric_type = + c_schema->fields[i]->index_params.metric_type = static_cast( hnsw_params->metric_type()); - c_hnsw_params->base.quantize_type = + c_schema->fields[i]->index_params.quantize_type = static_cast( hnsw_params->quantize_type()); - - // Set HNSW-specific parameters - c_hnsw_params->m = hnsw_params->m(); - c_hnsw_params->ef_construction = + c_schema->fields[i]->index_params.hnsw.m = + hnsw_params->m(); + c_schema->fields[i]->index_params.hnsw.ef_construction = hnsw_params->ef_construction(); - - // Assign to field schema (using pointer assignment) - c_schema->fields[i]->index_params = - reinterpret_cast(c_hnsw_params); - c_schema->fields[i]->index_params->index_type = - ZVEC_INDEX_TYPE_HNSW; + c_schema->fields[i]->has_index = true; } break; } case zvec::IndexType::IVF: { - // Cast to IVFIndexParams and convert auto ivf_params = std::dynamic_pointer_cast( index_params); if (ivf_params) { - auto c_ivf_params = static_cast( - malloc(sizeof(ZVecIVFIndexParams))); - if (!c_ivf_params) { - throw std::bad_alloc(); - } - - // Initialize the base vector index parameters - c_ivf_params->base.base.index_type = ZVEC_INDEX_TYPE_IVF; - c_ivf_params->base.metric_type = + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_IVF; + c_schema->fields[i]->index_params.metric_type = static_cast( ivf_params->metric_type()); - c_ivf_params->base.quantize_type = + c_schema->fields[i]->index_params.quantize_type = static_cast( ivf_params->quantize_type()); - - // Set IVF-specific parameters - c_ivf_params->n_list = ivf_params->n_list(); - c_ivf_params->n_iters = ivf_params->n_iters(); - c_ivf_params->use_soar = ivf_params->use_soar(); - - // Assign to field schema (using pointer assignment) - c_schema->fields[i]->index_params = - reinterpret_cast(c_ivf_params); - c_schema->fields[i]->index_params->index_type = - ZVEC_INDEX_TYPE_IVF; + c_schema->fields[i]->index_params.ivf.n_list = + ivf_params->n_list(); + c_schema->fields[i]->index_params.ivf.n_iters = + ivf_params->n_iters(); + c_schema->fields[i]->index_params.ivf.use_soar = + ivf_params->use_soar(); + c_schema->fields[i]->has_index = true; } break; } case zvec::IndexType::FLAT: { - // Cast to FlatIndexParams and convert auto flat_params = std::dynamic_pointer_cast( index_params); if (flat_params) { - auto c_flat_params = static_cast( - malloc(sizeof(ZVecFlatIndexParams))); - if (!c_flat_params) { - throw std::bad_alloc(); - } - - // Initialize the base vector index parameters - c_flat_params->base.base.index_type = + c_schema->fields[i]->index_params.index_type = ZVEC_INDEX_TYPE_FLAT; - c_flat_params->base.metric_type = + c_schema->fields[i]->index_params.metric_type = static_cast( flat_params->metric_type()); - c_flat_params->base.quantize_type = + c_schema->fields[i]->index_params.quantize_type = static_cast( flat_params->quantize_type()); - - // Flat index has no additional parameters - - // Assign to field schema (using pointer assignment) - c_schema->fields[i]->index_params = - reinterpret_cast(c_flat_params); - c_schema->fields[i]->index_params->index_type = - ZVEC_INDEX_TYPE_FLAT; + c_schema->fields[i]->has_index = true; } break; } case zvec::IndexType::INVERT: { - // Cast to InvertIndexParams and convert auto invert_params = std::dynamic_pointer_cast( index_params); if (invert_params) { - auto c_invert_params = - static_cast( - malloc(sizeof(ZVecInvertIndexParams))); - if (!c_invert_params) { - throw std::bad_alloc(); - } - - // Initialize the base index parameters - c_invert_params->base.index_type = ZVEC_INDEX_TYPE_INVERT; - - // Set Invert-specific parameters - c_invert_params->enable_range_optimization = + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_INVERT; + c_schema->fields[i] + ->index_params.invert.enable_range_optimization = invert_params->enable_range_optimization(); - c_invert_params->enable_extended_wildcard = + c_schema->fields[i] + ->index_params.invert.enable_extended_wildcard = invert_params->enable_extended_wildcard(); - - // Assign to field schema (using pointer assignment) - c_schema->fields[i]->index_params = - reinterpret_cast(c_invert_params); - c_schema->fields[i]->index_params->index_type = - ZVEC_INDEX_TYPE_INVERT; + c_schema->fields[i]->has_index = true; } break; } default: - // For undefined or unsupported index types, set to NULL - c_schema->fields[i]->index_params = nullptr; - c_schema->fields[i]->index_params->index_type = - ZVEC_INDEX_TYPE_UNDEFINED; + // For undefined or unsupported index types + c_schema->fields[i]->has_index = false; break; } } else { - // No index parameters, set to NULL - c_schema->fields[i]->index_params = nullptr; + // No index parameters + c_schema->fields[i]->has_index = false; } } catch (const std::bad_alloc &) { // Clean up already allocated fields @@ -4875,109 +4613,37 @@ ZVecErrorCode zvec_collection_create_index( switch (index_params->index_type) { case ZVEC_INDEX_TYPE_INVERT: { - const ZVecInvertIndexParams *invert_params = - &index_params->params.invert_params; auto cpp_params = std::make_shared( - invert_params->enable_range_optimization, - invert_params->enable_extended_wildcard); + index_params->invert.enable_range_optimization, + index_params->invert.enable_extended_wildcard); auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); return status_to_error_code(status); } case ZVEC_INDEX_TYPE_HNSW: { - const ZVecHnswIndexParams *hnsw_params = &index_params->params.hnsw_params; - auto metric = convert_metric_type(hnsw_params->base.metric_type); - auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); auto cpp_params = std::make_shared( - metric, hnsw_params->m, hnsw_params->ef_construction, quantize); - auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); - return status_to_error_code(status); -} - -case ZVEC_INDEX_TYPE_FLAT: { - const ZVecFlatIndexParams *flat_params = &index_params->params.flat_params; - auto metric = convert_metric_type(flat_params->base.metric_type); - auto quantize = convert_quantize_type(flat_params->base.quantize_type); - auto cpp_params = std::make_shared(metric, quantize); - auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); - return status_to_error_code(status); -} - -case ZVEC_INDEX_TYPE_IVF: { - const ZVecIVFIndexParams *ivf_params = &index_params->params.ivf_params; - auto metric = convert_metric_type(ivf_params->base.metric_type); - auto quantize = convert_quantize_type(ivf_params->base.quantize_type); - auto cpp_params = std::make_shared( - metric, ivf_params->n_list, ivf_params->n_iters, ivf_params->use_soar, + metric, index_params->hnsw.m, index_params->hnsw.ef_construction, quantize); auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); return status_to_error_code(status); } -default: { - set_last_error("Unsupported index type"); - return ZVEC_ERROR_INVALID_ARGUMENT; -} - } - ) - } - - ZVecErrorCode zvec_collection_create_index_with_params( - ZVecCollection *collection, const char *field_name, - const void *index_params) { - if (!collection || !field_name || !index_params) { - set_last_error("Invalid arguments"); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - auto coll_ptr = - reinterpret_cast *>(collection); - std::string field_name_str(field_name); - - const ZVecBaseIndexParams *base_params = - static_cast(index_params); - - ZVEC_TRY_RETURN_ERROR("Exception occurred", - switch (base_params->index_type) { - case ZVEC_INDEX_TYPE_INVERT: { - const ZVecInvertIndexParams *invert_params = - static_cast(index_params); - auto cpp_params = std::make_shared( - invert_params->enable_range_optimization, - invert_params->enable_extended_wildcard); - auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); - return status_to_error_code(status); - } - -case ZVEC_INDEX_TYPE_HNSW: { - const ZVecHnswIndexParams *hnsw_params = - static_cast(index_params); - auto metric = convert_metric_type(hnsw_params->base.metric_type); - auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); - auto cpp_params = std::make_shared( - metric, hnsw_params->m, hnsw_params->ef_construction, quantize); - auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); - return status_to_error_code(status); -} - case ZVEC_INDEX_TYPE_FLAT: { - const ZVecFlatIndexParams *flat_params = - static_cast(index_params); - auto metric = convert_metric_type(flat_params->base.metric_type); - auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); auto cpp_params = std::make_shared(metric, quantize); auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); return status_to_error_code(status); } case ZVEC_INDEX_TYPE_IVF: { - const ZVecIVFIndexParams *ivf_params = - static_cast(index_params); - auto metric = convert_metric_type(ivf_params->base.metric_type); - auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); auto cpp_params = std::make_shared( - metric, ivf_params->n_list, ivf_params->n_iters, ivf_params->use_soar, - quantize); + metric, index_params->ivf.n_list, index_params->ivf.n_iters, + index_params->ivf.use_soar, quantize); auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); return status_to_error_code(status); } @@ -4990,52 +4656,46 @@ default: { ) } + // Legacy function - kept for backward compatibility, just calls + // zvec_collection_create_index ZVecErrorCode zvec_collection_create_hnsw_index( ZVecCollection *collection, const char *field_name, - const ZVecHnswIndexParams *hnsw_params) { + const ZVecIndexParams *hnsw_params) { if (!hnsw_params) { set_last_error("Invalid HNSW parameters"); return ZVEC_ERROR_INVALID_ARGUMENT; } - - return zvec_collection_create_index_with_params(collection, field_name, - hnsw_params); + return zvec_collection_create_index(collection, field_name, hnsw_params); } ZVecErrorCode zvec_collection_create_flat_index( ZVecCollection *collection, const char *field_name, - const ZVecFlatIndexParams *flat_params) { + const ZVecIndexParams *flat_params) { if (!flat_params) { set_last_error("Invalid Flat parameters"); return ZVEC_ERROR_INVALID_ARGUMENT; } - - return zvec_collection_create_index_with_params(collection, field_name, - flat_params); + return zvec_collection_create_index(collection, field_name, flat_params); } ZVecErrorCode zvec_collection_create_ivf_index( ZVecCollection *collection, const char *field_name, - const ZVecIVFIndexParams *ivf_params) { + const ZVecIndexParams *ivf_params) { if (!ivf_params) { set_last_error("Invalid IVF parameters"); return ZVEC_ERROR_INVALID_ARGUMENT; } - - return zvec_collection_create_index_with_params(collection, field_name, - ivf_params); + return zvec_collection_create_index(collection, field_name, ivf_params); } ZVecErrorCode zvec_collection_create_invert_index( ZVecCollection *collection, const char *field_name, - const ZVecInvertIndexParams *invert_params) { + const ZVecIndexParams *invert_params) { if (!invert_params) { set_last_error("Invalid Invert parameters"); return ZVEC_ERROR_INVALID_ARGUMENT; } - - return zvec_collection_create_index_with_params(collection, field_name, - invert_params); + return zvec_collection_create_index(collection, field_name, invert_params); } ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 95bec9c5..e6496734 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -651,70 +651,46 @@ typedef struct ZVecCollection ZVecCollection; // ============================================================================= /** - * @brief Base index parameters structure - */ -typedef struct { - ZVecIndexType index_type; /**< Index type */ -} ZVecBaseIndexParams; - -/** - * @brief Scalar index parameters structure - */ -typedef struct { - ZVecBaseIndexParams base; /**< Inherit base parameters */ - bool enable_range_optimization; /**< Whether to enable range optimization */ - bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ -} ZVecInvertIndexParams; - -/** - * @brief Vector index base parameters structure - */ -typedef struct { - ZVecBaseIndexParams base; /**< Inherit base parameters */ - ZVecMetricType metric_type; /**< Distance metric type */ - ZVecQuantizeType quantize_type; /**< Quantization type */ -} ZVecVectorIndexParams; - -/** - * @brief HNSW index parameters structure - */ -typedef struct { - ZVecVectorIndexParams base; /**< Inherit vector index parameters */ - int m; /**< Graph connectivity parameter */ - int ef_construction; /**< Exploration factor during construction */ - int ef_search; /**< Exploration factor during search */ -} ZVecHnswIndexParams; - -/** - * @brief Flat index parameters structure - */ -typedef struct { - ZVecVectorIndexParams base; /**< Inherit vector index parameters */ - // Flat index has no additional parameters -} ZVecFlatIndexParams; - -/** - * @brief IVF index parameters structure + * @brief Flattened index parameters structure + * + * Uses a union to store specific parameters for different index types, + * avoiding C++-style inheritance nesting. Supports stack allocation, + * reducing malloc/free overhead. */ typedef struct { - ZVecVectorIndexParams base; /**< Inherit vector index parameters */ - int n_list; /**< Number of cluster centers */ - int n_iters; /**< Number of iterations */ - bool use_soar; /**< Whether to use SOAR algorithm */ - int n_probe; /**< Number of clusters to probe during search */ -} ZVecIVFIndexParams; + ZVecIndexType index_type; /**< Index type */ + ZVecMetricType metric_type; /**< Distance metric type (for vector indexes) */ + ZVecQuantizeType quantize_type; /**< Quantization type (for vector indexes) */ -/** - * @brief Generic index parameters union - */ -typedef struct { - ZVecIndexType index_type; /**< Index type */ union { - ZVecInvertIndexParams invert_params; /**< Scalar index parameters */ - ZVecHnswIndexParams hnsw_params; /**< HNSW index parameters */ - ZVecFlatIndexParams flat_params; /**< Flat index parameters */ - ZVecIVFIndexParams ivf_params; /**< IVF index parameters */ - } params; + /** @brief Inverted index specific parameters */ + struct { + bool enable_range_optimization; /**< Whether to enable range optimization + */ + bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ + } invert; + + /** @brief HNSW index specific parameters */ + struct { + int m; /**< Graph connectivity parameter */ + int ef_construction; /**< Exploration factor during construction */ + int ef_search; /**< Exploration factor during search */ + } hnsw; + + /** @brief IVF index specific parameters */ + struct { + int n_list; /**< Number of cluster centers */ + int n_iters; /**< Number of iterations */ + bool use_soar; /**< Whether to use SOAR algorithm */ + int n_probe; /**< Number of clusters to probe during search */ + } ivf; + + /** @brief Flat index has no additional parameters, + * reserved for alignment */ + struct { + int _reserved; + } flat; + }; } ZVecIndexParams; // ============================================================================= @@ -729,189 +705,59 @@ typedef struct { ZVecDataType data_type; /**< Data type */ bool nullable; /**< Whether nullable */ uint32_t dimension; /**< Vector dimension (only used for vector fields) */ - ZVecIndexParams *index_params; /**< Index parameters, NULL means no index */ + ZVecIndexParams index_params; /**< Index parameters (embedded, not pointer) */ + bool has_index; /**< Whether this field has an index */ } ZVecFieldSchema; // ============================================================================= -// Index Parameters Creation and Destruction Interface +// Index Parameters Interface // ============================================================================= /** - * @brief Initialize base index parameters - * @param params Base index parameters structure pointer - * @param index_type Index type - */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_base_init( - ZVecBaseIndexParams *params, ZVecIndexType index_type); - -/** - * @brief Initialize scalar index parameters - * @param params Scalar index parameters structure pointer - * @param enable_range_opt Whether to enable range optimization - * @param enable_wildcard Whether to enable wildcard expansion - */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_invert_init( - ZVecInvertIndexParams *params, bool enable_range_opt, bool enable_wildcard); - -/** - * @brief Initialize vector index parameters - * @param params Vector index parameters structure pointer + * @brief Initialize index parameters with default values based on index type + * @param params Index parameters structure pointer * @param index_type Index type - * @param metric_type Metric type - * @param quantize_type Quantization type + * @param metric_type Metric type (for vector indexes) */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_vector_init( - ZVecVectorIndexParams *params, ZVecIndexType index_type, - ZVecMetricType metric_type, ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type); /** - * @brief Initialize HNSW index parameters - * @param params HNSW index parameters structure pointer - * @param metric_type Metric type - * @param m Connectivity parameter + * @brief Set HNSW specific parameters + * @param params Index parameters structure pointer (must be HNSW type) + * @param m Graph connectivity parameter * @param ef_construction Construction exploration factor * @param ef_search Search exploration factor - * @param quantize_type Quantization type */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init( - ZVecHnswIndexParams *params, ZVecMetricType metric_type, int m, - int ef_construction, int ef_search, ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_hnsw(ZVecIndexParams *params, + int m, + int ef_construction, + int ef_search); /** - * @brief Initialize Flat index parameters - * @param params Flat index parameters structure pointer - * @param metric_type Metric type - * @param quantize_type Quantization type - */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init( - ZVecFlatIndexParams *params, ZVecMetricType metric_type, - ZVecQuantizeType quantize_type); - -/** - * @brief Initialize IVF index parameters - * @param params IVF index parameters structure pointer - * @param metric_type Metric type + * @brief Set IVF specific parameters + * @param params Index parameters structure pointer (must be IVF type) * @param n_list Number of cluster centers * @param n_iters Number of iterations * @param use_soar Whether to use SOAR algorithm * @param n_probe Search probe count - * @param quantize_type Quantization type */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init( - ZVecIVFIndexParams *params, ZVecMetricType metric_type, int n_list, - int n_iters, bool use_soar, int n_probe, ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_ivf(ZVecIndexParams *params, + int n_list, int n_iters, + bool use_soar, + int n_probe); /** - * @brief Initialize generic index parameters - * @param params Generic index parameters structure pointer - * @param index_type Index type - * @param metric_type Metric type (only valid for vector indexes) - */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default( - ZVecIndexParams *params, ZVecIndexType index_type, - ZVecMetricType metric_type); - -/** - * @brief Destroy index parameters (free internal dynamically allocated memory) - * @param params Index parameters structure pointer - */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); - - -/** - * @brief Create inverted index parameters + * @brief Set invert index specific parameters + * @param params Index parameters structure pointer (must be INVERT type) * @param enable_range_opt Whether to enable range optimization * @param enable_wildcard Whether to enable extended wildcard - * @return ZVecInvertIndexParams* Pointer to the newly created index parameters - */ -ZVEC_EXPORT ZVecInvertIndexParams *ZVEC_CALL -zvec_index_params_invert_create(bool enable_range_opt, bool enable_wildcard); - -/** - * @brief Create vector index base parameters - * @param index_type Index type - * @param metric_type Metric type - * @param quantize_type Quantization type - * @return ZVecVectorIndexParams* Pointer to the newly created index parameters - */ -ZVEC_EXPORT ZVecVectorIndexParams *ZVEC_CALL zvec_index_params_vector_create( - ZVecIndexType index_type, ZVecMetricType metric_type, - ZVecQuantizeType quantize_type); - -/** - * @brief Create HNSW index parameters - * @param metric_type Metric type - * @param quantize_type Quantization type - * @param m Graph degree parameter - * @param ef_construction Exploration factor during construction - * @param ef_search Exploration factor during search - - * @return ZVecHnswIndexParams* Pointer to the newly created index parameters - */ -ZVEC_EXPORT ZVecHnswIndexParams *ZVEC_CALL zvec_index_params_hnsw_create( - ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, - int ef_construction, int ef_search); - -/** - * @brief Create Flat index parameters - * @param metric_type Metric type - * @param quantize_type Quantization type - * @return ZVecFlatIndexParams* Pointer to the newly created index parameters - */ -ZVEC_EXPORT ZVecFlatIndexParams *ZVEC_CALL zvec_index_params_flat_create( - ZVecMetricType metric_type, ZVecQuantizeType quantize_type); - -/** - * @brief Create IVF index parameters - * @param metric_type Metric type - * @param n_list Number of cluster centers - * @param n_iters Number of iterations - * @param use_soar Whether to use SOAR algorithm - * @param n_probe Number of clusters to probe during search - * @param quantize_type Quantization type - * @return ZVecIVFIndexParams* Pointer to the newly created index parameters - */ -ZVEC_EXPORT ZVecIVFIndexParams *ZVEC_CALL zvec_index_params_ivf_create( - ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int n_list, - int n_iters, bool use_soar, int n_probe); - - -/** - * @brief Destroy inverted index parameters - * @param params Index parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_index_params_invert_destroy(ZVecInvertIndexParams *params); - -/** - * @brief Destroy vector index parameters - * @param params Index parameters pointer */ -ZVEC_EXPORT void ZVEC_CALL -zvec_index_params_vector_destroy(ZVecVectorIndexParams *params); - -/** - * @brief Destroy HNSW index parameters - * @param params Index parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params); - -/** - * @brief Destroy Flat index parameters - * @param params Index parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_index_params_flat_destroy(ZVecFlatIndexParams *params); - -/** - * @brief Destroy IVF index parameters - * @param params Index parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params); - +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_invert(ZVecIndexParams *params, + bool enable_range_opt, + bool enable_wildcard); // ============================================================================= // Query Parameters Structures @@ -1223,7 +1069,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( * @param invert_params Inverted index parameters pointer */ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( - ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params); + ZVecFieldSchema *field_schema, const ZVecIndexParams *invert_params); /** * @brief Set HNSW index parameters for field schema @@ -1231,7 +1077,7 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( * @param hnsw_params HNSW index parameters pointer */ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( - ZVecFieldSchema *field_schema, const ZVecHnswIndexParams *hnsw_params); + ZVecFieldSchema *field_schema, const ZVecIndexParams *hnsw_params); /** * @brief Set Flat index parameters for field schema @@ -1239,7 +1085,7 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( * @param flat_params Flat index parameters pointer */ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( - ZVecFieldSchema *field_schema, const ZVecFlatIndexParams *flat_params); + ZVecFieldSchema *field_schema, const ZVecIndexParams *flat_params); /** * @brief Set IVF index parameters for field schema @@ -1247,7 +1093,7 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( * @param ivf_params IVF index parameters pointer */ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( - ZVecFieldSchema *field_schema, const ZVecIVFIndexParams *ivf_params); + ZVecFieldSchema *field_schema, const ZVecIndexParams *ivf_params); // ============================================================================= @@ -1509,19 +1355,6 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index(ZVecCollection *collection, const char *field_name, const ZVecIndexParams *index_params); -/** - * @brief Create index for collection field (using specific type parameters) - * @param collection Collection handle - * @param field_name Field name - * @param index_params Index parameters (select appropriate structure based on - * index type) - * @return Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( - ZVecCollection *collection, const char *field_name, - const void - *index_params); // Determine specific type based on index_type field - /** * @brief Create HNSW index for collection field * @param collection Collection handle @@ -1531,7 +1364,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( ZVecCollection *collection, const char *field_name, - const ZVecHnswIndexParams *hnsw_params); + const ZVecIndexParams *hnsw_params); /** * @brief Create Flat index for collection field @@ -1542,7 +1375,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( ZVecCollection *collection, const char *field_name, - const ZVecFlatIndexParams *flat_params); + const ZVecIndexParams *flat_params); /** * @brief Create IVF index for collection field @@ -1553,7 +1386,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( ZVecCollection *collection, const char *field_name, - const ZVecIVFIndexParams *ivf_params); + const ZVecIndexParams *ivf_params); /** * @brief Create scalar index for collection field @@ -1564,7 +1397,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( ZVecCollection *collection, const char *field_name, - const ZVecInvertIndexParams *invert_params); + const ZVecIndexParams *invert_params); /** * @brief Drop index @@ -2265,22 +2098,28 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); /** * @brief Simplified HNSW index parameters initialization macro - * @param metric Distance metric type - * @param m_ Connectivity parameter - * @param ef_construction Exploration factor during construction - * @param ef_search Exploration factor during search - * @param quant Quantization type + * @param _metric Distance metric type + * @param _m Connectivity parameter + * @param _ef_construction Exploration factor during construction + * @param _ef_search Exploration factor during search + * @param _quant Quantization type * * Usage example: - * ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, - * 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - */ -#define ZVEC_HNSW_PARAMS(metric, m_, ef_construction, ef_search, quant) \ - (ZVecHnswIndexParams) { \ - .base.base.index_type = ZVEC_INDEX_TYPE_HNSW, .base.metric_type = metric, \ - .base.quantize_type = quant, .m = m_, .ef_construction = ef_construction, \ - .ef_search = ef_search \ - } + * @code + * ZVecIndexParams params = ZVEC_HNSW_PARAMS( + * ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + * @endcode + */ +// clang-format off +#define ZVEC_HNSW_PARAMS(_metric, _m, _ef_construction, _ef_search, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_HNSW, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .hnsw.m = (_m), \ + .hnsw.ef_construction = (_ef_construction), \ + .hnsw.ef_search = (_ef_search) }) +// clang-format on /** * @brief Simplified inverted index parameters initialization macro @@ -2288,25 +2127,28 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); * @param wildcard Whether to enable wildcard expansion * * Usage example: - * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + * ZVecIndexParams params = ZVEC_INVERT_PARAMS(true, false); */ -#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ - (ZVecInvertIndexParams) { \ - .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ - .enable_range_optimization = range_opt, \ - .enable_extended_wildcard = wildcard \ - } +// clang-format off +#define ZVEC_INVERT_PARAMS(_range_opt, _wildcard) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_INVERT, \ + .invert.enable_range_optimization = (_range_opt), \ + .invert.enable_extended_wildcard = (_wildcard) }) +// clang-format on /** * @brief Simplified Flat index parameters initialization macro * @param metric Distance metric type * @param quant Quantization type */ -#define ZVEC_FLAT_PARAMS(metric, quant) \ - (ZVecFlatIndexParams) { \ - .base.index_type = ZVEC_INDEX_TYPE_FLAT, .base.metric_type = metric, \ - .base.quantize_type = quant \ - } +// clang-format off +#define ZVEC_FLAT_PARAMS(_metric, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_FLAT, \ + .metric_type = (_metric), \ + .quantize_type = (_quant) }) +// clang-format on /** * @brief Simplified IVF index parameters initialization macro @@ -2317,12 +2159,17 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); * @param nprobe Number of clusters to probe during search * @param quant Quantization type */ -#define ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) \ - (ZVecIVFIndexParams) { \ - .base.index_type = ZVEC_INDEX_TYPE_IVF, .base.metric_type = metric, \ - .base.quantize_type = quant, .n_list = nlist, .n_iters = niters, \ - .use_soar = soar, .n_probe = nprobe \ - } +// clang-format off +#define ZVEC_IVF_PARAMS(_metric, _nlist, _niters, _soar, _nprobe, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_IVF, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .ivf.n_list = (_nlist), \ + .ivf.n_iters = (_niters), \ + .ivf.use_soar = (_soar), \ + .ivf.n_probe = (_nprobe) }) +// clang-format on /** * @brief Simplified string initialization macro diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index f3da89b6..a442d191 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -942,8 +942,7 @@ void test_field_helper_functions(void) { TEST_START(); // Test scalar field helper functions - ZVecInvertIndexParams *invert_params = - zvec_test_create_default_invert_params(true); + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); TEST_ASSERT(scalar_field != NULL); @@ -957,7 +956,7 @@ void test_field_helper_functions(void) { } // Test vector field helper functions - ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); ZVecFieldSchema *vector_field = zvec_test_create_vector_field( "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); TEST_ASSERT(vector_field != NULL); @@ -3038,22 +3037,21 @@ void test_index_params(void) { TEST_START(); // Test HNSW parameter creation - ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); TEST_ASSERT(hnsw_params != NULL); if (hnsw_params) { free(hnsw_params); } // Test Flat parameter creation - ZVecFlatIndexParams *flat_params = zvec_test_create_default_flat_params(); + ZVecIndexParams *flat_params = zvec_test_create_default_flat_params(); TEST_ASSERT(flat_params != NULL); if (flat_params) { free(flat_params); } // Test scalar index parameter creation - ZVecInvertIndexParams *invert_params = - zvec_test_create_default_invert_params(true); + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); TEST_ASSERT(invert_params != NULL); if (invert_params) { free(invert_params); @@ -3116,53 +3114,90 @@ void test_zvec_string_functions(void) { void test_index_params_functions(void) { TEST_START(); - // Test base index params - ZVecBaseIndexParams base_params; - zvec_index_params_base_init(&base_params, ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(base_params.index_type == ZVEC_INDEX_TYPE_HNSW); + // Test index params with new flat structure + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.hnsw.m == 16); + TEST_ASSERT(hnsw_params.hnsw.ef_construction == 200); + TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); // Test invert index params - ZVecInvertIndexParams invert_params; - zvec_index_params_invert_init(&invert_params, true, false); - TEST_ASSERT(invert_params.base.index_type == ZVEC_INDEX_TYPE_INVERT); - TEST_ASSERT(invert_params.enable_range_optimization == true); - TEST_ASSERT(invert_params.enable_extended_wildcard == false); - - // Test vector index params - ZVecVectorIndexParams vector_params; - zvec_index_params_vector_init(&vector_params, ZVEC_INDEX_TYPE_HNSW, - ZVEC_METRIC_TYPE_L2, - ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(vector_params.base.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(vector_params.metric_type == ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(vector_params.quantize_type == ZVEC_QUANTIZE_TYPE_UNDEFINED); - - // Test HNSW index params - ZVecHnswIndexParams hnsw_params; - zvec_index_params_hnsw_init(&hnsw_params, ZVEC_METRIC_TYPE_COSINE, 16, 200, - 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(hnsw_params.base.base.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(hnsw_params.base.metric_type == ZVEC_METRIC_TYPE_COSINE); - TEST_ASSERT(hnsw_params.m == 16); - TEST_ASSERT(hnsw_params.ef_construction == 200); - TEST_ASSERT(hnsw_params.ef_search == 50); - - // Test Flat index params - ZVecFlatIndexParams flat_params; - zvec_index_params_flat_init(&flat_params, ZVEC_METRIC_TYPE_IP, - ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(flat_params.base.base.index_type == ZVEC_INDEX_TYPE_FLAT); - TEST_ASSERT(flat_params.base.metric_type == ZVEC_METRIC_TYPE_IP); + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.invert.enable_range_optimization == true); + TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); + + // Test flat index params + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); // Test IVF index params - ZVecIVFIndexParams ivf_params; - zvec_index_params_ivf_init(&ivf_params, ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, - ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(ivf_params.base.base.index_type == ZVEC_INDEX_TYPE_IVF); - TEST_ASSERT(ivf_params.n_list == 100); - TEST_ASSERT(ivf_params.n_iters == 10); - TEST_ASSERT(ivf_params.use_soar == true); - TEST_ASSERT(ivf_params.n_probe == 5); + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params.ivf.n_list == 100); + TEST_ASSERT(ivf_params.ivf.n_iters == 10); + TEST_ASSERT(ivf_params.ivf.use_soar == true); + TEST_ASSERT(ivf_params.ivf.n_probe == 5); + + TEST_END(); +} + +void test_index_params_api_functions(void) { + TEST_START(); + + ZVecIndexParams params; + ZVecErrorCode error; + + // Test zvec_index_params_init for HNSW + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_COSINE); + + // Test zvec_index_params_set_hnsw + zvec_index_params_set_hnsw(¶ms, 32, 300, 150); + TEST_ASSERT(params.hnsw.m == 32); + TEST_ASSERT(params.hnsw.ef_construction == 300); + TEST_ASSERT(params.hnsw.ef_search == 150); + + // Test zvec_index_params_init for IVF + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_IVF, ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_ivf + zvec_index_params_set_ivf(¶ms, 200, 20, true, 10); + TEST_ASSERT(params.ivf.n_list == 200); + TEST_ASSERT(params.ivf.n_iters == 20); + TEST_ASSERT(params.ivf.use_soar == true); + TEST_ASSERT(params.ivf.n_probe == 10); + + // Test zvec_index_params_init for INVERT + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_INVERT, + ZVEC_METRIC_TYPE_UNDEFINED); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_INVERT); + + // Test zvec_index_params_set_invert + zvec_index_params_set_invert(¶ms, true, true); + TEST_ASSERT(params.invert.enable_range_optimization == true); + TEST_ASSERT(params.invert.enable_extended_wildcard == true); + + // Test zvec_index_params_init for FLAT + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_IP); TEST_END(); } @@ -3588,11 +3623,12 @@ void test_actual_vector_queries(void) { zvec_collection_schema_add_field(schema, id_field); // Add vector field with HNSW index - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on ZVecFieldSchema *vec_field = zvec_field_schema_create( "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); - zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); zvec_collection_schema_add_field(schema, vec_field); ZVecCollection *collection = NULL; @@ -3685,7 +3721,6 @@ void test_actual_vector_queries(void) { } zvec_collection_schema_destroy(schema); - zvec_index_params_hnsw_destroy(hnsw_params); } // Clean up @@ -3713,20 +3748,21 @@ void test_index_creation_and_management(void) { if (collection) { // Test 1: Create HNSW index - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); - TEST_ASSERT(hnsw_params != NULL); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on - err = zvec_collection_create_hnsw_index(collection, "dense", hnsw_params); + err = + zvec_collection_create_hnsw_index(collection, "dense", &hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Test 2: Create scalar index - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - TEST_ASSERT(invert_params != NULL); + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on err = zvec_collection_create_invert_index(collection, "name", - invert_params); + &invert_params); TEST_ASSERT(err == ZVEC_OK); err = zvec_collection_drop_index(collection, "name"); @@ -3737,8 +3773,6 @@ void test_index_creation_and_management(void) { TEST_ASSERT(err == ZVEC_OK); zvec_collection_destroy(collection); - zvec_index_params_hnsw_destroy(hnsw_params); - zvec_index_params_invert_destroy(invert_params); } zvec_collection_schema_destroy(schema); @@ -3846,18 +3880,16 @@ void test_field_ddl_operations(void) { TEST_ASSERT(field2->dimension == 128); // Test index parameter setting - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); - TEST_ASSERT(hnsw_params != NULL); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on - ZVecErrorCode err = zvec_field_schema_set_index_params( - field2, (ZVecIndexParams *)hnsw_params); + ZVecErrorCode err = zvec_field_schema_set_index_params(field2, &hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Cleanup zvec_field_schema_destroy(field1); zvec_field_schema_destroy(field2); - zvec_index_params_hnsw_destroy(hnsw_params); TEST_END(); } @@ -3878,9 +3910,10 @@ void test_performance_benchmarks(void) { ZVecFieldSchema *vec_field = zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); - zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); /* NOLINT */ + // clang-format on + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); zvec_collection_schema_add_field(schema, vec_field); ZVecCollection *collection = NULL; @@ -4007,7 +4040,6 @@ void test_performance_benchmarks(void) { printf(" Average query time: %.2f ms\n", avg_query_time); zvec_collection_destroy(collection); - zvec_index_params_hnsw_destroy(hnsw_params); } zvec_collection_schema_destroy(schema); @@ -4045,42 +4077,44 @@ void test_zvec_shutdown(void) { void test_index_params_creation_functions(void) { TEST_START(); - // Test zvec_index_params_init_default - ZVecIndexParams params; - zvec_index_params_init_default(¶ms, ZVEC_INDEX_TYPE_HNSW, - ZVEC_METRIC_TYPE_COSINE); - TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); - - // Test zvec_index_params_vector_create - ZVecVectorIndexParams *vector_params = zvec_index_params_vector_create( - ZVEC_INDEX_TYPE_HNSW, ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_FP16); - TEST_ASSERT(vector_params != NULL); - TEST_ASSERT(vector_params->base.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(vector_params->metric_type == ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(vector_params->quantize_type == ZVEC_QUANTIZE_TYPE_FP16); - if (vector_params) { - zvec_index_params_vector_destroy(vector_params); - } - - // Test zvec_index_params_ivf_create - ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); - TEST_ASSERT(ivf_params != NULL); - TEST_ASSERT(ivf_params->base.base.index_type == ZVEC_INDEX_TYPE_IVF); - TEST_ASSERT(ivf_params->base.metric_type == ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(ivf_params->n_list == 100); - TEST_ASSERT(ivf_params->n_iters == 10); - TEST_ASSERT(ivf_params->use_soar == true); - TEST_ASSERT(ivf_params->n_probe == 5); - if (ivf_params) { - zvec_index_params_ivf_destroy(ivf_params); - } - - // Test zvec_index_params_vector_destroy - ZVecVectorIndexParams *vector_params2 = zvec_index_params_vector_create( - ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(vector_params2 != NULL); - zvec_index_params_vector_destroy(vector_params2); + // Test HNSW parameters using macro + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.hnsw.m == 16); + TEST_ASSERT(hnsw_params.hnsw.ef_construction == 100); + TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); + + // Test IVF parameters using macro + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); + // clang-format on + TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params.ivf.n_list == 100); + TEST_ASSERT(ivf_params.ivf.n_iters == 10); + TEST_ASSERT(ivf_params.ivf.use_soar == true); + TEST_ASSERT(ivf_params.ivf.n_probe == 5); + + // Test Flat parameters using macro + // clang-format off + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + // clang-format on + TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test Invert parameters using macro + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.invert.enable_range_optimization == true); + TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); TEST_END(); } @@ -4115,45 +4149,42 @@ void test_collection_advanced_index_functions(void) { if (collection) { // Test zvec_collection_create_flat_index - ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - TEST_ASSERT(flat_params != NULL); - err = zvec_collection_create_flat_index(collection, "vec", flat_params); + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + err = zvec_collection_create_flat_index(collection, "vec", &flat_params); TEST_ASSERT(err == ZVEC_OK); - zvec_index_params_flat_destroy(flat_params); // Test zvec_collection_create_ivf_index - ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); - TEST_ASSERT(ivf_params != NULL); + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); + // clang-format on err = zvec_collection_drop_index(collection, "vec"); // Drop previous index first TEST_ASSERT(err == ZVEC_OK); - err = zvec_collection_create_ivf_index(collection, "vec", ivf_params); + err = zvec_collection_create_ivf_index(collection, "vec", &ivf_params); TEST_ASSERT(err == ZVEC_OK); - zvec_index_params_ivf_destroy(ivf_params); - // Test zvec_collection_create_index_with_params - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_FP16, 16, 100, 50); - TEST_ASSERT(hnsw_params != NULL); - err = zvec_collection_drop_index(collection, "vec"); + // Test zvec_collection_create_hnsw_index + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_FP16); + // clang-format on + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first TEST_ASSERT(err == ZVEC_OK); - err = zvec_collection_create_index_with_params(collection, "vec", - hnsw_params); + err = zvec_collection_create_hnsw_index(collection, "vec", &hnsw_params); TEST_ASSERT(err == ZVEC_OK); - zvec_index_params_hnsw_destroy(hnsw_params); // Test zvec_field_schema_set_ivf_index ZVecFieldSchema *new_vec_field = zvec_field_schema_create( "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); TEST_ASSERT(new_vec_field != NULL); - ZVecIVFIndexParams *ivf_params2 = zvec_index_params_ivf_create( - ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 50, 5, false, 3); - TEST_ASSERT(ivf_params2 != NULL); - zvec_field_schema_set_ivf_index(new_vec_field, ivf_params2); - TEST_ASSERT(new_vec_field->index_params != NULL); - zvec_index_params_ivf_destroy(ivf_params2); + // clang-format off + ZVecIndexParams ivf_params2 = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_IP, 50, 5, false, 3, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + zvec_field_schema_set_ivf_index(new_vec_field, &ivf_params2); + TEST_ASSERT(new_vec_field->has_index == true); zvec_field_schema_destroy(new_vec_field); zvec_collection_destroy(collection); @@ -4173,14 +4204,15 @@ void test_collection_query_functions(void) { // Create schema and collection ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on ZVecFieldSchema *name_field = zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); ZVecFieldSchema *vec_field = zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); - zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); zvec_collection_schema_add_field(schema, name_field); zvec_collection_schema_add_field(schema, vec_field); @@ -4275,7 +4307,6 @@ void test_collection_query_functions(void) { zvec_doc_destroy(doc2); } - zvec_index_params_hnsw_destroy(hnsw_params); zvec_collection_schema_destroy(schema); zvec_test_delete_dir(temp_dir); @@ -4464,18 +4495,6 @@ void test_array_memory_functions(void) { TEST_END(); } -void test_index_params_destruction(void) { - TEST_START(); - - // Test zvec_index_params_invert_destroy - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, false); - TEST_ASSERT(invert_params != NULL); - zvec_index_params_invert_destroy(invert_params); - - TEST_END(); -} - // ============================================================================= // Main function // ============================================================================= @@ -4534,6 +4553,7 @@ int main(void) { // Index tests test_index_params(); test_index_params_functions(); + test_index_params_api_functions(); test_index_creation_and_management(); // Query tests @@ -4556,7 +4576,6 @@ int main(void) { test_collection_query_functions(); test_doc_advanced_functions(); test_array_memory_functions(); - test_index_params_destruction(); printf("\n=== Comprehensive Test Summary ===\n"); printf("Total tests: %d\n", test_count); diff --git a/tests/c_api/utils.c b/tests/c_api/utils.c index 66c932a4..7d287761 100644 --- a/tests/c_api/utils.c +++ b/tests/c_api/utils.c @@ -41,27 +41,25 @@ ZVecCollectionSchema *zvec_test_create_temp_schema(void) { ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); schema->max_doc_count_per_segment = 1000; - // Create index parameters using C API - ZVecInvertIndexParams *invert_params = - zvec_index_params_invert_create(true, true); - ZVecHnswIndexParams *dense_hnsw_params = zvec_index_params_hnsw_create( + // Create index parameters using C API (using new flat structure with macros) + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, true); + ZVecIndexParams dense_hnsw_params = ZVEC_HNSW_PARAMS( ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecHnswIndexParams *sparse_hnsw_params = zvec_index_params_hnsw_create( + ZVecIndexParams sparse_hnsw_params = ZVEC_HNSW_PARAMS( ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); // Create and add fields ZVecFieldSchema *id_field = zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); - zvec_field_schema_set_invert_index(id_field, invert_params); + zvec_field_schema_set_invert_index(id_field, &invert_params); zvec_collection_schema_add_field(schema, id_field); // Create name field (inverted index without optimization) - ZVecInvertIndexParams *name_invert_params = - zvec_index_params_invert_create(false, false); + ZVecIndexParams name_invert_params = ZVEC_INVERT_PARAMS(false, false); ZVecFieldSchema *name_field = zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); - zvec_field_schema_set_invert_index(name_field, name_invert_params); + zvec_field_schema_set_invert_index(name_field, &name_invert_params); zvec_collection_schema_add_field(schema, name_field); // Create weight field (no index) @@ -72,13 +70,13 @@ ZVecCollectionSchema *zvec_test_create_temp_schema(void) { // Create dense field (HNSW index) ZVecFieldSchema *dense_field = zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); - zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); + zvec_field_schema_set_hnsw_index(dense_field, &dense_hnsw_params); zvec_collection_schema_add_field(schema, dense_field); // Create sparse field (HNSW index) ZVecFieldSchema *sparse_field = zvec_field_schema_create( "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); - zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); + zvec_field_schema_set_hnsw_index(sparse_field, &sparse_hnsw_params); zvec_collection_schema_add_field(schema, sparse_field); return schema; @@ -101,9 +99,8 @@ ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { } ZVecCollectionSchema *zvec_test_create_normal_schema( - bool nullable, const char *name, - const ZVecInvertIndexParams *scalar_index_params, - const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count) { + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count) { // Create collection schema using C API ZVecCollectionSchema *schema = zvec_collection_schema_create(name ? name : "demo"); @@ -121,8 +118,7 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecFieldSchema *field = zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); if (scalar_index_params) { - zvec_field_schema_set_invert_index( - field, (ZVecInvertIndexParams *)scalar_index_params); + zvec_field_schema_set_invert_index(field, scalar_index_params); } zvec_collection_schema_add_field(schema, field); } @@ -141,8 +137,7 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecFieldSchema *field = zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); if (scalar_index_params) { - zvec_field_schema_set_invert_index( - field, (ZVecInvertIndexParams *)scalar_index_params); + zvec_field_schema_set_invert_index(field, scalar_index_params); } zvec_collection_schema_add_field(schema, field); } @@ -152,39 +147,37 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); if (vector_index_params) { - zvec_field_schema_set_hnsw_index( - dense_fp32, (ZVecHnswIndexParams *)vector_index_params); + zvec_field_schema_set_hnsw_index(dense_fp32, vector_index_params); } zvec_collection_schema_add_field(schema, dense_fp32); ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); - ZVecFlatIndexParams *flat_params1 = zvec_index_params_flat_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + ZVecIndexParams flat_params1 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_fp16, &flat_params1); zvec_collection_schema_add_field(schema, dense_fp16); ZVecFieldSchema *dense_int8 = zvec_field_schema_create( "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); - ZVecFlatIndexParams *flat_params2 = zvec_index_params_flat_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(dense_int8, flat_params2); + ZVecIndexParams flat_params2 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_int8, &flat_params2); zvec_collection_schema_add_field(schema, dense_int8); // sparse vectors ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); if (vector_index_params) { - zvec_field_schema_set_hnsw_index( - sparse_fp32, (ZVecHnswIndexParams *)vector_index_params); + zvec_field_schema_set_hnsw_index(sparse_fp32, vector_index_params); } zvec_collection_schema_add_field(schema, sparse_fp32); ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); - ZVecFlatIndexParams *flat_params3 = zvec_index_params_flat_create( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + ZVecIndexParams flat_params3 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(sparse_fp16, &flat_params3); zvec_collection_schema_add_field(schema, sparse_fp16); return schema; @@ -192,7 +185,7 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( bool nullable, bool enable_optimize, const char *name) { - ZVecInvertIndexParams *invert_params = + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(enable_optimize); ZVecCollectionSchema *schema = zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); @@ -202,8 +195,8 @@ ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( bool nullable, const char *name, - const ZVecHnswIndexParams *vector_index_params) { - ZVecHnswIndexParams *default_params = NULL; + const ZVecIndexParams *vector_index_params) { + ZVecIndexParams *default_params = NULL; if (!vector_index_params) { default_params = zvec_test_create_default_hnsw_params(); } @@ -759,41 +752,30 @@ ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, // Index Parameter Creation Helper Functions Implementation // ============================================================================= -ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void) { - ZVecHnswIndexParams *params = - (ZVecHnswIndexParams *)malloc(sizeof(ZVecHnswIndexParams)); +ZVecIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); if (!params) return NULL; - params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; - params->base.metric_type = ZVEC_METRIC_TYPE_IP; - params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; - params->m = 16; - params->ef_construction = 100; + *params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_IP, 16, 100, 50, + ZVEC_QUANTIZE_TYPE_UNDEFINED); return params; } -ZVecFlatIndexParams *zvec_test_create_default_flat_params(void) { - ZVecFlatIndexParams *params = - (ZVecFlatIndexParams *)malloc(sizeof(ZVecFlatIndexParams)); +ZVecIndexParams *zvec_test_create_default_flat_params(void) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); if (!params) return NULL; - params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; - params->base.metric_type = ZVEC_METRIC_TYPE_IP; - params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + *params = ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); return params; } -ZVecInvertIndexParams *zvec_test_create_default_invert_params( - bool enable_optimize) { - ZVecInvertIndexParams *params = - (ZVecInvertIndexParams *)malloc(sizeof(ZVecInvertIndexParams)); +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); if (!params) return NULL; - params->base.index_type = ZVEC_INDEX_TYPE_INVERT; - params->enable_range_optimization = enable_optimize; - params->enable_extended_wildcard = enable_optimize; + *params = ZVEC_INVERT_PARAMS(enable_optimize, enable_optimize); return params; } @@ -804,7 +786,7 @@ ZVecInvertIndexParams *zvec_test_create_default_invert_params( ZVecFieldSchema *zvec_test_create_scalar_field( const char *name, ZVecDataType data_type, bool nullable, - const ZVecInvertIndexParams *invert_params) { + const ZVecIndexParams *invert_params) { ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); if (!field) return NULL; @@ -813,21 +795,23 @@ ZVecFieldSchema *zvec_test_create_scalar_field( free(field); return NULL; } - // Fix const qualifier issue - create string copy field->name->data = name ? strdup(name) : NULL; field->name->length = name ? strlen(name) : 0; field->name->capacity = name ? strlen(name) + 1 : 0; field->data_type = data_type; field->nullable = nullable; field->dimension = 0; - field->index_params = invert_params ? (ZVecIndexParams *)invert_params : NULL; + field->has_index = (invert_params != NULL); + if (invert_params) { + field->index_params = *invert_params; + } return field; } ZVecFieldSchema *zvec_test_create_vector_field( const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, - const ZVecHnswIndexParams *vector_index_params) { + const ZVecIndexParams *vector_index_params) { ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); if (!field) return NULL; @@ -836,22 +820,23 @@ ZVecFieldSchema *zvec_test_create_vector_field( free(field); return NULL; } - // Fix const qualifier issue - create string copy field->name->data = name ? strdup(name) : NULL; field->name->length = name ? strlen(name) : 0; field->name->capacity = name ? strlen(name) + 1 : 0; field->data_type = data_type; field->nullable = nullable; field->dimension = dimension; - field->index_params = - vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + field->has_index = (vector_index_params != NULL); + if (vector_index_params) { + field->index_params = *vector_index_params; + } return field; } ZVecFieldSchema *zvec_test_create_sparse_vector_field( const char *name, ZVecDataType data_type, bool nullable, - const ZVecHnswIndexParams *vector_index_params) { + const ZVecIndexParams *vector_index_params) { ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); if (!field) return NULL; @@ -860,15 +845,16 @@ ZVecFieldSchema *zvec_test_create_sparse_vector_field( free(field); return NULL; } - // Fix const qualifier issue - create string copy field->name->data = name ? strdup(name) : NULL; field->name->length = name ? strlen(name) : 0; field->name->capacity = name ? strlen(name) + 1 : 0; field->data_type = data_type; field->nullable = nullable; - field->dimension = 0; // Sparse vectors don't need fixed dimension - field->index_params = - vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + field->dimension = 0; + field->has_index = (vector_index_params != NULL); + if (vector_index_params) { + field->index_params = *vector_index_params; + } return field; } @@ -882,17 +868,13 @@ void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { for (size_t i = 0; i < count; i++) { if (fields[i].name) { - // Free string memory allocated by strdup if (fields[i].name->data) { free(fields[i].name->data); } free(fields[i].name); } - // Free index parameter memory - if (fields[i].index_params) { - zvec_index_params_destroy(fields[i].index_params); - free(fields[i].index_params); - } + // Note: index_params is now an embedded value, not a pointer + // It will be freed automatically when the struct is freed } free(fields); } diff --git a/tests/c_api/utils.h b/tests/c_api/utils.h index 63e5e314..0e9b42b7 100644 --- a/tests/c_api/utils.h +++ b/tests/c_api/utils.h @@ -57,9 +57,8 @@ ZVecCollectionSchema *zvec_test_create_scalar_schema(void); * @return ZVecCollectionSchema* Created schema pointer */ ZVecCollectionSchema *zvec_test_create_normal_schema( - bool nullable, const char *name, - const ZVecInvertIndexParams *scalar_index_params, - const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count); + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count); /** * @brief Create schema with scalar index @@ -83,7 +82,7 @@ ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( */ ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( bool nullable, const char *name, - const ZVecHnswIndexParams *vector_index_params); + const ZVecIndexParams *vector_index_params); /** * @brief Create schema with specified maximum document count @@ -157,28 +156,24 @@ ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, /** * @brief Create default HNSW index parameters * - * @return ZVecHnswIndexParams* Created parameter pointer, needs to be released - * by calling free() + * @return ZVecIndexParams* Created parameter pointer */ -ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void); +ZVecIndexParams *zvec_test_create_default_hnsw_params(void); /** * @brief Create default Flat index parameters * - * @return ZVecFlatIndexParams* Created parameter pointer, needs to be released - * by calling free() + * @return ZVecIndexParams* Created parameter pointer */ -ZVecFlatIndexParams *zvec_test_create_default_flat_params(void); +ZVecIndexParams *zvec_test_create_default_flat_params(void); /** * @brief Create default scalar index parameters * * @param enable_optimize Whether to enable optimization - * @return ZVecInvertIndexParams* Created parameter pointer, needs to be - * released by calling free() + * @return ZVecIndexParams* Created parameter pointer */ -ZVecInvertIndexParams *zvec_test_create_default_invert_params( - bool enable_optimize); +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize); // ============================================================================= // Field Schema Creation Helper Functions @@ -196,7 +191,7 @@ ZVecInvertIndexParams *zvec_test_create_default_invert_params( */ ZVecFieldSchema *zvec_test_create_scalar_field( const char *name, ZVecDataType data_type, bool nullable, - const ZVecInvertIndexParams *invert_params); + const ZVecIndexParams *invert_params); /** * @brief Create vector field schema @@ -210,7 +205,7 @@ ZVecFieldSchema *zvec_test_create_scalar_field( */ ZVecFieldSchema *zvec_test_create_vector_field( const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, - const ZVecHnswIndexParams *vector_index_params); + const ZVecIndexParams *vector_index_params); /** * @brief Create sparse vector field schema @@ -223,7 +218,7 @@ ZVecFieldSchema *zvec_test_create_vector_field( */ ZVecFieldSchema *zvec_test_create_sparse_vector_field( const char *name, ZVecDataType data_type, bool nullable, - const ZVecHnswIndexParams *vector_index_params); + const ZVecIndexParams *vector_index_params); // ============================================================================= // Memory Management Helper Functions From 6b5d58fbba900b67ac45d6e813fa2ff344807085 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Sun, 22 Mar 2026 17:23:08 +0800 Subject: [PATCH 5/7] refact c api code --- CMakeLists.txt | 11 +- examples/CMakeLists.txt | 15 - examples/{c_api => c}/CMakeLists.txt | 12 +- examples/{c_api => c}/basic_example.c | 0 .../{c_api => c}/collection_schema_example.c | 0 examples/{c_api => c}/doc_example.c | 0 examples/{c_api => c}/field_schema_example.c | 0 examples/{c_api => c}/index_example.c | 0 examples/{c_api => c}/optimized_example.c | 0 src/CMakeLists.txt | 5 +- src/binding/CMakeLists.txt | 10 +- src/{c_api => binding/c}/CMakeLists.txt | 38 +- src/{c_api => binding/c}/c_api.cc | 584 +++--- src/c_api/API_REFERENCE_CN.md | 1843 ----------------- src/db/CMakeLists.txt | 1 + src/include/zvec/c_api.h | 27 +- src/include/zvec/version.h.in | 16 + tests/CMakeLists.txt | 4 +- tests/{c_api => c}/CMakeLists.txt | 2 +- tests/{c_api => c}/c_api_test.c | 32 +- tests/{c_api => c}/utils.c | 0 tests/{c_api => c}/utils.h | 0 .../flat_sparse/flat_sparse_builder_test.cc | 2 +- .../metric/quantized_integer_metric_test.cc | 4 +- 24 files changed, 353 insertions(+), 2253 deletions(-) delete mode 100644 examples/CMakeLists.txt rename examples/{c_api => c}/CMakeLists.txt (90%) rename examples/{c_api => c}/basic_example.c (100%) rename examples/{c_api => c}/collection_schema_example.c (100%) rename examples/{c_api => c}/doc_example.c (100%) rename examples/{c_api => c}/field_schema_example.c (100%) rename examples/{c_api => c}/index_example.c (100%) rename examples/{c_api => c}/optimized_example.c (100%) rename src/{c_api => binding/c}/CMakeLists.txt (80%) rename src/{c_api => binding/c}/c_api.cc (90%) delete mode 100644 src/c_api/API_REFERENCE_CN.md create mode 100644 src/include/zvec/version.h.in rename tests/{c_api => c}/CMakeLists.txt (94%) rename tests/{c_api => c}/c_api_test.c (99%) rename tests/{c_api => c}/utils.c (100%) rename tests/{c_api => c}/utils.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad954faa..7730c84b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,15 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include_directories(${PROJECT_ROOT_DIR}/src/include) include_directories(${PROJECT_ROOT_DIR}/src) +# Add generated headers to global include path +include_directories(${PROJECT_BINARY_DIR}/src/generated) option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF) message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}") +option(BUILD_C_BINDINGS "Build C bindings" ON) +message(STATUS "BUILD_C_BINDINGS:${BUILD_C_BINDINGS}") + option(BUILD_TOOLS "Build tools" ON) message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}") @@ -34,15 +39,9 @@ if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "") endif() message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") -option(BUILD_EXAMPLES "Build examples" ON) -message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") - cc_directory(thirdparty) cc_directories(src) cc_directories(tests) -if(BUILD_EXAMPLES) - cc_directories(examples) -endif() if(BUILD_TOOLS) cc_directories(tools) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 66e943ad..00000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2025-present the zvec project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -add_subdirectory(c_api) \ No newline at end of file diff --git a/examples/c_api/CMakeLists.txt b/examples/c/CMakeLists.txt similarity index 90% rename from examples/c_api/CMakeLists.txt rename to examples/c/CMakeLists.txt index 759f744f..476b42c2 100644 --- a/examples/c_api/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -19,7 +19,7 @@ target_include_directories(c_api_basic_example PRIVATE ${PROJECT_SOURCE_DIR}/src/include ) set_target_properties(c_api_basic_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c ) @@ -30,7 +30,7 @@ target_include_directories(c_api_collection_schema_example PRIVATE ${PROJECT_SOURCE_DIR}/src/include ) set_target_properties(c_api_collection_schema_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c ) # Struct document example @@ -40,26 +40,26 @@ target_include_directories(c_api_doc_example PRIVATE ${PROJECT_SOURCE_DIR}/src/include ) set_target_properties(c_api_doc_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c ) # Index example add_executable(c_api_index_example index_example.c) target_link_libraries(c_api_index_example PRIVATE zvec_c_api) set_target_properties(c_api_index_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c ) # Newly added field schema example add_executable(c_api_field_schema_example field_schema_example.c) target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) set_target_properties(c_api_field_schema_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c ) # Optimized example add_executable(c_api_optimized_example optimized_example.c) target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) set_target_properties(c_api_optimized_example PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c ) diff --git a/examples/c_api/basic_example.c b/examples/c/basic_example.c similarity index 100% rename from examples/c_api/basic_example.c rename to examples/c/basic_example.c diff --git a/examples/c_api/collection_schema_example.c b/examples/c/collection_schema_example.c similarity index 100% rename from examples/c_api/collection_schema_example.c rename to examples/c/collection_schema_example.c diff --git a/examples/c_api/doc_example.c b/examples/c/doc_example.c similarity index 100% rename from examples/c_api/doc_example.c rename to examples/c/doc_example.c diff --git a/examples/c_api/field_schema_example.c b/examples/c/field_schema_example.c similarity index 100% rename from examples/c_api/field_schema_example.c rename to examples/c/field_schema_example.c diff --git a/examples/c_api/index_example.c b/examples/c/index_example.c similarity index 100% rename from examples/c_api/index_example.c rename to examples/c/index_example.c diff --git a/examples/c_api/optimized_example.c b/examples/c/optimized_example.c similarity index 100% rename from examples/c_api/optimized_example.c rename to examples/c/optimized_example.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f3a85ee..c9747a00 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,4 @@ cc_directory(ailego) cc_directory(turbo) cc_directory(core) cc_directory(db) -cc_directory(c_api) -if(BUILD_PYTHON_BINDINGS) - cc_directory(binding) -endif() +cc_directory(binding) diff --git a/src/binding/CMakeLists.txt b/src/binding/CMakeLists.txt index 7dab04ad..700d0811 100644 --- a/src/binding/CMakeLists.txt +++ b/src/binding/CMakeLists.txt @@ -4,5 +4,11 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) # Retrieve version from git repository git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) -# Add repository -cc_directory(python) \ No newline at end of file +# Add repositories +if(BUILD_C_BINDINGS) + cc_directory(c) +endif() + +if(BUILD_PYTHON_BINDINGS) + cc_directory(python) +endif() \ No newline at end of file diff --git a/src/c_api/CMakeLists.txt b/src/binding/c/CMakeLists.txt similarity index 80% rename from src/c_api/CMakeLists.txt rename to src/binding/c/CMakeLists.txt index 565479ab..0a714584 100644 --- a/src/c_api/CMakeLists.txt +++ b/src/binding/c/CMakeLists.txt @@ -16,6 +16,35 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) include(GNUInstallDirs) +# Retrieve version from git repository and generate version header +git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) + +# Debug: print version variables +message(STATUS "ZVEC_VERSION: ${ZVEC_VERSION}") + +# Parse version string to extract major.minor.patch +# Format: vX.Y.Z-commit-hash or vX.Y.Z +if(ZVEC_VERSION MATCHES "^v([0-9]+)\\.([0-9]+)\\.([0-9]+)") + set(ZVEC_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(ZVEC_VERSION_MINOR "${CMAKE_MATCH_2}") + set(ZVEC_VERSION_PATCH "${CMAKE_MATCH_3}") + set(ZVEC_VERSION_STRING "${ZVEC_VERSION}") +else() + # Default version if parsing fails + set(ZVEC_VERSION_MAJOR 0) + set(ZVEC_VERSION_MINOR 2) + set(ZVEC_VERSION_PATCH 1) + set(ZVEC_VERSION_STRING "${ZVEC_VERSION_MAJOR}.${ZVEC_VERSION_MINOR}.${ZVEC_VERSION_PATCH}") +endif() + +message(STATUS "Parsed version: ${ZVEC_VERSION_MAJOR}.${ZVEC_VERSION_MINOR}.${ZVEC_VERSION_PATCH} (${ZVEC_VERSION_STRING})") + +# Configure version header file +configure_file( + ${PROJECT_SOURCE_DIR}/src/include/zvec/version.h.in + ${PROJECT_BINARY_DIR}/src/generated/zvec_version.h +) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -63,7 +92,7 @@ find_package(Threads REQUIRED) # This creates a truly self-contained library with zero external dependencies # Users only need to link libzvec_c_api.so without installing any dependencies if(APPLE) - # Combine all libraries in a single target_link_libraries call + # First, link all libraries normally target_link_libraries(zvec_c_api PRIVATE # zvec static libraries @@ -87,10 +116,9 @@ if(APPLE) ${CMAKE_DL_LIBS} ) - # Then use target_link_libraries with -force_load on macOS - # This ensures all symbols from static libraries are included - # Note: sparsehash and magic_enum are header-only, skip them - target_link_libraries(zvec_c_api PRIVATE + # Then, apply -force_load to ensure all symbols are included + # Note: This may cause duplicate library warnings, but they are harmless + target_link_options(zvec_c_api PRIVATE -Wl,-force_load,$ -Wl,-force_load,$ -Wl,-force_load,$ diff --git a/src/c_api/c_api.cc b/src/binding/c/c_api.cc similarity index 90% rename from src/c_api/c_api.cc rename to src/binding/c/c_api.cc index 20834a20..3588f2ab 100644 --- a/src/c_api/c_api.cc +++ b/src/binding/c/c_api.cc @@ -122,90 +122,88 @@ struct DeleteArrayGuard { } // namespace // Error checking macros - these preserve __LINE__ accuracy -#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ - if (!(ptr)) { \ - set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ - return nullptr; \ +// Simplified macro for setting error with automatic file/line/function info +#define SET_LAST_ERROR(code, msg) \ + set_last_error_details(code, msg, __FILE__, __LINE__, __FUNCTION__) + +#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ + if (!(ptr)) { \ + SET_LAST_ERROR(error_code, msg); \ + return nullptr; \ } -#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ - if (!(ptr)) { \ - set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ - return (error_code); \ +#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ + if (!(ptr)) { \ + SET_LAST_ERROR(error_code, msg); \ + return (error_code); \ } -#define ZVEC_CHECK_COND(cond, error_code, msg) \ - if (cond) { \ - set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ - return nullptr; \ +#define ZVEC_CHECK_COND(cond, error_code, msg) \ + if (cond) { \ + SET_LAST_ERROR(error_code, msg); \ + return nullptr; \ } -#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ - if (cond) { \ - set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ - return (error_code); \ +#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ + if (cond) { \ + SET_LAST_ERROR(error_code, msg); \ + return (error_code); \ } // For void functions (no return value): #define ZVEC_TRY_BEGIN_VOID try { -#define ZVEC_CATCH_END_VOID \ - } \ - catch (const std::exception &e) { \ - set_last_error(std::string("Exception: ") + e.what()); \ +#define ZVEC_CATCH_END_VOID \ + } \ + catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_UNKNOWN, std::string("Exception: ") + e.what()); \ } // For functions returning pointer - complete try-catch wrapper // Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) // Note: Use variadic macro to handle commas in template arguments -#define ZVEC_TRY_RETURN_NULL(msg, ...) \ - try { \ - { __VA_ARGS__ } \ - } catch (const std::bad_alloc &e) { \ - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return nullptr; \ - } catch (const std::exception &e) { \ - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return nullptr; \ +#define ZVEC_TRY_RETURN_NULL(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return nullptr; \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return nullptr; \ } // For functions returning ErrorCode // Usage: ZVEC_TRY_RETURN_ERROR("error msg", code...) // Note: Use variadic macro to handle commas in template arguments -#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ - try { \ - { __VA_ARGS__ } \ - } catch (const std::bad_alloc &e) { \ - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ - } catch (const std::exception &e) { \ - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return ZVEC_ERROR_INTERNAL_ERROR; \ +#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return ZVEC_ERROR_INTERNAL_ERROR; \ } // For functions returning scalar values (int, float, size_t, etc.) // Usage: ZVEC_TRY_RETURN_SCALAR("error msg", error_value, code...) // Note: Use variadic macro to handle commas in template arguments -#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ - try { \ - { __VA_ARGS__ } \ - } catch (const std::bad_alloc &e) { \ - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return (error_val); \ - } catch (const std::exception &e) { \ - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ - std::string(msg) + ": " + e.what(), __FILE__, \ - __LINE__, __FUNCTION__); \ - return (error_val); \ +#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return (error_val); \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return (error_val); \ } // Global status flags @@ -278,9 +276,8 @@ const char *zvec_get_version(void) { bool zvec_check_version(int major, int minor, int patch) { if (major < 0 || minor < 0 || patch < 0) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Version numbers must be non-negative", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative"); return false; } @@ -311,27 +308,24 @@ int zvec_get_version_patch(void) { ZVecString *zvec_string_create(const char *str) { if (!str) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String pointer cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); return nullptr; } size_t len = strlen(str); ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); if (!zstr) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecString", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); return nullptr; } char *data_buffer = static_cast(malloc(len + 1)); if (!data_buffer) { free(zstr); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for string data", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data"); return nullptr; } @@ -344,26 +338,23 @@ ZVecString *zvec_string_create(const char *str) { ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { if (!view || !view->data) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String view or data cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null"); return nullptr; } ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); if (!zstr) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecString", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); return nullptr; } char *data_buffer = static_cast(malloc(view->length + 1)); if (!data_buffer) { free(zstr); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for string data", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data"); return nullptr; } @@ -378,26 +369,23 @@ ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { if (!data) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Binary data pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null"); return nullptr; } ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); if (!zstr) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecString", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); return nullptr; } char *data_buffer = static_cast(malloc(length + 1)); if (!data_buffer) { free(zstr); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for binary data", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for binary data"); return nullptr; } @@ -412,9 +400,8 @@ ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { ZVecString *zvec_string_copy(const ZVecString *str) { if (!str || !str->data) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Source string or data cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null"); return nullptr; } @@ -423,9 +410,8 @@ ZVecString *zvec_string_copy(const ZVecString *str) { const char *zvec_string_c_str(const ZVecString *str) { if (!str) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String pointer cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); return nullptr; } @@ -434,9 +420,8 @@ const char *zvec_string_c_str(const ZVecString *str) { size_t zvec_string_length(const ZVecString *str) { if (!str) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String pointer cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); return 0; } @@ -445,16 +430,13 @@ size_t zvec_string_length(const ZVecString *str) { int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { if (!str1 || !str2) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String pointers cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null"); return -1; } if (!str1->data || !str2->data) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "String data cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "String data cannot be null"); return -1; } @@ -469,9 +451,8 @@ ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { ZVecConsoleLogConfig *config = static_cast(malloc(sizeof(ZVecConsoleLogConfig))); if (!config) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecConsoleLogConfig", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConsoleLogConfig"); return nullptr; } config->level = level; @@ -484,18 +465,16 @@ ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, uint32_t file_size, uint32_t overdue_days) { if (!dir || !basename) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Directory or basename cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Directory or basename cannot be null"); return nullptr; } ZVecFileLogConfig *config = static_cast(malloc(sizeof(ZVecFileLogConfig))); if (!config) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFileLogConfig", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFileLogConfig"); return nullptr; } @@ -507,9 +486,8 @@ ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, if (dir_str) zvec_free_string(dir_str); if (basename_str) zvec_free_string(basename_str); free(config); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to create strings for file log config", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create strings for file log config"); return nullptr; } @@ -529,9 +507,8 @@ ZVecConfigData *zvec_config_data_create(void) { ZVecConfigData *config = static_cast(malloc(sizeof(ZVecConfigData))); if (!config) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecConfigData", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConfigData"); return nullptr; } @@ -539,9 +516,8 @@ ZVecConfigData *zvec_config_data_create(void) { zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); if (!log_config) { free(config); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to create console log config", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create console log config"); return nullptr; } config->log_config = log_config; @@ -560,15 +536,13 @@ ZVecConfigData *zvec_config_data_create(void) { } void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { - if (config) { - free(config); - } + free(config); } void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { if (config) { - if (config->dir.data) free((void *)config->dir.data); - if (config->basename.data) free((void *)config->basename.data); + if (config->dir.data) free(config->dir.data); + if (config->basename.data) free(config->basename.data); free(config); } } @@ -649,16 +623,16 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { std::lock_guard lock(g_init_mutex); if (g_initialized.load()) { - set_last_error_details(ZVEC_ERROR_ALREADY_EXISTS, - "Library already initialized"); + SET_LAST_ERROR(ZVEC_ERROR_ALREADY_EXISTS, "Library already initialized"); return ZVEC_ERROR_ALREADY_EXISTS; } ZVEC_TRY_RETURN_ERROR( "Initialization failed", // Convert to C++ configuration object + zvec::GlobalConfig::ConfigData cpp_config{}; + if (config) { - zvec::GlobalConfig::ConfigData cpp_config{}; cpp_config.memory_limit_bytes = config->memory_limit_bytes; cpp_config.query_thread_count = config->query_thread_count; cpp_config.invert_to_forward_scan_ratio = @@ -699,21 +673,19 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { } cpp_config.log_config = log_config; } - // Initialize global configuration - auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); - if (!status.ok()) { - set_last_error(status.message()); - return ZVEC_ERROR_INTERNAL_ERROR; - } } else { // Initialize with default configuration - zvec::GlobalConfig::ConfigData default_config; - auto status = zvec::GlobalConfig::Instance().Initialize(default_config); - if (!status.ok()) { - set_last_error(status.message()); - return ZVEC_ERROR_INTERNAL_ERROR; - } - } g_initialized.store(true); + cpp_config = zvec::GlobalConfig::ConfigData{}; + } + + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + g_initialized.store(true); return ZVEC_OK;) } @@ -721,8 +693,7 @@ ZVecErrorCode zvec_shutdown(void) { std::lock_guard lock(g_init_mutex); if (!g_initialized.load()) { - set_last_error_details(ZVEC_ERROR_FAILED_PRECONDITION, - "Library not initialized"); + SET_LAST_ERROR(ZVEC_ERROR_FAILED_PRECONDITION, "Library not initialized"); return ZVEC_ERROR_FAILED_PRECONDITION; } @@ -730,16 +701,8 @@ ZVecErrorCode zvec_shutdown(void) { return ZVEC_OK;) } -ZVecErrorCode zvec_is_initialized(bool *initialized) { - if (!initialized) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Initialized flag pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - *initialized = g_initialized.load(); - return ZVEC_OK; +bool zvec_is_initialized(void) { + return g_initialized.load(); } // ============================================================================= @@ -748,9 +711,8 @@ ZVecErrorCode zvec_is_initialized(bool *initialized) { ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { if (!error_details) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Error details pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -793,8 +755,11 @@ static ZVecErrorCode handle_expected_result( // Helper function: copy strings static char *copy_string(const std::string &str) { if (str.empty()) return nullptr; - char *copy = static_cast(malloc(str.length() + 1)); - strcpy(copy, str.c_str()); + size_t len = str.length(); + char *copy = static_cast(malloc(len + 1)); + if (!copy) return nullptr; + strncpy(copy, str.c_str(), len); + copy[len] = '\0'; // Ensure null-termination return copy; } @@ -805,10 +770,7 @@ static void free_write_results_internal(ZVecWriteResult *results, return; } for (size_t i = 0; i < result_count; ++i) { - if (results[i].pk) { - free((void *)results[i].pk); - results[i].pk = nullptr; - } + // pk is not stored (ordered style), only free message if (results[i].message) { free((void *)results[i].message); results[i].message = nullptr; @@ -839,10 +801,10 @@ static ZVecErrorCode build_write_results( return ZVEC_ERROR_INTERNAL_ERROR; } + // Use ordered style: result index corresponds to input index. + // No need to store pk in result, caller can access by index. for (size_t i = 0; i < *result_count; ++i) { - const std::string pk = i < pks.size() ? pks[i] : std::string(); const std::string message = statuses[i].message(); - (*results)[i].pk = copy_string(pk); (*results)[i].message = copy_string(message); (*results)[i].code = status_to_error_code(statuses[i]); } @@ -1138,9 +1100,8 @@ void zvec_free_field_schema(ZVecFieldSchema *field_schema) { void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, ZVecMetricType metric_type) { if (!params) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Index params pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null"); return; } @@ -1176,8 +1137,7 @@ void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, break; default: - set_last_error_details(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type"); break; } } @@ -1185,9 +1145,8 @@ void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, int ef_construction, int ef_search) { if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not HNSW index type", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type"); return; } params->hnsw.m = m; @@ -1198,9 +1157,8 @@ void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, bool use_soar, int n_probe) { if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not IVF index type", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type"); return; } params->ivf.n_list = n_list; @@ -1212,9 +1170,8 @@ void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, void zvec_index_params_set_invert(ZVecIndexParams *params, bool enable_range_opt, bool enable_wildcard) { if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not INVERT index type", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type"); return; } params->invert.enable_range_optimization = enable_range_opt; @@ -1229,27 +1186,23 @@ ZVecFieldSchema *zvec_field_schema_create(const char *name, ZVecDataType data_type, bool nullable, uint32_t dimension) { if (!name) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field name cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); return nullptr; } ZVecFieldSchema *schema = static_cast(malloc(sizeof(ZVecFieldSchema))); if (!schema) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFieldSchema", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema"); return nullptr; } schema->name = zvec_string_create(name); if (!schema->name) { free(schema); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to create string for field name", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name"); return nullptr; } @@ -1273,9 +1226,8 @@ void zvec_field_schema_destroy(ZVecFieldSchema *schema) { ZVecErrorCode zvec_field_schema_set_index_params( ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1341,9 +1293,8 @@ static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { void zvec_collection_options_init_default(ZVecCollectionOptions *options) { if (!options) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection options pointer cannot be null", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer cannot be null"); return; } @@ -1359,27 +1310,24 @@ void zvec_collection_options_init_default(ZVecCollectionOptions *options) { ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { if (!name) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection name cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null"); return nullptr; } ZVecCollectionSchema *schema = static_cast(malloc(sizeof(ZVecCollectionSchema))); if (!schema) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecCollectionSchema", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecCollectionSchema"); return nullptr; } schema->name = zvec_string_create(name); if (!schema->name) { free(schema); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to create string for collection name", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for collection name"); return nullptr; } @@ -1409,26 +1357,23 @@ void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, ZVecFieldSchema *field) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } if (!field || !field->name) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field or field name cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field or field name cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } for (size_t i = 0; i < schema->field_count; ++i) { if (schema->fields[i]->name && field->name && zvec_string_compare(schema->fields[i]->name, field->name) == 0) { - set_last_error_details( + SET_LAST_ERROR( ZVEC_ERROR_ALREADY_EXISTS, - std::string("Field '") + field->name->data + "' already exists", - __FILE__, __LINE__, __FUNCTION__); + std::string("Field '") + field->name->data + "' already exists"); return ZVEC_ERROR_ALREADY_EXISTS; } } @@ -1439,9 +1384,8 @@ ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, ZVecFieldSchema **new_fields = static_cast( malloc(new_capacity * sizeof(ZVecFieldSchema *))); if (!new_fields) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for fields", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } @@ -1464,16 +1408,14 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, size_t field_count) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } if (!fields && field_count > 0) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Fields array cannot be null when field_count > 0", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1484,10 +1426,9 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, for (size_t i = 0; i < field_count; ++i) { const ZVecFieldSchema &field = fields[i]; if (!field.name || !field.name->data || field.name->length == 0) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - std::string("Field at index ") + - std::to_string(i) + " has invalid name", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + std::to_string(i) + + " has invalid name"); return ZVEC_ERROR_INVALID_ARGUMENT; } } @@ -1502,9 +1443,8 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, ZVecFieldSchema **new_fields = static_cast( malloc(new_capacity * sizeof(ZVecFieldSchema *))); if (!new_fields) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for fields", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } @@ -1523,18 +1463,16 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, ZVecFieldSchema *new_field = static_cast(malloc(sizeof(ZVecFieldSchema))); if (!new_field) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for new field", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for new field"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } new_field->name = zvec_string_copy(src_field.name); if (!new_field->name) { free(new_field); - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to copy field name", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to copy field name"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } @@ -1554,16 +1492,13 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, const char *field_name) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } if (!field_name) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field name cannot be null", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1581,9 +1516,8 @@ ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, } } - set_last_error_details(ZVEC_ERROR_NOT_FOUND, - std::string("Field '") + field_name + "' not found", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found"); return ZVEC_ERROR_NOT_FOUND; } @@ -1591,17 +1525,14 @@ ZVecErrorCode zvec_collection_schema_remove_fields( ZVecCollectionSchema *schema, const char *const *field_names, size_t field_count) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } if (!field_names && field_count > 0) { - set_last_error_details( - ZVEC_ERROR_INVALID_ARGUMENT, - "Field names array cannot be null when field_count > 0", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1611,10 +1542,9 @@ ZVecErrorCode zvec_collection_schema_remove_fields( for (size_t i = 0; i < field_count; ++i) { if (!field_names[i]) { - set_last_error_details( + SET_LAST_ERROR( ZVEC_ERROR_INVALID_ARGUMENT, - std::string("Field name at index ") + std::to_string(i) + " is null", - __FILE__, __LINE__, __FUNCTION__); + std::string("Field name at index ") + std::to_string(i) + " is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } } @@ -1649,8 +1579,7 @@ ZVecErrorCode zvec_collection_schema_remove_fields( error_msg += ", "; } } - set_last_error_details(ZVEC_ERROR_NOT_FOUND, error_msg, __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, error_msg); return ZVEC_ERROR_NOT_FOUND; } @@ -1689,9 +1618,8 @@ ZVecFieldSchema *zvec_collection_schema_find_field( size_t zvec_collection_schema_get_field_count( const ZVecCollectionSchema *schema) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return 0; } @@ -1701,16 +1629,13 @@ size_t zvec_collection_schema_get_field_count( ZVecFieldSchema *zvec_collection_schema_get_field( const ZVecCollectionSchema *schema, size_t index) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return nullptr; } if (index >= schema->field_count) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field index out of bounds", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field index out of bounds"); return nullptr; } @@ -1720,9 +1645,8 @@ ZVecFieldSchema *zvec_collection_schema_get_field( ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( ZVecCollectionSchema *schema, uint64_t max_doc_count) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1739,9 +1663,8 @@ uint64_t zvec_collection_schema_get_max_doc_count_per_segment( ZVecErrorCode zvec_collection_schema_validate( const ZVecCollectionSchema *schema, ZVecString **error_msg) { if (!schema) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1753,9 +1676,7 @@ ZVecErrorCode zvec_collection_schema_validate( if (error_msg) { *error_msg = zvec_string_create("Collection name is required"); } - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection name is required", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection name is required"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1763,9 +1684,8 @@ ZVecErrorCode zvec_collection_schema_validate( if (error_msg) { *error_msg = zvec_string_create("At least one field is required"); } - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "At least one field is required", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1775,8 +1695,7 @@ ZVecErrorCode zvec_collection_schema_validate( if (error_msg) { *error_msg = zvec_string_create("Null field found"); } - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found"); return ZVEC_ERROR_INVALID_ARGUMENT; } @@ -1784,9 +1703,7 @@ ZVecErrorCode zvec_collection_schema_validate( if (error_msg) { *error_msg = zvec_string_create("Field name is required"); } - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Field name is required", __FILE__, __LINE__, - __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name is required"); return ZVEC_ERROR_INVALID_ARGUMENT; } } @@ -2422,7 +2339,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_binary32 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_BINARY64: { @@ -2432,7 +2349,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_binary64 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP32: { @@ -2442,7 +2359,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_fp32 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP16: { @@ -2452,7 +2369,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_fp16 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP64: { @@ -2462,7 +2379,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_fp64 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT8: { @@ -2472,7 +2389,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_int8 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT16: { @@ -2482,7 +2399,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for vector_int16 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT4: { @@ -2500,7 +2417,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, // Extract upper 4 bits vec.push_back((byte_val >> 4) & 0x0F); } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } @@ -2512,7 +2429,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid sparse vector data size"); return error_code; } - (*doc_ptr)->set(name, sparse_vec); + (*doc_ptr)->set(name, std::move(sparse_vec)); break; } case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { @@ -2522,14 +2439,14 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid sparse vector data size"); return error_code; } - (*doc_ptr)->set(name, sparse_vec); + (*doc_ptr)->set(name, std::move(sparse_vec)); break; } // Array types case ZVEC_DATA_TYPE_ARRAY_BINARY: { auto binary_array = extract_binary_array(value, value_size); - (*doc_ptr)->set(name, binary_array); + (*doc_ptr)->set(name, std::move(binary_array)); break; } case ZVEC_DATA_TYPE_ARRAY_STRING: { @@ -2543,11 +2460,11 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, reinterpret_cast(const_cast(value)); auto string_array = extract_string_array_from_zvec(zvec_str_array, count); - (*doc_ptr)->set(name, string_array); + (*doc_ptr)->set(name, std::move(string_array)); } else { // C-string array (null-terminated strings) auto string_array = extract_string_array(value, value_size); - (*doc_ptr)->set(name, string_array); + (*doc_ptr)->set(name, std::move(string_array)); } break; } @@ -2557,7 +2474,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_bool type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_INT32: { @@ -2567,7 +2484,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_int32 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_INT64: { @@ -2577,7 +2494,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_int64 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT32: { @@ -2587,7 +2504,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_uint32 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT64: { @@ -2597,7 +2514,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_uint64 type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_FLOAT: { @@ -2607,7 +2524,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_float type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { @@ -2617,7 +2534,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, set_last_error("Invalid value size for array_double type"); return error_code; } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } @@ -2695,7 +2612,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_BINARY64: { @@ -2704,7 +2621,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP16: { @@ -2714,14 +2631,14 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP32: { std::vector vec(field->value.vector_value.data, field->value.vector_value.data + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_FP64: { @@ -2729,7 +2646,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast(field->value.vector_value.data), reinterpret_cast(field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT4: { @@ -2750,7 +2667,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, vec.push_back((byte_val >> 4) & 0x0F); } } - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT8: { @@ -2758,7 +2675,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast(field->value.vector_value.data), reinterpret_cast(field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_VECTOR_INT16: { @@ -2767,7 +2684,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } @@ -2779,14 +2696,14 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { std::vector vec(field->value.vector_value.data, field->value.vector_value.data + field->value.vector_value.length); - (*doc_ptr)->set(name, vec); + (*doc_ptr)->set(name, std::move(vec)); break; } @@ -2813,7 +2730,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, break; } } - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_STRING: { @@ -2831,7 +2748,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, break; } } - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_BOOL: { @@ -2839,7 +2756,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast(field->value.binary_value.data), reinterpret_cast(field->value.binary_value.data) + field->value.binary_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_INT32: { @@ -2848,7 +2765,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_INT64: { @@ -2857,7 +2774,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT32: { @@ -2867,7 +2784,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT64: { @@ -2877,14 +2794,14 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast( field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_FLOAT: { std::vector array_values(field->value.vector_value.data, field->value.vector_value.data + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { @@ -2892,7 +2809,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, reinterpret_cast(field->value.vector_value.data), reinterpret_cast(field->value.vector_value.data) + field->value.vector_value.length); - (*doc_ptr)->set(name, array_values); + (*doc_ptr)->set(name, std::move(array_values)); break; } @@ -4382,9 +4299,8 @@ ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { ZVecQueryParams *params = static_cast(malloc(sizeof(ZVecQueryParams))); if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecQueryParams", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParams"); return nullptr; } params->index_type = index_type; @@ -4401,9 +4317,8 @@ ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, ZVecHnswQueryParams *params = static_cast(malloc(sizeof(ZVecHnswQueryParams))); if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecHnswQueryParams", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswQueryParams"); return nullptr; } params->base.index_type = index_type; @@ -4421,9 +4336,8 @@ ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, ZVecIVFQueryParams *params = static_cast(malloc(sizeof(ZVecIVFQueryParams))); if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIVFQueryParams", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFQueryParams"); return nullptr; } params->base.index_type = index_type; @@ -4439,9 +4353,8 @@ ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, ZVecFlatQueryParams *params = static_cast(malloc(sizeof(ZVecFlatQueryParams))); if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFlatQueryParams", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatQueryParams"); return nullptr; } params->base.index_type = index_type; @@ -4454,9 +4367,8 @@ ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { ZVecQueryParamsUnion *params = static_cast(malloc(sizeof(ZVecQueryParamsUnion))); if (!params) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecQueryParamsUnion", - __FILE__, __LINE__, __FUNCTION__); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParamsUnion"); return nullptr; } params->index_type = index_type; diff --git a/src/c_api/API_REFERENCE_CN.md b/src/c_api/API_REFERENCE_CN.md deleted file mode 100644 index 115313c7..00000000 --- a/src/c_api/API_REFERENCE_CN.md +++ /dev/null @@ -1,1843 +0,0 @@ -# ZVec C API 参考文档 - -**版本**: 0.3.0 -**许可**: Apache License 2.0 - ---- - -## 目录 - -1. [概述](#概述) -2. [快速开始](#快速开始) -3. [版本管理](#版本管理) -4. [错误处理](#错误处理) -5. [初始化与关闭](#初始化与关闭) -6. [配置管理](#配置管理) -7. [数据结构](#数据结构) -8. [Schema 管理](#schema-管理) -9. [Collection 管理](#collection-管理) -10. [索引管理](#索引管理) -11. [文档操作](#文档操作) -12. [数据增删改](#数据增删改) -13. [数据查询](#数据查询) -14. [工具函数](#工具函数) -15. [完整示例](#完整示例) - ---- - -## 概述 - -ZVec C API 是 ZVec 向量数据库的 C 语言接口,提供了完整的向量存储、索引和检索功能。本接口采用 C ABI,可与 C、C++、Rust、Go 等语言互操作。 - -### 核心概念 - -| 概念 | 说明 | -|------|------| -| **Collection** | 数据集合,类似数据库中的表 | -| **Schema** | 集合的结构定义,包含字段信息 | -| **Document** | 单条数据记录 | -| **Index** | 字段索引,加速查询 | -| **Field** | 字段,支持标量和向量类型 | - ---- - -## 快速开始 - -### 最小可用示例 - -```c -#include "zvec/c_api.h" -#include - -int main() { - // 1. 初始化库 - zvec_initialize(NULL); - - // 2. 创建集合 Schema - ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); - ZVecFieldSchema *field = zvec_field_schema_create( - "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); - zvec_collection_schema_add_field(schema, field); - - // 3. 创建并打开集合 - ZVecCollection *collection = NULL; - ZVecErrorCode rc = zvec_collection_create_and_open( - "./my_data", schema, NULL, &collection); - - if (rc != ZVEC_OK) { - char *err_msg; - zvec_get_last_error(&err_msg); - printf("Error: %s\n", err_msg); - return 1; - } - - // 4. 创建索引 - ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); - zvec_collection_create_hnsw_index(collection, "embedding", params); - - // 5. 插入数据 - ZVecDoc *doc = zvec_doc_create(); - zvec_doc_set_pk(doc, "doc_001"); - float vec[] = {0.1f, 0.2f, 0.3f}; - zvec_doc_add_field_by_value(doc, "embedding", - ZVEC_DATA_TYPE_VECTOR_FP32, vec, sizeof(vec)); - - size_t success, errors; - zvec_collection_insert(collection, &doc, 1, &success, &errors); - zvec_doc_destroy(doc); - - // 6. 查询 - ZVecVectorQuery query = ZVEC_VECTOR_QUERY( - "embedding", ZVEC_FLOAT_ARRAY(vec, 3), 10, ""); - ZVecDoc **results; - size_t count; - zvec_collection_query(collection, &query, &results, &count); - - // 7. 清理 - zvec_docs_free(results, count); - zvec_index_params_hnsw_destroy(params); - zvec_collection_close(collection); - zvec_collection_destroy(collection); - zvec_collection_schema_destroy(schema); - zvec_shutdown(); - - return 0; -} -``` - ---- - -## 版本管理 - -### 获取版本信息 - -```c -// 获取完整版本字符串 -const char *version = zvec_get_version(); -// 输出示例:"0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" - -// 获取各部分版本号 -int major = zvec_get_version_major(); // 0 -int minor = zvec_get_version_minor(); // 3 -int patch = zvec_get_version_patch(); // 0 -``` - -### 版本兼容性检查 - -```c -// 检查当前库版本是否满足最低要求 -bool compatible = zvec_check_version(0, 2, 0); -if (!compatible) { - printf("Library version too old!\n"); -} -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_get_version()` | 无 | `const char*` | 获取完整版本字符串 | -| `zvec_get_version_major()` | 无 | `int` | 获取主版本号 | -| `zvec_get_version_minor()` | 无 | `int` | 获取次版本号 | -| `zvec_get_version_patch()` | 无 | `int` | 获取补丁版本号 | -| `zvec_check_version()` | `major, minor, patch` | `bool` | 检查版本兼容性 | - ---- - -## 错误处理 - -### 错误码枚举 - -```c -typedef enum { - ZVEC_OK = 0, // 成功 - ZVEC_ERROR_NOT_FOUND = 1, // 资源未找到 - ZVEC_ERROR_ALREADY_EXISTS = 2, // 资源已存在 - ZVEC_ERROR_INVALID_ARGUMENT = 3, // 无效参数 - ZVEC_ERROR_PERMISSION_DENIED = 4, // 权限拒绝 - ZVEC_ERROR_FAILED_PRECONDITION = 5, // 前置条件失败 - ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, // 资源耗尽 - ZVEC_ERROR_UNAVAILABLE = 7, // 服务不可用 - ZVEC_ERROR_INTERNAL_ERROR = 8, // 内部错误 - ZVEC_ERROR_NOT_SUPPORTED = 9, // 不支持的操作 - ZVEC_ERROR_UNKNOWN = 10 // 未知错误 -} ZVecErrorCode; -``` - -### 获取错误信息 - -```c -// 获取详细错误信息 -ZVecErrorDetails details; -zvec_get_last_error_details(&details); -printf("Error %d: %s\n", details.code, details.message); -printf(" at %s:%d in %s()\n", details.file, details.line, details.function); - -// 获取错误消息字符串 -char *error_msg; -ZVecErrorCode code = zvec_get_last_error(&error_msg); -if (code != ZVEC_OK) { - printf("Error: %s\n", error_msg); - free(error_msg); // 需要调用者释放 -} - -// 清除错误状态 -zvec_clear_error(); - -// 错误码转字符串 -const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); -// 返回:"Invalid argument" -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_get_last_error(&msg)` | `char **msg` | `ZVecErrorCode` | 获取最后错误消息 | -| `zvec_get_last_error_details(&details)` | `ZVecErrorDetails*` | `ZVecErrorCode` | 获取详细错误信息 | -| `zvec_clear_error()` | 无 | void | 清除错误状态 | -| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | - ---- - -## 初始化与关闭 - -### 初始化库 - -```c -// 使用默认配置初始化 -ZVecErrorCode rc = zvec_initialize(NULL); - -// 使用自定义配置初始化 -ZVecConfigData *config = zvec_config_data_create(); -zvec_config_data_set_memory_limit(config, 2UL * 1024 * 1024 * 1024); // 2GB -zvec_config_data_set_query_thread_count(config, 4); -rc = zvec_initialize(config); -zvec_config_data_destroy(config); - -if (rc != ZVEC_OK) { - // 处理初始化失败 -} -``` - -### 关闭库 - -```c -// 关闭前确保所有 Collection 已关闭 -zvec_collection_close(collection); -zvec_collection_destroy(collection); - -// 关闭库,释放所有资源 -ZVecErrorCode rc = zvec_shutdown(); -``` - -### 检查初始化状态 - -```c -bool initialized; -zvec_is_initialized(&initialized); -if (!initialized) { - zvec_initialize(NULL); -} -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_initialize(config)` | `const ZVecConfigData*` | `ZVecErrorCode` | 初始化库 | -| `zvec_shutdown()` | 无 | `ZVecErrorCode` | 关闭库 | -| `zvec_is_initialized(&initialized)` | `bool*` | `ZVecErrorCode` | 检查是否已初始化 | - ---- - -## 配置管理 - -### 配置数据结构 - -```c -typedef struct { - uint64_t memory_limit_bytes; // 内存限制(字节) - - // 日志配置 - ZVecLogType log_type; - void *log_config; // ZVecConsoleLogConfig 或 ZVecFileLogConfig - - // 查询配置 - uint32_t query_thread_count; // 查询线程数 - float invert_to_forward_scan_ratio; // 倒排转正扫比例 - float brute_force_by_keys_ratio; // 暴力检索比例 - - // 优化配置 - uint32_t optimize_thread_count; // 优化线程数 -} ZVecConfigData; -``` - -### 日志配置 - -```c -// 控制台日志配置 -typedef struct { - ZVecLogLevel level; // 日志级别 -} ZVecConsoleLogConfig; - -// 文件日志配置 -typedef struct { - ZVecLogLevel level; // 日志级别 - ZVecString dir; // 日志目录 - ZVecString basename; // 日志文件基础名 - uint32_t file_size; // 文件大小 (MB) - uint32_t overdue_days; // 过期天数 -} ZVecFileLogConfig; -``` - -### 日志级别 - -```c -typedef enum { - ZVEC_LOG_LEVEL_DEBUG = 0, - ZVEC_LOG_LEVEL_INFO = 1, - ZVEC_LOG_LEVEL_WARN = 2, - ZVEC_LOG_LEVEL_ERROR = 3, - ZVEC_LOG_LEVEL_FATAL = 4 -} ZVecLogLevel; -``` - -### 配置创建与销毁 - -```c -// 创建配置 -ZVecConfigData *config = zvec_config_data_create(); - -// 创建控制台日志配置 -ZVecConsoleLogConfig *console_log = zvec_config_console_log_create( - ZVEC_LOG_LEVEL_INFO); - -// 创建文件日志配置 -ZVecFileLogConfig *file_log = zvec_config_file_log_create( - ZVEC_LOG_LEVEL_DEBUG, - "/var/log/zvec", // 日志目录 - "zvec", // 基础文件名 - 100, // 文件大小 100MB - 30 // 保留 30 天 -); - -// 设置配置 -zvec_config_data_set_memory_limit(config, 1024 * 1024 * 1024); -zvec_config_data_set_log_config(config, ZVEC_LOG_TYPE_CONSOLE, console_log); -zvec_config_data_set_query_thread_count(config, 8); -zvec_config_data_set_optimize_thread_count(config, 4); - -// 销毁配置 -zvec_config_console_log_destroy(console_log); -zvec_config_file_log_destroy(file_log); -zvec_config_data_destroy(config); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_config_data_create()` | 无 | `ZVecConfigData*` | 创建配置数据 | -| `zvec_config_data_destroy(config)` | `ZVecConfigData*` | void | 销毁配置数据 | -| `zvec_config_data_set_memory_limit(config, bytes)` | config, 字节数 | `ZVecErrorCode` | 设置内存限制 | -| `zvec_config_data_set_log_config(config, type, cfg)` | config, 类型,配置 | `ZVecErrorCode` | 设置日志配置 | -| `zvec_config_data_set_query_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置查询线程数 | -| `zvec_config_data_set_optimize_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置优化线程数 | -| `zvec_config_console_log_create(level)` | 日志级别 | `ZVecConsoleLogConfig*` | 创建控制台日志配置 | -| `zvec_config_console_log_destroy(cfg)` | 配置指针 | void | 销毁控制台日志配置 | -| `zvec_config_file_log_create(...)` | 级别,目录,文件名,大小,天数 | `ZVecFileLogConfig*` | 创建文件日志配置 | -| `zvec_config_file_log_destroy(cfg)` | 配置指针 | void | 销毁文件日志配置 | - ---- - -## 数据结构 - -### 字符串类型 - -```c -// 字符串视图(不拥有内存) -typedef struct { - const char *data; - size_t length; -} ZVecStringView; - -// 可变字符串(拥有内存) -typedef struct { - char *data; - size_t length; - size_t capacity; -} ZVecString; - -// 字符串数组 -typedef struct { - ZVecString *strings; - size_t count; -} ZVecStringArray; -``` - -### 数组类型 - -```c -// Float 数组 -typedef struct { - const float *data; - size_t length; -} ZVecFloatArray; - -// Int64 数组 -typedef struct { - const int64_t *data; - size_t length; -} ZVecInt64Array; - -// 字节数组 -typedef struct { - const uint8_t *data; - size_t length; -} ZVecByteArray; - -// 可变字节数组 -typedef struct { - uint8_t *data; - size_t length; - size_t capacity; -} ZVecMutableByteArray; -``` - -### 字符串操作 - -```c -// 从 C 字符串创建 -ZVecString *str = zvec_string_create("Hello, World!"); - -// 从字符串视图创建 -ZVecStringView view = {"Hello", 5}; -ZVecString *str2 = zvec_string_create_from_view(&view); - -// 创建二进制安全字符串(可包含 null 字节) -uint8_t data[] = {0x00, 0x01, 0x02, 0x03}; -ZVecString *bin_str = zvec_bin_create(data, sizeof(data)); - -// 复制字符串 -ZVecString *copy = zvec_string_copy(str); - -// 获取 C 字符串 -const char *c_str = zvec_string_c_str(str); - -// 获取长度 -size_t len = zvec_string_length(str); - -// 比较字符串 -int cmp = zvec_string_compare(str1, str2); // 返回 -1, 0, 1 - -// 释放字符串 -zvec_free_string(str); -``` - -### 数组操作 - -```c -// 创建字符串数组 -ZVecStringArray *arr = zvec_string_array_create(10); - -// 添加字符串 -zvec_string_array_add(arr, 0, "first"); -zvec_string_array_add(arr, 1, "second"); - -// 销毁字符串数组 -zvec_string_array_destroy(arr); - -// 创建字节数组 -ZVecMutableByteArray *byte_arr = zvec_byte_array_create(1024); -zvec_byte_array_destroy(byte_arr); - -// 创建 float 数组 -ZVecFloatArray *float_arr = zvec_float_array_create(100); -zvec_float_array_destroy(float_arr); - -// 创建 int64 数组 -ZVecInt64Array *int_arr = zvec_int64_array_create(50); -zvec_int64_array_destroy(int_arr); - -// 释放 uint8 数组 -zvec_free_uint8_array(uint8_t *array); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_string_create(str)` | `const char*` | `ZVecString*` | 从 C 字符串创建 | -| `zvec_string_create_from_view(view)` | `ZVecStringView*` | `ZVecString*` | 从视图创建字符串 | -| `zvec_bin_create(data, length)` | `uint8_t*`, size_t | `ZVecString*` | 创建二进制字符串 | -| `zvec_string_copy(str)` | `ZVecString*` | `ZVecString*` | 复制字符串 | -| `zvec_string_c_str(str)` | `ZVecString*` | `const char*` | 获取 C 字符串 | -| `zvec_string_length(str)` | `ZVecString*` | size_t | 获取长度 | -| `zvec_string_compare(s1, s2)` | 两个字符串 | int | 比较字符串 | -| `zvec_free_string(str)` | `ZVecString*` | void | 释放字符串 | -| `zvec_string_array_create(count)` | size_t | `ZVecStringArray*` | 创建字符串数组 | -| `zvec_string_array_add(arr, idx, str)` | arr, 索引,字符串 | void | 添加字符串 | -| `zvec_string_array_destroy(arr)` | `ZVecStringArray*` | void | 销毁字符串数组 | -| `zvec_byte_array_create(capacity)` | size_t | `ZVecMutableByteArray*` | 创建字节数组 | -| `zvec_byte_array_destroy(arr)` | `ZVecMutableByteArray*` | void | 销毁字节数组 | -| `zvec_float_array_create(count)` | size_t | `ZVecFloatArray*` | 创建 float 数组 | -| `zvec_float_array_destroy(arr)` | `ZVecFloatArray*` | void | 销毁 float 数组 | -| `zvec_int64_array_create(count)` | size_t | `ZVecInt64Array*` | 创建 int64 数组 | -| `zvec_int64_array_destroy(arr)` | `ZVecInt64Array*` | void | 销毁 int64 数组 | -| `zvec_free_uint8_array(arr)` | `uint8_t*` | void | 释放 uint8 数组 | - ---- - -## Schema 管理 - -### 数据类型 - -```c -typedef enum { - // 标量类型 - ZVEC_DATA_TYPE_UNDEFINED = 0, - ZVEC_DATA_TYPE_BINARY = 1, - ZVEC_DATA_TYPE_STRING = 2, - ZVEC_DATA_TYPE_BOOL = 3, - ZVEC_DATA_TYPE_INT32 = 4, - ZVEC_DATA_TYPE_INT64 = 5, - ZVEC_DATA_TYPE_UINT32 = 6, - ZVEC_DATA_TYPE_UINT64 = 7, - ZVEC_DATA_TYPE_FLOAT = 8, - ZVEC_DATA_TYPE_DOUBLE = 9, - - // 向量类型 - ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, - ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, - ZVEC_DATA_TYPE_VECTOR_FP16 = 22, - ZVEC_DATA_TYPE_VECTOR_FP32 = 23, - ZVEC_DATA_TYPE_VECTOR_FP64 = 24, - ZVEC_DATA_TYPE_VECTOR_INT4 = 25, - ZVEC_DATA_TYPE_VECTOR_INT8 = 26, - ZVEC_DATA_TYPE_VECTOR_INT16 = 27, - - // 稀疏向量类型 - ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, - ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, - - // 数组类型 - ZVEC_DATA_TYPE_ARRAY_BINARY = 40, - ZVEC_DATA_TYPE_ARRAY_STRING = 41, - ZVEC_DATA_TYPE_ARRAY_BOOL = 42, - ZVEC_DATA_TYPE_ARRAY_INT32 = 43, - ZVEC_DATA_TYPE_ARRAY_INT64 = 44, - ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, - ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, - ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, - ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 -} ZVecDataType; -``` - -### 字段 Schema - -```c -typedef struct { - ZVecString *name; // 字段名 - ZVecDataType data_type; // 数据类型 - bool nullable; // 是否可空 - uint32_t dimension; // 向量维度(仅向量类型使用) - ZVecIndexParams *index_params; // 索引参数 -} ZVecFieldSchema; -``` - -### 创建字段 Schema - -```c -// 创建标量字段 -ZVecFieldSchema *id_field = zvec_field_schema_create( - "id", ZVEC_DATA_TYPE_STRING, false, 0); - -// 创建向量字段(768 维) -ZVecFieldSchema *embedding_field = zvec_field_schema_create( - "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768); - -// 创建带索引的字段 -ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); -zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); - -// 或者使用专用函数 -zvec_field_schema_set_invert_index(field, invert_params); -zvec_field_schema_set_hnsw_index(field, hnsw_params); -zvec_field_schema_set_flat_index(field, flat_params); -zvec_field_schema_set_ivf_index(field, ivf_params); - -// 设置索引参数 -zvec_field_schema_set_index_params(field, index_params); - -// 销毁字段 Schema -zvec_field_schema_destroy(field); -zvec_free_field_schema(field); -``` - -### Collection Schema - -```c -typedef struct { - ZVecString *name; // 集合名 - ZVecFieldSchema **fields; // 字段数组 - size_t field_count; // 字段数量 - size_t field_capacity; // 字段容量 - uint64_t max_doc_count_per_segment; // 每段最大文档数 -} ZVecCollectionSchema; -``` - -### 创建 Collection Schema - -```c -// 创建 Schema -ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); - -// 添加单个字段 -ZVecFieldSchema *field = zvec_field_schema_create( - "title", ZVEC_DATA_TYPE_STRING, false, 0); -zvec_collection_schema_add_field(schema, field); - -// 批量添加字段 -ZVecFieldSchema fields[3] = { - *zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0), - *zvec_field_schema_create("embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768), - *zvec_field_schema_create("timestamp", ZVEC_DATA_TYPE_INT64, true, 0) -}; -zvec_collection_schema_add_fields(schema, fields, 3); - -// 获取字段数量 -size_t count = zvec_collection_schema_get_field_count(schema); - -// 按索引获取字段 -ZVecFieldSchema *f = zvec_collection_schema_get_field(schema, 0); - -// 按名称查找字段 -ZVecFieldSchema *f = zvec_collection_schema_find_field(schema, "embedding"); - -// 删除字段 -zvec_collection_schema_remove_field(schema, "title"); - -// 批量删除字段 -const char *field_names[] = {"field1", "field2"}; -zvec_collection_schema_remove_fields(schema, field_names, 2); - -// 设置每段最大文档数 -zvec_collection_schema_set_max_doc_count_per_segment(schema, 500000); - -// 获取每段最大文档数 -uint64_t max_docs = zvec_collection_schema_get_max_doc_count_per_segment(schema); - -// 验证 Schema -ZVecString *error_msg; -ZVecErrorCode rc = zvec_collection_schema_validate(schema, &error_msg); -if (rc != ZVEC_OK) { - printf("Invalid schema: %s\n", error_msg->data); - zvec_free_string(error_msg); -} - -// 销毁 Schema -zvec_collection_schema_destroy(schema); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_field_schema_create(name, type, nullable, dim)` | 名,类型,是否可空,维度 | `ZVecFieldSchema*` | 创建字段 Schema | -| `zvec_field_schema_destroy(schema)` | `ZVecFieldSchema*` | void | 销毁字段 Schema | -| `zvec_field_schema_set_index_params(schema, params)` | schema, 索引参数 | `ZVecErrorCode` | 设置索引参数 | -| `zvec_field_schema_set_invert_index(schema, params)` | schema, 倒排参数 | void | 设置倒排索引 | -| `zvec_field_schema_set_hnsw_index(schema, params)` | schema, HNSW 参数 | void | 设置 HNSW 索引 | -| `zvec_field_schema_set_flat_index(schema, params)` | schema, Flat 参数 | void | 设置 Flat 索引 | -| `zvec_field_schema_set_ivf_index(schema, params)` | schema, IVF 参数 | void | 设置 IVF 索引 | -| `zvec_free_field_schema(schema)` | `ZVecFieldSchema*` | void | 释放字段 Schema | -| `zvec_collection_schema_create(name)` | 集合名 | `ZVecCollectionSchema*` | 创建集合 Schema | -| `zvec_collection_schema_destroy(schema)` | `ZVecCollectionSchema*` | void | 销毁集合 Schema | -| `zvec_collection_schema_add_field(schema, field)` | schema, 字段 | `ZVecErrorCode` | 添加字段 | -| `zvec_collection_schema_add_fields(schema, fields, count)` | schema, 字段数组,数量 | `ZVecErrorCode` | 批量添加字段 | -| `zvec_collection_schema_remove_field(schema, name)` | schema, 字段名 | `ZVecErrorCode` | 删除字段 | -| `zvec_collection_schema_remove_fields(schema, names, count)` | schema, 字段名数组,数量 | `ZVecErrorCode` | 批量删除字段 | -| `zvec_collection_schema_get_field_count(schema)` | `ZVecCollectionSchema*` | size_t | 获取字段数量 | -| `zvec_collection_schema_get_field(schema, index)` | schema, 索引 | `ZVecFieldSchema*` | 按索引获取字段 | -| `zvec_collection_schema_find_field(schema, name)` | schema, 字段名 | `ZVecFieldSchema*` | 按名查找字段 | -| `zvec_collection_schema_validate(schema, &error)` | schema, 错误输出 | `ZVecErrorCode` | 验证 Schema | -| `zvec_collection_schema_set_max_doc_count_per_segment(schema, count)` | schema, 数量 | `ZVecErrorCode` | 设置段最大文档数 | -| `zvec_collection_schema_get_max_doc_count_per_segment(schema)` | `ZVecCollectionSchema*` | uint64_t | 获取段最大文档数 | - ---- - -## Collection 管理 - -### Collection 选项 - -```c -typedef struct { - bool enable_mmap; // 是否启用内存映射 - size_t max_buffer_size; // 最大缓冲区大小 - bool read_only; // 是否只读模式 - uint64_t max_doc_count_per_segment; // 每段最大文档数 -} ZVecCollectionOptions; -``` - -### 创建和打开 Collection - -```c -// 初始化默认选项 -ZVecCollectionOptions options; -zvec_collection_options_init_default(&options); - -// 或使用宏 -ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); - -// 自定义选项 -options.enable_mmap = true; -options.max_buffer_size = 2 * 1024 * 1024; // 2MB -options.read_only = false; -options.max_doc_count_per_segment = 500000; - -// 创建并打开 -ZVecCollection *collection; -ZVecErrorCode rc = zvec_collection_create_and_open( - "/path/to/data", schema, &options, &collection); - -// 打开已有集合 -rc = zvec_collection_open("/path/to/data", &options, &collection); -``` - -### Collection 操作 - -```c -// 关闭集合 -rc = zvec_collection_close(collection); - -// 销毁集合 -rc = zvec_collection_destroy(collection); - -// 刷盘数据 -rc = zvec_collection_flush(collection); - -// 获取 Schema -ZVecCollectionSchema *schema; -rc = zvec_collection_get_schema(collection, &schema); -// 使用后销毁 -zvec_collection_schema_destroy(schema); - -// 获取选项 -ZVecCollectionOptions *options; -rc = zvec_collection_get_options(collection, &options); -// 使用后销毁 -free(options); - -// 获取统计信息 -typedef struct { - uint64_t doc_count; // 文档总数 - ZVecString **index_names; // 索引名数组 - float *index_completeness; // 索引完成度数组 - size_t index_count; // 索引数量 -} ZVecCollectionStats; - -ZVecCollectionStats *stats; -rc = zvec_collection_get_stats(collection, &stats); -printf("Documents: %lu\n", stats->doc_count); -printf("Indexes: %zu\n", stats->index_count); -zvec_collection_stats_destroy(stats); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_collection_options_init_default(&opts)` | `ZVecCollectionOptions*` | void | 初始化默认选项 | -| `zvec_collection_create_and_open(path, schema, opts, &coll)` | 路径,Schema, 选项,输出 | `ZVecErrorCode` | 创建并打开集合 | -| `zvec_collection_open(path, opts, &coll)` | 路径,选项,输出 | `ZVecErrorCode` | 打开已有集合 | -| `zvec_collection_close(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 关闭集合 | -| `zvec_collection_destroy(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 销毁集合 | -| `zvec_collection_flush(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 刷盘数据 | -| `zvec_collection_get_schema(coll, &schema)` | 集合,输出 | `ZVecErrorCode` | 获取 Schema | -| `zvec_collection_get_options(coll, &opts)` | 集合,输出 | `ZVecErrorCode` | 获取选项 | -| `zvec_collection_get_stats(coll, &stats)` | 集合,输出 | `ZVecErrorCode` | 获取统计信息 | -| `zvec_collection_stats_destroy(stats)` | `ZVecCollectionStats*` | void | 销毁统计信息 | - ---- - -## 索引管理 - -### 索引类型 - -```c -typedef enum { - ZVEC_INDEX_TYPE_UNDEFINED = 0, - ZVEC_INDEX_TYPE_HNSW = 1, // HNSW 图索引 - ZVEC_INDEX_TYPE_IVF = 3, // 倒排文件索引 - ZVEC_INDEX_TYPE_FLAT = 4, // 暴力检索 - ZVEC_INDEX_TYPE_INVERT = 10 // 标量倒排索引 -} ZVecIndexType; -``` - -### 距离度量类型 - -```c -typedef enum { - ZVEC_METRIC_TYPE_UNDEFINED = 0, - ZVEC_METRIC_TYPE_L2 = 1, // L2 距离 - ZVEC_METRIC_TYPE_IP = 2, // 内积 - ZVEC_METRIC_TYPE_COSINE = 3, // 余弦相似度 - ZVEC_METRIC_TYPE_MIPSL2 = 4 // L2 内积 -} ZVecMetricType; -``` - -### 量化类型 - -```c -typedef enum { - ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, - ZVEC_QUANTIZE_TYPE_FP16 = 1, // FP16 量化 - ZVEC_QUANTIZE_TYPE_INT8 = 2, // INT8 量化 - ZVEC_QUANTIZE_TYPE_INT4 = 3 // INT4 量化 -} ZVecQuantizeType; -``` - -### HNSW 索引参数 - -```c -typedef struct { - ZVecVectorIndexParams base; // 基类参数 - int m; // 图连接度参数 - int ef_construction; // 构建时探索因子 - int ef_search; // 搜索时探索因子 -} ZVecHnswIndexParams; - -// 创建 HNSW 参数 -ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, // 距离类型 - ZVEC_QUANTIZE_TYPE_UNDEFINED, // 量化类型 - 16, // m: 图连接度 - 200, // ef_construction: 构建探索因子 - 50 // ef_search: 搜索探索因子 -); - -// 或使用初始化函数 -ZVecHnswIndexParams params; -zvec_index_params_hnsw_init(¶ms, - ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - -// 或使用宏 -ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - -zvec_index_params_hnsw_destroy(params); -``` - -### IVF 索引参数 - -```c -typedef struct { - ZVecVectorIndexParams base; // 基类参数 - int n_list; // 聚类中心数量 - int n_iters; // 迭代次数 - bool use_soar; // 是否使用 SOAR 算法 - int n_probe; // 搜索时探测的聚类数 -} ZVecIVFIndexParams; - -// 创建 IVF 参数 -ZVecIVFIndexParams *params = zvec_index_params_ivf_create( - ZVEC_METRIC_TYPE_L2, // 距离类型 - ZVEC_QUANTIZE_TYPE_INT8, // 量化类型 - 1024, // n_list: 聚类中心数 - 25, // n_iters: 迭代次数 - true, // use_soar: 使用 SOAR - 20 // n_probe: 探测聚类数 -); - -// 或使用宏 -ZVecIVFIndexParams params = ZVEC_IVF_PARAMS( - ZVEC_METRIC_TYPE_L2, 1024, 25, true, 20, ZVEC_QUANTIZE_TYPE_INT8); - -zvec_index_params_ivf_destroy(params); -``` - -### Flat 索引参数 - -```c -typedef struct { - ZVecVectorIndexParams base; // 基类参数 -} ZVecFlatIndexParams; - -// 创建 Flat 参数 -ZVecFlatIndexParams *params = zvec_index_params_flat_create( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); - -// 或使用宏 -ZVecFlatIndexParams params = ZVEC_FLAT_PARAMS( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); - -zvec_index_params_flat_destroy(params); -``` - -### 标量倒排索引参数 - -```c -typedef struct { - ZVecBaseIndexParams base; // 基类参数 - bool enable_range_optimization; // 是否启用范围优化 - bool enable_extended_wildcard; // 是否启用通配符 -} ZVecInvertIndexParams; - -// 创建倒排索引参数 -ZVecInvertIndexParams *params = zvec_index_params_invert_create( - true, // enable_range_optimization - false // enable_extended_wildcard -); - -// 或使用宏 -ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); - -// 或使用初始化函数 -ZVecInvertIndexParams params; -zvec_index_params_invert_init(¶ms, true, false); - -zvec_index_params_invert_destroy(params); -``` - -### 创建索引 - -```c -// 通用创建索引函数 -zvec_collection_create_index(collection, "embedding", index_params); - -// 类型安全的创建索引函数 -zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); -zvec_collection_create_ivf_index(collection, "embedding", ivf_params); -zvec_collection_create_flat_index(collection, "embedding", flat_params); -zvec_collection_create_invert_index(collection, "title", invert_params); - -// 删除索引 -zvec_collection_drop_index(collection, "embedding"); - -// 优化集合(重建索引、合并段) -zvec_collection_optimize(collection); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_index_params_base_init(params, type)` | 参数,类型 | void | 初始化基础参数 | -| `zvec_index_params_invert_init(params, range_opt, wildcard)` | 参数,范围优化,通配符 | void | 初始化倒排参数 | -| `zvec_index_params_vector_init(params, idx, metric, quant)` | 参数,索引类型,度量,量化 | void | 初始化向量索引参数 | -| `zvec_index_params_hnsw_init(params, metric, m, ef_c, ef_s, quant)` | 参数,度量,m, ef_construction, ef_search, 量化 | void | 初始化 HNSW 参数 | -| `zvec_index_params_ivf_init(params, metric, nlist, niters, soar, nprobe, quant)` | 参数,度量,nlist, niters, soar, nprobe, 量化 | void | 初始化 IVF 参数 | -| `zvec_index_params_flat_init(params, metric, quant)` | 参数,度量,量化 | void | 初始化 Flat 参数 | -| `zvec_index_params_invert_create(range_opt, wildcard)` | 范围优化,通配符 | `ZVecInvertIndexParams*` | 创建倒排参数 | -| `zvec_index_params_vector_create(type, metric, quant)` | 类型,度量,量化 | `ZVecVectorIndexParams*` | 创建向量索引参数 | -| `zvec_index_params_hnsw_create(metric, quant, m, ef_c, ef_s)` | 度量,量化,m, ef_construction, ef_search | `ZVecHnswIndexParams*` | 创建 HNSW 参数 | -| `zvec_index_params_ivf_create(metric, quant, nlist, niters, soar, nprobe)` | 度量,量化,nlist, niters, soar, nprobe | `ZVecIVFIndexParams*` | 创建 IVF 参数 | -| `zvec_index_params_flat_create(metric, quant)` | 度量,量化 | `ZVecFlatIndexParams*` | 创建 Flat 参数 | -| `zvec_index_params_invert_destroy(params)` | 参数 | void | 销毁倒排参数 | -| `zvec_index_params_vector_destroy(params)` | 参数 | void | 销毁向量索引参数 | -| `zvec_index_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 参数 | -| `zvec_index_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 参数 | -| `zvec_index_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 参数 | -| `zvec_collection_create_index(coll, field, params)` | 集合,字段,参数 | `ZVecErrorCode` | 创建索引 | -| `zvec_collection_create_hnsw_index(...)` | 集合,字段,HNSW 参数 | `ZVecErrorCode` | 创建 HNSW 索引 | -| `zvec_collection_create_ivf_index(...)` | 集合,字段,IVF 参数 | `ZVecErrorCode` | 创建 IVF 索引 | -| `zvec_collection_create_flat_index(...)` | 集合,字段,Flat 参数 | `ZVecErrorCode` | 创建 Flat 索引 | -| `zvec_collection_create_invert_index(...)` | 集合,字段,倒排参数 | `ZVecErrorCode` | 创建倒排索引 | -| `zvec_collection_drop_index(coll, field)` | 集合,字段名 | `ZVecErrorCode` | 删除索引 | -| `zvec_collection_optimize(coll)` | 集合 | `ZVecErrorCode` | 优化集合 | - ---- - -## 文档操作 - -### 文档结构 - -```c -typedef struct ZVecDoc ZVecDoc; // 不透明指针 - -// 字段值联合 -typedef union { - bool bool_value; - int32_t int32_value; - int64_t int64_value; - uint32_t uint32_value; - uint64_t uint64_value; - float float_value; - double double_value; - ZVecString string_value; - ZVecFloatArray vector_value; - ZVecByteArray binary_value; -} ZVecFieldValue; - -// 文档字段 -typedef struct { - ZVecString name; - ZVecDataType data_type; - ZVecFieldValue value; -} ZVecDocField; -``` - -### 创建和销毁文档 - -```c -// 创建文档 -ZVecDoc *doc = zvec_doc_create(); - -// 清空文档 -zvec_doc_clear(doc); - -// 销毁文档 -zvec_doc_destroy(doc); -``` - -### 设置文档属性 - -```c -// 设置主键 -zvec_doc_set_pk(doc, "doc_001"); - -// 设置文档 ID -zvec_doc_set_doc_id(doc, 12345); - -// 设置分数 -zvec_doc_set_score(doc, 0.95f); - -// 设置操作类型 -typedef enum { - ZVEC_DOC_OP_INSERT = 0, // 插入 - ZVEC_DOC_OP_UPDATE = 1, // 更新 - ZVEC_DOC_OP_UPSERT = 2, // 插入或更新 - ZVEC_DOC_OP_DELETE = 3 // 删除 -} ZVecDocOperator; - -zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); -``` - -### 获取文档属性 - -```c -// 获取文档 ID -uint64_t id = zvec_doc_get_doc_id(doc); - -// 获取分数 -float score = zvec_doc_get_score(doc); - -// 获取操作类型 -ZVecDocOperator op = zvec_doc_get_operator(doc); - -// 获取主键指针(不复制) -const char *pk = zvec_doc_get_pk_pointer(doc); - -// 获取主键副本(需手动释放) -const char *pk = zvec_doc_get_pk_copy(doc); -free((void*)pk); - -// 获取字段数量 -size_t count = zvec_doc_get_field_count(doc); - -// 检查文档是否为空 -bool empty = zvec_doc_is_empty(doc); - -// 检查是否包含字段 -bool has = zvec_doc_has_field(doc, "embedding"); - -// 检查字段是否有值 -bool has_value = zvec_doc_has_field_value(doc, "embedding"); - -// 检查字段是否为 null -bool is_null = zvec_doc_is_field_null(doc, "optional_field"); -``` - -### 添加字段 - -```c -// 按值添加字段 -float embedding[768] = {0.1f, 0.2f, ...}; -zvec_doc_add_field_by_value(doc, "embedding", - ZVEC_DATA_TYPE_VECTOR_FP32, embedding, sizeof(embedding)); - -// 添加字符串字段 -const char *title = "Hello World"; -zvec_doc_add_field_by_value(doc, "title", - ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); - -// 添加整数字段 -int64_t timestamp = 1234567890; -zvec_doc_add_field_by_value(doc, "timestamp", - ZVEC_DATA_TYPE_INT64, ×tamp, sizeof(timestamp)); - -// 按结构添加字段 -ZVecDocField field; -field.name = ZVEC_STRING("score"); -field.data_type = ZVEC_DATA_TYPE_FLOAT; -field.value.float_value = 0.95f; -zvec_doc_add_field_by_struct(doc, &field); - -// 删除字段 -zvec_doc_remove_field(doc, "title"); -``` - -### 获取字段值 - -```c -// 获取基本类型值 -float float_val; -zvec_doc_get_field_value_basic(doc, "score", - ZVEC_DATA_TYPE_FLOAT, &float_val, sizeof(float_val)); - -int64_t int_val; -zvec_doc_get_field_value_basic(doc, "timestamp", - ZVEC_DATA_TYPE_INT64, &int_val, sizeof(int_val)); - -// 获取字段值副本(需手动释放) -void *value; -size_t value_size; - -// 获取字符串 -zvec_doc_get_field_value_copy(doc, "title", ZVEC_DATA_TYPE_STRING, &value, &value_size); -printf("Title: %s\n", (char*)value); -free(value); - -// 获取向量 -zvec_doc_get_field_value_copy(doc, "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, &value, &value_size); -float *vec = (float*)value; -// 使用... -free(value); - -// 获取二进制数据 -zvec_doc_get_field_value_copy(doc, "data", ZVEC_DATA_TYPE_BINARY, &value, &value_size); -zvec_free_uint8_array((uint8_t*)value); - -// 获取字段值指针(无需释放,数据在文档内) -const void *value; -size_t value_size; -zvec_doc_get_field_value_pointer(doc, "score", ZVEC_DATA_TYPE_FLOAT, &value, &value_size); -float score = *(float*)value; -``` - -### 获取所有字段名 - -```c -char **field_names; -size_t count; -zvec_doc_get_field_names(doc, &field_names, &count); - -for (size_t i = 0; i < count; i++) { - printf("Field %zu: %s\n", i, field_names[i]); -} - -// 释放 -zvec_free_str_array(field_names, count); -``` - -### 序列化/反序列化 - -```c -// 序列化 -uint8_t *data; -size_t size; -ZVecErrorCode rc = zvec_doc_serialize(doc, &data, &size); - -// 保存到文件 -FILE *f = fopen("doc.bin", "wb"); -fwrite(data, 1, size, f); -fclose(f); -zvec_free_uint8_array(data); - -// 从文件读取 -FILE *f = fopen("doc.bin", "rb"); -fseek(f, 0, SEEK_END); -size_t file_size = ftell(f); -fseek(f, 0, SEEK_SET); -uint8_t *buffer = malloc(file_size); -fread(buffer, 1, file_size, f); -fclose(f); - -// 反序列化 -ZVecDoc *new_doc; -rc = zvec_doc_deserialize(buffer, file_size, &new_doc); -free(buffer); - -// 使用... -zvec_doc_destroy(new_doc); -``` - -### 文档合并 - -```c -// 合并两个文档 -ZVecDoc *doc1 = zvec_doc_create(); -ZVecDoc *doc2 = zvec_doc_create(); - -// 设置字段... -zvec_doc_merge(doc1, doc2); // 将 doc2 的字段合并到 doc1 -``` - -### 内存使用 - -```c -size_t bytes = zvec_doc_memory_usage(doc); -printf("Document uses %zu bytes\n", bytes); -``` - -### 验证文档 - -```c -char *error_msg; -ZVecErrorCode rc = zvec_doc_validate(doc, schema, false, &error_msg); -if (rc != ZVEC_OK) { - printf("Invalid document: %s\n", error_msg); - free(error_msg); -} -``` - -### 文档详细信息 - -```c -char *detail_str; -zvec_doc_to_detail_string(doc, &detail_str); -printf("Document: %s\n", detail_str); -free(detail_str); -``` - -### 批量释放文档 - -```c -ZVecDoc **docs = malloc(count * sizeof(ZVecDoc*)); -// 填充 docs... - -// 批量释放 -zvec_docs_free(docs, count); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_doc_create()` | 无 | `ZVecDoc*` | 创建文档 | -| `zvec_doc_destroy(doc)` | `ZVecDoc*` | void | 销毁文档 | -| `zvec_doc_clear(doc)` | `ZVecDoc*` | void | 清空文档 | -| `zvec_doc_set_pk(doc, pk)` | doc, 主键 | void | 设置主键 | -| `zvec_doc_set_doc_id(doc, id)` | doc, ID | void | 设置文档 ID | -| `zvec_doc_set_score(doc, score)` | doc, 分数 | void | 设置分数 | -| `zvec_doc_set_operator(doc, op)` | doc, 操作类型 | void | 设置操作类型 | -| `zvec_doc_get_doc_id(doc)` | `ZVecDoc*` | uint64_t | 获取文档 ID | -| `zvec_doc_get_score(doc)` | `ZVecDoc*` | float | 获取分数 | -| `zvec_doc_get_operator(doc)` | `ZVecDoc*` | `ZVecDocOperator` | 获取操作类型 | -| `zvec_doc_get_pk_pointer(doc)` | `ZVecDoc*` | `const char*` | 获取主键指针 | -| `zvec_doc_get_pk_copy(doc)` | `ZVecDoc*` | `const char*` | 获取主键副本 | -| `zvec_doc_get_field_count(doc)` | `ZVecDoc*` | size_t | 获取字段数量 | -| `zvec_doc_is_empty(doc)` | `ZVecDoc*` | bool | 检查是否为空 | -| `zvec_doc_has_field(doc, name)` | doc, 字段名 | bool | 检查是否包含字段 | -| `zvec_doc_has_field_value(doc, name)` | doc, 字段名 | bool | 检查字段是否有值 | -| `zvec_doc_is_field_null(doc, name)` | doc, 字段名 | bool | 检查字段是否为 null | -| `zvec_doc_add_field_by_value(doc, name, type, value, size)` | doc, 名,类型,值,大小 | `ZVecErrorCode` | 添加字段 | -| `zvec_doc_add_field_by_struct(doc, field)` | doc, 字段结构 | `ZVecErrorCode` | 按结构添加字段 | -| `zvec_doc_remove_field(doc, name)` | doc, 字段名 | `ZVecErrorCode` | 删除字段 | -| `zvec_doc_get_field_value_basic(doc, name, type, buf, size)` | doc, 名,类型,缓冲区,大小 | `ZVecErrorCode` | 获取基本类型值 | -| `zvec_doc_get_field_value_copy(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值副本 | -| `zvec_doc_get_field_value_pointer(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值指针 | -| `zvec_doc_get_field_names(doc, &names, &count)` | doc, 名称输出,数量输出 | `ZVecErrorCode` | 获取所有字段名 | -| `zvec_doc_serialize(doc, &data, &size)` | doc, 数据输出,大小输出 | `ZVecErrorCode` | 序列化 | -| `zvec_doc_deserialize(data, size, &doc)` | 数据,大小,文档输出 | `ZVecErrorCode` | 反序列化 | -| `zvec_doc_merge(doc, other)` | doc, 源文档 | void | 合并文档 | -| `zvec_doc_memory_usage(doc)` | `ZVecDoc*` | size_t | 获取内存使用 | -| `zvec_doc_validate(doc, schema, is_update, &err)` | doc, schema, 是否更新,错误输出 | `ZVecErrorCode` | 验证文档 | -| `zvec_doc_to_detail_string(doc, &str)` | doc, 字符串输出 | `ZVecErrorCode` | 获取详细信息字符串 | -| `zvec_docs_free(docs, count)` | 文档数组,数量 | void | 批量释放文档 | -| `zvec_free_str_array(arr, count)` | 字符串数组,数量 | void | 释放字符串数组 | - ---- - -## 数据增删改 - -### 插入文档 - -```c -ZVecDoc *docs[3]; -docs[0] = zvec_doc_create(); -docs[1] = zvec_doc_create(); -docs[2] = zvec_doc_create(); - -zvec_doc_set_pk(docs[0], "doc_001"); -zvec_doc_set_pk(docs[1], "doc_002"); -zvec_doc_set_pk(docs[2], "doc_003"); - -// 添加字段... - -size_t success_count, error_count; -ZVecErrorCode rc = zvec_collection_insert(collection, - (const ZVecDoc**)docs, 3, &success_count, &error_count); - -printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); - -// 清理 -zvec_docs_free(docs, 3); -``` - -### 更新文档 - -```c -ZVecDoc *doc = zvec_doc_create(); -zvec_doc_set_pk(doc, "doc_001"); - -// 设置要更新的字段 -float new_embedding[768] = {0.2f, 0.3f, ...}; -zvec_doc_add_field_by_value(doc, "embedding", - ZVEC_DATA_TYPE_VECTOR_FP32, new_embedding, sizeof(new_embedding)); - -size_t success_count, error_count; -ZVecErrorCode rc = zvec_collection_update(collection, - (const ZVecDoc**)&doc, 1, &success_count, &error_count); - -zvec_doc_destroy(doc); -``` - -### 插入或更新(Upsert) - -```c -ZVecDoc *doc = zvec_doc_create(); -zvec_doc_set_pk(doc, "doc_001"); -// 设置字段... - -size_t success_count, error_count; -ZVecErrorCode rc = zvec_collection_upsert(collection, - (const ZVecDoc**)&doc, 1, &success_count, &error_count); - -zvec_doc_destroy(doc); -``` - -### 删除文档 - -```c -// 按主键删除 -const char *pks[] = {"doc_001", "doc_002", "doc_003"}; -size_t success_count, error_count; -ZVecErrorCode rc = zvec_collection_delete(collection, - pks, 3, &success_count, &error_count); - -// 按过滤条件删除 -rc = zvec_collection_delete_by_filter(collection, "category='spam'"); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_collection_insert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入文档 | -| `zvec_collection_update(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 更新文档 | -| `zvec_collection_upsert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入或更新 | -| `zvec_collection_delete(coll, pks, count, &success, &error)` | 集合,主键数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 按主键删除 | -| `zvec_collection_delete_by_filter(coll, filter)` | 集合,过滤表达式 | `ZVecErrorCode` | 按条件删除 | - ---- - -## 数据查询 - -### 向量查询参数 - -```c -typedef struct { - ZVecIndexType index_type; // 索引类型 - float radius; // 搜索半径 - bool is_linear; // 是否线性搜索 - bool is_using_refiner; // 是否使用优化器 -} ZVecQueryParams; -``` - -### HNSW 查询参数 - -```c -typedef struct { - ZVecQueryParams base; - int ef; // 搜索时探索因子 -} ZVecHnswQueryParams; - -// 创建 -ZVecHnswQueryParams *params = zvec_query_params_hnsw_create( - ZVEC_INDEX_TYPE_HNSW, - 100, // ef - 0.0f, // radius - false, // is_linear - true // is_using_refiner -); - -zvec_query_params_hnsw_set_ef(params, 200); -zvec_query_params_hnsw_destroy(params); -``` - -### IVF 查询参数 - -```c -typedef struct { - ZVecQueryParams base; - int nprobe; // 探测聚类数 - float scale_factor; // 缩放因子 -} ZVecIVFQueryParams; - -// 创建 -ZVecIVFQueryParams *params = zvec_query_params_ivf_create( - ZVEC_INDEX_TYPE_IVF, - 20, // nprobe - true, // is_using_refiner - 1.0f // scale_factor -); - -zvec_query_params_ivf_set_nprobe(params, 50); -zvec_query_params_ivf_set_scale_factor(params, 1.5f); -zvec_query_params_ivf_destroy(params); -``` - -### Flat 查询参数 - -```c -typedef struct { - ZVecQueryParams base; - float scale_factor; // 缩放因子 -} ZVecFlatQueryParams; - -ZVecFlatQueryParams *params = zvec_query_params_flat_create( - ZVEC_INDEX_TYPE_FLAT, - false, // is_using_refiner - 1.0f // scale_factor -); - -zvec_query_params_flat_destroy(params); -``` - -### 基础查询参数 - -```c -// 创建基础参数 -ZVecQueryParams *params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); - -// 设置属性 -zvec_query_params_set_index_type(params, ZVEC_INDEX_TYPE_HNSW); -zvec_query_params_set_radius(params, 0.5f); -zvec_query_params_set_is_linear(params, true); -zvec_query_params_set_is_using_refiner(params, true); - -zvec_query_params_destroy(params); -``` - -### 向量查询 - -```c -typedef struct { - int topk; // 返回结果数 - ZVecString field_name; // 查询字段名 - ZVecByteArray query_vector; // 查询向量 - ZVecByteArray query_sparse_indices; // 稀疏向量索引 - ZVecByteArray query_sparse_values; // 稀疏向量值 - ZVecString filter; // 过滤表达式 - bool include_vector; // 是否返回向量 - bool include_doc_id; // 是否返回文档 ID - ZVecStringArray output_fields; // 输出字段列表 - ZVecQueryParamsUnion *query_params; // 查询参数 -} ZVecVectorQuery; - -// 使用宏快速创建 -float query_vec[768] = {0.1f, 0.2f, ...}; -ZVecVectorQuery query = ZVEC_VECTOR_QUERY( - "embedding", // 字段名 - ZVEC_FLOAT_ARRAY(query_vec, 768), - 10, // topK - "category='news'" // 过滤条件 -); - -// 手动创建 -ZVecVectorQuery query = { - .topk = 10, - .field_name = ZVEC_STRING("embedding"), - .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), - .filter = ZVEC_STRING(""), - .include_vector = true, - .include_doc_id = true, - .output_fields.strings = NULL, - .output_fields.count = 0, - .query_params = NULL -}; - -// 执行查询 -ZVecDoc **results; -size_t result_count; -ZVecErrorCode rc = zvec_collection_query(collection, &query, &results, &result_count); - -if (rc == ZVEC_OK) { - for (size_t i = 0; i < result_count; i++) { - const char *pk = zvec_doc_get_pk_pointer(results[i]); - float score = zvec_doc_get_score(results[i]); - printf("Result %zu: pk=%s, score=%f\n", i, pk, score); - } -} - -// 释放结果 -zvec_docs_free(results, result_count); -``` - -### 分组向量查询 - -```c -typedef struct { - ZVecString field_name; // 查询字段名 - ZVecByteArray query_vector; // 查询向量 - ZVecByteArray query_sparse_indices; // 稀疏向量索引 - ZVecByteArray query_sparse_values; // 稀疏向量值 - ZVecString filter; // 过滤表达式 - bool include_vector; // 是否返回向量 - ZVecStringArray output_fields; // 输出字段列表 - ZVecString group_by_field_name; // 分组字段名 - uint32_t group_count; // 分组数量 - uint32_t group_topk; // 每组返回结果数 - ZVecQueryParamsUnion *query_params; // 查询参数 -} ZVecGroupByVectorQuery; - -// 创建分组查询 -ZVecGroupByVectorQuery query = { - .field_name = ZVEC_STRING("embedding"), - .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), - .filter = ZVEC_STRING(""), - .include_vector = false, - .group_by_field_name = ZVEC_STRING("category"), - .group_count = 5, - .group_topk = 3, - .query_params = NULL -}; - -// 执行查询 -ZVecDoc **results; -ZVecString **group_values; -size_t result_count; - -ZVecErrorCode rc = zvec_collection_query_by_group( - collection, &query, &results, &group_values, &result_count); - -if (rc == ZVEC_OK) { - for (size_t i = 0; i < result_count; i++) { - printf("Group: %s\n", group_values[i]->data); - // 处理结果... - } -} - -// 释放结果 -zvec_docs_free(results, result_count); -zvec_string_array_destroy((ZVecStringArray*)group_values); -``` - -### 按主键获取 - -```c -const char *pks[] = {"doc_001", "doc_002", "doc_003"}; -ZVecDoc **documents; -size_t found_count; - -ZVecErrorCode rc = zvec_collection_fetch(collection, - pks, 3, &documents, &found_count); - -printf("Found %zu documents\n", found_count); - -// 使用... -zvec_docs_free(documents, found_count); -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_query_params_create(type)` | 索引类型 | `ZVecQueryParams*` | 创建查询参数 | -| `zvec_query_params_hnsw_create(type, ef, radius, linear, refiner)` | 类型,ef, 半径,线性,优化器 | `ZVecHnswQueryParams*` | 创建 HNSW 查询参数 | -| `zvec_query_params_ivf_create(type, nprobe, refiner, scale)` | 类型,nprobe, 优化器,缩放因子 | `ZVecIVFQueryParams*` | 创建 IVF 查询参数 | -| `zvec_query_params_flat_create(type, refiner, scale)` | 类型,优化器,缩放因子 | `ZVecFlatQueryParams*` | 创建 Flat 查询参数 | -| `zvec_query_params_union_create(type)` | 索引类型 | `ZVecQueryParamsUnion*` | 创建查询参数联合 | -| `zvec_query_params_destroy(params)` | 参数 | void | 销毁查询参数 | -| `zvec_query_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 查询参数 | -| `zvec_query_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 查询参数 | -| `zvec_query_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 查询参数 | -| `zvec_query_params_union_destroy(params)` | 参数 | void | 销毁查询参数联合 | -| `zvec_query_params_set_index_type(params, type)` | 参数,类型 | `ZVecErrorCode` | 设置索引类型 | -| `zvec_query_params_set_radius(params, radius)` | 参数,半径 | `ZVecErrorCode` | 设置搜索半径 | -| `zvec_query_params_set_is_linear(params, linear)` | 参数,是否线性 | `ZVecErrorCode` | 设置线性搜索 | -| `zvec_query_params_set_is_using_refiner(params, refiner)` | 参数,是否优化器 | `ZVecErrorCode` | 设置优化器 | -| `zvec_query_params_hnsw_set_ef(params, ef)` | 参数,ef | `ZVecErrorCode` | 设置 ef | -| `zvec_query_params_ivf_set_nprobe(params, nprobe)` | 参数,nprobe | `ZVecErrorCode` | 设置 nprobe | -| `zvec_query_params_ivf_set_scale_factor(params, scale)` | 参数,缩放因子 | `ZVecErrorCode` | 设置缩放因子 | -| `zvec_collection_query(coll, query, &results, &count)` | 集合,查询,结果输出,数量输出 | `ZVecErrorCode` | 向量查询 | -| `zvec_collection_query_by_group(coll, query, &results, &groups, &count)` | 集合,分组查询,结果输出,分组值输出,数量输出 | `ZVecErrorCode` | 分组向量查询 | -| `zvec_collection_fetch(coll, pks, count, &docs, &found)` | 集合,主键数组,数量,文档输出,找到数量 | `ZVecErrorCode` | 按主键获取 | - ---- - -## 工具函数 - -### 类型转字符串 - -```c -// 数据类型转字符串 -const char *type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_VECTOR_FP32); -// 返回:"VECTOR_FP32" - -// 索引类型转字符串 -const char *idx_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); -// 返回:"HNSW" - -// 距离类型转字符串 -const char *metric_str = zvec_metric_type_to_string(ZVEC_METRIC_TYPE_COSINE); -// 返回:"COSINE" - -// 错误码转字符串 -const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); -// 返回:"Invalid argument" -``` - -| 函数 | 参数 | 返回值 | 说明 | -|------|------|--------|------| -| `zvec_data_type_to_string(type)` | `ZVecDataType` | `const char*` | 数据类型转字符串 | -| `zvec_index_type_to_string(type)` | `ZVecIndexType` | `const char*` | 索引类型转字符串 | -| `zvec_metric_type_to_string(type)` | `ZVecMetricType` | `const char*` | 距离类型转字符串 | -| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | - ---- - -## 完整示例 - -### 构建可搜索的向量数据库 - -```c -#include "zvec/c_api.h" -#include -#include - -#define DIM 768 -#define DOC_COUNT 1000 - -// 生成随机向量 -void generate_vector(float *vec, size_t dim) { - for (size_t i = 0; i < dim; i++) { - vec[i] = (float)rand() / RAND_MAX; - } -} - -int main() { - ZVecErrorCode rc; - - // ========== 1. 初始化 ========== - printf("Initializing ZVec...\n"); - rc = zvec_initialize(NULL); - if (rc != ZVEC_OK) { - fprintf(stderr, "Failed to initialize: %s\n", - zvec_error_code_to_string(rc)); - return 1; - } - printf("Version: %s\n", zvec_get_version()); - - // ========== 2. 创建 Schema ========== - printf("Creating schema...\n"); - ZVecCollectionSchema *schema = zvec_collection_schema_create("documents"); - - // ID 字段 - ZVecFieldSchema *id_field = zvec_field_schema_create( - "id", ZVEC_DATA_TYPE_STRING, false, 0); - zvec_collection_schema_add_field(schema, id_field); - - // 向量字段 - ZVecFieldSchema *embedding_field = zvec_field_schema_create( - "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, DIM); - zvec_collection_schema_add_field(schema, embedding_field); - - // 标题字段 - ZVecFieldSchema *title_field = zvec_field_schema_create( - "title", ZVEC_DATA_TYPE_STRING, true, 0); - ZVecInvertIndexParams *invert_params = zvec_index_params_invert_create( - true, true); // 启用范围优化和通配符 - zvec_field_schema_set_invert_index(title_field, invert_params); - zvec_collection_schema_add_field(schema, title_field); - - // 时间戳字段 - ZVecFieldSchema *ts_field = zvec_field_schema_create( - "timestamp", ZVEC_DATA_TYPE_INT64, true, 0); - zvec_collection_schema_add_field(schema, ts_field); - - // 验证 Schema - ZVecString *error_msg; - rc = zvec_collection_schema_validate(schema, &error_msg); - if (rc != ZVEC_OK) { - fprintf(stderr, "Invalid schema: %s\n", error_msg->data); - zvec_free_string(error_msg); - return 1; - } - - // ========== 3. 创建 Collection ========== - printf("Creating collection...\n"); - ZVecCollection *collection; - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); - - rc = zvec_collection_create_and_open( - "./my_vector_db", schema, &options, &collection); - if (rc != ZVEC_OK) { - fprintf(stderr, "Failed to create collection: %s\n", - zvec_error_code_to_string(rc)); - return 1; - } - - // ========== 4. 创建索引 ========== - printf("Creating HNSW index...\n"); - ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( - ZVEC_METRIC_TYPE_COSINE, - ZVEC_QUANTIZE_TYPE_UNDEFINED, - 16, // m - 200, // ef_construction - 50 // ef_search - ); - rc = zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); - zvec_index_params_hnsw_destroy(hnsw_params); - - // ========== 5. 批量插入数据 ========== - printf("Inserting %d documents...\n", DOC_COUNT); - - ZVecDoc **docs = malloc(DOC_COUNT * sizeof(ZVecDoc*)); - float vectors[DOC_COUNT][DIM]; - - for (int i = 0; i < DOC_COUNT; i++) { - docs[i] = zvec_doc_create(); - - // 设置主键 - char pk[32]; - snprintf(pk, sizeof(pk), "doc_%06d", i); - zvec_doc_set_pk(docs[i], pk); - - // 生成随机向量 - generate_vector(vectors[i], DIM); - zvec_doc_add_field_by_value(docs[i], "embedding", - ZVEC_DATA_TYPE_VECTOR_FP32, vectors[i], sizeof(float) * DIM); - - // 添加标题 - char title[64]; - snprintf(title, sizeof(title), "Document Title %d", i); - zvec_doc_add_field_by_value(docs[i], "title", - ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); - - // 添加时间戳 - int64_t ts = 1700000000 + i * 1000; - zvec_doc_add_field_by_value(docs[i], "timestamp", - ZVEC_DATA_TYPE_INT64, &ts, sizeof(ts)); - } - - size_t success_count, error_count; - rc = zvec_collection_insert(collection, - (const ZVecDoc**)docs, DOC_COUNT, &success_count, &error_count); - printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); - - // 清理文档 - zvec_docs_free(docs, DOC_COUNT); - free(docs); - - // 刷盘 - zvec_collection_flush(collection); - - // ========== 6. 查询 ========== - printf("\nPerforming vector search...\n"); - - // 生成查询向量 - float query_vec[DIM]; - generate_vector(query_vec, DIM); - - // 创建查询 - ZVecVectorQuery query = ZVEC_VECTOR_QUERY( - "embedding", - ZVEC_FLOAT_ARRAY(query_vec, DIM), - 10, // topK - "timestamp > 1700500000" // 过滤条件 - ); - - // 执行查询 - ZVecDoc **results; - size_t result_count; - rc = zvec_collection_query(collection, &query, &results, &result_count); - - if (rc == ZVEC_OK) { - printf("Found %zu results:\n", result_count); - for (size_t i = 0; i < result_count; i++) { - const char *pk = zvec_doc_get_pk_pointer(results[i]); - float score = zvec_doc_get_score(results[i]); - - // 获取标题 - const char *title; - size_t title_size; - zvec_doc_get_field_value_copy(results[i], "title", - ZVEC_DATA_TYPE_STRING, (void**)&title, &title_size); - - printf(" [%zu] %s - score: %.4f - title: %s\n", - i, pk, score, title); - free((void*)title); - } - } - - // 释放结果 - zvec_docs_free(results, result_count); - - // ========== 7. 获取统计信息 ========== - printf("\nCollection statistics:\n"); - ZVecCollectionStats *stats; - rc = zvec_collection_get_stats(collection, &stats); - if (rc == ZVEC_OK) { - printf(" Total documents: %lu\n", stats->doc_count); - printf(" Index count: %zu\n", stats->index_count); - for (size_t i = 0; i < stats->index_count; i++) { - printf(" Index %zu: %s (%.1f%% complete)\n", - i, stats->index_names[i]->data, - stats->index_completeness[i] * 100); - } - zvec_collection_stats_destroy(stats); - } - - // ========== 8. 清理 ========== - printf("\nCleaning up...\n"); - zvec_collection_close(collection); - zvec_collection_destroy(collection); - zvec_collection_schema_destroy(schema); - zvec_shutdown(); - - printf("Done!\n"); - return 0; -} -``` - -### 编译示例 - -```bash -gcc -o example example.c -lzvec -I./include -L./lib -./example -``` - ---- - -## 附录 - -### 内存管理约定 - -| 创建函数 | 释放函数 | 说明 | -|----------|----------|------| -| `zvec_*_create()` | `zvec_*_destroy()` | 需要成对调用 | -| `zvec_collection_create_and_open()` | `zvec_collection_close()` + `zvec_collection_destroy()` | Collection 生命周期 | -| `zvec_doc_create()` | `zvec_doc_destroy()` | 文档生命周期 | -| `zvec_get_last_error(&msg)` | `free(msg)` | 错误消息需手动释放 | -| `zvec_doc_get_field_value_copy()` | `free()` 或 `zvec_free_uint8_array()` | 字段值副本需释放 | -| 查询返回的 `results` | `zvec_docs_free()` | 查询结果批量释放 | - -### 宏定义速查 - -```c -// 索引参数宏 -ZVEC_HNSW_PARAMS(metric, m, ef_construction, ef_search, quant) -ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) -ZVEC_FLAT_PARAMS(metric, quant) -ZVEC_INVERT_PARAMS(range_opt, wildcard) - -// 数据结构宏 -ZVEC_STRING(str) -ZVEC_STRING_VIEW(str) -ZVEC_FLOAT_ARRAY(data_ptr, len) -ZVEC_INT64_ARRAY(data_ptr, len) - -// 选项宏 -ZVEC_DEFAULT_OPTIONS() - -// 查询宏 -ZVEC_VECTOR_QUERY(field_name, query_vec, top_k, filter) - -// 文档字段宏 -ZVEC_DOC_FIELD(name, type, value_union) -``` - -### 最佳实践 - -1. **初始化检查**: 总是检查 `zvec_initialize()` 的返回值 -2. **错误处理**: 每次 API 调用后检查返回值,使用 `zvec_get_last_error()` 获取详情 -3. **资源释放**: 确保所有创建的资源都被正确释放 -4. **批量操作**: 使用批量插入/更新/删除提高性能 -5. **索引选择**: - - 小规模数据 (< 10 万): 使用 Flat 索引 - - 中等规模 (10 万 -1000 万): 使用 HNSW 索引 - - 大规模 (> 1000 万): 使用 IVF 索引 -6. **查询优化**: 合理使用过滤条件减少扫描范围 diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 0384659b..b2689278 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -17,6 +17,7 @@ cc_library( NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} + PUBINCS ${PROJECT_ROOT_DIR}/src/include LIBS zvec_ailego zvec_core diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index e6496734..96e99b8b 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -20,6 +20,13 @@ #include #include +// Include generated version header +#if defined(__has_include) && __has_include() +#include +#else +#include "zvec_version.h" +#endif + // ============================================================================= // API Export Control // ============================================================================= @@ -51,18 +58,6 @@ extern "C" { // Version Information // ============================================================================= -/** @brief Major version number */ -#define ZVEC_VERSION_MAJOR 0 - -/** @brief Minor version number */ -#define ZVEC_VERSION_MINOR 3 - -/** @brief Patch version number */ -#define ZVEC_VERSION_PATCH 0 - -/** @brief Full version string */ -#define ZVEC_VERSION_STRING "0.3.0" - /** * @brief Get library version information * @@ -560,10 +555,9 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); /** * @brief Check if library is initialized - * @param[out] initialized Whether initialized - * @return ZVecErrorCode Error code + * @return true if initialized, false otherwise */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_is_initialized(bool *initialized); +ZVEC_EXPORT bool ZVEC_CALL zvec_is_initialized(void); // ============================================================================= // Data Type Enumerations @@ -1463,9 +1457,10 @@ typedef struct ZVecDoc ZVecDoc; /** * @brief Per-document status returned by detailed DML APIs. + * @note Uses ordered style: result index corresponds to input document index. + * Caller should access pk by index from the original input array. */ typedef struct { - const char *pk; /**< Primary key (allocated by API) */ ZVecErrorCode code; /**< Per-document status code */ const char *message; /**< Per-document status message (allocated by API) */ } ZVecWriteResult; diff --git a/src/include/zvec/version.h.in b/src/include/zvec/version.h.in new file mode 100644 index 00000000..2d92b728 --- /dev/null +++ b/src/include/zvec/version.h.in @@ -0,0 +1,16 @@ +#ifndef ZVEC_VERSION_H +#define ZVEC_VERSION_H + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR @ZVEC_VERSION_MAJOR@ + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR @ZVEC_VERSION_MINOR@ + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH @ZVEC_VERSION_PATCH@ + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "@ZVEC_VERSION_STRING@" + +#endif // ZVEC_VERSION_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e1ffc326..7308514b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,4 +4,6 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) -cc_directories(c_api) \ No newline at end of file +if(BUILD_C_BINDINGS) + cc_directories(c) +endif() diff --git a/tests/c_api/CMakeLists.txt b/tests/c/CMakeLists.txt similarity index 94% rename from tests/c_api/CMakeLists.txt rename to tests/c/CMakeLists.txt index ad2f62e1..b5e461a2 100644 --- a/tests/c_api/CMakeLists.txt +++ b/tests/c/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) diff --git a/tests/c_api/c_api_test.c b/tests/c/c_api_test.c similarity index 99% rename from tests/c_api/c_api_test.c rename to tests/c/c_api_test.c index a442d191..0088cfef 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c/c_api_test.c @@ -67,25 +67,38 @@ void test_version_functions(void) { // Test version retrieval functions const char *version = zvec_get_version(); TEST_ASSERT(version != NULL); + printf(" Version string: %s\n", version); // Test version component retrieval int major = zvec_get_version_major(); int minor = zvec_get_version_minor(); int patch = zvec_get_version_patch(); + printf(" Version components: %d.%d.%d\n", major, minor, patch); TEST_ASSERT(major >= 0); TEST_ASSERT(minor >= 0); TEST_ASSERT(patch >= 0); + // Test version compatibility check with current version (should pass) TEST_ASSERT(zvec_check_version(major, minor, patch)); - // Test version checking functions - bool compatible = zvec_check_version(0, 3, 0); - TEST_ASSERT(compatible == true); + // Test with older version (should pass - current is newer) + if (minor > 0) { + TEST_ASSERT(zvec_check_version(major, minor - 1, patch)); + } + if (major > 0) { + TEST_ASSERT(zvec_check_version(major - 1, minor, patch)); + } + // Test with much newer version (should fail - current is older) bool not_compatible = zvec_check_version(99, 99, 99); TEST_ASSERT(not_compatible == false); + // Test with invalid negative versions (should fail and set error) + TEST_ASSERT(zvec_check_version(-1, 0, 0) == false); + TEST_ASSERT(zvec_check_version(0, -1, 0) == false); + TEST_ASSERT(zvec_check_version(0, 0, -1) == false); + TEST_END(); } @@ -265,9 +278,7 @@ void test_zvec_initialize() { } ZVecErrorCode err = zvec_initialize(config); TEST_ASSERT(err == ZVEC_OK); - bool is_initialized = false; - zvec_is_initialized(&is_initialized); - TEST_ASSERT(is_initialized); + TEST_ASSERT(zvec_is_initialized()); TEST_END(); } @@ -3158,7 +3169,6 @@ void test_index_params_api_functions(void) { TEST_START(); ZVecIndexParams params; - ZVecErrorCode error; // Test zvec_index_params_init for HNSW zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_HNSW, @@ -3468,10 +3478,6 @@ void test_collection_dml_functions(void) { TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(result_count == 1); if (results && result_count == 1) { - TEST_ASSERT(results[0].pk != NULL); - if (results[0].pk) { - TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); - } TEST_ASSERT(results[0].code == ZVEC_OK); zvec_write_results_free(results, result_count); } @@ -3484,10 +3490,6 @@ void test_collection_dml_functions(void) { TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(result_count == 1); if (results && result_count == 1) { - TEST_ASSERT(results[0].pk != NULL); - if (results[0].pk) { - TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); - } zvec_write_results_free(results, result_count); } diff --git a/tests/c_api/utils.c b/tests/c/utils.c similarity index 100% rename from tests/c_api/utils.c rename to tests/c/utils.c diff --git a/tests/c_api/utils.h b/tests/c/utils.h similarity index 100% rename from tests/c_api/utils.h rename to tests/c/utils.h diff --git a/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc b/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc index c89d086b..59dcb574 100644 --- a/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc +++ b/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc @@ -257,7 +257,7 @@ TEST_F(FlatSparseBuilderTest, TestHalfFloatConverter) { ASSERT_EQ(0UL, stats.discarded_count()); ASSERT_EQ(0UL, stats.trained_costtime()); ASSERT_EQ(stats.built_costtime(), 0UL); - //ASSERT_GT(stats.dumped_costtime(), 0UL); + // ASSERT_GT(stats.dumped_costtime(), 0UL); // cleanup and rebuild ASSERT_EQ(0, builder->cleanup()); diff --git a/tests/core/metric/quantized_integer_metric_test.cc b/tests/core/metric/quantized_integer_metric_test.cc index 501d8c7b..835a07fb 100644 --- a/tests/core/metric/quantized_integer_metric_test.cc +++ b/tests/core/metric/quantized_integer_metric_test.cc @@ -251,7 +251,7 @@ void TestDistanceMatrixInt8(const std::string &metric_name) { const size_t batch_size = M; const size_t query_size = N; - size_t dimension = (std::uniform_int_distribution(1, 65))(gen)*4; + size_t dimension = (std::uniform_int_distribution(1, 65))(gen) * 4; auto holder = GetHolder(dimension, batch_size, dist); IndexMeta meta(IndexMeta::DT_FP32, dimension); meta.set_metric(metric_name, 0, Params()); @@ -453,7 +453,7 @@ void TestDistanceMatrixInt4(const std::string &metric_name) { const size_t batch_size = M; const size_t query_size = N; - size_t dimension = (std::uniform_int_distribution(1, 65))(gen)*8; + size_t dimension = (std::uniform_int_distribution(1, 65))(gen) * 8; auto holder = GetHolder(dimension, batch_size, dist); IndexMeta meta(IndexMeta::DT_FP32, dimension); meta.set_metric(metric_name, 0, Params()); From 4e5c052cfba6aab3e35efc1c96fbfccc258d2bce Mon Sep 17 00:00:00 2001 From: lc285652 Date: Mon, 23 Mar 2026 14:16:00 +0800 Subject: [PATCH 6/7] remove RAII guard --- src/binding/c/c_api.cc | 88 ------------------------------------------ 1 file changed, 88 deletions(-) diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc index 3588f2ab..43718882 100644 --- a/src/binding/c/c_api.cc +++ b/src/binding/c/c_api.cc @@ -33,94 +33,6 @@ #include #include -// ============================================================================= -// RAII Helpers and Error Handling Macros -// ============================================================================= - -namespace { - -// RAII guard for malloc-allocated memory -template -struct MallocGuard { - T *ptr; - explicit MallocGuard(T *p = nullptr) : ptr(p) {} - ~MallocGuard() { - if (ptr) std::free(ptr); - } - MallocGuard(const MallocGuard &) = delete; - MallocGuard &operator=(const MallocGuard &) = delete; - MallocGuard(MallocGuard &&other) noexcept : ptr(other.ptr) { - other.ptr = nullptr; - } - MallocGuard &operator=(MallocGuard &&other) noexcept { - if (this != &other) { - if (ptr) std::free(ptr); - ptr = other.ptr; - other.ptr = nullptr; - } - return *this; - } - T *get() const { - return ptr; - } - T *release() { - T *p = ptr; - ptr = nullptr; - return p; - } - T **ptr_ptr() { - return &ptr; - } -}; - -// RAII guard for C++ objects allocated with new -template -struct DeleteGuard { - T *ptr; - explicit DeleteGuard(T *p = nullptr) : ptr(p) {} - ~DeleteGuard() { - delete ptr; - } - DeleteGuard(const DeleteGuard &) = delete; - DeleteGuard &operator=(const DeleteGuard &) = delete; - DeleteGuard(DeleteGuard &&other) noexcept : ptr(other.ptr) { - other.ptr = nullptr; - } - T *get() const { - return ptr; - } - T *release() { - T *p = ptr; - ptr = nullptr; - return p; - } -}; - -// RAII guard for array allocated with new[] -template -struct DeleteArrayGuard { - T *ptr; - explicit DeleteArrayGuard(T *p = nullptr) : ptr(p) {} - ~DeleteArrayGuard() { - delete[] ptr; - } - DeleteArrayGuard(const DeleteArrayGuard &) = delete; - DeleteArrayGuard &operator=(const DeleteArrayGuard &) = delete; - DeleteArrayGuard(DeleteArrayGuard &&other) noexcept : ptr(other.ptr) { - other.ptr = nullptr; - } - T *get() const { - return ptr; - } - T *release() { - T *p = ptr; - ptr = nullptr; - return p; - } -}; - -} // namespace - // Error checking macros - these preserve __LINE__ accuracy // Simplified macro for setting error with automatic file/line/function info #define SET_LAST_ERROR(code, msg) \ From 6b6be5b656c39e8d9784ded29e1879c7d7c15f92 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Mon, 23 Mar 2026 20:34:22 +0800 Subject: [PATCH 7/7] refact use opaque pointer pattern --- examples/c/basic_example.c | 76 +- examples/c/collection_schema_example.c | 70 +- examples/c/doc_example.c | 72 +- examples/c/field_schema_example.c | 80 +- examples/c/index_example.c | 180 +- examples/c/optimized_example.c | 53 +- src/binding/c/c_api.cc | 3333 ++++++++++++++++++------ src/include/zvec/c_api.h | 1573 ++++++++--- tests/c/c_api_test.c | 964 ++++--- tests/c/utils.c | 396 +-- tests/c/utils.h | 8 - 11 files changed, 4985 insertions(+), 1820 deletions(-) diff --git a/examples/c/basic_example.c b/examples/c/basic_example.c index 2e912248..43767e8f 100644 --- a/examples/c/basic_example.c +++ b/examples/c/basic_example.c @@ -45,14 +45,23 @@ static ZVecErrorCode create_simple_test_collection( ZVecErrorCode error = ZVEC_OK; - // Create index parameters using new macros - // clang-format off - ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); - ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *invert_params = &invert_params_val; - ZVecIndexParams *hnsw_params = &hnsw_params_val; + // Create index parameters using new API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); // Create and add ID field (primary key) ZVecFieldSchema *id_field = @@ -60,6 +69,8 @@ static ZVecErrorCode create_simple_test_collection( zvec_field_schema_set_invert_index(id_field, invert_params); error = zvec_collection_schema_add_field(schema, id_field); if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); zvec_collection_schema_destroy(schema); return error; } @@ -70,6 +81,8 @@ static ZVecErrorCode create_simple_test_collection( zvec_field_schema_set_invert_index(text_field, invert_params); error = zvec_collection_schema_add_field(schema, text_field); if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); zvec_collection_schema_destroy(schema); return error; } @@ -80,18 +93,29 @@ static ZVecErrorCode create_simple_test_collection( zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); error = zvec_collection_schema_add_field(schema, embedding_field); if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); zvec_collection_schema_destroy(schema); return error; } + // Cleanup index parameters (they have been copied to the field schemas) + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + // Use default options - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } // Create collection using the new API - error = zvec_collection_create_and_open("./test_collection", schema, &options, + error = zvec_collection_create_and_open("./test_collection", schema, options, collection); // Cleanup resources + zvec_collection_options_destroy(options); zvec_collection_schema_destroy(schema); return error; @@ -176,28 +200,31 @@ int main() { error = zvec_collection_get_stats(collection, &stats); if (handle_error(error, "getting collection stats") == ZVEC_OK) { printf("✓ Collection stats - Document count: %llu\n", - (unsigned long long)stats->doc_count); + (unsigned long long)zvec_collection_stats_get_doc_count(stats)); // Free statistics memory zvec_collection_stats_destroy(stats); } printf("Testing vector query...\n"); // Query documents - ZVecVectorQuery query = {0}; - query.field_name = - (ZVecString){.data = "embedding", .length = strlen("embedding")}; - query.query_vector = - (ZVecByteArray){.data = (uint8_t *)vector1, .length = 3 * sizeof(float)}; - query.topk = 10; - query.filter = (ZVecString){.data = "", .length = 0}; - query.include_vector = true; - query.include_doc_id = true; - query.output_fields.strings = NULL; - query.output_fields.count = 0; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + return 1; + } + + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, vector1, 3 * sizeof(float)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); ZVecDoc **results = NULL; size_t result_count = 0; - error = zvec_collection_query(collection, &query, &results, &result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); if (error != ZVEC_OK) { char *error_msg = NULL; @@ -205,9 +232,12 @@ int main() { printf("[ERROR] Query failed: %s\n", error_msg ? error_msg : "Unknown error"); free(error_msg); + zvec_vector_query_destroy(query); goto cleanup; } + zvec_vector_query_destroy(query); + printf("✓ Query successful - Returned %zu results\n", result_count); // Process query results diff --git a/examples/c/collection_schema_example.c b/examples/c/collection_schema_example.c index 183cc270..2d49b6d0 100644 --- a/examples/c/collection_schema_example.c +++ b/examples/c/collection_schema_example.c @@ -49,25 +49,30 @@ int main() { printf("✓ Collection schema created successfully\n"); // 2. Set schema properties - schema->max_doc_count_per_segment = 1000000; + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000000); printf("✓ Set max documents per segment: %llu\n", - (unsigned long long)schema->max_doc_count_per_segment); + (unsigned long long) + zvec_collection_schema_get_max_doc_count_per_segment(schema)); // 3. Create index parameters - // clang-format off - ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); - // clang-format on - ZVecIndexParams *invert_params = &invert_params_val; - // clang-format off - ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *hnsw_params = &hnsw_params_val; - - if (!invert_params || !hnsw_params) { - fprintf(stderr, "Failed to create index parameters\n"); + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); zvec_collection_schema_destroy(schema); return 1; } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); // 4. Create and add ID field (primary key) ZVecFieldSchema *id_field = @@ -125,15 +130,22 @@ int main() { // printf("✓ Total field count: %zu\n", field_count); // 8. Create collection with schema - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return 1; + } ZVecCollection *collection = NULL; error = zvec_collection_create_and_open("./schema_example_collection", schema, - &options, &collection); + options, &collection); if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_options_destroy(options); zvec_collection_schema_destroy(schema); return 1; } + zvec_collection_options_destroy(options); printf("✓ Collection created successfully with schema\n"); // 9. Prepare test data @@ -208,25 +220,29 @@ int main() { } // 13. Query test - ZVecVectorQuery query = {0}; - query.field_name = - (ZVecString){.data = "embedding", .length = strlen("embedding")}; - query.query_vector = (ZVecByteArray){.data = (uint8_t *)vector1, - .length = 128 * sizeof(float)}; - query.topk = 5; - query.filter = (ZVecString){.data = "", .length = 0}; - query.include_vector = true; - query.include_doc_id = true; - query.output_fields.strings = NULL; - query.output_fields.count = 0; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, vector1, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 5); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); ZVecDoc **results = NULL; size_t result_count = 0; - error = zvec_collection_query(collection, &query, &results, &result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); if (error == ZVEC_OK) { printf("✓ Vector query successful - Returned %zu results\n", result_count); zvec_docs_free(results, result_count); } + zvec_vector_query_destroy(query); // 14. Cleanup resources zvec_collection_destroy(collection); diff --git a/examples/c/doc_example.c b/examples/c/doc_example.c index 8d8574bb..a3cc05ed 100644 --- a/examples/c/doc_example.c +++ b/examples/c/doc_example.c @@ -275,20 +275,24 @@ int main() { printf("✓ Collection schema created\n"); // 2. Create index parameters - // clang-format off - ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); - // clang-format on - ZVecIndexParams *invert_params = &invert_params_val; - // clang-format off - ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *hnsw_params = &hnsw_params_val; - - if (!invert_params || !hnsw_params) { - fprintf(stderr, "Failed to create index parameters\n"); + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); zvec_collection_schema_destroy(schema); return -1; } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); // 3. Create fields for all data types printf("Creating fields for all data types...\n"); @@ -348,11 +352,17 @@ int main() { } // 4. Create collection - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } ZVecCollection *collection = NULL; error = zvec_collection_create_and_open("./doc_example_collection", schema, - &options, &collection); + options, &collection); + zvec_collection_options_destroy(options); if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); return -1; @@ -404,22 +414,25 @@ int main() { // Use the first document's vector for querying float query_vector[] = {0.0f, 0.0f, 0.0f}; - ZVecVectorQuery query = { - .field_name = - (ZVecString){.data = "vector_fp32", .length = strlen("vector_fp32")}, - .query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, - .length = 3 * sizeof(float)}, - .topk = 5, - .filter = (ZVecString){.data = "", .length = 0}, - .include_vector = true, - .include_doc_id = true, - .output_fields = {.strings = NULL, .count = 0}}; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_vector_query_set_field_name(query, "vector_fp32"); + zvec_vector_query_set_query_vector(query, query_vector, 3 * sizeof(float)); + zvec_vector_query_set_topk(query, 5); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); ZVecDoc **query_results = NULL; size_t result_count = 0; - error = - zvec_collection_query(collection, &query, &query_results, &result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &query_results, &result_count); if (handle_error(error, "querying documents") != ZVEC_OK) { query_results = NULL; result_count = 0; @@ -463,16 +476,13 @@ int main() { printf("\n=== Filter Query Test ===\n"); // Create filtered query - ZVecVectorQuery filtered_query = query; - filtered_query.filter = - (ZVecString){.data = "string_field = 'string_field_0'", - .length = strlen("string_field = 'string_field_0'")}; + zvec_vector_query_set_filter(query, "string_field = 'string_field_0'"); ZVecDoc **filtered_results = NULL; size_t filtered_count = 0; - error = zvec_collection_query(collection, &filtered_query, &filtered_results, - &filtered_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &filtered_results, &filtered_count); if (handle_error(error, "filtered querying") == ZVEC_OK) { printf("Filtered query returned %zu results\n", filtered_count); diff --git a/examples/c/field_schema_example.c b/examples/c/field_schema_example.c index c41d0817..ea73f42a 100644 --- a/examples/c/field_schema_example.c +++ b/examples/c/field_schema_example.c @@ -49,22 +49,40 @@ int main() { printf("✓ Collection schema created successfully\n"); // 2. Create different types of index parameters - // clang-format off - ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); - // clang-format on - ZVecIndexParams *invert_params = &invert_params_val; - // clang-format off - ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *hnsw_params = &hnsw_params_val; - // clang-format off - ZVecIndexParams flat_params_val = ZVEC_FLAT_PARAMS( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *flat_params = &flat_params_val; + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params) { + fprintf(stderr, "Failed to create Flat index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_L2); if (!invert_params || !hnsw_params || !flat_params) { fprintf(stderr, "Failed to create index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(flat_params); zvec_collection_schema_destroy(schema); return -1; } @@ -154,11 +172,17 @@ int main() { } // 5. Create collection with the schema - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } ZVecCollection *collection = NULL; error = zvec_collection_create_and_open("./field_example_collection", schema, - &options, &collection); + options, &collection); + zvec_collection_options_destroy(options); if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); return -1; @@ -251,25 +275,27 @@ int main() { printf("✓ Collection flushed\n"); // Test vector query on medium vector field - ZVecVectorQuery query = {0}; - query.field_name = - (ZVecString){.data = "medium_vector", .length = strlen("medium_vector")}; - query.query_vector = (ZVecByteArray){.data = (uint8_t *)medium_vec1, - .length = 128 * sizeof(float)}; - query.topk = 2; - query.filter = (ZVecString){.data = "", .length = 0}; - query.include_vector = false; - query.include_doc_id = true; - query.output_fields.strings = NULL; - query.output_fields.count = 0; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(query, "medium_vector"); + zvec_vector_query_set_query_vector(query, medium_vec1, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 2); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); ZVecDoc **results = NULL; size_t result_count = 0; - error = zvec_collection_query(collection, &query, &results, &result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); if (error == ZVEC_OK) { printf("✓ Vector query successful - Found %zu results\n", result_count); zvec_docs_free(results, result_count); } + zvec_vector_query_destroy(query); // 9. Cleanup cleanup: diff --git a/examples/c/index_example.c b/examples/c/index_example.c index 72877263..7187901c 100644 --- a/examples/c/index_example.c +++ b/examples/c/index_example.c @@ -52,43 +52,96 @@ int main() { printf("Creating index parameters...\n"); // Inverted index parameters - // clang-format off - ZVecIndexParams invert_params_standard_val = ZVEC_INVERT_PARAMS(true, false); - ZVecIndexParams invert_params_extended_val = ZVEC_INVERT_PARAMS(true, true); - // clang-format on - ZVecIndexParams *invert_params_standard = &invert_params_standard_val; - ZVecIndexParams *invert_params_extended = &invert_params_extended_val; + ZVecIndexParams *invert_params_standard = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params_standard) { + fprintf(stderr, "Failed to create invert index parameters (standard)\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params_standard, true, false); + + ZVecIndexParams *invert_params_extended = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params_extended) { + fprintf(stderr, "Failed to create invert index parameters (extended)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params_extended, true, true); // HNSW index parameters with different configurations - // clang-format off - ZVecIndexParams hnsw_params_fast_val = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecIndexParams hnsw_params_balanced_val = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_COSINE, 32, 200, 100, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecIndexParams hnsw_params_accurate_val = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_IP, 64, 400, 200, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *hnsw_params_fast = &hnsw_params_fast_val; - ZVecIndexParams *hnsw_params_balanced = &hnsw_params_balanced_val; - ZVecIndexParams *hnsw_params_accurate = &hnsw_params_accurate_val; + ZVecIndexParams *hnsw_params_fast = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_fast) { + fprintf(stderr, "Failed to create HNSW index parameters (fast)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_fast, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params_fast, 16, 100); + + ZVecIndexParams *hnsw_params_balanced = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_balanced) { + fprintf(stderr, "Failed to create HNSW index parameters (balanced)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_balanced, + ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params_balanced, 32, 200); + + ZVecIndexParams *hnsw_params_accurate = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_accurate) { + fprintf(stderr, "Failed to create HNSW index parameters (accurate)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_accurate, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(hnsw_params_accurate, 64, 400); // Flat index parameters - // clang-format off - ZVecIndexParams flat_params_l2_val = ZVEC_FLAT_PARAMS( - ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecIndexParams flat_params_cosine_val = ZVEC_FLAT_PARAMS( - ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *flat_params_l2 = &flat_params_l2_val; - ZVecIndexParams *flat_params_cosine = &flat_params_cosine_val; - - if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || - !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || - !flat_params_cosine) { - fprintf(stderr, "Failed to create index parameters\n"); + ZVecIndexParams *flat_params_l2 = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params_l2) { + fprintf(stderr, "Failed to create Flat index parameters (L2)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_index_params_destroy(hnsw_params_accurate); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(flat_params_l2, ZVEC_METRIC_TYPE_L2); + + ZVecIndexParams *flat_params_cosine = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params_cosine) { + fprintf(stderr, "Failed to create Flat index parameters (cosine)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_index_params_destroy(hnsw_params_accurate); + zvec_index_params_destroy(flat_params_l2); zvec_collection_schema_destroy(schema); return -1; } + zvec_index_params_set_metric_type(flat_params_cosine, + ZVEC_METRIC_TYPE_COSINE); // 3. Create fields with different index types printf("Creating fields with various index types...\n"); @@ -157,14 +210,19 @@ int main() { } // 4. Create collection - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } ZVecCollection *collection = NULL; error = zvec_collection_create_and_open("./index_example_collection", schema, - &options, &collection); + options, &collection); + zvec_collection_options_destroy(options); if (handle_error(error, "creating collection") != ZVEC_OK) { zvec_collection_schema_destroy(schema); - // Cleanup index parameters return -1; } printf("✓ Collection created successfully\n"); @@ -261,49 +319,53 @@ int main() { printf("Testing various index queries...\n"); // Test HNSW query (balanced) - ZVecVectorQuery hnsw_query = {0}; - hnsw_query.field_name = (ZVecString){.data = "balanced_vector", - .length = strlen("balanced_vector")}; - hnsw_query.query_vector = (ZVecByteArray){.data = (uint8_t *)balanced_vec[0], - .length = 128 * sizeof(float)}; - hnsw_query.topk = 2; - hnsw_query.filter = (ZVecString){.data = "", .length = 0}; - hnsw_query.include_vector = false; - hnsw_query.include_doc_id = true; - hnsw_query.output_fields.strings = NULL; - hnsw_query.output_fields.count = 0; + ZVecVectorQuery *hnsw_query = zvec_vector_query_create(); + if (!hnsw_query) { + fprintf(stderr, "Failed to create HNSW query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(hnsw_query, "balanced_vector"); + zvec_vector_query_set_query_vector(hnsw_query, balanced_vec[0], + 128 * sizeof(float)); + zvec_vector_query_set_topk(hnsw_query, 2); + zvec_vector_query_set_filter(hnsw_query, ""); + zvec_vector_query_set_include_vector(hnsw_query, false); + zvec_vector_query_set_include_doc_id(hnsw_query, true); ZVecDoc **hnsw_results = NULL; size_t hnsw_result_count = 0; - error = zvec_collection_query(collection, &hnsw_query, &hnsw_results, - &hnsw_result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)hnsw_query, + &hnsw_results, &hnsw_result_count); if (error == ZVEC_OK) { printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); zvec_docs_free(hnsw_results, hnsw_result_count); } + zvec_vector_query_destroy(hnsw_query); // Test Flat query (exact) - ZVecVectorQuery flat_query = {0}; - flat_query.field_name = - (ZVecString){.data = "exact_vector", .length = strlen("exact_vector")}; - flat_query.query_vector = (ZVecByteArray){.data = (uint8_t *)exact_vec[0], - .length = 32 * sizeof(float)}; - flat_query.topk = 2; - flat_query.filter = (ZVecString){.data = "", .length = 0}; - flat_query.include_vector = false; - flat_query.include_doc_id = true; - flat_query.output_fields.strings = NULL; - flat_query.output_fields.count = 0; + ZVecVectorQuery *flat_query = zvec_vector_query_create(); + if (!flat_query) { + fprintf(stderr, "Failed to create Flat query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(flat_query, "exact_vector"); + zvec_vector_query_set_query_vector(flat_query, exact_vec[0], + 32 * sizeof(float)); + zvec_vector_query_set_topk(flat_query, 2); + zvec_vector_query_set_filter(flat_query, ""); + zvec_vector_query_set_include_vector(flat_query, false); + zvec_vector_query_set_include_doc_id(flat_query, true); ZVecDoc **flat_results = NULL; size_t flat_result_count = 0; - error = zvec_collection_query(collection, &flat_query, &flat_results, - &flat_result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)flat_query, + &flat_results, &flat_result_count); if (error == ZVEC_OK) { printf("✓ Flat (exact) query successful - Found %zu results\n", flat_result_count); zvec_docs_free(flat_results, flat_result_count); } + zvec_vector_query_destroy(flat_query); // 9. Performance comparison information printf("\nIndex Performance Characteristics:\n"); diff --git a/examples/c/optimized_example.c b/examples/c/optimized_example.c index 2f87c93d..86513797 100644 --- a/examples/c/optimized_example.c +++ b/examples/c/optimized_example.c @@ -70,17 +70,14 @@ int main() { printf("✓ Collection schema created\n"); // 2. Create optimized index parameters - // clang-format off - ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_L2, 32, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - ZVecIndexParams *hnsw_params = &hnsw_params_val; - + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); if (!hnsw_params) { - fprintf(stderr, "Failed to create HNSW parameters\n"); + fprintf(stderr, "Failed to create HNSW index parameters\n"); zvec_collection_schema_destroy(schema); return -1; } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 32, 200); // 3. Create fields with optimized configuration ZVecFieldSchema *id_field = @@ -112,12 +109,18 @@ int main() { printf("✓ Fields configured with indexes\n"); // 4. Create collection with optimized options - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); - options.enable_mmap = true; // Enable memory mapping for better performance + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + goto cleanup_fields; + } + zvec_collection_options_set_enable_mmap( + options, true); // Enable memory mapping for better performance ZVecCollection *collection = NULL; error = zvec_collection_create_and_open("./optimized_example_collection", - schema, &options, &collection); + schema, options, &collection); + zvec_collection_options_destroy(options); if (handle_error(error, "creating collection") != ZVEC_OK) { goto cleanup_fields; } @@ -230,17 +233,18 @@ int main() { goto cleanup_collection; } - ZVecVectorQuery query = {0}; - query.field_name = - (ZVecString){.data = "embedding", .length = strlen("embedding")}; - query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, - .length = 128 * sizeof(float)}; - query.topk = 10; - query.filter = (ZVecString){.data = "", .length = 0}; - query.include_vector = false; - query.include_doc_id = true; - query.output_fields.strings = NULL; - query.output_fields.count = 0; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + free(query_vector); + goto cleanup_collection; + } + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, query_vector, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); const int QUERY_COUNT = 100; start_time = clock(); @@ -249,7 +253,8 @@ int main() { ZVecDoc **results = NULL; size_t result_count = 0; - error = zvec_collection_query(collection, &query, &results, &result_count); + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); if (error != ZVEC_OK) { char *error_msg = NULL; zvec_get_last_error(&error_msg); @@ -273,13 +278,15 @@ int main() { printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); free(query_vector); + zvec_vector_query_destroy(query); // 8. Memory usage information ZVecCollectionStats *stats = NULL; error = zvec_collection_get_stats(collection, &stats); if (error == ZVEC_OK && stats) { printf("Collection Statistics:\n"); - printf(" Document count: %llu\n", (unsigned long long)stats->doc_count); + printf(" Document count: %llu\n", + (unsigned long long)zvec_collection_stats_get_doc_count(stats)); zvec_collection_stats_destroy(stats); } diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc index 43718882..427f6323 100644 --- a/src/binding/c/c_api.cc +++ b/src/binding/c/c_api.cc @@ -70,6 +70,16 @@ SET_LAST_ERROR(ZVEC_ERROR_UNKNOWN, std::string("Exception: ") + e.what()); \ } +// For functions returning ZVecErrorCode - complete try-catch wrapper +#define ZVEC_TRY_BEGIN_CODE ZVEC_TRY_BEGIN_VOID +#define ZVEC_CATCH_END_CODE(code_on_error) \ + } \ + catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_UNKNOWN, std::string("Exception: ") + e.what()); \ + return code_on_error; \ + } \ + return ZVEC_OK; + // For functions returning pointer - complete try-catch wrapper // Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) // Note: Use variadic macro to handle commas in template arguments @@ -359,6 +369,37 @@ int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { // Configuration-related functions implementation // ============================================================================= +// Internal structure - Console log configuration +struct ZVecConsoleLogConfig { + ZVecLogLevel level; +}; + +// Internal structure - File log configuration +struct ZVecFileLogConfig { + ZVecLogLevel level; + ZVecString *dir; + ZVecString *basename; + uint32_t file_size; + uint32_t overdue_days; +}; + +// Internal structure - Configuration data +struct ZVecConfigData { + uint64_t memory_limit_bytes; + + // log + ZVecLogType log_type; + void *log_config; // ZVecConsoleLogConfig* or ZVecFileLogConfig* + + // query + uint32_t query_thread_count; + float invert_to_forward_scan_ratio; + float brute_force_by_keys_ratio; + + // optimize + uint32_t optimize_thread_count; +}; + ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { ZVecConsoleLogConfig *config = static_cast(malloc(sizeof(ZVecConsoleLogConfig))); @@ -391,30 +432,158 @@ ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, } config->level = level; - ZVecString *dir_str = zvec_string_create(dir); - ZVecString *basename_str = zvec_string_create(basename); + config->dir = zvec_string_create(dir); + config->basename = zvec_string_create(basename); - if (!dir_str || !basename_str) { - if (dir_str) zvec_free_string(dir_str); - if (basename_str) zvec_free_string(basename_str); + if (!config->dir || !config->basename) { + if (config->dir) zvec_free_string(config->dir); + if (config->basename) zvec_free_string(config->basename); free(config); SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to create strings for file log config"); return nullptr; } - config->dir = *dir_str; - config->basename = *basename_str; config->file_size = file_size; config->overdue_days = overdue_days; - // Free the temporary string wrappers (data is copied by value) - free(dir_str); - free(basename_str); - return config; } +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + free(const_cast(config)); +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir) zvec_free_string(config->dir); + if (config->basename) zvec_free_string(config->basename); + free(const_cast(config)); + } +} + +ZVecLogLevel zvec_config_console_log_get_level( + const ZVecConsoleLogConfig *config) { + if (!config) { + return ZVEC_LOG_LEVEL_WARN; + } + return config->level; +} + +ZVecErrorCode zvec_config_console_log_set_level(ZVecConsoleLogConfig *config, + ZVecLogLevel level) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->level = level; + return ZVEC_OK; +} + +ZVecLogLevel zvec_config_file_log_get_level(const ZVecFileLogConfig *config) { + if (!config) { + return ZVEC_LOG_LEVEL_WARN; + } + return config->level; +} + +ZVecErrorCode zvec_config_file_log_set_level(ZVecFileLogConfig *config, + ZVecLogLevel level) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->level = level; + return ZVEC_OK; +} + +const char *zvec_config_file_log_get_dir(const ZVecFileLogConfig *config) { + if (!config || !config->dir) { + return nullptr; + } + return config->dir->data; +} + +ZVecErrorCode zvec_config_file_log_set_dir(ZVecFileLogConfig *config, + const char *dir) { + if (!config || !dir) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or dir pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (config->dir) { + zvec_free_string(config->dir); + } + config->dir = zvec_string_create(dir); + if (!config->dir) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create dir string"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + return ZVEC_OK; +} + +const char *zvec_config_file_log_get_basename(const ZVecFileLogConfig *config) { + if (!config || !config->basename) { + return nullptr; + } + return config->basename->data; +} + +ZVecErrorCode zvec_config_file_log_set_basename(ZVecFileLogConfig *config, + const char *basename) { + if (!config || !basename) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or basename pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (config->basename) { + zvec_free_string(config->basename); + } + config->basename = zvec_string_create(basename); + if (!config->basename) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create basename string"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + return ZVEC_OK; +} + +uint32_t zvec_config_file_log_get_file_size(const ZVecFileLogConfig *config) { + if (!config) { + return 0; + } + return config->file_size; +} + +ZVecErrorCode zvec_config_file_log_set_file_size(ZVecFileLogConfig *config, + uint32_t file_size) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->file_size = file_size; + return ZVEC_OK; +} + +uint32_t zvec_config_file_log_get_overdue_days( + const ZVecFileLogConfig *config) { + if (!config) { + return 0; + } + return config->overdue_days; +} + +ZVecErrorCode zvec_config_file_log_set_overdue_days(ZVecFileLogConfig *config, + uint32_t days) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->overdue_days = days; + return ZVEC_OK; +} + ZVecConfigData *zvec_config_data_create(void) { ZVecConfigData *config = static_cast(malloc(sizeof(ZVecConfigData))); @@ -424,6 +593,7 @@ ZVecConfigData *zvec_config_data_create(void) { return nullptr; } + // Create default console log config ZVecConsoleLogConfig *log_config = zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); if (!log_config) { @@ -447,55 +617,54 @@ ZVecConfigData *zvec_config_data_create(void) { return config; } -void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { - free(config); -} - -void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { - if (config) { - if (config->dir.data) free(config->dir.data); - if (config->basename.data) free(config->basename.data); - free(config); - } -} - void zvec_config_data_destroy(ZVecConfigData *config) { - if (config->log_config) { - if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { - zvec_config_console_log_destroy( - (ZVecConsoleLogConfig *)config->log_config); - } else { - zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + if (config) { + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + static_cast(config->log_config)); + } else { + zvec_config_file_log_destroy( + static_cast(config->log_config)); + } } + free(config); } - free(config); } ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, uint64_t memory_limit_bytes) { if (!config) { - set_last_error("Config data pointer is null"); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - config->memory_limit_bytes = memory_limit_bytes; return ZVEC_OK; } +uint64_t zvec_config_data_get_memory_limit(const ZVecConfigData *config) { + if (!config) { + return 0; + } + return config->memory_limit_bytes; +} + ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, ZVecLogType log_type, void *log_config) { if (!config || !log_config) { - set_last_error("Config data pointer is null"); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or log_config pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } if (config->log_config) { if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { zvec_config_console_log_destroy( - (ZVecConsoleLogConfig *)config->log_config); + static_cast(config->log_config)); } else { - zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + zvec_config_file_log_destroy( + static_cast(config->log_config)); } } @@ -504,28 +673,100 @@ ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, return ZVEC_OK; } +ZVecLogType zvec_config_data_get_log_type(const ZVecConfigData *config) { + if (!config) { + return ZVEC_LOG_TYPE_CONSOLE; + } + return config->log_type; +} + +ZVecConsoleLogConfig *zvec_config_data_get_console_log_config( + const ZVecConfigData *config) { + if (!config || config->log_type != ZVEC_LOG_TYPE_CONSOLE) { + return nullptr; + } + return static_cast(config->log_config); +} + +ZVecFileLogConfig *zvec_config_data_get_file_log_config( + const ZVecConfigData *config) { + if (!config || config->log_type != ZVEC_LOG_TYPE_FILE) { + return nullptr; + } + return static_cast(config->log_config); +} + ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, uint32_t thread_count) { if (!config) { - set_last_error("Config data pointer is null"); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - config->query_thread_count = thread_count; return ZVEC_OK; } +uint32_t zvec_config_data_get_query_thread_count(const ZVecConfigData *config) { + if (!config) { + return 1; + } + return config->query_thread_count; +} + +ZVecErrorCode zvec_config_data_set_invert_to_forward_scan_ratio( + ZVecConfigData *config, float ratio) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->invert_to_forward_scan_ratio = ratio; + return ZVEC_OK; +} + +float zvec_config_data_get_invert_to_forward_scan_ratio( + const ZVecConfigData *config) { + if (!config) { + return 0.0f; + } + return config->invert_to_forward_scan_ratio; +} + +ZVecErrorCode zvec_config_data_set_brute_force_by_keys_ratio( + ZVecConfigData *config, float ratio) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->brute_force_by_keys_ratio = ratio; + return ZVEC_OK; +} + +float zvec_config_data_get_brute_force_by_keys_ratio( + const ZVecConfigData *config) { + if (!config) { + return 0.0f; + } + return config->brute_force_by_keys_ratio; +} + ZVecErrorCode zvec_config_data_set_optimize_thread_count( ZVecConfigData *config, uint32_t thread_count) { if (!config) { - set_last_error("Config data pointer is null"); + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - config->optimize_thread_count = thread_count; return ZVEC_OK; } +uint32_t zvec_config_data_get_optimize_thread_count( + const ZVecConfigData *config) { + if (!config) { + return 1; + } + return config->optimize_thread_count; +} + // ============================================================================= // Initialization and cleanup interface implementation @@ -545,45 +786,56 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { zvec::GlobalConfig::ConfigData cpp_config{}; if (config) { - cpp_config.memory_limit_bytes = config->memory_limit_bytes; - cpp_config.query_thread_count = config->query_thread_count; + cpp_config.memory_limit_bytes = + zvec_config_data_get_memory_limit(config); + cpp_config.query_thread_count = + zvec_config_data_get_query_thread_count(config); cpp_config.invert_to_forward_scan_ratio = - config->invert_to_forward_scan_ratio; + zvec_config_data_get_invert_to_forward_scan_ratio(config); cpp_config.brute_force_by_keys_ratio = - config->brute_force_by_keys_ratio; - cpp_config.optimize_thread_count = config->optimize_thread_count; + zvec_config_data_get_brute_force_by_keys_ratio(config); + cpp_config.optimize_thread_count = + zvec_config_data_get_optimize_thread_count(config); // Set log configuration - if (config->log_config) { - std::shared_ptr log_config; + void *log_config = zvec_config_data_get_console_log_config(config); + if (!log_config) { + log_config = zvec_config_data_get_file_log_config(config); + } - switch (config->log_type) { + if (log_config) { + std::shared_ptr cpp_log_config; + + switch (zvec_config_data_get_log_type(config)) { case ZVEC_LOG_TYPE_CONSOLE: { ZVecConsoleLogConfig *console_config = - (ZVecConsoleLogConfig *)config->log_config; + static_cast(log_config); auto console_level = static_cast( - console_config->level); - log_config = + zvec_config_console_log_get_level(console_config)); + cpp_log_config = std::make_shared( console_level); break; } case ZVEC_LOG_TYPE_FILE: { ZVecFileLogConfig *file_config = - (ZVecFileLogConfig *)config->log_config; - auto file_level = - static_cast(file_config->level); - std::string dir(file_config->dir.data, file_config->dir.length); - std::string basename(file_config->basename.data, - file_config->basename.length); - log_config = std::make_shared( - file_level, dir, basename); + static_cast(log_config); + auto file_level = static_cast( + zvec_config_file_log_get_level(file_config)); + std::string dir(zvec_config_file_log_get_dir(file_config)); + std::string basename( + zvec_config_file_log_get_basename(file_config)); + cpp_log_config = + std::make_shared( + file_level, dir, basename, + zvec_config_file_log_get_file_size(file_config), + zvec_config_file_log_get_overdue_days(file_config)); break; } default: throw std::runtime_error("Unknown log type"); } - cpp_config.log_config = log_config; + cpp_config.log_config = cpp_log_config; } } else { // Initialize with default configuration @@ -788,50 +1040,42 @@ static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { return static_cast(zvec_type); } +// Forward declaration: convert C index params to C++ +static std::shared_ptr convert_c_index_params_to_cpp( + const ZVecIndexParams *params); + // Helper function: set field index params static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, const ZVecFieldSchema *zvec_field) { - if (!zvec_field->has_index) { + if (!zvec_field_schema_has_index(zvec_field)) { return zvec::Status::OK(); } - const ZVecIndexParams *params = &zvec_field->index_params; + // Get the index params using getter - we need to access internal struct + // For this internal function, we can access the struct members since it's in + // the implementation We'll add a friend-like internal getter + ZVecIndexParams *index_params = nullptr; + // Use a hack to get the index_params - cast to access internal member + // This is safe because we're in the implementation file + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + index_params = + reinterpret_cast(zvec_field)->index_params; - switch (params->index_type) { - case ZVEC_INDEX_TYPE_HNSW: { - auto metric = convert_metric_type(params->metric_type); - auto quantize = convert_quantize_type(params->quantize_type); - auto index_params = std::make_shared( - metric, params->hnsw.m, params->hnsw.ef_construction, quantize); - field_schema->set_index_params(index_params); - break; - } - case ZVEC_INDEX_TYPE_FLAT: { - auto metric = convert_metric_type(params->metric_type); - auto quantize = convert_quantize_type(params->quantize_type); - auto index_params = - std::make_shared(metric, quantize); - field_schema->set_index_params(index_params); - break; - } - case ZVEC_INDEX_TYPE_INVERT: { - auto index_params = std::make_shared( - params->invert.enable_range_optimization, - params->invert.enable_extended_wildcard); - field_schema->set_index_params(index_params); - break; - } - case ZVEC_INDEX_TYPE_IVF: { - auto metric = convert_metric_type(params->metric_type); - auto quantize = convert_quantize_type(params->quantize_type); - auto index_params = std::make_shared( - metric, params->ivf.n_list, params->ivf.n_iters, params->ivf.use_soar, - quantize); - field_schema->set_index_params(index_params); - break; - } - default: - break; + if (!index_params) { + return zvec::Status::OK(); + } + + // Use the conversion helper function + auto cpp_params = convert_c_index_params_to_cpp(index_params); + if (cpp_params) { + field_schema->set_index_params(cpp_params); } return zvec::Status::OK(); @@ -858,6 +1102,18 @@ ZVecStringArray *zvec_string_array_create(size_t count) { return array; } +ZVecStringArray *zvec_string_array_create_from_strings(const char **strings, + size_t count) { + if (!strings || count == 0) { + return nullptr; + } + ZVecStringArray *array = zvec_string_array_create(count); + for (size_t i = 0; i < count; ++i) { + zvec_string_array_add(array, i, strings[i]); + } + return array; +} + void zvec_string_array_add(ZVecStringArray *array, size_t idx, const char *str) { if (idx >= array->count) return; @@ -1006,122 +1262,514 @@ void zvec_free_field_schema(ZVecFieldSchema *field_schema) { } // ============================================================================= -// Index parameters management interface implementation +// Index parameters management interface implementation (deprecated) +// These are deprecated in favor of the opaque pointer API // ============================================================================= +// Deprecated: Use zvec_index_params_create() instead void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, ZVecMetricType metric_type) { - if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Index params pointer cannot be null"); - return; - } - - // Zero-initialize the entire structure - memset(params, 0, sizeof(ZVecIndexParams)); - - params->index_type = index_type; - params->metric_type = metric_type; - params->quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; - - // Set default values based on index type - switch (index_type) { - case ZVEC_INDEX_TYPE_INVERT: - params->invert.enable_range_optimization = false; - params->invert.enable_extended_wildcard = false; - break; - - case ZVEC_INDEX_TYPE_HNSW: - params->hnsw.m = 16; - params->hnsw.ef_construction = 200; - params->hnsw.ef_search = 50; - break; - - case ZVEC_INDEX_TYPE_FLAT: - // No additional parameters for Flat - break; - - case ZVEC_INDEX_TYPE_IVF: - params->ivf.n_list = 100; - params->ivf.n_iters = 10; - params->ivf.use_soar = false; - params->ivf.n_probe = 10; - break; - - default: - SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type"); - break; - } + // This function is deprecated and should not be used + // Use zvec_index_params_create() instead + SET_LAST_ERROR( + ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_init is deprecated. Use zvec_index_params_create()"); } +// Deprecated: Use zvec_index_params_set_hnsw_params() instead void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, int ef_construction, int ef_search) { - if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not HNSW index type"); - return; - } - params->hnsw.m = m; - params->hnsw.ef_construction = ef_construction; - params->hnsw.ef_search = ef_search; + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_hnsw is deprecated. Use " + "zvec_index_params_set_hnsw_params()"); } +// Deprecated: Use zvec_index_params_set_ivf_params() instead void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, bool use_soar, int n_probe) { - if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not IVF index type"); - return; - } - params->ivf.n_list = n_list; - params->ivf.n_iters = n_iters; - params->ivf.use_soar = use_soar; - params->ivf.n_probe = n_probe; + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_ivf is deprecated. Use " + "zvec_index_params_set_ivf_params()"); } +// Deprecated: Use zvec_index_params_set_invert_params() instead void zvec_index_params_set_invert(ZVecIndexParams *params, bool enable_range_opt, bool enable_wildcard) { - if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Invalid params or not INVERT index type"); - return; - } - params->invert.enable_range_optimization = enable_range_opt; - params->invert.enable_extended_wildcard = enable_wildcard; + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_invert is deprecated. Use " + "zvec_index_params_set_invert_params()"); } // ============================================================================= -// FieldSchema management interface implementation +// ZVecIndexParams opaque pointer implementation // ============================================================================= -ZVecFieldSchema *zvec_field_schema_create(const char *name, - ZVecDataType data_type, bool nullable, - uint32_t dimension) { - if (!name) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); - return nullptr; - } +// Internal structure - holds C++ shared_ptr +struct ZVecIndexParams { + std::shared_ptr cpp_params; + ZVecIndexType index_type; + ZVecMetricType metric_type; + ZVecQuantizeType quantize_type; + + // Type-specific storage (only one is active based on index_type) + struct { + bool enable_range_optimization; + bool enable_extended_wildcard; + } invert; + + struct { + int m; + int ef_construction; + } hnsw; + + struct { + int n_list; + int n_iters; + bool use_soar; + } ivf; +}; - ZVecFieldSchema *schema = - static_cast(malloc(sizeof(ZVecFieldSchema))); - if (!schema) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFieldSchema"); - return nullptr; - } +// ============================================================================= +// ZVecFieldSchema opaque pointer implementation +// ============================================================================= - schema->name = zvec_string_create(name); - if (!schema->name) { - free(schema); - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to create string for field name"); - return nullptr; - } +// Internal structure - field schema with private members +struct ZVecFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; // Owned by field schema + bool has_index; +}; + +// Internal structure - collection schema with private members +struct ZVecCollectionSchema { + ZVecString *name; + ZVecFieldSchema **fields; + size_t field_count; + size_t field_capacity; + uint64_t max_doc_count_per_segment; +}; - schema->data_type = data_type; +// ============================================================================= +// Configuration structures opaque pointer implementation +// ============================================================================= + +// Internal structure - QueryParams (base) +struct ZVecQueryParams { + ZVecIndexType index_type; + float radius; + bool is_linear; + bool is_using_refiner; +}; + +// Internal structure - HnswQueryParams +struct ZVecHnswQueryParams { + ZVecQueryParams base; + int ef; +}; + +// Internal structure - IVFQueryParams +struct ZVecIVFQueryParams { + ZVecQueryParams base; + int nprobe; + float scale_factor; +}; + +// Internal structure - FlatQueryParams +struct ZVecFlatQueryParams { + ZVecQueryParams base; + float scale_factor; +}; + +// Internal structure - VectorQuery +struct ZVecVectorQuery { + int topk; + ZVecString *field_name; + ZVecByteArray query_vector; + ZVecByteArray query_sparse_indices; + ZVecByteArray query_sparse_values; + ZVecString *filter; + bool include_vector; + bool include_doc_id; + ZVecStringArray *output_fields; + void *query_params; // Type-specific params (HnswQueryParams*, + // IVFQueryParams*, etc.) + ZVecIndexType params_type; // To track the type of query_params +}; + +// Internal structure - GroupByVectorQuery +struct ZVecGroupByVectorQuery { + ZVecString *field_name; + ZVecByteArray query_vector; + ZVecByteArray query_sparse_indices; + ZVecByteArray query_sparse_values; + ZVecString *filter; + bool include_vector; + ZVecStringArray *output_fields; + ZVecString *group_by_field_name; + uint32_t group_count; + uint32_t group_topk; + void *query_params; // Type-specific params + ZVecIndexType params_type; // To track the type of query_params +}; + +// Internal structure - CollectionOptions +struct ZVecCollectionOptions { + bool enable_mmap; + size_t max_buffer_size; + bool read_only; + uint64_t max_doc_count_per_segment; +}; + +// Internal structure - CollectionStats +struct ZVecCollectionStats { + uint64_t doc_count; + ZVecString **index_names; + float *index_completeness; + size_t index_count; +}; + +ZVecIndexParams *zvec_index_params_create(ZVecIndexType index_type) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecIndexParams", + ZVecIndexParams *params = new ZVecIndexParams(); + params->index_type = index_type; + params->metric_type = ZVEC_METRIC_TYPE_L2; // Default + params->quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + // Initialize type-specific params with defaults + memset(¶ms->invert, 0, sizeof(params->invert)); + memset(¶ms->hnsw, 0, sizeof(params->hnsw)); + memset(¶ms->ivf, 0, sizeof(params->ivf)); + + // Set defaults based on index type + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + params->invert.enable_range_optimization = true; + params->invert.enable_extended_wildcard = false; + break; + case ZVEC_INDEX_TYPE_HNSW: + params->hnsw.m = 16; + params->hnsw.ef_construction = 200; + break; + case ZVEC_INDEX_TYPE_IVF: + params->ivf.n_list = 100; + params->ivf.n_iters = 10; + params->ivf.use_soar = false; + break; + case ZVEC_INDEX_TYPE_FLAT: + default: + break; + } + + return params;) + + return nullptr; +} + +void zvec_index_params_destroy(ZVecIndexParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_index_params_set_metric_type(ZVecIndexParams *params, + ZVecMetricType metric_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->metric_type = metric_type; + return ZVEC_OK; +} + +ZVecMetricType zvec_index_params_get_metric_type( + const ZVecIndexParams *params) { + if (!params) { + return ZVEC_METRIC_TYPE_L2; // Default + } + return params->metric_type; +} + +ZVecErrorCode zvec_index_params_set_quantize_type( + ZVecIndexParams *params, ZVecQuantizeType quantize_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->quantize_type = quantize_type; + return ZVEC_OK; +} + +ZVecQuantizeType zvec_index_params_get_quantize_type( + const ZVecIndexParams *params) { + if (!params) { + return ZVEC_QUANTIZE_TYPE_UNDEFINED; + } + return params->quantize_type; +} + +ZVecIndexType zvec_index_params_get_type(const ZVecIndexParams *params) { + if (!params) { + return ZVEC_INDEX_TYPE_FLAT; // Default + } + return params->index_type; +} + +ZVecErrorCode zvec_index_params_set_hnsw_params(ZVecIndexParams *params, int m, + int ef_construction) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->hnsw.m = m; + params->hnsw.ef_construction = ef_construction; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_hnsw_params(const ZVecIndexParams *params, + int *out_m, + int *out_ef_construction) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_m) *out_m = params->hnsw.m; + if (out_ef_construction) *out_ef_construction = params->hnsw.ef_construction; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_set_ivf_params(ZVecIndexParams *params, + int n_list, int n_iters, + bool use_soar) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->ivf.n_list = n_list; + params->ivf.n_iters = n_iters; + params->ivf.use_soar = use_soar; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_ivf_params(const ZVecIndexParams *params, + int *out_n_list, + int *out_n_iters, + bool *out_use_soar) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_n_list) *out_n_list = params->ivf.n_list; + if (out_n_iters) *out_n_iters = params->ivf.n_iters; + if (out_use_soar) *out_use_soar = params->ivf.use_soar; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_set_invert_params(ZVecIndexParams *params, + bool enable_range_opt, + bool enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->invert.enable_range_optimization = enable_range_opt; + params->invert.enable_extended_wildcard = enable_wildcard; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_invert_params(const ZVecIndexParams *params, + bool *out_enable_range_opt, + bool *out_enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_enable_range_opt) + *out_enable_range_opt = params->invert.enable_range_optimization; + if (out_enable_wildcard) + *out_enable_wildcard = params->invert.enable_extended_wildcard; + return ZVEC_OK; +} + +// Helper function to convert C++ IndexParams to C ZVecIndexParams +static ZVecIndexParams *convert_cpp_index_params_to_c( + const std::shared_ptr &cpp_params) { + if (!cpp_params) { + return nullptr; + } + + ZVecIndexType c_type; + switch (cpp_params->type()) { + case zvec::IndexType::HNSW: + c_type = ZVEC_INDEX_TYPE_HNSW; + break; + case zvec::IndexType::IVF: + c_type = ZVEC_INDEX_TYPE_IVF; + break; + case zvec::IndexType::FLAT: + c_type = ZVEC_INDEX_TYPE_FLAT; + break; + case zvec::IndexType::INVERT: + c_type = ZVEC_INDEX_TYPE_INVERT; + break; + default: + c_type = ZVEC_INDEX_TYPE_FLAT; + break; + } + + ZVecIndexParams *params = zvec_index_params_create(c_type); + if (!params) return nullptr; + + params->cpp_params = cpp_params; + + // Extract metric and quantize types from VectorIndexParams if applicable + if (cpp_params->is_vector_index_type()) { + auto *vec_params = + dynamic_cast(cpp_params.get()); + if (vec_params) { + switch (vec_params->metric_type()) { + case zvec::MetricType::L2: + params->metric_type = ZVEC_METRIC_TYPE_L2; + break; + case zvec::MetricType::IP: + params->metric_type = ZVEC_METRIC_TYPE_IP; + break; + case zvec::MetricType::COSINE: + params->metric_type = ZVEC_METRIC_TYPE_COSINE; + break; + default: + params->metric_type = ZVEC_METRIC_TYPE_L2; + break; + } + // Note: quantize_type would need similar mapping if used + } + } + + // Extract type-specific parameters + switch (c_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto *hnsw = + dynamic_cast(cpp_params.get()); + if (hnsw) { + params->hnsw.m = hnsw->m(); + params->hnsw.ef_construction = hnsw->ef_construction(); + } + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto *ivf = dynamic_cast(cpp_params.get()); + if (ivf) { + params->ivf.n_list = ivf->n_list(); + params->ivf.n_iters = ivf->n_iters(); + params->ivf.use_soar = ivf->use_soar(); + } + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + auto *invert = + dynamic_cast(cpp_params.get()); + if (invert) { + params->invert.enable_range_optimization = + invert->enable_range_optimization(); + params->invert.enable_extended_wildcard = + invert->enable_extended_wildcard(); + } + break; + } + default: + break; + } + + return params; +} + +// Helper function to convert C ZVecIndexParams to C++ IndexParams +static std::shared_ptr convert_c_index_params_to_cpp( + const ZVecIndexParams *params) { + if (!params) { + return nullptr; + } + + zvec::MetricType metric = zvec::MetricType::L2; + switch (params->metric_type) { + case ZVEC_METRIC_TYPE_L2: + metric = zvec::MetricType::L2; + break; + case ZVEC_METRIC_TYPE_IP: + metric = zvec::MetricType::IP; + break; + case ZVEC_METRIC_TYPE_COSINE: + metric = zvec::MetricType::COSINE; + break; + default: + metric = zvec::MetricType::L2; + break; + } + + zvec::QuantizeType quantize = zvec::QuantizeType::UNDEFINED; + // Add quantize type mapping if needed + + switch (params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: + return std::make_shared( + metric, params->hnsw.m, params->hnsw.ef_construction, quantize); + case ZVEC_INDEX_TYPE_IVF: + return std::make_shared( + metric, params->ivf.n_list, params->ivf.n_iters, params->ivf.use_soar, + quantize); + case ZVEC_INDEX_TYPE_FLAT: + return std::make_shared(metric, quantize); + case ZVEC_INDEX_TYPE_INVERT: + return std::make_shared( + params->invert.enable_range_optimization, + params->invert.enable_extended_wildcard); + default: + return std::make_shared(zvec::MetricType::L2); + } +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return nullptr; + } + + ZVecFieldSchema *schema = new ZVecFieldSchema(); + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema"); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name"); + return nullptr; + } + + schema->data_type = data_type; schema->nullable = nullable; schema->dimension = dimension; - memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + schema->index_params = nullptr; schema->has_index = false; return schema; @@ -1130,9 +1778,239 @@ ZVecFieldSchema *zvec_field_schema_create(const char *name, void zvec_field_schema_destroy(ZVecFieldSchema *schema) { if (schema) { zvec_free_string(schema->name); - // index_params is embedded, no need to free - free(schema); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + } + delete schema; + } +} + +// Getter functions +const char *zvec_field_schema_get_name(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return nullptr; + } + return zvec_string_c_str(schema->name); +} + +ZVecDataType zvec_field_schema_get_data_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_DATA_TYPE_UNDEFINED; } + return schema->data_type; +} + +bool zvec_field_schema_is_nullable(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + return schema->nullable; +} + +ZVecErrorCode zvec_field_schema_set_nullable(ZVecFieldSchema *schema, + bool nullable) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + schema->nullable = nullable; + return ZVEC_OK; +} + +uint32_t zvec_field_schema_get_dimension(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return 0; + } + return schema->dimension; +} + +ZVecErrorCode zvec_field_schema_set_dimension(ZVecFieldSchema *schema, + uint32_t dimension) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + schema->dimension = dimension; + return ZVEC_OK; +} + +bool zvec_field_schema_has_index(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + return schema->has_index; +} + +ZVecIndexType zvec_field_schema_get_index_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_INDEX_TYPE_UNDEFINED; + } + if (!schema->index_params) { + return ZVEC_INDEX_TYPE_UNDEFINED; + } + return schema->index_params->index_type; +} + +const ZVecIndexParams *zvec_field_schema_get_index_params( + const ZVecFieldSchema *schema) { + if (!schema) { + return nullptr; + } + return schema->index_params; +} + +bool zvec_field_schema_is_vector_field(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +bool zvec_field_schema_is_dense_vector(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16); +} + +bool zvec_field_schema_is_sparse_vector(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +bool zvec_field_schema_is_array_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_ARRAY_BINARY || + data_type == ZVEC_DATA_TYPE_ARRAY_STRING || + data_type == ZVEC_DATA_TYPE_ARRAY_BOOL || + data_type == ZVEC_DATA_TYPE_ARRAY_INT32 || + data_type == ZVEC_DATA_TYPE_ARRAY_INT64 || + data_type == ZVEC_DATA_TYPE_ARRAY_UINT32 || + data_type == ZVEC_DATA_TYPE_ARRAY_UINT64 || + data_type == ZVEC_DATA_TYPE_ARRAY_FLOAT || + data_type == ZVEC_DATA_TYPE_ARRAY_DOUBLE); +} + +ZVecDataType zvec_field_schema_get_element_data_type( + const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_DATA_TYPE_UNDEFINED; + } + ZVecDataType data_type = schema->data_type; + switch (data_type) { + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return ZVEC_DATA_TYPE_BINARY; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return ZVEC_DATA_TYPE_STRING; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return ZVEC_DATA_TYPE_BOOL; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return ZVEC_DATA_TYPE_INT32; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return ZVEC_DATA_TYPE_INT64; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return ZVEC_DATA_TYPE_UINT32; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return ZVEC_DATA_TYPE_UINT64; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return ZVEC_DATA_TYPE_FLOAT; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return ZVEC_DATA_TYPE_DOUBLE; + default: + return data_type; + } +} + +bool zvec_field_schema_has_invert_index(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + // Invert index is for non-vector fields with index + if (zvec_field_schema_is_vector_field(schema)) { + return false; + } + return schema->has_index && schema->index_params && + schema->index_params->index_type == ZVEC_INDEX_TYPE_INVERT; +} + +// Helper function to check if a data type is a vector type +bool zvec_is_vector_data_type(ZVecDataType data_type) { + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +ZVecErrorCode zvec_field_schema_clear_index(ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + schema->has_index = false; + return ZVEC_OK; } ZVecErrorCode zvec_field_schema_set_index_params( @@ -1144,76 +2022,283 @@ ZVecErrorCode zvec_field_schema_set_index_params( } if (!index_params) { - memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } schema->has_index = false; return ZVEC_OK; } - schema->index_params = *index_params; - schema->has_index = true; + // Clone the index_params (create a new copy) + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + } + schema->index_params = zvec_index_params_create(index_params->index_type); + if (!schema->index_params) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to clone index params"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + // Copy all fields using getter/setter API + ZVecErrorCode err = ZVEC_OK; + err = zvec_index_params_set_metric_type(schema->index_params, + index_params->metric_type); + if (err != ZVEC_OK) return err; + + err = zvec_index_params_set_quantize_type(schema->index_params, + index_params->quantize_type); + if (err != ZVEC_OK) return err; + + // Copy type-specific params + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: + err = zvec_index_params_set_invert_params( + schema->index_params, index_params->invert.enable_range_optimization, + index_params->invert.enable_extended_wildcard); + break; + case ZVEC_INDEX_TYPE_HNSW: + err = zvec_index_params_set_hnsw_params( + schema->index_params, index_params->hnsw.m, + index_params->hnsw.ef_construction); + break; + case ZVEC_INDEX_TYPE_IVF: + err = zvec_index_params_set_ivf_params( + schema->index_params, index_params->ivf.n_list, + index_params->ivf.n_iters, index_params->ivf.use_soar); + break; + case ZVEC_INDEX_TYPE_FLAT: + default: + break; + } + + if (err != ZVEC_OK) return err; + + schema->has_index = true; return ZVEC_OK; } void zvec_field_schema_set_invert_index(ZVecFieldSchema *field_schema, const ZVecIndexParams *invert_params) { if (field_schema && invert_params) { - field_schema->index_params = *invert_params; - field_schema->index_params.index_type = ZVEC_INDEX_TYPE_INVERT; - field_schema->has_index = true; + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->index_params->metric_type = invert_params->metric_type; + field_schema->index_params->quantize_type = invert_params->quantize_type; + field_schema->index_params->invert.enable_range_optimization = + invert_params->invert.enable_range_optimization; + field_schema->index_params->invert.enable_extended_wildcard = + invert_params->invert.enable_extended_wildcard; + field_schema->has_index = true; + } } } void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, const ZVecIndexParams *hnsw_params) { if (field_schema && hnsw_params) { - field_schema->index_params = *hnsw_params; - field_schema->index_params.index_type = ZVEC_INDEX_TYPE_HNSW; - field_schema->has_index = true; + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->index_params->metric_type = hnsw_params->metric_type; + field_schema->index_params->quantize_type = hnsw_params->quantize_type; + field_schema->index_params->hnsw.m = hnsw_params->hnsw.m; + field_schema->index_params->hnsw.ef_construction = + hnsw_params->hnsw.ef_construction; + field_schema->has_index = true; + } } } void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, const ZVecIndexParams *flat_params) { if (field_schema && flat_params) { - field_schema->index_params = *flat_params; - field_schema->index_params.index_type = ZVEC_INDEX_TYPE_FLAT; - field_schema->has_index = true; + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->index_params->metric_type = flat_params->metric_type; + field_schema->index_params->quantize_type = flat_params->quantize_type; + field_schema->has_index = true; + } + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *ivf_params) { + if (field_schema && ivf_params) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->index_params->metric_type = ivf_params->metric_type; + field_schema->index_params->quantize_type = ivf_params->quantize_type; + field_schema->index_params->ivf.n_list = ivf_params->ivf.n_list; + field_schema->index_params->ivf.n_iters = ivf_params->ivf.n_iters; + field_schema->index_params->ivf.use_soar = ivf_params->ivf.use_soar; + field_schema->has_index = true; + } + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + zvec_free_string(field_schema->name); + field_schema->name = nullptr; + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + field_schema->index_params = nullptr; + } +} + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +// ============================================================================= +// CollectionOptions functions implementation +// ============================================================================= + +ZVecCollectionOptions *zvec_collection_options_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecCollectionOptions", + ZVecCollectionOptions *options = new ZVecCollectionOptions(); + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + return options;) + return nullptr; +} + +void zvec_collection_options_destroy(ZVecCollectionOptions *options) { + if (options) { + delete options; + } +} + +ZVecErrorCode zvec_collection_options_set_enable_mmap( + ZVecCollectionOptions *options, bool enable) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->enable_mmap = enable; + return ZVEC_OK; +} + +bool zvec_collection_options_get_enable_mmap( + const ZVecCollectionOptions *options) { + if (!options) { + return true; // Default + } + return options->enable_mmap; +} + +ZVecErrorCode zvec_collection_options_set_max_buffer_size( + ZVecCollectionOptions *options, size_t size) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->max_buffer_size = size; + return ZVEC_OK; +} + +size_t zvec_collection_options_get_max_buffer_size( + const ZVecCollectionOptions *options) { + if (!options) { + return zvec::DEFAULT_MAX_BUFFER_SIZE; // Default + } + return options->max_buffer_size; +} + +ZVecErrorCode zvec_collection_options_set_read_only( + ZVecCollectionOptions *options, bool read_only) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->read_only = read_only; + return ZVEC_OK; +} + +bool zvec_collection_options_get_read_only( + const ZVecCollectionOptions *options) { + if (!options) { + return false; // Default + } + return options->read_only; +} + +ZVecErrorCode zvec_collection_options_set_max_doc_count_per_segment( + ZVecCollectionOptions *options, uint64_t count) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } + options->max_doc_count_per_segment = count; + return ZVEC_OK; } -void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, - const ZVecIndexParams *ivf_params) { - if (field_schema && ivf_params) { - field_schema->index_params = *ivf_params; - field_schema->index_params.index_type = ZVEC_INDEX_TYPE_IVF; - field_schema->has_index = true; +uint64_t zvec_collection_options_get_max_doc_count_per_segment( + const ZVecCollectionOptions *options) { + if (!options) { + return zvec::MAX_DOC_COUNT_PER_SEGMENT; // Default } + return options->max_doc_count_per_segment; } -static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { - if (!field_schema) return; +// ============================================================================= +// CollectionStats functions implementation +// ============================================================================= - // index_params is embedded, no need to free - zvec_free_string(field_schema->name); - field_schema->name = nullptr; +uint64_t zvec_collection_stats_get_doc_count(const ZVecCollectionStats *stats) { + if (!stats) { + return 0; + } + return stats->doc_count; } -// ============================================================================= -// CollectionOptions management interface implementation -// ============================================================================= +size_t zvec_collection_stats_get_index_count(const ZVecCollectionStats *stats) { + if (!stats) { + return 0; + } + return stats->index_count; +} -void zvec_collection_options_init_default(ZVecCollectionOptions *options) { - if (!options) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection options pointer cannot be null"); - return; +const char *zvec_collection_stats_get_index_name( + const ZVecCollectionStats *stats, size_t index) { + if (!stats || !stats->index_names || index >= stats->index_count) { + return nullptr; } + return stats->index_names[index]->data; +} - options->enable_mmap = true; - options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; - options->read_only = false; - options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; +float zvec_collection_stats_get_index_completeness( + const ZVecCollectionStats *stats, size_t index) { + if (!stats || !stats->index_completeness || index >= stats->index_count) { + return 0.0f; + } + return stats->index_completeness[index]; } // ============================================================================= @@ -1227,8 +2312,7 @@ ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { return nullptr; } - ZVecCollectionSchema *schema = - static_cast(malloc(sizeof(ZVecCollectionSchema))); + ZVecCollectionSchema *schema = new ZVecCollectionSchema(); if (!schema) { SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory for ZVecCollectionSchema"); @@ -1237,7 +2321,7 @@ ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { schema->name = zvec_string_create(name); if (!schema->name) { - free(schema); + delete schema; SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to create string for collection name"); return nullptr; @@ -1262,8 +2346,18 @@ void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { free(schema->fields); } - free(schema); + delete schema; + } +} + +const char *zvec_collection_schema_get_name( + const ZVecCollectionSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return nullptr; } + return zvec_string_c_str(schema->name); } ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, @@ -1274,18 +2368,22 @@ ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, return ZVEC_ERROR_INVALID_ARGUMENT; } - if (!field || !field->name) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Field or field name cannot be null"); + if (!field) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + const char *field_name = zvec_field_schema_get_name(field); + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } for (size_t i = 0; i < schema->field_count; ++i) { - if (schema->fields[i]->name && field->name && - zvec_string_compare(schema->fields[i]->name, field->name) == 0) { - SET_LAST_ERROR( - ZVEC_ERROR_ALREADY_EXISTS, - std::string("Field '") + field->name->data + "' already exists"); + const char *existing_name = zvec_field_schema_get_name(schema->fields[i]); + if (existing_name && strcmp(existing_name, field_name) == 0) { + SET_LAST_ERROR(ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field_name + "' already exists"); return ZVEC_ERROR_ALREADY_EXISTS; } } @@ -1316,9 +2414,9 @@ ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, return ZVEC_OK; } -ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, - const ZVecFieldSchema *fields, - size_t field_count) { +ZVecErrorCode zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *const *fields, + size_t field_count) { if (!schema) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection schema pointer cannot be null"); @@ -1335,9 +2433,16 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, return ZVEC_OK; } + // Validate all fields first for (size_t i = 0; i < field_count; ++i) { - const ZVecFieldSchema &field = fields[i]; - if (!field.name || !field.name->data || field.name->length == 0) { + if (!fields[i]) { + SET_LAST_ERROR( + ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + std::to_string(i) + " is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const char *field_name = zvec_field_schema_get_name(fields[i]); + if (!field_name || strlen(field_name) == 0) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, std::string("Field at index ") + std::to_string(i) + " has invalid name"); @@ -1369,31 +2474,48 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, schema->field_capacity = new_capacity; } + // Clone each field and add to schema for (size_t i = 0; i < field_count; ++i) { - const ZVecFieldSchema &src_field = fields[i]; + const ZVecFieldSchema *src_field = fields[i]; + const char *field_name = zvec_field_schema_get_name(src_field); + ZVecDataType data_type = zvec_field_schema_get_data_type(src_field); + bool nullable = zvec_field_schema_is_nullable(src_field); + uint32_t dimension = zvec_field_schema_get_dimension(src_field); + // Create a new field with the same properties ZVecFieldSchema *new_field = - static_cast(malloc(sizeof(ZVecFieldSchema))); + zvec_field_schema_create(field_name, data_type, nullable, dimension); if (!new_field) { + // Clean up previously created fields + for (size_t j = 0; j < i; ++j) { + zvec_field_schema_destroy( + schema->fields[schema->field_count - (i - j)]); + } SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for new field"); + "Failed to create new field"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } - new_field->name = zvec_string_copy(src_field.name); - if (!new_field->name) { - free(new_field); - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to copy field name"); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; + // Copy index params if present + if (zvec_field_schema_has_index(src_field)) { + // Internal access: we need to get the index_params pointer + // Use the same hack as in set_field_index_params + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *src_index_params = + reinterpret_cast(src_field) + ->index_params; + if (src_index_params) { + zvec_field_schema_set_index_params(new_field, src_index_params); + } } - new_field->data_type = src_field.data_type; - new_field->nullable = src_field.nullable; - new_field->dimension = src_field.dimension; - new_field->index_params = src_field.index_params; - new_field->has_index = src_field.has_index; - schema->fields[schema->field_count] = new_field; schema->field_count++; } @@ -1469,8 +2591,8 @@ ZVecErrorCode zvec_collection_schema_remove_fields( bool found = false; for (size_t i = 0; i < schema->field_count; ++i) { - if (schema->fields[i]->name && - strcmp(schema->fields[i]->name->data, target_name.c_str()) == 0) { + const char *current_name = zvec_field_schema_get_name(schema->fields[i]); + if (current_name && strcmp(current_name, target_name.c_str()) == 0) { remove_indices.push_back(i); found = true; break; @@ -1491,136 +2613,557 @@ ZVecErrorCode zvec_collection_schema_remove_fields( error_msg += ", "; } } - SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, error_msg); - return ZVEC_ERROR_NOT_FOUND; + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, error_msg); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *current_name = zvec_field_schema_get_name(schema->fields[i]); + if (current_name && strcmp(current_name, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return nullptr; + } + + if (index >= schema->field_count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field index out of bounds"); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection name is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + const char *field_name = zvec_field_schema_get_name(field); + if (!field_name || strlen(field_name) == 0) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_set_name(ZVecCollectionSchema *schema, + const char *name) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + if (schema->name) { + zvec_free_string(schema->name); + } + schema->name = zvec_string_create(name); + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +bool zvec_collection_schema_has_field(const ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema || !field_name) { + return false; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *name = zvec_field_schema_get_name(schema->fields[i]); + if (name && strcmp(name, field_name) == 0) { + return true; + } + } + return false; +} + +ZVecErrorCode zvec_collection_schema_alter_field( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecFieldSchema *new_field) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!new_field) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "New field cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_CODE + // Find the field + for (size_t i = 0; i < schema->field_count; ++i) { + const char *name = zvec_field_schema_get_name(schema->fields[i]); + if (name && strcmp(name, field_name) == 0) { + // Clone the new field + ZVecFieldSchema *cloned = + zvec_field_schema_create(zvec_field_schema_get_name(new_field), + zvec_field_schema_get_data_type(new_field), + zvec_field_schema_is_nullable(new_field), + zvec_field_schema_get_dimension(new_field)); + + if (zvec_field_schema_has_index(new_field)) { + ZVecIndexType idx_type = zvec_field_schema_get_index_type(new_field); + ZVecIndexParams *cloned_params = zvec_index_params_create(idx_type); + const ZVecIndexParams *src_params = + zvec_field_schema_get_index_params(new_field); + + // Copy index parameters + switch (idx_type) { + case ZVEC_INDEX_TYPE_INVERT: { + bool enable_opt; + bool enable_wildcard; + zvec_index_params_get_invert_params(src_params, &enable_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_opt, + enable_wildcard); + break; + } + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_const; + zvec_index_params_get_hnsw_params(src_params, &m, &ef_const); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_const); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(src_params, &n_list, &n_iters, + &use_soar); + zvec_index_params_set_ivf_params(cloned_params, n_list, n_iters, + use_soar); + break; + } + default: + break; + } + + zvec_field_schema_set_index_params(cloned, cloned_params); + zvec_index_params_destroy(cloned_params); + } + + // Destroy old field and replace with new one + zvec_field_schema_destroy(schema->fields[i]); + schema->fields[i] = cloned; + return ZVEC_OK; + } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) +} + +ZVecFieldSchema *zvec_collection_schema_get_forward_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Check if it's a scalar field (not vector) + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type)) { + return field; + } + } + } + return nullptr; +} + +ZVecFieldSchema *zvec_collection_schema_get_vector_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Check if it's a vector field + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (zvec_is_vector_data_type(data_type)) { + return field; + } + } } + return nullptr; +} - std::sort(remove_indices.begin(), remove_indices.end(), - std::greater()); - - for (size_t remove_index : remove_indices) { - zvec_field_schema_destroy(schema->fields[remove_index]); +ZVecErrorCode zvec_collection_schema_get_forward_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, fields, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } - for (size_t j = remove_index; j < schema->field_count - 1; ++j) { - schema->fields[j] = schema->fields[j + 1]; + ZVEC_TRY_BEGIN_VOID + // Count scalar fields + size_t scalar_count = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (!zvec_is_vector_data_type(data_type)) { + scalar_count++; } + } - schema->field_count--; + *fields = + (ZVecFieldSchema **)malloc(scalar_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (!zvec_is_vector_data_type(data_type)) { + (*fields)[idx++] = schema->fields[i]; + } } + *count = scalar_count; + ZVEC_CATCH_END_VOID + return ZVEC_OK; } -ZVecFieldSchema *zvec_collection_schema_find_field( - const ZVecCollectionSchema *schema, const char *field_name) { - if (!schema || !field_name) { - return nullptr; +ZVecErrorCode zvec_collection_schema_get_forward_fields_with_index( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, fields, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } + ZVEC_TRY_BEGIN_VOID + // Count scalar fields with index + size_t indexed_count = 0; for (size_t i = 0; i < schema->field_count; ++i) { - if (schema->fields[i]->name && - strcmp(schema->fields[i]->name->data, field_name) == 0) { - return schema->fields[i]; + ZVecFieldSchema *field = schema->fields[i]; + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type) && + zvec_field_schema_has_index(field)) { + indexed_count++; } } - return nullptr; -} + *fields = + (ZVecFieldSchema **)malloc(indexed_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } -size_t zvec_collection_schema_get_field_count( - const ZVecCollectionSchema *schema) { - if (!schema) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null"); - return 0; + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type) && + zvec_field_schema_has_index(field)) { + (*fields)[idx++] = field; + } } - return schema->field_count; + *count = indexed_count; + ZVEC_CATCH_END_VOID + + return ZVEC_OK; } -ZVecFieldSchema *zvec_collection_schema_get_field( - const ZVecCollectionSchema *schema, size_t index) { - if (!schema) { +ZVecErrorCode zvec_collection_schema_get_all_field_names( + const ZVecCollectionSchema *schema, const char ***names, size_t *count) { + if (!schema || !names || !count) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null"); - return nullptr; + "Schema, names, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } - if (index >= schema->field_count) { - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field index out of bounds"); - return nullptr; + ZVEC_TRY_BEGIN_VOID + *count = schema->field_count; + *names = (const char **)malloc(schema->field_count * sizeof(const char *)); + if (!*names) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; } - return schema->fields[index]; + for (size_t i = 0; i < schema->field_count; ++i) { + (*names)[i] = zvec_field_schema_get_name(schema->fields[i]); + } + + ZVEC_CATCH_END_VOID + + return ZVEC_OK; } -ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( - ZVecCollectionSchema *schema, uint64_t max_doc_count) { - if (!schema) { +ZVecErrorCode zvec_collection_schema_get_vector_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "Collection schema pointer cannot be null"); + "Schema, fields, and count cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - schema->max_doc_count_per_segment = max_doc_count; + ZVEC_TRY_BEGIN_VOID + // Count vector fields + size_t vector_count = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (zvec_is_vector_data_type(data_type)) { + vector_count++; + } + } + + *fields = + (ZVecFieldSchema **)malloc(vector_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (zvec_is_vector_data_type(data_type)) { + (*fields)[idx++] = schema->fields[i]; + } + } + + *count = vector_count; + ZVEC_CATCH_END_VOID + return ZVEC_OK; } -uint64_t zvec_collection_schema_get_max_doc_count_per_segment( - const ZVecCollectionSchema *schema) { - if (!schema) return 0; - return schema->max_doc_count_per_segment; +bool zvec_collection_schema_has_index(const ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema || !field_name) { + return false; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + return zvec_field_schema_has_index(field); + } + } + return false; } -ZVecErrorCode zvec_collection_schema_validate( - const ZVecCollectionSchema *schema, ZVecString **error_msg) { +ZVecErrorCode zvec_collection_schema_add_index( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecIndexParams *index_params) { if (!schema) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection schema pointer cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - - if (error_msg) { - *error_msg = nullptr; + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } - - if (!schema->name) { - if (error_msg) { - *error_msg = zvec_string_create("Collection name is required"); - } - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection name is required"); + if (!index_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Index params cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } - if (schema->field_count == 0) { - if (error_msg) { - *error_msg = zvec_string_create("At least one field is required"); + ZVEC_TRY_BEGIN_CODE + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Clone the index params + ZVecIndexType idx_type = zvec_index_params_get_type(index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(idx_type); + + // Copy parameters based on type + switch (idx_type) { + case ZVEC_INDEX_TYPE_INVERT: { + bool enable_opt, enable_wildcard; + zvec_index_params_get_invert_params(index_params, &enable_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_opt, + enable_wildcard); + break; + } + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_const; + zvec_index_params_get_hnsw_params(index_params, &m, &ef_const); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_const); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(index_params, &n_list, &n_iters, + &use_soar); + zvec_index_params_set_ivf_params(cloned_params, n_list, n_iters, + use_soar); + break; + } + default: + break; + } + + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + return ZVEC_OK; } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) +} + +ZVecErrorCode zvec_collection_schema_drop_index(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, - "At least one field is required"); + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + ZVEC_TRY_BEGIN_CODE for (size_t i = 0; i < schema->field_count; ++i) { - auto field = schema->fields[i]; - if (!field) { - if (error_msg) { - *error_msg = zvec_string_create("Null field found"); - } - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found"); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - if (!field->name) { - if (error_msg) { - *error_msg = zvec_string_create("Field name is required"); - } - SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name is required"); - return ZVEC_ERROR_INVALID_ARGUMENT; + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Clear the index + zvec_field_schema_clear_index(field); + return ZVEC_OK; } } - return ZVEC_OK; + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) } void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { @@ -1633,9 +3176,9 @@ void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { if (schema->fields) { for (size_t i = 0; i < schema->field_count; ++i) { - zvec_field_schema_cleanup(schema->fields[i]); + zvec_field_schema_destroy(schema->fields[i]); } - delete[] schema->fields; + free(schema->fields); schema->fields = nullptr; schema->field_count = 0; } @@ -1777,17 +3320,17 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { } bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { - bool is_vector_field = - (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || - zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || - zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || - zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + ZVecDataType data_type = zvec_field_schema_get_data_type(&zvec_field); + bool is_vector_field = (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); return is_vector_field; } @@ -2034,30 +3577,33 @@ static std::vector convert_zvec_docs_to_internal( static zvec::Status convert_zvec_collection_schema_to_internal( const ZVecCollectionSchema *schema, zvec::CollectionSchema::Ptr &collection_schema) { - std::string coll_name(schema->name->data, schema->name->length); + std::string coll_name(zvec_string_c_str(schema->name), + zvec_string_length(schema->name)); collection_schema = std::make_shared(coll_name); collection_schema->set_max_doc_count_per_segment( schema->max_doc_count_per_segment); for (size_t i = 0; i < schema->field_count; ++i) { - const ZVecFieldSchema &zvec_field = *schema->fields[i]; - zvec::DataType data_type = convert_data_type(zvec_field.data_type); - std::string field_name = - std::string(zvec_field.name->data, zvec_field.name->length); + const ZVecFieldSchema *zvec_field = schema->fields[i]; + ZVecDataType field_data_type = zvec_field_schema_get_data_type(zvec_field); + zvec::DataType data_type = convert_data_type(field_data_type); + std::string field_name = zvec_field_schema_get_name(zvec_field); + bool nullable = zvec_field_schema_is_nullable(zvec_field); + uint32_t dimension = zvec_field_schema_get_dimension(zvec_field); zvec::FieldSchema::Ptr field_schema; - bool is_vector_field = check_is_vector_field(zvec_field); + bool is_vector_field = check_is_vector_field(*zvec_field); if (is_vector_field) { - field_schema = std::make_shared( - field_name, data_type, zvec_field.dimension, zvec_field.nullable); - } else { field_schema = std::make_shared(field_name, data_type, - zvec_field.nullable); + dimension, nullable); + } else { + field_schema = + std::make_shared(field_name, data_type, nullable); } - if (zvec_field.has_index) { - zvec::Status status = set_field_index_params(field_schema, &zvec_field); + if (zvec_field_schema_has_index(zvec_field)) { + zvec::Status status = set_field_index_params(field_schema, zvec_field); if (!status.ok()) { return status; } @@ -2073,78 +3619,121 @@ static zvec::Status convert_zvec_collection_schema_to_internal( } static zvec::Status convert_zvec_field_schema_to_internal( - const ZVecFieldSchema &zvec_field, zvec::FieldSchema::Ptr &field_schema) { + const ZVecFieldSchema *zvec_field, zvec::FieldSchema::Ptr &field_schema) { // Validate input - if (!zvec_field.name) { + if (!zvec_field) { + return zvec::Status::InvalidArgument("Field schema cannot be null"); + } + + const char *field_name_cstr = zvec_field_schema_get_name(zvec_field); + if (!field_name_cstr) { return zvec::Status::InvalidArgument("Field name cannot be null"); } - zvec::DataType data_type = convert_data_type(zvec_field.data_type); - if (data_type == zvec::DataType::UNDEFINED) { + ZVecDataType data_type = zvec_field_schema_get_data_type(zvec_field); + zvec::DataType data_type_internal = convert_data_type(data_type); + if (data_type_internal == zvec::DataType::UNDEFINED) { return zvec::Status::InvalidArgument("Invalid data type"); } - std::string field_name(zvec_field.name->data, zvec_field.name->length); - bool is_vector_field = check_is_vector_field(zvec_field); + std::string field_name(field_name_cstr); + bool nullable = zvec_field_schema_is_nullable(zvec_field); + uint32_t dimension = zvec_field_schema_get_dimension(zvec_field); + bool is_vector_field = check_is_vector_field(*zvec_field); if (is_vector_field) { field_schema = std::make_shared( - field_name, data_type, zvec_field.dimension, zvec_field.nullable); - - if (zvec_field.has_index) { - switch (zvec_field.index_params.index_type) { - case ZVEC_INDEX_TYPE_HNSW: { - auto metric = - convert_metric_type(zvec_field.index_params.metric_type); - auto quantize = - convert_quantize_type(zvec_field.index_params.quantize_type); - auto index_params = std::make_shared( - metric, zvec_field.index_params.hnsw.m, - zvec_field.index_params.hnsw.ef_construction, quantize); - field_schema->set_index_params(index_params); - break; - } - case ZVEC_INDEX_TYPE_FLAT: { - auto metric = - convert_metric_type(zvec_field.index_params.metric_type); - auto quantize = - convert_quantize_type(zvec_field.index_params.quantize_type); - auto index_params = - std::make_shared(metric, quantize); - field_schema->set_index_params(index_params); - break; - } - case ZVEC_INDEX_TYPE_IVF: { - auto metric = - convert_metric_type(zvec_field.index_params.metric_type); - auto quantize = - convert_quantize_type(zvec_field.index_params.quantize_type); - auto index_params = std::make_shared( - metric, zvec_field.index_params.ivf.n_list, - zvec_field.index_params.ivf.n_iters, - zvec_field.index_params.ivf.use_soar, quantize); - field_schema->set_index_params(index_params); - break; + field_name, data_type_internal, dimension, nullable); + + if (zvec_field_schema_has_index(zvec_field)) { + // Internal access to index_params + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *index_params = + reinterpret_cast(zvec_field) + ->index_params; + + if (index_params) { + ZVecIndexType index_type = zvec_index_params_get_type(index_params); + ZVecMetricType metric_type = + zvec_index_params_get_metric_type(index_params); + ZVecQuantizeType quantize_type = + zvec_index_params_get_quantize_type(index_params); + + auto metric = convert_metric_type(metric_type); + auto quantize = convert_quantize_type(quantize_type); + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_construction; + zvec_index_params_get_hnsw_params(index_params, &m, + &ef_construction); + auto hnsw_params = std::make_shared( + metric, m, ef_construction, quantize); + field_schema->set_index_params(hnsw_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(flat_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(index_params, &n_list, &n_iters, + &use_soar); + auto ivf_params = std::make_shared( + metric, n_list, n_iters, use_soar, quantize); + field_schema->set_index_params(ivf_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; } - default: - field_schema->set_index_params( - std::make_shared(zvec::MetricType::L2)); - break; + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); } } else { field_schema->set_index_params( std::make_shared(zvec::MetricType::L2)); } } else { - field_schema = std::make_shared(field_name, data_type, - zvec_field.nullable); - - if (zvec_field.has_index && - zvec_field.index_params.index_type == ZVEC_INDEX_TYPE_INVERT) { - auto index_params = std::make_shared( - zvec_field.index_params.invert.enable_range_optimization, - zvec_field.index_params.invert.enable_extended_wildcard); - field_schema->set_index_params(index_params); + field_schema = std::make_shared( + field_name, data_type_internal, nullable); + + if (zvec_field_schema_has_index(zvec_field)) { + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *index_params = + reinterpret_cast(zvec_field) + ->index_params; + + if (index_params && + zvec_index_params_get_type(index_params) == ZVEC_INDEX_TYPE_INVERT) { + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(index_params, &enable_range_opt, + &enable_wildcard); + auto invert_params = std::make_shared( + enable_range_opt, enable_wildcard); + field_schema->set_index_params(invert_params); + } } } @@ -3961,105 +5550,19 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, // Copy nullable flag c_schema->fields[i]->nullable = cpp_field->nullable(); - // Initialize index parameters (embedded, not pointer) - memset(&c_schema->fields[i]->index_params, 0, - sizeof(ZVecIndexParams)); + // Initialize index parameters to nullptr + c_schema->fields[i]->index_params = nullptr; c_schema->fields[i]->has_index = false; // Convert index parameters based on the actual type auto index_params = cpp_field->index_params(); if (index_params) { - switch (index_params->type()) { - case zvec::IndexType::HNSW: { - auto hnsw_params = - std::dynamic_pointer_cast( - index_params); - if (hnsw_params) { - c_schema->fields[i]->index_params.index_type = - ZVEC_INDEX_TYPE_HNSW; - c_schema->fields[i]->index_params.metric_type = - static_cast( - hnsw_params->metric_type()); - c_schema->fields[i]->index_params.quantize_type = - static_cast( - hnsw_params->quantize_type()); - c_schema->fields[i]->index_params.hnsw.m = - hnsw_params->m(); - c_schema->fields[i]->index_params.hnsw.ef_construction = - hnsw_params->ef_construction(); - c_schema->fields[i]->has_index = true; - } - break; - } - - case zvec::IndexType::IVF: { - auto ivf_params = - std::dynamic_pointer_cast( - index_params); - if (ivf_params) { - c_schema->fields[i]->index_params.index_type = - ZVEC_INDEX_TYPE_IVF; - c_schema->fields[i]->index_params.metric_type = - static_cast( - ivf_params->metric_type()); - c_schema->fields[i]->index_params.quantize_type = - static_cast( - ivf_params->quantize_type()); - c_schema->fields[i]->index_params.ivf.n_list = - ivf_params->n_list(); - c_schema->fields[i]->index_params.ivf.n_iters = - ivf_params->n_iters(); - c_schema->fields[i]->index_params.ivf.use_soar = - ivf_params->use_soar(); - c_schema->fields[i]->has_index = true; - } - break; - } - - case zvec::IndexType::FLAT: { - auto flat_params = - std::dynamic_pointer_cast( - index_params); - if (flat_params) { - c_schema->fields[i]->index_params.index_type = - ZVEC_INDEX_TYPE_FLAT; - c_schema->fields[i]->index_params.metric_type = - static_cast( - flat_params->metric_type()); - c_schema->fields[i]->index_params.quantize_type = - static_cast( - flat_params->quantize_type()); - c_schema->fields[i]->has_index = true; - } - break; - } - - case zvec::IndexType::INVERT: { - auto invert_params = - std::dynamic_pointer_cast( - index_params); - if (invert_params) { - c_schema->fields[i]->index_params.index_type = - ZVEC_INDEX_TYPE_INVERT; - c_schema->fields[i] - ->index_params.invert.enable_range_optimization = - invert_params->enable_range_optimization(); - c_schema->fields[i] - ->index_params.invert.enable_extended_wildcard = - invert_params->enable_extended_wildcard(); - c_schema->fields[i]->has_index = true; - } - break; - } - - default: - // For undefined or unsupported index types - c_schema->fields[i]->has_index = false; - break; + // Use helper function to convert C++ index params to C + c_schema->fields[i]->index_params = + convert_cpp_index_params_to_c(index_params); + if (c_schema->fields[i]->index_params) { + c_schema->fields[i]->has_index = true; } - } else { - // No index parameters - c_schema->fields[i]->has_index = false; } } catch (const std::bad_alloc &) { // Clean up already allocated fields @@ -4105,9 +5608,8 @@ ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, return ZVEC_ERROR_INTERNAL_ERROR; } - // Create and initialize options structure - *options = static_cast( - malloc(sizeof(ZVecCollectionOptions))); + // Create and initialize options using new + *options = new ZVecCollectionOptions(); if (!*options) { set_last_error("Failed to allocate memory for options"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; @@ -4142,8 +5644,7 @@ ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, return ZVEC_ERROR_INTERNAL_ERROR; } - *stats = static_cast( - malloc(sizeof(ZVecCollectionStats))); + *stats = new ZVecCollectionStats(); if (!*stats) { set_last_error("Failed to allocate memory for stats"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; @@ -4173,248 +5674,601 @@ ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, return error_code;) } -ZVecCollectionStats *zvec_collection_stats_create(void) { - ZVecCollectionStats *stats = - static_cast(malloc(sizeof(ZVecCollectionStats))); - if (!stats) { - return nullptr; +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + free(stats); + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecQueryParams", + ZVecQueryParams *params = new ZVecQueryParams(); + params->index_type = index_type; params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; return params;) + return nullptr; +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecIndexType zvec_query_params_get_index_type(const ZVecQueryParams *params) { + if (!params) { + return ZVEC_INDEX_TYPE_UNDEFINED; + } + return params->index_type; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->radius = radius; + return ZVEC_OK; +} + +float zvec_query_params_get_radius(const ZVecQueryParams *params) { + if (!params) { + return 0.0f; + } + return params->radius; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->is_linear = is_linear; + return ZVEC_OK; +} + +bool zvec_query_params_get_is_linear(const ZVecQueryParams *params) { + if (!params) { + return false; + } + return params->is_linear; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +bool zvec_query_params_get_is_using_refiner(const ZVecQueryParams *params) { + if (!params) { + return false; } - stats->doc_count = 0; - stats->index_count = 0; - stats->index_completeness = nullptr; - stats->index_names = nullptr; - return stats; + return params->is_using_refiner; } -void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { - if (stats) { - if (stats->index_names) { - for (size_t i = 0; i < stats->index_count; ++i) { - zvec_free_string(stats->index_names[i]); - } - free(stats->index_names); - } +// ============================================================================= +// HnswQueryParams functions implementation +// ============================================================================= - if (stats->index_completeness) { - free(stats->index_completeness); - } +ZVecHnswQueryParams *zvec_query_params_hnsw_create(int ef, float radius, + bool is_linear, + bool is_using_refiner) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecHnswQueryParams", + ZVecHnswQueryParams *params = new ZVecHnswQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_HNSW; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; return params;) + return nullptr; +} - free(stats); +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + delete params; } } -// ============================================================================= -// QueryParams functions implementation -// ============================================================================= - -ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { - ZVecQueryParams *params = - static_cast(malloc(sizeof(ZVecQueryParams))); +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecQueryParams"); - return nullptr; + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } - params->index_type = index_type; - params->radius = 0.0f; - params->is_linear = false; - params->is_using_refiner = false; - return params; + params->ef = ef; + return ZVEC_OK; } -ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, - int ef, float radius, - bool is_linear, - bool is_using_refiner) { - ZVecHnswQueryParams *params = - static_cast(malloc(sizeof(ZVecHnswQueryParams))); +int zvec_query_params_hnsw_get_ef(const ZVecHnswQueryParams *params) { if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecHnswQueryParams"); - return nullptr; + return zvec::core_interface::kDefaultHnswEfSearch; } - params->base.index_type = index_type; - params->base.radius = radius; - params->base.is_linear = is_linear; - params->base.is_using_refiner = is_using_refiner; - params->ef = ef; - return params; + return params->ef; } -ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, - int nprobe, +// ============================================================================= +// IVFQueryParams functions implementation +// ============================================================================= + +ZVecIVFQueryParams *zvec_query_params_ivf_create(int nprobe, bool is_using_refiner, float scale_factor) { - ZVecIVFQueryParams *params = - static_cast(malloc(sizeof(ZVecIVFQueryParams))); + ZVEC_TRY_RETURN_NULL("Failed to create ZVecIVFQueryParams", + ZVecIVFQueryParams *params = new ZVecIVFQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_IVF; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; return params;) + return nullptr; +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecIVFQueryParams"); - return nullptr; + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } - params->base.index_type = index_type; - params->base.is_using_refiner = is_using_refiner; params->nprobe = nprobe; + return ZVEC_OK; +} + +int zvec_query_params_ivf_get_nprobe(const ZVecIVFQueryParams *params) { + if (!params) { + return 10; + } + return params->nprobe; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } params->scale_factor = scale_factor; - return params; + return ZVEC_OK; +} + +float zvec_query_params_ivf_get_scale_factor(const ZVecIVFQueryParams *params) { + if (!params) { + return 10.0f; + } + return params->scale_factor; } -ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, - bool is_using_refiner, +// ============================================================================= +// FlatQueryParams functions implementation +// ============================================================================= + +ZVecFlatQueryParams *zvec_query_params_flat_create(bool is_using_refiner, float scale_factor) { - ZVecFlatQueryParams *params = - static_cast(malloc(sizeof(ZVecFlatQueryParams))); + ZVEC_TRY_RETURN_NULL("Failed to create ZVecFlatQueryParams", + ZVecFlatQueryParams *params = new ZVecFlatQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_FLAT; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; return params;) + return nullptr; +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_flat_set_scale_factor( + ZVecFlatQueryParams *params, float scale_factor) { if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecFlatQueryParams"); - return nullptr; + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Flat query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } - params->base.index_type = index_type; - params->base.is_using_refiner = is_using_refiner; params->scale_factor = scale_factor; - return params; + return ZVEC_OK; } -ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { - ZVecQueryParamsUnion *params = - static_cast(malloc(sizeof(ZVecQueryParamsUnion))); +float zvec_query_params_flat_get_scale_factor( + const ZVecFlatQueryParams *params) { if (!params) { - SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, - "Failed to allocate memory for ZVecQueryParamsUnion"); - return nullptr; + return 10.0f; } - params->index_type = index_type; + return params->scale_factor; +} - switch (index_type) { - case ZVEC_INDEX_TYPE_HNSW: - params->params.hnsw_params.base.index_type = index_type; - params->params.hnsw_params.ef = - zvec::core_interface::kDefaultHnswEfSearch; - break; - case ZVEC_INDEX_TYPE_IVF: - params->params.ivf_params.base.index_type = index_type; - params->params.ivf_params.nprobe = 10; - params->params.ivf_params.scale_factor = 10.0f; - break; - case ZVEC_INDEX_TYPE_FLAT: - params->params.flat_params.base.index_type = index_type; - params->params.flat_params.scale_factor = 10.0f; - break; - default: - params->params.base_params.index_type = index_type; - break; - } +// ============================================================================= +// VectorQuery and GroupByVectorQuery functions implementation +// ============================================================================= - return params; +ZVecVectorQuery *zvec_vector_query_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecVectorQuery", + ZVecVectorQuery *query = new ZVecVectorQuery(); + query->topk = 10; query->field_name = nullptr; + query->query_vector.data = nullptr; query->query_vector.length = 0; + query->query_sparse_indices.data = nullptr; + query->query_sparse_indices.length = 0; + query->query_sparse_values.data = nullptr; + query->query_sparse_values.length = 0; query->filter = nullptr; + query->include_vector = false; query->include_doc_id = true; + query->output_fields = nullptr; query->query_params = nullptr; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; return query;) + return nullptr; } -void zvec_query_params_destroy(ZVecQueryParams *params) { - if (params) { - free(params); +void zvec_vector_query_destroy(ZVecVectorQuery *query) { + if (query) { + if (query->field_name) { + zvec_free_string(query->field_name); + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (query->query_params) { + // Delete type-specific params based on params_type + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_IVF: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_FLAT: + delete static_cast(query->query_params); + break; + default: + delete static_cast(query->query_params); + break; + } + } + delete query; } } -void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { - if (params) { - free(params); +ZVecErrorCode zvec_vector_query_set_topk(ZVecVectorQuery *query, int topk) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; } + query->topk = topk; + return ZVEC_OK; } -void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { - if (params) { - free(params); +int zvec_vector_query_get_topk(const ZVecVectorQuery *query) { + if (!query) { + return 10; } + return query->topk; } -void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { - if (params) { - free(params); +ZVecErrorCode zvec_vector_query_set_field_name(ZVecVectorQuery *query, + const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->field_name) { + zvec_free_string(query->field_name); } + query->field_name = zvec_string_create(field_name); + return ZVEC_OK; } -void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { - if (params) { - free(params); +const char *zvec_vector_query_get_field_name(const ZVecVectorQuery *query) { + if (!query || !query->field_name) { + return nullptr; } + return query->field_name->data; } -ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, - ZVecIndexType index_type) { - if (!params) { - set_last_error("Query params pointer is null"); +ZVecErrorCode zvec_vector_query_set_query_vector(ZVecVectorQuery *query, + const void *data, + size_t size) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + query->query_vector.data = (const uint8_t *)data; + query->query_vector.length = size; + return ZVEC_OK; +} - params->index_type = index_type; +ZVecErrorCode zvec_vector_query_set_filter(ZVecVectorQuery *query, + const char *filter) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (filter && strlen(filter) > 0) { + query->filter = zvec_string_create(filter); + } else { + query->filter = nullptr; + } return ZVEC_OK; } -ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, - float radius) { - if (!params) { - set_last_error("Query params pointer is null"); +const char *zvec_vector_query_get_filter(const ZVecVectorQuery *query) { + if (!query || !query->filter) { + return nullptr; + } + return query->filter->data; +} + +ZVecErrorCode zvec_vector_query_set_include_vector(ZVecVectorQuery *query, + bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + query->include_vector = include; + return ZVEC_OK; +} - params->radius = radius; +ZVecErrorCode zvec_vector_query_set_include_doc_id(ZVecVectorQuery *query, + bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->include_doc_id = include; return ZVEC_OK; } -ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, - bool is_linear) { - if (!params) { - set_last_error("Query params pointer is null"); +ZVecErrorCode zvec_vector_query_set_output_fields(ZVecVectorQuery *query, + const char **fields, + size_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (fields && count > 0) { + query->output_fields = zvec_string_array_create_from_strings(fields, count); + } else { + query->output_fields = nullptr; + } + return ZVEC_OK; +} - params->is_linear = is_linear; +ZVecErrorCode zvec_vector_query_set_query_params(ZVecVectorQuery *query, + void *params) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + // Note: We don't delete old params here, caller should manage lifetime + query->query_params = params; + // Set params_type based on the type of params (caller should ensure + // consistency) For now, we assume params is one of the known types + if (params) { + // We can't automatically determine the type, so we'll need to trust the + // caller to set the correct type via a separate call if needed + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; + } return ZVEC_OK; } -ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, - bool is_using_refiner) { - if (!params) { - set_last_error("Query params pointer is null"); +// GroupByVectorQuery functions + +ZVecGroupByVectorQuery *zvec_group_by_vector_query_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecGroupByVectorQuery", + ZVecGroupByVectorQuery *query = new ZVecGroupByVectorQuery(); + query->field_name = nullptr; query->query_vector.data = nullptr; + query->query_vector.length = 0; + query->query_sparse_indices.data = nullptr; + query->query_sparse_indices.length = 0; + query->query_sparse_values.data = nullptr; + query->query_sparse_values.length = 0; query->filter = nullptr; + query->include_vector = false; query->output_fields = nullptr; + query->group_by_field_name = nullptr; query->group_count = 0; + query->group_topk = 0; query->query_params = nullptr; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; return query;) + return nullptr; +} + +void zvec_group_by_vector_query_destroy(ZVecGroupByVectorQuery *query) { + if (query) { + if (query->field_name) { + zvec_free_string(query->field_name); + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (query->group_by_field_name) { + zvec_free_string(query->group_by_field_name); + } + if (query->query_params) { + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_IVF: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_FLAT: + delete static_cast(query->query_params); + break; + default: + delete static_cast(query->query_params); + break; + } + } + delete query; + } +} + +ZVecErrorCode zvec_group_by_vector_query_set_field_name( + ZVecGroupByVectorQuery *query, const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + if (query->field_name) { + zvec_free_string(query->field_name); + } + query->field_name = zvec_string_create(field_name); + return ZVEC_OK; +} - params->is_using_refiner = is_using_refiner; +ZVecErrorCode zvec_group_by_vector_query_set_group_by_field_name( + ZVecGroupByVectorQuery *query, const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->group_by_field_name) { + zvec_free_string(query->group_by_field_name); + } + query->group_by_field_name = zvec_string_create(field_name); return ZVEC_OK; } -ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, - int ef) { - if (!params) { - set_last_error("HNSW query params pointer is null"); +ZVecErrorCode zvec_group_by_vector_query_set_group_count( + ZVecGroupByVectorQuery *query, uint32_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + query->group_count = count; + return ZVEC_OK; +} - params->ef = ef; +ZVecErrorCode zvec_group_by_vector_query_set_group_topk( + ZVecGroupByVectorQuery *query, uint32_t topk) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->group_topk = topk; return ZVEC_OK; } -ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, - int nprobe) { - if (!params) { - set_last_error("IVF query params pointer is null"); +ZVecErrorCode zvec_group_by_vector_query_set_query_vector( + ZVecGroupByVectorQuery *query, const void *data, size_t size) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + query->query_vector.data = (const uint8_t *)data; + query->query_vector.length = size; + return ZVEC_OK; +} - params->nprobe = nprobe; +ZVecErrorCode zvec_group_by_vector_query_set_filter( + ZVecGroupByVectorQuery *query, const char *filter) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (filter && strlen(filter) > 0) { + query->filter = zvec_string_create(filter); + } else { + query->filter = nullptr; + } return ZVEC_OK; } -ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, - float scale_factor) { - if (!params) { - set_last_error("Query params pointer is null"); +ZVecErrorCode zvec_group_by_vector_query_set_include_vector( + ZVecGroupByVectorQuery *query, bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + query->include_vector = include; + return ZVEC_OK; +} - params->scale_factor = scale_factor; +ZVecErrorCode zvec_group_by_vector_query_set_output_fields( + ZVecGroupByVectorQuery *query, const char **fields, size_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (fields && count > 0) { + query->output_fields = zvec_string_array_create_from_strings(fields, count); + } else { + query->output_fields = nullptr; + } return ZVEC_OK; } +ZVecErrorCode zvec_group_by_vector_query_set_query_params( + ZVecGroupByVectorQuery *query, void *params) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->query_params = params; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; + return ZVEC_OK; +} // ============================================================================= // Index Interface Implementation @@ -4575,23 +6429,25 @@ default: { auto coll_ptr = reinterpret_cast *>(collection); - zvec::DataType data_type = convert_data_type(field_schema->data_type); + zvec::DataType data_type = + convert_data_type(zvec_field_schema_get_data_type(field_schema)); if (data_type == zvec::DataType::UNDEFINED) { set_last_error("Invalid data type"); return ZVEC_ERROR_INVALID_ARGUMENT; } - std::string field_name(field_schema->name->data, - field_schema->name->length); + std::string field_name(zvec_field_schema_get_name(field_schema)); bool is_vector_field = check_is_vector_field(*field_schema); zvec::FieldSchema::Ptr schema; if (is_vector_field) { - schema = std::make_shared(field_name, data_type, - field_schema->dimension, - field_schema->nullable); + schema = std::make_shared( + field_name, data_type, + zvec_field_schema_get_dimension(field_schema), + zvec_field_schema_is_nullable(field_schema)); } else { - schema = std::make_shared(field_name, data_type, - field_schema->nullable); + schema = std::make_shared( + field_name, data_type, + zvec_field_schema_is_nullable(field_schema)); } std::string expr = expression ? expression : ""; @@ -4639,7 +6495,7 @@ default: { zvec::FieldSchema::Ptr schema = nullptr; if (new_schema) { auto status = - convert_zvec_field_schema_to_internal(*new_schema, schema); + convert_zvec_field_schema_to_internal(new_schema, schema); if (!status.ok()) { set_last_error(status.message()); return ZVEC_ERROR_INVALID_ARGUMENT; @@ -4977,9 +6833,12 @@ default: { const ZVecVectorQuery *query) { internal_query.topk_ = query->topk; internal_query.field_name_ = - std::string(query->field_name.data, query->field_name.length); + query->field_name + ? std::string(query->field_name->data, query->field_name->length) + : ""; internal_query.filter_ = - std::string(query->filter.data, query->filter.length); + query->filter ? std::string(query->filter->data, query->filter->length) + : ""; internal_query.include_vector_ = query->include_vector; internal_query.include_doc_id_ = query->include_doc_id; @@ -5006,12 +6865,12 @@ default: { } // Output fields conversion - if (query->output_fields.count > 0) { + if (query->output_fields && query->output_fields->count > 0) { internal_query.output_fields_ = std::vector(); - for (size_t i = 0; i < query->output_fields.count; ++i) { + for (size_t i = 0; i < query->output_fields->count; ++i) { internal_query.output_fields_->emplace_back( - query->output_fields.strings[i].data, - query->output_fields.strings[i].length); + query->output_fields->strings[i].data, + query->output_fields->strings[i].length); } } } @@ -5023,42 +6882,42 @@ default: { // QueryParams conversion if (query->query_params) { - auto query_params = std::make_shared( - static_cast(query->query_params->index_type)); - - switch (query->query_params->index_type) { + switch (query->params_type) { case ZVEC_INDEX_TYPE_HNSW: { - auto hnsw_params = std::make_shared( - query->query_params->params.hnsw_params.ef, - query->query_params->params.hnsw_params.base.radius, - query->query_params->params.hnsw_params.base.is_linear, - query->query_params->params.hnsw_params.base.is_using_refiner); - internal_query.query_params_ = hnsw_params; + auto hnsw_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + hnsw_params->ef, hnsw_params->base.radius, + hnsw_params->base.is_linear, hnsw_params->base.is_using_refiner); + internal_query.query_params_ = internal_params; break; } case ZVEC_INDEX_TYPE_IVF: { - auto ivf_params = std::make_shared( - query->query_params->params.ivf_params.nprobe, - query->query_params->params.ivf_params.base.is_using_refiner, - query->query_params->params.ivf_params.scale_factor); - internal_query.query_params_ = ivf_params; + auto ivf_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + ivf_params->nprobe, ivf_params->base.is_using_refiner, + ivf_params->scale_factor); + internal_query.query_params_ = internal_params; break; } case ZVEC_INDEX_TYPE_FLAT: { - auto flat_params = std::make_shared( - query->query_params->params.flat_params.base.is_using_refiner, - query->query_params->params.flat_params.scale_factor); - internal_query.query_params_ = flat_params; + auto flat_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + flat_params->base.is_using_refiner, flat_params->scale_factor); + internal_query.query_params_ = internal_params; break; } default: { - query_params->set_radius( - query->query_params->params.base_params.radius); - query_params->set_is_linear( - query->query_params->params.base_params.is_linear); - query_params->set_is_using_refiner( - query->query_params->params.base_params.is_using_refiner); - internal_query.query_params_ = query_params; + auto base_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + static_cast(base_params->index_type)); + internal_params->set_radius(base_params->radius); + internal_params->set_is_linear(base_params->is_linear); + internal_params->set_is_using_refiner(base_params->is_using_refiner); + internal_query.query_params_ = internal_params; break; } } @@ -5069,12 +6928,18 @@ default: { void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, const ZVecGroupByVectorQuery *query) { internal_query.field_name_ = - std::string(query->field_name.data, query->field_name.length); + query->field_name + ? std::string(query->field_name->data, query->field_name->length) + : ""; internal_query.filter_ = - std::string(query->filter.data, query->filter.length); + query->filter ? std::string(query->filter->data, query->filter->length) + : ""; internal_query.include_vector_ = query->include_vector; - internal_query.group_by_field_name_ = std::string( - query->group_by_field_name.data, query->group_by_field_name.length); + internal_query.group_by_field_name_ = + query->group_by_field_name + ? std::string(query->group_by_field_name->data, + query->group_by_field_name->length) + : ""; internal_query.group_count_ = query->group_count; internal_query.group_topk_ = query->group_topk; @@ -5098,54 +6963,54 @@ default: { query->query_sparse_values.length); } - if (query->output_fields.count > 0) { + if (query->output_fields && query->output_fields->count > 0) { if (!internal_query.output_fields_.has_value()) { internal_query.output_fields_ = std::vector(); } - for (size_t i = 0; i < query->output_fields.count; ++i) { + for (size_t i = 0; i < query->output_fields->count; ++i) { internal_query.output_fields_->push_back( - std::string(query->output_fields.strings[i].data, - query->output_fields.strings[i].length)); + std::string(query->output_fields->strings[i].data, + query->output_fields->strings[i].length)); } } if (query->query_params) { - auto query_params = std::make_shared( - static_cast(query->query_params->index_type)); - - switch (query->query_params->index_type) { + switch (query->params_type) { case ZVEC_INDEX_TYPE_HNSW: { - auto hnsw_params = std::make_shared( - query->query_params->params.hnsw_params.ef, - query->query_params->params.hnsw_params.base.radius, - query->query_params->params.hnsw_params.base.is_linear, - query->query_params->params.hnsw_params.base.is_using_refiner); - internal_query.query_params_ = hnsw_params; + auto hnsw_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + hnsw_params->ef, hnsw_params->base.radius, + hnsw_params->base.is_linear, hnsw_params->base.is_using_refiner); + internal_query.query_params_ = internal_params; break; } case ZVEC_INDEX_TYPE_IVF: { - auto ivf_params = std::make_shared( - query->query_params->params.ivf_params.nprobe, - query->query_params->params.ivf_params.base.is_using_refiner, - query->query_params->params.ivf_params.scale_factor); - internal_query.query_params_ = ivf_params; + auto ivf_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + ivf_params->nprobe, ivf_params->base.is_using_refiner, + ivf_params->scale_factor); + internal_query.query_params_ = internal_params; break; } case ZVEC_INDEX_TYPE_FLAT: { - auto flat_params = std::make_shared( - query->query_params->params.flat_params.base.is_using_refiner, - query->query_params->params.flat_params.scale_factor); - internal_query.query_params_ = flat_params; + auto flat_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + flat_params->base.is_using_refiner, flat_params->scale_factor); + internal_query.query_params_ = internal_params; break; } default: { - query_params->set_radius( - query->query_params->params.base_params.radius); - query_params->set_is_linear( - query->query_params->params.base_params.is_linear); - query_params->set_is_using_refiner( - query->query_params->params.base_params.is_using_refiner); - internal_query.query_params_ = query_params; + auto base_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + static_cast(base_params->index_type)); + internal_params->set_radius(base_params->radius); + internal_params->set_is_linear(base_params->is_linear); + internal_params->set_is_using_refiner(base_params->is_using_refiner); + internal_query.query_params_ = internal_params; break; } } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 96e99b8b..775012bd 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -409,43 +409,37 @@ typedef enum { */ typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; +// ============================================================================= +// Configuration Structures (Opaque Pointer Pattern) +// ============================================================================= + /** - * @brief Console log configuration structure + * @brief Console log configuration (opaque pointer) + * Corresponds to zvec::GlobalConfig::ConsoleLogConfig + * Use zvec_config_console_log_create() to create and + * zvec_config_console_log_destroy() to destroy */ -typedef struct { - ZVecLogLevel level; /**< Log level */ -} ZVecConsoleLogConfig; +typedef struct ZVecConsoleLogConfig ZVecConsoleLogConfig; /** - * @brief File log configuration structure + * @brief File log configuration (opaque pointer) + * Corresponds to zvec::GlobalConfig::FileLogConfig + * Use zvec_config_file_log_create() to create and + * zvec_config_file_log_destroy() to destroy */ -typedef struct { - ZVecLogLevel level; /**< Log level */ - ZVecString dir; /**< Log directory */ - ZVecString basename; /**< Log file base name */ - uint32_t file_size; /**< Log file size (MB) */ - uint32_t overdue_days; /**< Log expiration days */ -} ZVecFileLogConfig; +typedef struct ZVecFileLogConfig ZVecFileLogConfig; /** - * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) + * @brief Configuration data (opaque pointer) + * Corresponds to zvec::GlobalConfig::ConfigData + * Use zvec_config_data_create() to create and + * zvec_config_data_destroy() to destroy */ -typedef struct { - uint64_t memory_limit_bytes; /**< Memory limit in bytes */ - - // log - ZVecLogType log_type; - void *log_config; /**< Log configuration (ZVecConsoleLogConfig or - ZVecFileLogConfig) */ - - // query - uint32_t query_thread_count; /**< Query thread count */ - float invert_to_forward_scan_ratio; /**< Inverted to forward scan ratio */ - float brute_force_by_keys_ratio; /**< Brute force by keys ratio */ +typedef struct ZVecConfigData ZVecConfigData; - // optimize - uint32_t optimize_thread_count; /**< Optimize thread count */ -} ZVecConfigData; +// ============================================================================= +// Log Configuration Management Functions +// ============================================================================= /** * @brief Create console log configuration @@ -484,6 +478,111 @@ zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); ZVEC_EXPORT void ZVEC_CALL zvec_config_file_log_destroy(ZVecFileLogConfig *config); +/** + * @brief Get log level from console log config + * @param config Console log configuration pointer + * @return ZVecLogLevel Log level + */ +ZVEC_EXPORT ZVecLogLevel ZVEC_CALL +zvec_config_console_log_get_level(const ZVecConsoleLogConfig *config); + +/** + * @brief Set log level in console log config + * @param config Console log configuration pointer + * @param level Log level + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_console_log_set_level( + ZVecConsoleLogConfig *config, ZVecLogLevel level); + +/** + * @brief Get log level from file log config + * @param config File log configuration pointer + * @return ZVecLogLevel Log level + */ +ZVEC_EXPORT ZVecLogLevel ZVEC_CALL +zvec_config_file_log_get_level(const ZVecFileLogConfig *config); + +/** + * @brief Set log level in file log config + * @param config File log configuration pointer + * @param level Log level + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_level(ZVecFileLogConfig *config, ZVecLogLevel level); + +/** + * @brief Get log directory from file log config + * @param config File log configuration pointer + * @return const char* Log directory (owned by config, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_config_file_log_get_dir(const ZVecFileLogConfig *config); + +/** + * @brief Set log directory in file log config + * @param config File log configuration pointer + * @param dir Log directory + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_dir(ZVecFileLogConfig *config, const char *dir); + +/** + * @brief Get log file basename from file log config + * @param config File log configuration pointer + * @return const char* Log file basename (owned by config, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_config_file_log_get_basename(const ZVecFileLogConfig *config); + +/** + * @brief Set log file basename in file log config + * @param config File log configuration pointer + * @param basename Log file basename + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_file_log_set_basename( + ZVecFileLogConfig *config, const char *basename); + +/** + * @brief Get log file size from file log config + * @param config File log configuration pointer + * @return uint32_t Log file size in MB + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_file_log_get_file_size(const ZVecFileLogConfig *config); + +/** + * @brief Set log file size in file log config + * @param config File log configuration pointer + * @param file_size Log file size in MB + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_file_log_set_file_size( + ZVecFileLogConfig *config, uint32_t file_size); + +/** + * @brief Get log overdue days from file log config + * @param config File log configuration pointer + * @return uint32_t Log overdue days + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_file_log_get_overdue_days(const ZVecFileLogConfig *config); + +/** + * @brief Set log overdue days in file log config + * @param config File log configuration pointer + * @param days Log overdue days + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_overdue_days(ZVecFileLogConfig *config, uint32_t days); + +// ============================================================================= +// Configuration Data Management Functions +// ============================================================================= /** * @brief Create configuration data @@ -506,9 +605,18 @@ ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( ZVecConfigData *config, uint64_t memory_limit_bytes); +/** + * @brief Get memory limit from configuration data + * @param config Configuration data pointer + * @return uint64_t Memory limit in bytes + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_config_data_get_memory_limit(const ZVecConfigData *config); + /** * @brief Set log configuration in configuration data * @param config Configuration data pointer + * @param log_type Log type (console or file) * @param log_config Log configuration pointer (ownership is transferred to * config, do not free separately) * @return ZVecErrorCode Error code @@ -516,6 +624,32 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( ZVecConfigData *config, ZVecLogType log_type, void *log_config); +/** + * @brief Get log type from configuration data + * @param config Configuration data pointer + * @return ZVecLogType Log type + */ +ZVEC_EXPORT ZVecLogType ZVEC_CALL +zvec_config_data_get_log_type(const ZVecConfigData *config); + +/** + * @brief Get console log config from configuration data + * @param config Configuration data pointer + * @return ZVecConsoleLogConfig* Console log configuration (owned by config, do + * not destroy) + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_data_get_console_log_config(const ZVecConfigData *config); + +/** + * @brief Get file log config from configuration data + * @param config Configuration data pointer + * @return ZVecFileLogConfig* File log configuration (owned by config, do not + * destroy) + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL +zvec_config_data_get_file_log_config(const ZVecConfigData *config); + /** * @brief Set query thread count in configuration data * @param config Configuration data pointer @@ -525,6 +659,50 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( ZVecConfigData *config, uint32_t thread_count); +/** + * @brief Get query thread count from configuration data + * @param config Configuration data pointer + * @return uint32_t Query thread count + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_data_get_query_thread_count(const ZVecConfigData *config); + +/** + * @brief Set invert to forward scan ratio in configuration data + * @param config Configuration data pointer + * @param ratio Invert to forward scan ratio + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_data_set_invert_to_forward_scan_ratio(ZVecConfigData *config, + float ratio); + +/** + * @brief Get invert to forward scan ratio from configuration data + * @param config Configuration data pointer + * @return float Invert to forward scan ratio + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_config_data_get_invert_to_forward_scan_ratio(const ZVecConfigData *config); + +/** + * @brief Set brute force by keys ratio in configuration data + * @param config Configuration data pointer + * @param ratio Brute force by keys ratio + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_data_set_brute_force_by_keys_ratio(ZVecConfigData *config, + float ratio); + +/** + * @brief Get brute force by keys ratio from configuration data + * @param config Configuration data pointer + * @return float Brute force by keys ratio + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_config_data_get_brute_force_by_keys_ratio(const ZVecConfigData *config); + /** * @brief Set optimize thread count in configuration data * @param config Configuration data pointer @@ -534,6 +712,14 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( ZVecConfigData *config, uint32_t thread_count); +/** + * @brief Get optimize thread count from configuration data + * @param config Configuration data pointer + * @return uint32_t Optimize thread count + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_data_get_optimize_thread_count(const ZVecConfigData *config); + // ============================================================================= // Initialization and Cleanup Interface // ============================================================================= @@ -641,411 +827,960 @@ typedef enum { typedef struct ZVecCollection ZVecCollection; // ============================================================================= -// Index Parameters Structures +// Index Parameters Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Index parameters (opaque pointer) + * + * Use zvec_index_params_create() to create and zvec_index_params_destroy() to + * destroy. Specific parameters are set via type-specific setter functions. + */ +typedef struct ZVecIndexParams ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Field schema (opaque pointer) + * + * Use zvec_field_schema_create() to create and zvec_field_schema_destroy() to + * destroy. Fields are accessed via getter/setter functions. + */ +typedef struct ZVecFieldSchema ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Interface +// ============================================================================= + +/** + * @brief Create index parameters + * @param index_type Index type + * @return Pointer to newly created ZVecIndexParams, or NULL on error + */ +ZVEC_EXPORT ZVecIndexParams *ZVEC_CALL +zvec_index_params_create(ZVecIndexType index_type); + +/** + * @brief Destroy index parameters + * @param params Index parameters to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); + +/** + * @brief Get index type + * @param params Index parameters (must not be NULL) + * @return Index type + */ +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_index_params_get_type(const ZVecIndexParams *params); + +/** + * @brief Set metric type (for vector indexes) + * @param params Index parameters (must be vector index type) + * @param metric_type Metric type + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_metric_type( + ZVecIndexParams *params, ZVecMetricType metric_type); + +/** + * @brief Get metric type + * @param params Index parameters (must not be NULL) + * @return Metric type + */ +ZVEC_EXPORT ZVecMetricType ZVEC_CALL +zvec_index_params_get_metric_type(const ZVecIndexParams *params); + +/** + * @brief Set quantize type (for vector indexes) + * @param params Index parameters (must be vector index type) + * @param quantize_type Quantize type + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_quantize_type( + ZVecIndexParams *params, ZVecQuantizeType quantize_type); + +/** + * @brief Get quantize type + * @param params Index parameters (must not be NULL) + * @return Quantize type + */ +ZVEC_EXPORT ZVecQuantizeType ZVEC_CALL +zvec_index_params_get_quantize_type(const ZVecIndexParams *params); + +/** + * @brief Set HNSW specific parameters + * @param params Index parameters (must be HNSW type) + * @param m Graph connectivity parameter + * @param ef_construction Construction exploration factor + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_hnsw_params( + ZVecIndexParams *params, int m, int ef_construction); + +/** + * @brief Get HNSW m parameter + * @param params Index parameters (must not be NULL) + * @return m parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_hnsw_m(const ZVecIndexParams *params); + +/** + * @brief Get HNSW ef_construction parameter + * @param params Index parameters (must not be NULL) + * @return ef_construction parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_hnsw_ef_construction(const ZVecIndexParams *params); + +/** + * @brief Get HNSW parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_m Output parameter for m + * @param out_ef_construction Output parameter for ef_construction + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_get_hnsw_params( + const ZVecIndexParams *params, int *out_m, int *out_ef_construction); + +/** + * @brief Set IVF specific parameters + * @param params Index parameters (must be IVF type) + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_ivf_params( + ZVecIndexParams *params, int n_list, int n_iters, bool use_soar); + +/** + * @brief Get IVF parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_n_list Output parameter for n_list + * @param out_n_iters Output parameter for n_iters + * @param out_use_soar Output parameter for use_soar + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_index_params_get_ivf_params(const ZVecIndexParams *params, int *out_n_list, + int *out_n_iters, bool *out_use_soar); + +/** + * @brief Get invert index parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_enable_range_opt Output parameter for enable_range_optimization + * @param out_enable_wildcard Output parameter for enable_extended_wildcard + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_get_invert_params( + const ZVecIndexParams *params, bool *out_enable_range_opt, + bool *out_enable_wildcard); + +/** + * @brief Set invert index specific parameters + * @param params Index parameters (must be INVERT type) + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_invert_params( + ZVecIndexParams *params, bool enable_range_opt, bool enable_wildcard); + +// ============================================================================= +// Query Parameters Structures (Opaque Pointer Pattern) // ============================================================================= /** - * @brief Flattened index parameters structure - * - * Uses a union to store specific parameters for different index types, - * avoiding C++-style inheritance nesting. Supports stack allocation, - * reducing malloc/free overhead. + * @brief Base query parameters (opaque pointer) + * Corresponds to zvec::QueryParams + * Use zvec_query_params_create() to create and zvec_query_params_destroy() to + * destroy + */ +typedef struct ZVecQueryParams ZVecQueryParams; + +/** + * @brief HNSW query parameters (opaque pointer) + * Corresponds to zvec::HnswQueryParams + * Use zvec_query_params_hnsw_create() to create + */ +typedef struct ZVecHnswQueryParams ZVecHnswQueryParams; + +/** + * @brief IVF query parameters (opaque pointer) + * Corresponds to zvec::IVFQueryParams + * Use zvec_query_params_ivf_create() to create + */ +typedef struct ZVecIVFQueryParams ZVecIVFQueryParams; + +/** + * @brief Flat query parameters (opaque pointer) + * Corresponds to zvec::FlatQueryParams + * Use zvec_query_params_flat_create() to create + */ +typedef struct ZVecFlatQueryParams ZVecFlatQueryParams; + +// Deprecated: ZVecQueryParamsUnion is no longer needed +// Use specific query params types directly instead + +// ============================================================================= +// Query Structures (Updated to use opaque pointer QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (opaque pointer) + * Aligned with zvec::VectorQuery + * Use zvec_vector_query_create() to create and zvec_vector_query_destroy() to + * destroy + */ +typedef struct ZVecVectorQuery ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (opaque pointer) + * Aligned with zvec::GroupByVectorQuery + * Use zvec_group_by_vector_query_create() to create and + * zvec_group_by_vector_query_destroy() to destroy + */ +typedef struct ZVecGroupByVectorQuery ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +// ----------------------------------------------------------------------------- +// ZVecQueryParams (Base Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Destroy base query parameters + * @param params Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Set index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Get index type + * @param params Query parameters pointer + * @return ZVecIndexType Index type + */ +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_query_params_get_index_type(const ZVecQueryParams *params); + +/** + * @brief Set search radius + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Get search radius + * @param params Query parameters pointer + * @return float Search radius + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_get_radius(const ZVecQueryParams *params); + +/** + * @brief Set linear search mode + * @param params Query parameters pointer + * @param is_linear Whether linear search + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Get linear search mode + * @param params Query parameters pointer + * @return bool Whether linear search + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_query_params_get_is_linear(const ZVecQueryParams *params); + +/** + * @brief Set whether to use refiner + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Get whether to use refiner + * @param params Query parameters pointer + * @return bool Whether to use refiner + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_query_params_get_is_using_refiner(const ZVecQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecHnswQueryParams (HNSW Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create HNSW query parameters + * @param ef Exploration factor during search (default: 40) + * @param radius Search radius (default: 0.0) + * @param is_linear Whether linear search (default: false) + * @param is_using_refiner Whether using refiner (default: false) + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL zvec_query_params_hnsw_create( + int ef, float radius, bool is_linear, bool is_using_refiner); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Set exploration factor + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Get exploration factor + * @param params HNSW query parameters pointer + * @return int Exploration factor + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_query_params_hnsw_get_ef(const ZVecHnswQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecIVFQueryParams (IVF Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create IVF query parameters + * @param nprobe Number of clusters to probe (default: 10) + * @param is_using_refiner Whether using refiner (default: false) + * @param scale_factor Scale factor (default: 10.0) + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL zvec_query_params_ivf_create( + int nprobe, bool is_using_refiner, float scale_factor); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Set number of probe clusters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Get number of probe clusters + * @param params IVF query parameters pointer + * @return int Number of probe clusters + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_query_params_ivf_get_nprobe(const ZVecIVFQueryParams *params); + +/** + * @brief Set scale factor + * @param params IVF query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Get scale factor + * @param params IVF query parameters pointer + * @return float Scale factor + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_ivf_get_scale_factor(const ZVecIVFQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecFlatQueryParams (Flat Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create Flat query parameters + * @param is_using_refiner Whether using refiner (default: false) + * @param scale_factor Scale factor (default: 10.0) + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL +zvec_query_params_flat_create(bool is_using_refiner, float scale_factor); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Set scale factor + * @param params Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_flat_set_scale_factor( + ZVecFlatQueryParams *params, float scale_factor); + +/** + * @brief Get scale factor + * @param params Flat query parameters pointer + * @return float Scale factor + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_flat_get_scale_factor(const ZVecFlatQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecVectorQuery (Vector Query) +// ----------------------------------------------------------------------------- + +/** + * @brief Create vector query + * @return ZVecVectorQuery* Pointer to the newly created vector query + */ +ZVEC_EXPORT ZVecVectorQuery *ZVEC_CALL zvec_vector_query_create(void); + +/** + * @brief Destroy vector query + * @param query Vector query pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_destroy(ZVecVectorQuery *query); + +/** + * @brief Set topk (number of results to return) + * @param query Vector query pointer + * @param topk Number of results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_topk(ZVecVectorQuery *query, int topk); + +/** + * @brief Get topk + * @param query Vector query pointer + * @return int Number of results + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_vector_query_get_topk(const ZVecVectorQuery *query); + +/** + * @brief Set field name + * @param query Vector query pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_field_name( + ZVecVectorQuery *query, const char *field_name); + +/** + * @brief Get field name + * @param query Vector query pointer + * @return const char* Field name (owned by query, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_vector_query_get_field_name(const ZVecVectorQuery *query); + +/** + * @brief Set query vector data + * @param query Vector query pointer + * @param data Vector data pointer + * @param size Data size in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_query_vector( + ZVecVectorQuery *query, const void *data, size_t size); + +/** + * @brief Set filter expression + * @param query Vector query pointer + * @param filter Filter expression string + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_filter(ZVecVectorQuery *query, const char *filter); + +/** + * @brief Get filter expression + * @param query Vector query pointer + * @return const char* Filter expression (owned by query, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_vector_query_get_filter(const ZVecVectorQuery *query); + +/** + * @brief Set whether to include vector data in results + * @param query Vector query pointer + * @param include Whether to include vector + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_include_vector(ZVecVectorQuery *query, bool include); + +/** + * @brief Set whether to include doc ID in results + * @param query Vector query pointer + * @param include Whether to include doc ID + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_include_doc_id(ZVecVectorQuery *query, bool include); + +/** + * @brief Set output fields + * @param query Vector query pointer + * @param fields Array of field names + * @param count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_output_fields( + ZVecVectorQuery *query, const char **fields, size_t count); + +/** + * @brief Set query parameters (takes ownership) + * @param query Vector query pointer + * @param params Query parameters pointer (type-specific: ZVecHnswQueryParams*, + * etc.) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_query_params(ZVecVectorQuery *query, void *params); + +// ----------------------------------------------------------------------------- +// ZVecGroupByVectorQuery (Group By Vector Query) +// ----------------------------------------------------------------------------- + +/** + * @brief Create group by vector query + * @return ZVecGroupByVectorQuery* Pointer to the newly created group by vector + * query + */ +ZVEC_EXPORT ZVecGroupByVectorQuery *ZVEC_CALL +zvec_group_by_vector_query_create(void); + +/** + * @brief Destroy group by vector query + * @param query Group by vector query pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_group_by_vector_query_destroy(ZVecGroupByVectorQuery *query); + +/** + * @brief Set field name + * @param query Group by vector query pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_field_name( + ZVecGroupByVectorQuery *query, const char *field_name); + +/** + * @brief Set group by field name + * @param query Group by vector query pointer + * @param field_name Group by field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_group_by_field_name( + ZVecGroupByVectorQuery *query, const char *field_name); + +/** + * @brief Set group count + * @param query Group by vector query pointer + * @param count Number of groups + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_group_count( + ZVecGroupByVectorQuery *query, uint32_t count); + +/** + * @brief Set group topk + * @param query Group by vector query pointer + * @param topk Number of results per group + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_group_topk( + ZVecGroupByVectorQuery *query, uint32_t topk); + +/** + * @brief Set query vector data + * @param query Group by vector query pointer + * @param data Vector data pointer + * @param size Data size in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_query_vector( + ZVecGroupByVectorQuery *query, const void *data, size_t size); + +/** + * @brief Set filter expression + * @param query Group by vector query pointer + * @param filter Filter expression string + * @return ZVecErrorCode Error code */ -typedef struct { - ZVecIndexType index_type; /**< Index type */ - ZVecMetricType metric_type; /**< Distance metric type (for vector indexes) */ - ZVecQuantizeType quantize_type; /**< Quantization type (for vector indexes) */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_filter( + ZVecGroupByVectorQuery *query, const char *filter); - union { - /** @brief Inverted index specific parameters */ - struct { - bool enable_range_optimization; /**< Whether to enable range optimization - */ - bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ - } invert; +/** + * @brief Set whether to include vector data in results + * @param query Group by vector query pointer + * @param include Whether to include vectors + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_include_vector(ZVecGroupByVectorQuery *query, + bool include); - /** @brief HNSW index specific parameters */ - struct { - int m; /**< Graph connectivity parameter */ - int ef_construction; /**< Exploration factor during construction */ - int ef_search; /**< Exploration factor during search */ - } hnsw; +/** + * @brief Set output fields + * @param query Group by vector query pointer + * @param fields Array of field names + * @param count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_output_fields(ZVecGroupByVectorQuery *query, + const char **fields, size_t count); - /** @brief IVF index specific parameters */ - struct { - int n_list; /**< Number of cluster centers */ - int n_iters; /**< Number of iterations */ - bool use_soar; /**< Whether to use SOAR algorithm */ - int n_probe; /**< Number of clusters to probe during search */ - } ivf; +/** + * @brief Set query parameters (takes ownership) + * @param query Group by vector query pointer + * @param params Query parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_query_params( + ZVecGroupByVectorQuery *query, void *params); - /** @brief Flat index has no additional parameters, - * reserved for alignment */ - struct { - int _reserved; - } flat; - }; -} ZVecIndexParams; +// Deprecated macros (use create/destroy functions instead) +// #define ZVEC_VECTOR_QUERY(...) - Deprecated, use zvec_vector_query_create() +// and setters #define ZVEC_DEFAULT_OPTIONS() - Deprecated, use +// zvec_collection_options_create() and setters // ============================================================================= -// Field Schema Structures +// Collection Options and Statistics (Opaque Pointer Pattern) // ============================================================================= /** - * @brief Field schema structure + * @brief Collection options (opaque pointer) + * Use zvec_collection_options_create() to create and + * zvec_collection_options_destroy() to destroy */ -typedef struct { - ZVecString *name; /**< Field name */ - ZVecDataType data_type; /**< Data type */ - bool nullable; /**< Whether nullable */ - uint32_t dimension; /**< Vector dimension (only used for vector fields) */ - ZVecIndexParams index_params; /**< Index parameters (embedded, not pointer) */ - bool has_index; /**< Whether this field has an index */ -} ZVecFieldSchema; +typedef struct ZVecCollectionOptions ZVecCollectionOptions; +/** + * @brief Collection statistics (opaque pointer) + * Use zvec_collection_stats_get functions to access fields + */ +typedef struct ZVecCollectionStats ZVecCollectionStats; // ============================================================================= -// Index Parameters Interface +// Collection Options Management Functions // ============================================================================= /** - * @brief Initialize index parameters with default values based on index type - * @param params Index parameters structure pointer - * @param index_type Index type - * @param metric_type Metric type (for vector indexes) + * @brief Create collection options + * @return ZVecCollectionOptions* Pointer to the newly created collection + * options */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init(ZVecIndexParams *params, - ZVecIndexType index_type, - ZVecMetricType metric_type); +ZVEC_EXPORT ZVecCollectionOptions *ZVEC_CALL +zvec_collection_options_create(void); /** - * @brief Set HNSW specific parameters - * @param params Index parameters structure pointer (must be HNSW type) - * @param m Graph connectivity parameter - * @param ef_construction Construction exploration factor - * @param ef_search Search exploration factor + * @brief Destroy collection options + * @param options Collection options pointer */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_hnsw(ZVecIndexParams *params, - int m, - int ef_construction, - int ef_search); +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_destroy(ZVecCollectionOptions *options); /** - * @brief Set IVF specific parameters - * @param params Index parameters structure pointer (must be IVF type) - * @param n_list Number of cluster centers - * @param n_iters Number of iterations - * @param use_soar Whether to use SOAR algorithm - * @param n_probe Search probe count + * @brief Set whether to enable memory mapping + * @param options Collection options pointer + * @param enable Whether to enable mmap + * @return ZVecErrorCode Error code */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_ivf(ZVecIndexParams *params, - int n_list, int n_iters, - bool use_soar, - int n_probe); +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_enable_mmap( + ZVecCollectionOptions *options, bool enable); /** - * @brief Set invert index specific parameters - * @param params Index parameters structure pointer (must be INVERT type) - * @param enable_range_opt Whether to enable range optimization - * @param enable_wildcard Whether to enable extended wildcard + * @brief Get whether to enable memory mapping + * @param options Collection options pointer + * @return bool Whether mmap is enabled */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_invert(ZVecIndexParams *params, - bool enable_range_opt, - bool enable_wildcard); - -// ============================================================================= -// Query Parameters Structures -// ============================================================================= +ZVEC_EXPORT bool ZVEC_CALL +zvec_collection_options_get_enable_mmap(const ZVecCollectionOptions *options); /** - * @brief Base query parameters structure (corresponds to zvec::QueryParams) + * @brief Set maximum buffer size + * @param options Collection options pointer + * @param size Maximum buffer size in bytes + * @return ZVecErrorCode Error code */ -typedef struct { - ZVecIndexType index_type; /**< Index type */ - float radius; /**< Search radius */ - bool is_linear; /**< Whether linear search */ - bool is_using_refiner; /**< Whether using refiner */ -} ZVecQueryParams; +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_max_buffer_size( + ZVecCollectionOptions *options, size_t size); /** - * @brief HNSW query parameters structure (corresponds to zvec::HnswQueryParams) + * @brief Get maximum buffer size + * @param options Collection options pointer + * @return size_t Maximum buffer size in bytes */ -typedef struct { - ZVecQueryParams base; /**< Inherit base query parameters */ - int ef; /**< Exploration factor during search */ -} ZVecHnswQueryParams; +ZVEC_EXPORT size_t ZVEC_CALL zvec_collection_options_get_max_buffer_size( + const ZVecCollectionOptions *options); /** - * @brief IVF query parameters structure (corresponds to zvec::IVFQueryParams) + * @brief Set whether read-only mode + * @param options Collection options pointer + * @param read_only Whether read-only + * @return ZVecErrorCode Error code */ -typedef struct { - ZVecQueryParams base; /**< Inherit base query parameters */ - int nprobe; /**< Number of clusters to probe during search */ - float scale_factor; /**< Scale factor */ -} ZVecIVFQueryParams; +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_read_only( + ZVecCollectionOptions *options, bool read_only); /** - * @brief Flat query parameters structure (corresponds to zvec::FlatQueryParams) + * @brief Get whether read-only mode + * @param options Collection options pointer + * @return bool Whether read-only mode */ -typedef struct { - ZVecQueryParams base; /**< Inherit base query parameters */ - float scale_factor; /**< Scale factor */ -} ZVecFlatQueryParams; +ZVEC_EXPORT bool ZVEC_CALL +zvec_collection_options_get_read_only(const ZVecCollectionOptions *options); /** - * @brief Query parameters union (supports query parameters for different index - * types) + * @brief Set maximum document count per segment + * @param options Collection options pointer + * @param count Maximum document count + * @return ZVecErrorCode Error code */ -typedef struct { - ZVecIndexType index_type; /**< Index type, used to distinguish the parameter - type stored in the union */ - union { - ZVecQueryParams base_params; /**< Base query parameters */ - ZVecHnswQueryParams hnsw_params; /**< HNSW query parameters */ - ZVecIVFQueryParams ivf_params; /**< IVF query parameters */ - ZVecFlatQueryParams flat_params; /**< Flat query parameters */ - } params; -} ZVecQueryParamsUnion; - -// ============================================================================= -// Query Structures (Updated Version, Including QueryParams) -// ============================================================================= +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_options_set_max_doc_count_per_segment( + ZVecCollectionOptions *options, uint64_t count); /** - * @brief Vector query structure (aligned with zvec::VectorQuery, includes - * QueryParams) - */ -typedef struct { - int topk; /**< Number of results to return */ - ZVecString field_name; /**< Query field name */ - ZVecByteArray query_vector; /**< Query vector (binary data) */ - ZVecByteArray - query_sparse_indices; /**< Sparse vector indices (binary data) */ - ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ - ZVecString filter; /**< Filter expression */ - bool include_vector; /**< Whether to include vector data */ - bool include_doc_id; /**< Whether to include document ID */ - ZVecStringArray output_fields; /**< Output field list (NULL means all) */ - ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL - means using default parameters) */ -} ZVecVectorQuery; - -/** - * @brief Grouped vector query structure (aligned with zvec::GroupByVectorQuery, - * includes QueryParams) + * @brief Get maximum document count per segment + * @param options Collection options pointer + * @return uint64_t Maximum document count per segment */ -typedef struct { - ZVecString field_name; /**< Query field name */ - ZVecByteArray query_vector; /**< Query vector (binary data) */ - ZVecByteArray - query_sparse_indices; /**< Sparse vector indices (binary data) */ - ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ - ZVecString filter; /**< Filter expression */ - bool include_vector; /**< Whether to include vector data */ - ZVecStringArray output_fields; /**< Output field list */ - ZVecString group_by_field_name; /**< Group by field name */ - uint32_t group_count; /**< Number of groups */ - uint32_t group_topk; /**< Number of results to return per group */ - ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL - means using default parameters) */ -} ZVecGroupByVectorQuery; - +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_options_get_max_doc_count_per_segment( + const ZVecCollectionOptions *options); // ============================================================================= -// Query Parameters Management Functions +// Collection Statistics Management Functions // ============================================================================= /** - * @brief Create base query parameters - * @param index_type Index type - * @return ZVecQueryParams* Pointer to the newly created query parameters - */ -ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL -zvec_query_params_create(ZVecIndexType index_type); - -/** - * @brief Create HNSW query parameters - * @param index_type Index type (should be ZVEC_INDEX_TYPE_HNSW) - * @param ef Exploration factor during search - * @param radius Search radius - * @param is_linear Whether linear search - * @param is_using_refiner Whether using refiner - * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query - * parameters + * @brief Get document count from collection stats + * @param stats Collection statistics pointer + * @return uint64_t Document count */ -ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL -zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, - bool is_linear, bool is_using_refiner); +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_stats_get_doc_count(const ZVecCollectionStats *stats); /** - * @brief Create IVF query parameters - * @param index_type Index type (should be ZVEC_INDEX_TYPE_IVF) - * @param nprobe Number of clusters to probe during search - * @param is_using_refiner Whether using refiner - * @param scale_factor Scale factor - * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + * @brief Get index count from collection stats + * @param stats Collection statistics pointer + * @return size_t Number of indexes */ -ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL -zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, - bool is_using_refiner, float scale_factor); +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_stats_get_index_count(const ZVecCollectionStats *stats); /** - * @brief Create Flat query parameters - * @param index_type Index type (should be ZVEC_INDEX_TYPE_FLAT) - * @param is_using_refiner Whether using refiner - * @param scale_factor Scale factor - * @return ZVecFlatQueryParams* Pointer to the newly created Flat query - * parameters + * @brief Get index name at specified index + * @param stats Collection statistics pointer + * @param index Index of the index name + * @return const char* Index name (owned by stats, do not free) */ -ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL zvec_query_params_flat_create( - ZVecIndexType index_type, bool is_using_refiner, float scale_factor); +ZVEC_EXPORT const char *ZVEC_CALL zvec_collection_stats_get_index_name( + const ZVecCollectionStats *stats, size_t index); /** - * @brief Create query parameters union - * @param index_type Index type - * @return ZVecQueryParamsUnion* Pointer to the newly created query parameters - * union + * @brief Get index completeness at specified index + * @param stats Collection statistics pointer + * @param index Index of the completeness value + * @return float Index completeness */ -ZVEC_EXPORT ZVecQueryParamsUnion *ZVEC_CALL -zvec_query_params_union_create(ZVecIndexType index_type); +ZVEC_EXPORT float ZVEC_CALL zvec_collection_stats_get_index_completeness( + const ZVecCollectionStats *stats, size_t index); /** - * @brief Destroy base query parameters - * @param params query parameters pointer + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema */ -ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); /** - * @brief Destroy HNSW query parameters - * @param params HNSW query parameters pointer + * @brief Destroy field schema + * @param schema Field schema pointer */ -ZVEC_EXPORT void ZVEC_CALL -zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); /** - * @brief Destroy IVF query parameters - * @param params IVF query parameters pointer + * @brief Get field name + * @param schema Field schema pointer (must not be NULL) + * @return const char* Field name (owned by schema, do not free) */ -ZVEC_EXPORT void ZVEC_CALL -zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); +ZVEC_EXPORT const char *ZVEC_CALL +zvec_field_schema_get_name(const ZVecFieldSchema *schema); /** - * @brief Destroy Flat query parameters - * @param params Flat query parameters pointer + * @brief Get field data type + * @param schema Field schema pointer (must not be NULL) + * @return ZVecDataType Data type */ -ZVEC_EXPORT void ZVEC_CALL -zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); +ZVEC_EXPORT ZVecDataType ZVEC_CALL +zvec_field_schema_get_data_type(const ZVecFieldSchema *schema); /** - * @brief Destroy query parameters union - * @param params Query parameters union pointer + * @brief Check if field is nullable + * @param schema Field schema pointer (must not be NULL) + * @return bool true if nullable, false otherwise */ -ZVEC_EXPORT void ZVEC_CALL -zvec_query_params_union_destroy(ZVecQueryParamsUnion *params); +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_nullable(const ZVecFieldSchema *schema); /** - * @brief Set query parameters index type - * @param params Query parameters pointer - * @param index_type Index type + * @brief Set field nullable + * @param schema Field schema pointer + * @param nullable Whether nullable * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( - ZVecQueryParams *params, ZVecIndexType index_type); +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_field_schema_set_nullable(ZVecFieldSchema *schema, bool nullable); /** - * @brief Set search radius for query parameters - * @param params Query parameters pointer - * @param radius Search radius - * @return ZVecErrorCode Error code + * @brief Get field dimension (for vector fields) + * @param schema Field schema pointer (must not be NULL) + * @return uint32_t Dimension value */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_query_params_set_radius(ZVecQueryParams *params, float radius); +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_field_schema_get_dimension(const ZVecFieldSchema *schema); /** - * @brief Set scale factor for query parameters - * @param params Query parameters pointer - * @param scale_factor Scale factor + * @brief Set field dimension (for vector fields) + * @param schema Field schema pointer + * @param dimension Dimension value * @return ZVecErrorCode Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); +zvec_field_schema_set_dimension(ZVecFieldSchema *schema, uint32_t dimension); /** - * @brief Set whether to use refiner for query parameters - * @param params Query parameters pointer - * @param is_using_refiner Whether to use refiner - * @return ZVecErrorCode Error code + * @brief Check if field has index + * @param schema Field schema pointer (must not be NULL) + * @return bool true if has index, false otherwise */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( - ZVecQueryParams *params, bool is_using_refiner); +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_has_index(const ZVecFieldSchema *schema); /** - * @brief Set exploration factor for HNSW query parameters - * @param params HNSW query parameters pointer - * @param ef Exploration factor - * @return ZVecErrorCode Error code + * @brief Get index type of the field + * @param schema Field schema pointer (must not be NULL) + * @return ZVecIndexType Index type, ZVEC_INDEX_TYPE_UNDEFINED if no index */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_field_schema_get_index_type(const ZVecFieldSchema *schema); /** - * @brief Set number of probe clusters for IVF query parameters - * @param params IVF query parameters pointer - * @param nprobe Number of probe clusters - * @return ZVecErrorCode Error code + * @brief Get index params of the field (returns pointer owned by the field + * schema, do not destroy) + * @param schema Field schema pointer (must not be NULL) + * @return ZVecIndexParams* Index params pointer, NULL if no index */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); +ZVEC_EXPORT const ZVecIndexParams *ZVEC_CALL +zvec_field_schema_get_index_params(const ZVecFieldSchema *schema); /** - * @brief Set scale factor for IVF/Flat query parameters - * @param params IVF or Flat query parameters pointer - * @param scale_factor Scale factor - * @return ZVecErrorCode Error code + * @brief Check if field is a vector field (dense or sparse) + * @param schema Field schema pointer (must not be NULL) + * @return bool true if vector field, false otherwise */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( - ZVecIVFQueryParams *params, float scale_factor); +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_vector_field(const ZVecFieldSchema *schema); /** - * @brief Collection options structure + * @brief Check if field is a dense vector field + * @param schema Field schema pointer (must not be NULL) + * @return bool true if dense vector field, false otherwise */ -typedef struct { - bool enable_mmap; /**< Whether to enable memory mapping */ - size_t max_buffer_size; /**< Maximum buffer size */ - bool read_only; /**< Whether read-only mode */ - uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ -} ZVecCollectionOptions; - +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_dense_vector(const ZVecFieldSchema *schema); /** - * @brief Collection statistics structure + * @brief Check if field is a sparse vector field + * @param schema Field schema pointer (must not be NULL) + * @return bool true if sparse vector field, false otherwise */ -typedef struct { - uint64_t doc_count; /**< Total document count */ - ZVecString **index_names; /**< Index name array */ - float *index_completeness; /**< Index completeness array */ - size_t index_count; /**< Index name count */ -} ZVecCollectionStats; +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_sparse_vector(const ZVecFieldSchema *schema); +/** + * @brief Check if field is an array type + * @param schema Field schema pointer (must not be NULL) + * @return bool true if array type, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_array_type(const ZVecFieldSchema *schema); /** - * @brief Create field schema - * @param name Field name - * @param data_type Data type - * @param nullable Whether nullable - * @param dimension Vector dimension - * @return ZVecFieldSchema* Pointer to the newly created field schema + * @brief Get element data type for array fields + * @param schema Field schema pointer (must not be NULL) + * @return ZVecDataType Element data type, or original type if not array */ -ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL -zvec_field_schema_create(const char *name, ZVecDataType data_type, - bool nullable, uint32_t dimension); +ZVEC_EXPORT ZVecDataType ZVEC_CALL +zvec_field_schema_get_element_data_type(const ZVecFieldSchema *schema); /** - * @brief Destroy field schema - * @param schema Field schema pointer + * @brief Check if field has inverted index (for scalar fields) + * @param schema Field schema pointer (must not be NULL) + * @return bool true if has inverted index, false otherwise */ -ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_has_invert_index(const ZVecFieldSchema *schema); /** * @brief Set index parameters for field @@ -1056,9 +1791,8 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( ZVecFieldSchema *schema, const ZVecIndexParams *index_params); - /** - * @brief Set inverted index parameters for field schema + * @brief Set inverted index parameters for field schema (convenience function) * @param field_schema Field schema pointer * @param invert_params Inverted index parameters pointer */ @@ -1066,7 +1800,7 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( ZVecFieldSchema *field_schema, const ZVecIndexParams *invert_params); /** - * @brief Set HNSW index parameters for field schema + * @brief Set HNSW index parameters for field schema (convenience function) * @param field_schema Field schema pointer * @param hnsw_params HNSW index parameters pointer */ @@ -1074,7 +1808,7 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( ZVecFieldSchema *field_schema, const ZVecIndexParams *hnsw_params); /** - * @brief Set Flat index parameters for field schema + * @brief Set Flat index parameters for field schema (convenience function) * @param field_schema Field schema pointer * @param flat_params Flat index parameters pointer */ @@ -1082,7 +1816,15 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( ZVecFieldSchema *field_schema, const ZVecIndexParams *flat_params); /** - * @brief Set IVF index parameters for field schema + * @brief Clear index from field schema + * @param schema Field schema pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_field_schema_clear_index(ZVecFieldSchema *schema); + +/** + * @brief Set IVF index parameters for field schema (convenience function) * @param field_schema Field schema pointer * @param ivf_params IVF index parameters pointer */ @@ -1095,15 +1837,13 @@ ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( // ============================================================================= /** - * @brief Collection schema structure + * @brief Collection schema (opaque pointer) + * + * Use zvec_collection_schema_create() to create and + * zvec_collection_schema_destroy() to destroy. Fields are accessed via + * getter/setter functions. */ -typedef struct { - ZVecString *name; /**< Collection name */ - ZVecFieldSchema **fields; /**< Field array */ - size_t field_count; /**< Field count */ - size_t field_capacity; /**< Field array capacity */ - uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ -} ZVecCollectionSchema; +typedef struct ZVecCollectionSchema ZVecCollectionSchema; /** * @brief Create collection schema @@ -1120,6 +1860,14 @@ zvec_collection_schema_create(const char *name); ZVEC_EXPORT void ZVEC_CALL zvec_collection_schema_destroy(ZVecCollectionSchema *schema); +/** + * @brief Get collection schema name + * @param schema Collection schema pointer (must not be NULL) + * @return const char* Collection name (owned by schema, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_collection_schema_get_name(const ZVecCollectionSchema *schema); + /** * @brief Add field to collection schema * @param schema Collection schema pointer @@ -1133,12 +1881,12 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( * @brief Add multiple fields to collection schema at once * * @param schema Collection schema pointer - * @param fields Array of fields to add + * @param fields Array of field pointers to add * @param field_count Number of fields to add * @return ZVecErrorCode Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( - ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, + ZVecCollectionSchema *schema, const ZVecFieldSchema *const *fields, size_t field_count); /** @@ -1220,6 +1968,129 @@ ZVEC_EXPORT uint64_t ZVEC_CALL zvec_collection_schema_get_max_doc_count_per_segment( const ZVecCollectionSchema *schema); +/** + * @brief Set collection schema name + * @param schema Collection schema pointer + * @param name New collection name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_name(ZVecCollectionSchema *schema, const char *name); + +/** + * @brief Check if field exists in schema + * @param schema Collection schema pointer + * @param field_name Field name to check + * @return true if field exists, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_collection_schema_has_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Alter field schema + * @param schema Collection schema pointer + * @param field_name Name of field to alter + * @param new_field New field schema with updated properties + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_alter_field( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecFieldSchema *new_field); + +/** + * @brief Get forward (scalar) field by name + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, NULL if not found or not + * scalar + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_forward_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Get vector field by name + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, NULL if not found or not + * vector + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_vector_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Get all forward (scalar) fields + * @param schema Collection schema pointer + * @param[out] fields Output array of field pointers (owned by caller, do not + * destroy) + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_forward_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Get all forward fields with index + * @param schema Collection schema pointer + * @param[out] fields Output array of field pointers + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_get_forward_fields_with_index( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Get all field names + * @param schema Collection schema pointer + * @param[out] names Output array of field names (owned by caller, do not free) + * @param[out] count Number of field names + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_all_field_names( + const ZVecCollectionSchema *schema, const char ***names, size_t *count); + +/** + * @brief Get all vector fields + * @param schema Collection schema pointer + * @param[out) fields Output array of field pointers + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_vector_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Check if field has index + * @param schema Collection schema pointer + * @param field_name Field name + * @return true if field has index, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_collection_schema_has_index( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Add index to field + * @param schema Collection schema pointer + * @param field_name Field name to add index to + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_index( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecIndexParams *index_params); + +/** + * @brief Drop index from field + * @param schema Collection schema pointer + * @param field_name Field name to drop index from + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_drop_index( + ZVecCollectionSchema *schema, const char *field_name); + // ============================================================================= // Collection Management Functions @@ -1288,13 +2159,6 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( const ZVecCollection *collection, ZVecCollectionSchema **schema); -/** - * @brief Initialize default collection options - * @param options Collection options structure pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_collection_options_init_default(ZVecCollectionOptions *options); - /** * @brief Get collection options * @param collection Collection handle @@ -2216,37 +3080,6 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); .data = data_ptr, .length = len \ } -/** - * @brief Simplified collection options initialization macro (using default - * values) - * - * Usage example: - * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); - */ -#define ZVEC_DEFAULT_OPTIONS() \ - (ZVecCollectionOptions) { \ - .enable_mmap = true, .max_buffer_size = 1048576, .read_only = false, \ - .max_doc_count_per_segment = 1000000 \ - } - -/** - * @brief Simplified vector query initialization macro - * @param field_name_str Query field name - * @param query_vec Query vector array - * @param top_k Number of results to return - * @param filter_str Filter condition string - * - * Usage example: - * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, - * ""); - */ -#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ - (ZVecVectorQuery) { \ - .field_name = ZVEC_STRING(field_name_str), .query_vector = query_vec, \ - .topk = top_k, .filter = ZVEC_STRING(filter_str), .include_vector = 1, \ - .include_doc_id = 1 \ - } - /** * @brief Simplified document field initialization macro * @param name_str Field name diff --git a/tests/c/c_api_test.c b/tests/c/c_api_test.c index 0088cfef..f3946c61 100644 --- a/tests/c/c_api_test.c +++ b/tests/c/c_api_test.c @@ -132,7 +132,8 @@ void test_zvec_config() { zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); TEST_ASSERT(console_config != NULL); if (console_config) { - TEST_ASSERT(console_config->level == ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(zvec_config_console_log_get_level(console_config) == + ZVEC_LOG_LEVEL_INFO); zvec_config_console_log_destroy(console_config); } @@ -141,11 +142,14 @@ void test_zvec_config() { ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); TEST_ASSERT(file_config != NULL); if (file_config) { - TEST_ASSERT(file_config->level == ZVEC_LOG_LEVEL_WARN); - TEST_ASSERT(strcmp(file_config->dir.data, "./logs") == 0); - TEST_ASSERT(strcmp(file_config->basename.data, "test_log") == 0); - TEST_ASSERT(file_config->file_size == 100); - TEST_ASSERT(file_config->overdue_days == 7); + TEST_ASSERT(zvec_config_file_log_get_level(file_config) == + ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(file_config), "./logs") == + 0); + TEST_ASSERT(strcmp(zvec_config_file_log_get_basename(file_config), + "test_log") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(file_config) == 100); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(file_config) == 7); zvec_config_file_log_destroy(file_config); } @@ -154,11 +158,14 @@ void test_zvec_config() { zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); TEST_ASSERT(empty_file_config != NULL); if (empty_file_config) { - TEST_ASSERT(empty_file_config->level == ZVEC_LOG_LEVEL_INFO); - TEST_ASSERT(strcmp(empty_file_config->dir.data, "") == 0); - TEST_ASSERT(strcmp(empty_file_config->basename.data, "") == 0); - TEST_ASSERT(empty_file_config->file_size == 0); - TEST_ASSERT(empty_file_config->overdue_days == 0); + TEST_ASSERT(zvec_config_file_log_get_level(empty_file_config) == + ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(empty_file_config), "") == + 0); + TEST_ASSERT( + strcmp(zvec_config_file_log_get_basename(empty_file_config), "") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(empty_file_config) == 0); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(empty_file_config) == 0); zvec_config_file_log_destroy(empty_file_config); } @@ -174,11 +181,12 @@ void test_zvec_config() { ZVecFileLogConfig *temp_file = zvec_config_file_log_create( ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); TEST_ASSERT(temp_file != NULL); - TEST_ASSERT(temp_file->level == ZVEC_LOG_LEVEL_DEBUG); - TEST_ASSERT(strcmp(temp_file->dir.data, "./logs") == 0); - TEST_ASSERT(strcmp(temp_file->basename.data, "app") == 0); - TEST_ASSERT(temp_file->file_size == 50); - TEST_ASSERT(temp_file->overdue_days == 30); + TEST_ASSERT(zvec_config_file_log_get_level(temp_file) == + ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(temp_file), "./logs") == 0); + TEST_ASSERT(strcmp(zvec_config_file_log_get_basename(temp_file), "app") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(temp_file) == 50); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(temp_file) == 30); zvec_config_file_log_destroy(temp_file); @@ -187,34 +195,38 @@ void test_zvec_config() { TEST_ASSERT(config_data != NULL); if (config_data) { // Test initial values - TEST_ASSERT(config_data->log_config != NULL); - TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(zvec_config_data_get_console_log_config(config_data) != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_CONSOLE); // Test memory limit setting ZVecErrorCode err = zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(config_data->memory_limit_bytes == 1024 * 1024 * 1024); + TEST_ASSERT(zvec_config_data_get_memory_limit(config_data) == + 1024 * 1024 * 1024); // Test thread count settings err = zvec_config_data_set_query_thread_count(config_data, 8); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(config_data->query_thread_count == 8); + TEST_ASSERT(zvec_config_data_get_query_thread_count(config_data) == 8); err = zvec_config_data_set_optimize_thread_count(config_data, 4); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(config_data->optimize_thread_count == 4); + TEST_ASSERT(zvec_config_data_get_optimize_thread_count(config_data) == 4); // Test log config replacement - TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); - TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(zvec_config_data_get_console_log_config(config_data) != NULL); ZVecFileLogConfig *new_file = zvec_config_file_log_create( ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); TEST_ASSERT(new_file != NULL); zvec_config_data_set_log_config(config_data, ZVEC_LOG_TYPE_FILE, new_file); - TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_FILE); - TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(zvec_config_data_get_file_log_config(config_data) != NULL); zvec_config_data_destroy(config_data); } @@ -239,21 +251,23 @@ void test_zvec_config() { // Test zero values err = zvec_config_data_set_memory_limit(boundary_config, 0); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(boundary_config->memory_limit_bytes == 0); + TEST_ASSERT(zvec_config_data_get_memory_limit(boundary_config) == 0); // Test maximum values err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(boundary_config->memory_limit_bytes == UINT64_MAX); + TEST_ASSERT(zvec_config_data_get_memory_limit(boundary_config) == + UINT64_MAX); // Test zero thread counts err = zvec_config_data_set_query_thread_count(boundary_config, 0); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(boundary_config->query_thread_count == 0); + TEST_ASSERT(zvec_config_data_get_query_thread_count(boundary_config) == 0); err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(boundary_config->optimize_thread_count == 0); + TEST_ASSERT(zvec_config_data_get_optimize_thread_count(boundary_config) == + 0); zvec_config_data_destroy(boundary_config); } @@ -273,8 +287,8 @@ void test_zvec_initialize() { ZVecConfigData *config = zvec_config_data_create(); TEST_ASSERT(config != NULL); if (config) { - TEST_ASSERT(config->log_config != NULL); - TEST_ASSERT(config->log_type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(zvec_config_data_get_console_log_config(config) != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config) == ZVEC_LOG_TYPE_CONSOLE); } ZVecErrorCode err = zvec_initialize(config); TEST_ASSERT(err == ZVEC_OK); @@ -293,11 +307,10 @@ void test_schema_basic_operations(void) { // Test 1: Basic Schema creation and destruction ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); TEST_ASSERT(schema != NULL); - TEST_ASSERT(schema->name != NULL); - TEST_ASSERT(strcmp(schema->name->data, "demo") == 0); - TEST_ASSERT(schema->field_count == 0); - TEST_ASSERT(schema->fields == NULL); - TEST_ASSERT(schema->max_doc_count_per_segment > 0); + TEST_ASSERT(zvec_collection_schema_get_name(schema) != NULL); + TEST_ASSERT(strcmp(zvec_collection_schema_get_name(schema), "demo") == 0); + TEST_ASSERT(zvec_collection_schema_get_field_count(schema) == 0); + TEST_ASSERT(zvec_collection_schema_get_max_doc_count_per_segment(schema) > 0); // Test 2: Schema field count operations size_t initial_count = zvec_collection_schema_get_field_count(schema); @@ -316,23 +329,24 @@ void test_schema_basic_operations(void) { const ZVecFieldSchema *found_field = zvec_collection_schema_find_field(schema, "id"); TEST_ASSERT(found_field != NULL); - TEST_ASSERT(strcmp(found_field->name->data, "id") == 0); - TEST_ASSERT(found_field->data_type == ZVEC_DATA_TYPE_INT64); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(found_field), "id") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(found_field) == + ZVEC_DATA_TYPE_INT64); // Test 5: Getting field by index ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); TEST_ASSERT(indexed_field != NULL); - TEST_ASSERT(strcmp(indexed_field->name->data, "id") == 0); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(indexed_field), "id") == 0); // Test 6: Adding multiple fields - ZVecFieldSchema fields_to_add[2]; + const ZVecFieldSchema *fields_to_add[2]; ZVecFieldSchema *name_field = zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); ZVecFieldSchema *age_field = zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); - fields_to_add[0] = *name_field; - fields_to_add[1] = *age_field; + fields_to_add[0] = name_field; + fields_to_add[1] = age_field; err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); TEST_ASSERT(err == ZVEC_OK); @@ -344,12 +358,16 @@ void test_schema_basic_operations(void) { const ZVecFieldSchema *name_found = zvec_collection_schema_find_field(schema, "name"); TEST_ASSERT(name_found != NULL); - TEST_ASSERT(strcmp(name_found->name->data, "name") == 0); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(name_found), "name") == 0); const ZVecFieldSchema *age_found = zvec_collection_schema_find_field(schema, "age"); TEST_ASSERT(age_found != NULL); - TEST_ASSERT(strcmp(age_found->name->data, "age") == 0); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(age_found), "age") == 0); + + // Clean up fields we created + zvec_field_schema_destroy(name_field); + zvec_field_schema_destroy(age_field); // Test 8: Setting and getting max doc count err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); @@ -400,8 +418,8 @@ void test_schema_edge_cases(void) { // Test 2: Empty string schema name ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); TEST_ASSERT(empty_schema != NULL); - TEST_ASSERT(empty_schema->name != NULL); - TEST_ASSERT(strcmp(empty_schema->name->data, "") == 0); + TEST_ASSERT(zvec_collection_schema_get_name(empty_schema) != NULL); + TEST_ASSERT(strcmp(zvec_collection_schema_get_name(empty_schema), "") == 0); zvec_collection_schema_destroy(empty_schema); // Test 3: Very long schema name @@ -410,8 +428,8 @@ void test_schema_edge_cases(void) { long_name[1023] = '\0'; ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); TEST_ASSERT(long_schema != NULL); - TEST_ASSERT(long_schema->name != NULL); - TEST_ASSERT(strlen(long_schema->name->data) == 1023); + TEST_ASSERT(zvec_collection_schema_get_name(long_schema) != NULL); + TEST_ASSERT(strlen(zvec_collection_schema_get_name(long_schema)) == 1023); zvec_collection_schema_destroy(long_schema); // Test 4: NULL schema parameter handling for all functions @@ -568,8 +586,9 @@ void test_schema_field_operations(void) { zvec_collection_schema_find_field(schema, "id"); TEST_ASSERT(id_field != NULL); if (id_field) { - TEST_ASSERT(strcmp(id_field->name->data, "id") == 0); - TEST_ASSERT(id_field->data_type == ZVEC_DATA_TYPE_INT64); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(id_field), "id") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(id_field) == + ZVEC_DATA_TYPE_INT64); } zvec_collection_schema_destroy(schema); @@ -586,7 +605,8 @@ void test_normal_schema_creation(void) { TEST_ASSERT(schema != NULL); if (schema) { - TEST_ASSERT(strcmp(schema->name->data, "test_normal") == 0); + TEST_ASSERT( + strcmp(zvec_collection_schema_get_name(schema), "test_normal") == 0); // Verify field count size_t field_count = zvec_collection_schema_get_field_count(schema); @@ -667,6 +687,156 @@ void test_schema_max_doc_count(void) { TEST_END(); } +void test_collection_schema_helpers(void) { + TEST_START(); + + // Create schema with various field types + ZVecCollectionSchema *schema = zvec_collection_schema_create("helper_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add scalar fields + ZVecFieldSchema *int_field = + zvec_field_schema_create("int_field", ZVEC_DATA_TYPE_INT32, false, 0); + ZVecFieldSchema *str_field = + zvec_field_schema_create("str_field", ZVEC_DATA_TYPE_STRING, true, 0); + + // Add vector field + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "vec_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + zvec_collection_schema_add_field(schema, int_field); + zvec_collection_schema_add_field(schema, str_field); + zvec_collection_schema_add_field(schema, vec_field); + + // Test has_field + TEST_ASSERT(zvec_collection_schema_has_field(schema, "int_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "str_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "vec_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "nonexistent") == + false); + + // Test get_forward_field (scalar field) + ZVecFieldSchema *found_int = + zvec_collection_schema_get_forward_field(schema, "int_field"); + TEST_ASSERT(found_int != NULL); + TEST_ASSERT(zvec_field_schema_get_data_type(found_int) == + ZVEC_DATA_TYPE_INT32); + + // get_forward_field should return NULL for vector field + ZVecFieldSchema *vec_as_forward = + zvec_collection_schema_get_forward_field(schema, "vec_field"); + TEST_ASSERT(vec_as_forward == NULL); + + // Test get_vector_field + ZVecFieldSchema *found_vec = + zvec_collection_schema_get_vector_field(schema, "vec_field"); + TEST_ASSERT(found_vec != NULL); + TEST_ASSERT(zvec_field_schema_is_vector_field(found_vec) == true); + + // get_vector_field should return NULL for scalar field + ZVecFieldSchema *int_as_vec = + zvec_collection_schema_get_vector_field(schema, "int_field"); + TEST_ASSERT(int_as_vec == NULL); + + // Test get_all_field_names + const char **names = NULL; + size_t name_count = 0; + ZVecErrorCode err = + zvec_collection_schema_get_all_field_names(schema, &names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count == 3); + // Note: We don't free names as they are owned by the schema + + // Test get_forward_fields + ZVecFieldSchema **forward_fields = NULL; + size_t forward_count = 0; + err = zvec_collection_schema_get_forward_fields(schema, &forward_fields, + &forward_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(forward_count == 2); // int_field and str_field + free(forward_fields); + + // Test get_vector_fields + ZVecFieldSchema **vector_fields = NULL; + size_t vector_count = 0; + err = zvec_collection_schema_get_vector_fields(schema, &vector_fields, + &vector_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(vector_count == 1); // vec_field + free(vector_fields); + + // Test has_index (initially no fields have index) + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == false); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "str_field") == false); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "vec_field") == false); + + // Test add_index + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + + err = zvec_collection_schema_add_index(schema, "int_field", invert_params); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == true); + + // Test drop_index + err = zvec_collection_schema_drop_index(schema, "int_field"); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == false); + + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_collection_schema_alter_field(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_collection_schema_create("alter_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create initial field + ZVecFieldSchema *field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + TEST_ASSERT(field != NULL); + + ZVecErrorCode err = zvec_collection_schema_add_field(schema, field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify initial state + ZVecFieldSchema *found = + zvec_collection_schema_find_field(schema, "test_field"); + TEST_ASSERT(found != NULL); + TEST_ASSERT(zvec_field_schema_is_nullable(found) == false); + + // Alter the field to make it nullable + ZVecFieldSchema *new_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, true, 0); + TEST_ASSERT(new_field != NULL); + + err = zvec_collection_schema_alter_field(schema, "test_field", new_field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify the change + found = zvec_collection_schema_find_field(schema, "test_field"); + TEST_ASSERT(found != NULL); + TEST_ASSERT(zvec_field_schema_is_nullable(found) == true); + + // Test alter non-existent field + err = zvec_collection_schema_alter_field(schema, "nonexistent", new_field); + TEST_ASSERT(err != ZVEC_OK); + + zvec_field_schema_destroy(new_field); + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + // ============================================================================= // Collection-related tests // ============================================================================= @@ -879,7 +1049,7 @@ void test_collection_stats(void) { if (stats) { // Basic validation of statistics - TEST_ASSERT(stats->doc_count == + TEST_ASSERT(zvec_collection_stats_get_doc_count(stats) == 0); // New collection should have no documents zvec_collection_stats_destroy(stats); } @@ -905,46 +1075,123 @@ void test_collection_stats(void) { void test_field_schema_functions(void) { TEST_START(); - // Test scalar field creation - ZVecFieldSchema scalar_field = {0}; - ZVecString name1 = {0}; - name1.data = "test_field"; - name1.length = 10; - scalar_field.name = &name1; - scalar_field.data_type = ZVEC_DATA_TYPE_STRING; - scalar_field.nullable = true; - scalar_field.dimension = 0; - - TEST_ASSERT(strcmp(scalar_field.name->data, "test_field") == 0); - TEST_ASSERT(scalar_field.data_type == ZVEC_DATA_TYPE_STRING); - TEST_ASSERT(scalar_field.nullable == true); - - // Test vector field creation - ZVecFieldSchema vector_field = {0}; - ZVecString name2 = {0}; - name2.data = "vec_field"; - name2.length = 9; - vector_field.name = &name2; - vector_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; - vector_field.nullable = false; - vector_field.dimension = 128; - - TEST_ASSERT(strcmp(vector_field.name->data, "vec_field") == 0); - TEST_ASSERT(vector_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32); - TEST_ASSERT(vector_field.dimension == 128); - - // Test sparse vector field creation - ZVecFieldSchema sparse_field = {0}; - ZVecString name3 = {0}; - name3.data = "sparse_field"; - name3.length = 12; - sparse_field.name = &name3; - sparse_field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; - sparse_field.nullable = false; - sparse_field.dimension = 0; - - TEST_ASSERT(strcmp(sparse_field.name->data, "sparse_field") == 0); - TEST_ASSERT(sparse_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + // Test scalar field creation using API + ZVecFieldSchema *scalar_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_STRING, true, 0); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(scalar_field), "test_field") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(scalar_field) == + ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_is_nullable(scalar_field) == true); + TEST_ASSERT(zvec_field_schema_get_dimension(scalar_field) == 0); + + // Test new functions for scalar field + TEST_ASSERT(zvec_field_schema_is_vector_field(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_dense_vector(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_array_type(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_get_element_data_type(scalar_field) == + ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_has_invert_index(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_get_index_type(scalar_field) == + ZVEC_INDEX_TYPE_UNDEFINED); + + zvec_field_schema_destroy(scalar_field); + } + + // Test vector field creation using API + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "vec_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(zvec_field_schema_get_name(vector_field), "vec_field") == + 0); + TEST_ASSERT(zvec_field_schema_get_data_type(vector_field) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_is_nullable(vector_field) == false); + TEST_ASSERT(zvec_field_schema_get_dimension(vector_field) == 128); + + // Test new functions for dense vector field + TEST_ASSERT(zvec_field_schema_is_vector_field(vector_field) == true); + TEST_ASSERT(zvec_field_schema_is_dense_vector(vector_field) == true); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(vector_field) == false); + TEST_ASSERT(zvec_field_schema_is_array_type(vector_field) == false); + + zvec_field_schema_destroy(vector_field); + } + + // Test sparse vector field creation using API + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + TEST_ASSERT(sparse_field != NULL); + if (sparse_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(sparse_field), "sparse_field") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(sparse_field) == + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + // Test new functions for sparse vector field + TEST_ASSERT(zvec_field_schema_is_vector_field(sparse_field) == true); + TEST_ASSERT(zvec_field_schema_is_dense_vector(sparse_field) == false); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(sparse_field) == true); + + zvec_field_schema_destroy(sparse_field); + } + + // Test array field + ZVecFieldSchema *array_field = zvec_field_schema_create( + "array_field", ZVEC_DATA_TYPE_ARRAY_INT32, false, 0); + TEST_ASSERT(array_field != NULL); + if (array_field) { + TEST_ASSERT(zvec_field_schema_is_array_type(array_field) == true); + TEST_ASSERT(zvec_field_schema_is_vector_field(array_field) == false); + TEST_ASSERT(zvec_field_schema_get_element_data_type(array_field) == + ZVEC_DATA_TYPE_INT32); + + zvec_field_schema_destroy(array_field); + } + + // Test field with invert index + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_metric_type(invert_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecFieldSchema *indexed_field = + zvec_field_schema_create("indexed_field", ZVEC_DATA_TYPE_INT64, false, 0); + TEST_ASSERT(indexed_field != NULL); + if (indexed_field) { + zvec_field_schema_set_index_params(indexed_field, invert_params); + TEST_ASSERT(zvec_field_schema_has_index(indexed_field) == true); + TEST_ASSERT(zvec_field_schema_get_index_type(indexed_field) == + ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(zvec_field_schema_has_invert_index(indexed_field) == true); + + zvec_field_schema_destroy(indexed_field); + } + zvec_index_params_destroy(invert_params); + + // Test field with HNSW index + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + ZVecFieldSchema *hnsw_field = zvec_field_schema_create( + "hnsw_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(hnsw_field != NULL); + if (hnsw_field) { + zvec_field_schema_set_hnsw_index(hnsw_field, hnsw_params); + TEST_ASSERT(zvec_field_schema_has_index(hnsw_field) == true); + TEST_ASSERT(zvec_field_schema_get_index_type(hnsw_field) == + ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(zvec_field_schema_has_invert_index(hnsw_field) == + false); // Vector field, no invert index + + zvec_field_schema_destroy(hnsw_field); + } + zvec_index_params_destroy(hnsw_params); TEST_END(); } @@ -958,13 +1205,13 @@ void test_field_helper_functions(void) { "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); TEST_ASSERT(scalar_field != NULL); if (scalar_field) { - TEST_ASSERT(strcmp(scalar_field->name->data, "test_scalar") == 0); - TEST_ASSERT(scalar_field->data_type == ZVEC_DATA_TYPE_INT32); - free(scalar_field); - } - if (invert_params) { - free(invert_params); + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(scalar_field), "test_scalar") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(scalar_field) == + ZVEC_DATA_TYPE_INT32); + zvec_field_schema_destroy(scalar_field); } + zvec_index_params_destroy(invert_params); // Test vector field helper functions ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); @@ -972,14 +1219,14 @@ void test_field_helper_functions(void) { "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); TEST_ASSERT(vector_field != NULL); if (vector_field) { - TEST_ASSERT(strcmp(vector_field->name->data, "test_vector") == 0); - TEST_ASSERT(vector_field->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); - TEST_ASSERT(vector_field->dimension == 128); - free(vector_field); - } - if (hnsw_params) { - free(hnsw_params); + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(vector_field), "test_vector") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(vector_field) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_get_dimension(vector_field) == 128); + zvec_field_schema_destroy(vector_field); } + zvec_index_params_destroy(hnsw_params); TEST_END(); } @@ -3125,42 +3372,63 @@ void test_zvec_string_functions(void) { void test_index_params_functions(void) { TEST_START(); - // Test index params with new flat structure - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); - TEST_ASSERT(hnsw_params.hnsw.m == 16); - TEST_ASSERT(hnsw_params.hnsw.ef_construction == 200); - TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); + // Test index params with new opaque pointer API + // Test HNSW params + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + // Default metric type is L2, need to set it explicitly + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_COSINE); + + int m, ef_construction; + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 16); + TEST_ASSERT(ef_construction == 200); // Test invert index params - // clang-format off - ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); - // clang-format on - TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); - TEST_ASSERT(invert_params.invert.enable_range_optimization == true); - TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); // Default is true + TEST_ASSERT(enable_wildcard == false); // Default is false // Test flat index params - // clang-format off - ZVecIndexParams flat_params = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); - TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + // Default metric type is L2, need to set it explicitly + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); // Test IVF index params - // clang-format off - ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); - TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(ivf_params.ivf.n_list == 100); - TEST_ASSERT(ivf_params.ivf.n_iters == 10); - TEST_ASSERT(ivf_params.ivf.use_soar == true); - TEST_ASSERT(ivf_params.ivf.n_probe == 5); + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + // Default metric type is L2 + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 100); + TEST_ASSERT(n_iters == 10); + TEST_ASSERT(use_soar == false); // Default is false + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(ivf_params); TEST_END(); } @@ -3168,46 +3436,69 @@ void test_index_params_functions(void) { void test_index_params_api_functions(void) { TEST_START(); - ZVecIndexParams params; - - // Test zvec_index_params_init for HNSW - zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_HNSW, - ZVEC_METRIC_TYPE_COSINE); - TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_COSINE); - - // Test zvec_index_params_set_hnsw - zvec_index_params_set_hnsw(¶ms, 32, 300, 150); - TEST_ASSERT(params.hnsw.m == 32); - TEST_ASSERT(params.hnsw.ef_construction == 300); - TEST_ASSERT(params.hnsw.ef_search == 150); - - // Test zvec_index_params_init for IVF - zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_IVF, ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_IVF); - TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_L2); - - // Test zvec_index_params_set_ivf - zvec_index_params_set_ivf(¶ms, 200, 20, true, 10); - TEST_ASSERT(params.ivf.n_list == 200); - TEST_ASSERT(params.ivf.n_iters == 20); - TEST_ASSERT(params.ivf.use_soar == true); - TEST_ASSERT(params.ivf.n_probe == 10); - - // Test zvec_index_params_init for INVERT - zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_INVERT, - ZVEC_METRIC_TYPE_UNDEFINED); - TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_INVERT); - - // Test zvec_index_params_set_invert - zvec_index_params_set_invert(¶ms, true, true); - TEST_ASSERT(params.invert.enable_range_optimization == true); - TEST_ASSERT(params.invert.enable_extended_wildcard == true); - - // Test zvec_index_params_init for FLAT - zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP); - TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_FLAT); - TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_IP); + // Test zvec_index_params_create for HNSW + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_metric_type + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_COSINE); + + // Test zvec_index_params_set_hnsw_params + zvec_index_params_set_hnsw_params(hnsw_params, 32, 300); + int m, ef_construction; + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 32); + TEST_ASSERT(ef_construction == 300); + + // Test zvec_index_params_create for IVF + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_ivf_params + zvec_index_params_set_ivf_params(ivf_params, 200, 20, true); + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 200); + TEST_ASSERT(n_iters == 20); + TEST_ASSERT(use_soar == true); + + // Test zvec_index_params_create for INVERT + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + + // Test zvec_index_params_set_invert_params + zvec_index_params_set_invert_params(invert_params, true, true); + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); + TEST_ASSERT(enable_wildcard == true); + + // Test zvec_index_params_create for FLAT + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(flat_params); TEST_END(); } @@ -3264,24 +3555,17 @@ void test_query_params_functions(void) { ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); TEST_ASSERT(base_params != NULL); - // Test union query parameters - ZVecQueryParamsUnion *union_params = - zvec_query_params_union_create(ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(union_params != NULL); - // Test HNSW query parameters - ZVecHnswQueryParams *hnsw_params = zvec_query_params_hnsw_create( - ZVEC_INDEX_TYPE_HNSW, 50, 0.5f, false, true); + ZVecHnswQueryParams *hnsw_params = + zvec_query_params_hnsw_create(50, 0.5f, false, true); TEST_ASSERT(hnsw_params != NULL); // Test IVF query parameters - ZVecIVFQueryParams *ivf_params = - zvec_query_params_ivf_create(ZVEC_INDEX_TYPE_IVF, 10, true, 1.5f); + ZVecIVFQueryParams *ivf_params = zvec_query_params_ivf_create(10, true, 1.5f); TEST_ASSERT(ivf_params != NULL); // Test Flat query parameters - ZVecFlatQueryParams *flat_params = - zvec_query_params_flat_create(ZVEC_INDEX_TYPE_FLAT, false, 2.0f); + ZVecFlatQueryParams *flat_params = zvec_query_params_flat_create(false, 2.0f); TEST_ASSERT(flat_params != NULL); // Test setting various parameters on base query params @@ -3320,14 +3604,12 @@ void test_query_params_functions(void) { zvec_query_params_hnsw_destroy(hnsw_params); zvec_query_params_ivf_destroy(ivf_params); zvec_query_params_flat_destroy(flat_params); - zvec_query_params_union_destroy(union_params); // Test boundary cases - null pointer handling zvec_query_params_hnsw_destroy(NULL); zvec_query_params_ivf_destroy(NULL); zvec_query_params_flat_destroy(NULL); - zvec_query_params_union_destroy(NULL); TEST_END(); @@ -3355,7 +3637,7 @@ void test_collection_stats_functions(void) { TEST_ASSERT(err == ZVEC_OK); if (stats) { - TEST_ASSERT(stats->doc_count == 0); + TEST_ASSERT(zvec_collection_stats_get_doc_count(stats) == 0); zvec_collection_stats_destroy(stats); } @@ -3625,13 +3907,16 @@ void test_actual_vector_queries(void) { zvec_collection_schema_add_field(schema, id_field); // Add vector field with HNSW index - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); ZVecFieldSchema *vec_field = zvec_field_schema_create( "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); - zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); zvec_collection_schema_add_field(schema, vec_field); + zvec_index_params_destroy(hnsw_params); ZVecCollection *collection = NULL; ZVecErrorCode err = @@ -3674,17 +3959,17 @@ void test_actual_vector_queries(void) { zvec_collection_flush(collection); // Test 1: Basic vector search - ZVecVectorQuery query1 = {0}; - query1.field_name = (ZVecString){.data = "embedding", .length = 9}; - query1.query_vector = - (ZVecByteArray){.data = (uint8_t *)vec1, .length = sizeof(vec1)}; - query1.topk = 3; - query1.include_vector = true; - query1.include_doc_id = true; + ZVecVectorQuery *query1 = zvec_vector_query_create(); + TEST_ASSERT(query1 != NULL); + zvec_vector_query_set_field_name(query1, "embedding"); + zvec_vector_query_set_query_vector(query1, vec1, sizeof(vec1)); + zvec_vector_query_set_topk(query1, 3); + zvec_vector_query_set_include_vector(query1, true); + zvec_vector_query_set_include_doc_id(query1, true); ZVecDoc **results = NULL; size_t result_count = 0; - err = zvec_collection_query(collection, &query1, &results, &result_count); + err = zvec_collection_query(collection, query1, &results, &result_count); TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(result_count > 0); TEST_ASSERT(results != NULL); @@ -3698,10 +3983,9 @@ void test_actual_vector_queries(void) { zvec_docs_free(results, result_count); // Test 2: Search with filter - ZVecVectorQuery query2 = query1; - query2.filter = (ZVecString){.data = "id > 2", .length = 6}; + zvec_vector_query_set_filter(query1, "id > 2"); - err = zvec_collection_query(collection, &query2, &results, &result_count); + err = zvec_collection_query(collection, query1, &results, &result_count); TEST_ASSERT(err == ZVEC_OK); // Should only return documents with id > 2 @@ -3714,11 +3998,12 @@ void test_actual_vector_queries(void) { zvec_docs_free(results, result_count); - // Cleanup documents + // Cleanup documents and query for (int i = 0; i < 4; i++) { zvec_doc_destroy(docs[i]); } + zvec_vector_query_destroy(query1); zvec_collection_destroy(collection); } @@ -3750,21 +4035,23 @@ void test_index_creation_and_management(void) { if (collection) { // Test 1: Create HNSW index - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); - err = - zvec_collection_create_hnsw_index(collection, "dense", &hnsw_params); + err = zvec_collection_create_hnsw_index(collection, "dense", hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Test 2: Create scalar index - // clang-format off - ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); - // clang-format on + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + zvec_index_params_set_invert_params(invert_params, true, false); err = zvec_collection_create_invert_index(collection, "name", - &invert_params); + invert_params); TEST_ASSERT(err == ZVEC_OK); err = zvec_collection_drop_index(collection, "name"); @@ -3775,6 +4062,8 @@ void test_index_creation_and_management(void) { TEST_ASSERT(err == ZVEC_OK); zvec_collection_destroy(collection); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(invert_params); } zvec_collection_schema_destroy(schema); @@ -3869,29 +4158,32 @@ void test_field_ddl_operations(void) { ZVecFieldSchema *field1 = zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); TEST_ASSERT(field1 != NULL); - TEST_ASSERT(strcmp(field1->name->data, "test_field1") == 0); - TEST_ASSERT(field1->data_type == ZVEC_DATA_TYPE_STRING); - TEST_ASSERT(field1->nullable == false); - TEST_ASSERT(field1->dimension == 0); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(field1), "test_field1") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(field1) == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_is_nullable(field1) == false); + TEST_ASSERT(zvec_field_schema_get_dimension(field1) == 0); ZVecFieldSchema *field2 = zvec_field_schema_create( "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); TEST_ASSERT(field2 != NULL); - TEST_ASSERT(field2->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); - TEST_ASSERT(field2->nullable == true); - TEST_ASSERT(field2->dimension == 128); + TEST_ASSERT(zvec_field_schema_get_data_type(field2) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_is_nullable(field2) == true); + TEST_ASSERT(zvec_field_schema_get_dimension(field2) == 128); // Test index parameter setting - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); - ZVecErrorCode err = zvec_field_schema_set_index_params(field2, &hnsw_params); + ZVecErrorCode err = zvec_field_schema_set_index_params(field2, hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Cleanup zvec_field_schema_destroy(field1); zvec_field_schema_destroy(field2); + zvec_index_params_destroy(hnsw_params); TEST_END(); } @@ -3912,10 +4204,11 @@ void test_performance_benchmarks(void) { ZVecFieldSchema *vec_field = zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); /* NOLINT */ - // clang-format on - zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); zvec_collection_schema_add_field(schema, vec_field); ZVecCollection *collection = NULL; @@ -3999,13 +4292,13 @@ void test_performance_benchmarks(void) { query_vec[i] = (float)rand() / RAND_MAX; } - ZVecVectorQuery query = {0}; - query.field_name = (ZVecString){.data = "vec", .length = 3}; - query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vec, - .length = sizeof(query_vec)}; - query.topk = 10; - query.include_vector = false; - query.include_doc_id = true; + ZVecVectorQuery *query = zvec_vector_query_create(); + TEST_ASSERT(query != NULL); + zvec_vector_query_set_field_name(query, "vec"); + zvec_vector_query_set_query_vector(query, query_vec, sizeof(query_vec)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); const int QUERY_COUNT = 100; #ifdef _POSIX_C_SOURCE @@ -4019,8 +4312,7 @@ void test_performance_benchmarks(void) { ZVecDoc **results = NULL; size_t result_count = 0; - err = - zvec_collection_query(collection, &query, &results, &result_count); + err = zvec_collection_query(collection, query, &results, &result_count); TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(result_count <= 10); @@ -4041,6 +4333,7 @@ void test_performance_benchmarks(void) { (query_time * 1000) / QUERY_COUNT; // ms per query printf(" Average query time: %.2f ms\n", avg_query_time); + zvec_vector_query_destroy(query); zvec_collection_destroy(collection); } @@ -4079,44 +4372,62 @@ void test_zvec_shutdown(void) { void test_index_params_creation_functions(void) { TEST_START(); - // Test HNSW parameters using macro - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); - TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); - TEST_ASSERT(hnsw_params.hnsw.m == 16); - TEST_ASSERT(hnsw_params.hnsw.ef_construction == 100); - TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); - - // Test IVF parameters using macro - // clang-format off - ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); - // clang-format on - TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); - TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); - TEST_ASSERT(ivf_params.ivf.n_list == 100); - TEST_ASSERT(ivf_params.ivf.n_iters == 10); - TEST_ASSERT(ivf_params.ivf.use_soar == true); - TEST_ASSERT(ivf_params.ivf.n_probe == 5); - - // Test Flat parameters using macro - // clang-format off - // clang-format off - ZVecIndexParams flat_params = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - // clang-format on - TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); - TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); - - // Test Invert parameters using macro - // clang-format off - ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); - // clang-format on - TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); - TEST_ASSERT(invert_params.invert.enable_range_optimization == true); - TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); + // Test HNSW parameters using new API + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + // Default metric type is L2 + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_L2); + + int m, ef_construction; + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 16); + TEST_ASSERT(ef_construction == 100); + + // Test IVF parameters using new API + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + int n_list, n_iters; + bool use_soar; + zvec_index_params_set_ivf_params(ivf_params, 100, 10, true); + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 100); + TEST_ASSERT(n_iters == 10); + TEST_ASSERT(use_soar == true); + + // Test Flat parameters using new API + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); + + // Test Invert parameters using new API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + bool enable_range_opt, enable_wildcard; + zvec_index_params_set_invert_params(invert_params, true, false); + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); + TEST_ASSERT(enable_wildcard == false); + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(invert_params); TEST_END(); } @@ -4141,54 +4452,66 @@ void test_collection_advanced_index_functions(void) { zvec_collection_schema_add_field(schema, id_field); zvec_collection_schema_add_field(schema, vec_field); - ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); - options.max_doc_count_per_segment = 1000; + ZVecCollectionOptions *options = zvec_collection_options_create(); + TEST_ASSERT(options != NULL); + zvec_collection_options_set_max_doc_count_per_segment(options, 1000); ZVecCollection *collection = NULL; - ZVecErrorCode err = zvec_collection_create_and_open(temp_dir, schema, - &options, &collection); + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, options, &collection); TEST_ASSERT(err == ZVEC_OK); if (collection) { // Test zvec_collection_create_flat_index - // clang-format off - ZVecIndexParams flat_params = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - err = zvec_collection_create_flat_index(collection, "vec", &flat_params); + ZVecIndexParams *flat_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_L2); + err = zvec_collection_create_flat_index(collection, "vec", flat_params); TEST_ASSERT(err == ZVEC_OK); // Test zvec_collection_create_ivf_index - // clang-format off - ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); - // clang-format on + ZVecIndexParams *ivf_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + zvec_index_params_set_metric_type(ivf_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_ivf_params(ivf_params, 100, 10, true); err = zvec_collection_drop_index(collection, "vec"); // Drop previous index first TEST_ASSERT(err == ZVEC_OK); - err = zvec_collection_create_ivf_index(collection, "vec", &ivf_params); + err = zvec_collection_create_ivf_index(collection, "vec", ivf_params); TEST_ASSERT(err == ZVEC_OK); // Test zvec_collection_create_hnsw_index - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_FP16); - // clang-format on + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); err = zvec_collection_drop_index(collection, "vec"); // Drop previous index first TEST_ASSERT(err == ZVEC_OK); - err = zvec_collection_create_hnsw_index(collection, "vec", &hnsw_params); + err = zvec_collection_create_hnsw_index(collection, "vec", hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Test zvec_field_schema_set_ivf_index ZVecFieldSchema *new_vec_field = zvec_field_schema_create( "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); TEST_ASSERT(new_vec_field != NULL); - // clang-format off - ZVecIndexParams ivf_params2 = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_IP, 50, 5, false, 3, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on - zvec_field_schema_set_ivf_index(new_vec_field, &ivf_params2); - TEST_ASSERT(new_vec_field->has_index == true); + ZVecIndexParams *ivf_params2 = + zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params2 != NULL); + zvec_index_params_set_metric_type(ivf_params2, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_ivf_params(ivf_params2, 50, 5, false); + zvec_field_schema_set_ivf_index(new_vec_field, ivf_params2); + TEST_ASSERT(zvec_field_schema_has_index(new_vec_field) == true); zvec_field_schema_destroy(new_vec_field); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params2); + zvec_collection_options_destroy(options); zvec_collection_destroy(collection); } zvec_collection_schema_destroy(schema); @@ -4206,15 +4529,17 @@ void test_collection_query_functions(void) { // Create schema and collection ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); - // clang-format off - ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - // clang-format on + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); ZVecFieldSchema *name_field = zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); ZVecFieldSchema *vec_field = zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); - zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_index_params_destroy(hnsw_params); zvec_collection_schema_add_field(schema, name_field); zvec_collection_schema_add_field(schema, vec_field); @@ -4261,28 +4586,25 @@ void test_collection_query_functions(void) { zvec_docs_free(results, found_count); // Test zvec_collection_query_by_group - ZVecGroupByVectorQuery group_query = {0}; - group_query.field_name = ZVEC_STRING("vec"); + ZVecGroupByVectorQuery *group_query = zvec_group_by_vector_query_create(); + TEST_ASSERT(group_query != NULL); + zvec_group_by_vector_query_set_field_name(group_query, "vec"); float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; - group_query.query_vector.data = (uint8_t *)query_vec; - group_query.query_vector.length = sizeof(query_vec); - group_query.group_by_field_name = ZVEC_STRING("name"); - group_query.group_count = 2; - group_query.group_topk = 1; - group_query.include_vector = false; - - ZVecStringArray output_fields = {0}; - output_fields.count = 1; - output_fields.strings = - (ZVecString *)malloc(sizeof(ZVecString) * output_fields.count); - output_fields.strings[0] = ZVEC_STRING("name"); - group_query.output_fields = output_fields; + zvec_group_by_vector_query_set_query_vector(group_query, query_vec, + sizeof(query_vec)); + zvec_group_by_vector_query_set_group_by_field_name(group_query, "name"); + zvec_group_by_vector_query_set_group_count(group_query, 2); + zvec_group_by_vector_query_set_group_topk(group_query, 1); + zvec_group_by_vector_query_set_include_vector(group_query, false); + + const char *output_fields[] = {"name"}; + zvec_group_by_vector_query_set_output_fields(group_query, output_fields, 1); ZVecDoc **group_results = NULL; ZVecString **group_values = NULL; size_t group_result_count = 0; err = - zvec_collection_query_by_group(collection, &group_query, &group_results, + zvec_collection_query_by_group(collection, group_query, &group_results, &group_values, &group_result_count); TEST_ASSERT(err == ZVEC_OK); if (group_results) { @@ -4295,14 +4617,14 @@ void test_collection_query_functions(void) { free(group_values); } - free(output_fields.strings); + zvec_group_by_vector_query_destroy(group_query); // Test zvec_collection_get_options ZVecCollectionOptions *options = NULL; err = zvec_collection_get_options(collection, &options); TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(options != NULL); - free(options); + zvec_collection_options_destroy(options); zvec_collection_destroy(collection); zvec_doc_destroy(doc1); @@ -4522,6 +4844,8 @@ int main(void) { test_normal_schema_creation(); test_schema_with_indexes(); test_schema_max_doc_count(); + test_collection_schema_helpers(); + test_collection_schema_alter_field(); // Field-related tests test_field_schema_functions(); diff --git a/tests/c/utils.c b/tests/c/utils.c index 7d287761..f570f9fd 100644 --- a/tests/c/utils.c +++ b/tests/c/utils.c @@ -39,27 +39,37 @@ static char *strdup_safe(const char *str) { ZVecCollectionSchema *zvec_test_create_temp_schema(void) { // Create collection schema using C API ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); - schema->max_doc_count_per_segment = 1000; + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); - // Create index parameters using C API (using new flat structure with macros) - ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, true); - ZVecIndexParams dense_hnsw_params = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); - ZVecIndexParams sparse_hnsw_params = ZVEC_HNSW_PARAMS( - ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // Create index parameters using new opaque pointer API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_invert_params(invert_params, true, true); + + ZVecIndexParams *dense_hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(dense_hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(dense_hnsw_params, 16, 100); + + ZVecIndexParams *sparse_hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(sparse_hnsw_params, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(sparse_hnsw_params, 16, 100); + ZVecIndexParams *name_invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_invert_params(name_invert_params, false, false); // Create and add fields ZVecFieldSchema *id_field = zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); - zvec_field_schema_set_invert_index(id_field, &invert_params); + zvec_field_schema_set_invert_index(id_field, invert_params); zvec_collection_schema_add_field(schema, id_field); // Create name field (inverted index without optimization) - ZVecIndexParams name_invert_params = ZVEC_INVERT_PARAMS(false, false); ZVecFieldSchema *name_field = zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); - zvec_field_schema_set_invert_index(name_field, &name_invert_params); + zvec_field_schema_set_invert_index(name_field, name_invert_params); zvec_collection_schema_add_field(schema, name_field); // Create weight field (no index) @@ -70,15 +80,21 @@ ZVecCollectionSchema *zvec_test_create_temp_schema(void) { // Create dense field (HNSW index) ZVecFieldSchema *dense_field = zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); - zvec_field_schema_set_hnsw_index(dense_field, &dense_hnsw_params); + zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); zvec_collection_schema_add_field(schema, dense_field); // Create sparse field (HNSW index) ZVecFieldSchema *sparse_field = zvec_field_schema_create( "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); - zvec_field_schema_set_hnsw_index(sparse_field, &sparse_hnsw_params); + zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); zvec_collection_schema_add_field(schema, sparse_field); + // Cleanup index parameters + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(dense_hnsw_params); + zvec_index_params_destroy(sparse_hnsw_params); + zvec_index_params_destroy(name_invert_params); + return schema; } @@ -104,7 +120,7 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( // Create collection schema using C API ZVecCollectionSchema *schema = zvec_collection_schema_create(name ? name : "demo"); - schema->max_doc_count_per_segment = max_doc_count; + zvec_collection_schema_set_max_doc_count_per_segment(schema, max_doc_count); // Create scalar fields (8) const char *scalar_names[] = {"int32", "string", "uint32", "bool", @@ -153,16 +169,16 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); - ZVecIndexParams flat_params1 = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(dense_fp16, &flat_params1); + ZVecIndexParams *flat_params1 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + zvec_index_params_destroy(flat_params1); zvec_collection_schema_add_field(schema, dense_fp16); ZVecFieldSchema *dense_int8 = zvec_field_schema_create( "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); - ZVecIndexParams flat_params2 = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(dense_int8, &flat_params2); + ZVecIndexParams *flat_params2 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(dense_int8, flat_params2); + zvec_index_params_destroy(flat_params2); zvec_collection_schema_add_field(schema, dense_int8); // sparse vectors @@ -175,9 +191,9 @@ ZVecCollectionSchema *zvec_test_create_normal_schema( ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); - ZVecIndexParams flat_params3 = - ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); - zvec_field_schema_set_flat_index(sparse_fp16, &flat_params3); + ZVecIndexParams *flat_params3 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + zvec_index_params_destroy(flat_params3); zvec_collection_schema_add_field(schema, sparse_fp16); return schema; @@ -249,57 +265,58 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, } // Create test data for each field - for (size_t i = 0; i < schema->field_count; i++) { - // Fix type mismatch issue - remove address operator - const ZVecFieldSchema *field = schema->fields[i]; - // Remove unused variable - // ZVecErrorCode err = ZVEC_OK; + size_t field_count = zvec_collection_schema_get_field_count(schema); + for (size_t i = 0; i < field_count; i++) { + const ZVecFieldSchema *field = zvec_collection_schema_get_field(schema, i); + const char *field_name = zvec_field_schema_get_name(field); + ZVecDataType field_type = zvec_field_schema_get_data_type(field); + uint32_t field_dimension = zvec_field_schema_get_dimension(field); - switch (field->data_type) { + switch (field_type) { case ZVEC_DATA_TYPE_BINARY: { char binary_str[32]; snprintf(binary_str, sizeof(binary_str), "binary_%llu", (unsigned long long)doc_id); - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - binary_str, strlen(binary_str)); + zvec_doc_add_field_by_value(doc, field_name, field_type, binary_str, + strlen(binary_str)); break; } case ZVEC_DATA_TYPE_BOOL: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(bool){doc_id % 10 == 0}, sizeof(bool)); break; } case ZVEC_DATA_TYPE_INT32: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(int32_t){(int32_t)doc_id}, sizeof(int32_t)); break; } case ZVEC_DATA_TYPE_INT64: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(int64_t){(int64_t)doc_id}, sizeof(int64_t)); break; } case ZVEC_DATA_TYPE_UINT32: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(uint32_t){(uint32_t)doc_id}, sizeof(uint32_t)); break; } case ZVEC_DATA_TYPE_UINT64: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(uint64_t){(uint64_t)doc_id}, sizeof(uint64_t)); break; } case ZVEC_DATA_TYPE_FLOAT: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(float){(float)doc_id}, sizeof(float)); break; } case ZVEC_DATA_TYPE_DOUBLE: { - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + zvec_doc_add_field_by_value(doc, field_name, field_type, &(double){(double)doc_id}, sizeof(double)); break; } @@ -307,8 +324,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, char string_val[64]; snprintf(string_val, sizeof(string_val), "value_%llu", (unsigned long long)doc_id); - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - string_val, strlen(string_val)); + zvec_doc_add_field_by_value(doc, field_name, field_type, string_val, + strlen(string_val)); break; } case ZVEC_DATA_TYPE_ARRAY_BOOL: { @@ -316,8 +333,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { bool_array[j] = (doc_id + j) % 2 == 0; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - bool_array, sizeof(bool_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, bool_array, + sizeof(bool_array)); break; } case ZVEC_DATA_TYPE_ARRAY_INT32: { @@ -325,8 +342,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { int32_array[j] = (int32_t)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - int32_array, sizeof(int32_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, int32_array, + sizeof(int32_array)); break; } case ZVEC_DATA_TYPE_ARRAY_INT64: { @@ -334,8 +351,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { int64_array[j] = (int64_t)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - int64_array, sizeof(int64_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, int64_array, + sizeof(int64_array)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT32: { @@ -343,8 +360,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { uint32_array[j] = (uint32_t)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - uint32_array, sizeof(uint32_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, uint32_array, + sizeof(uint32_array)); break; } case ZVEC_DATA_TYPE_ARRAY_UINT64: { @@ -352,8 +369,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { uint64_array[j] = (uint64_t)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - uint64_array, sizeof(uint64_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, uint64_array, + sizeof(uint64_array)); break; } case ZVEC_DATA_TYPE_ARRAY_FLOAT: { @@ -361,8 +378,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { float_array[j] = (float)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - float_array, sizeof(float_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, float_array, + sizeof(float_array)); break; } case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { @@ -370,8 +387,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, for (int j = 0; j < 10; j++) { double_array[j] = (double)doc_id; } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - double_array, sizeof(double_array)); + zvec_doc_add_field_by_value(doc, field_name, field_type, double_array, + sizeof(double_array)); break; } case ZVEC_DATA_TYPE_ARRAY_STRING: { @@ -388,112 +405,104 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, offset += len + 1; } } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - string_data, offset); + zvec_doc_add_field_by_value(doc, field_name, field_type, string_data, + offset); break; } case ZVEC_DATA_TYPE_VECTOR_BINARY32: { uint32_t *vector_data = - (uint32_t *)malloc(field->dimension * sizeof(uint32_t)); + (uint32_t *)malloc(field_dimension * sizeof(uint32_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (uint32_t)(doc_id + j); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(uint32_t)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(uint32_t)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_BINARY64: { uint64_t *vector_data = - (uint64_t *)malloc(field->dimension * sizeof(uint64_t)); + (uint64_t *)malloc(field_dimension * sizeof(uint64_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (uint64_t)(doc_id + j); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(uint64_t)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(uint64_t)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_FP32: { - float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (float)(doc_id + j * 0.1); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(float)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(float)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_FP64: { double *vector_data = - (double *)malloc(field->dimension * sizeof(double)); + (double *)malloc(field_dimension * sizeof(double)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (double)(doc_id + j * 0.1); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(double)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(double)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_FP16: { // FP16 needs special handling, simplified to FP32 here - float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (float)(doc_id + j * 0.1); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(float)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(float)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_INT8: { int8_t *vector_data = - (int8_t *)malloc(field->dimension * sizeof(int8_t)); + (int8_t *)malloc(field_dimension * sizeof(int8_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (int8_t)((doc_id + j) % 256); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(int8_t)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(int8_t)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_INT16: { int16_t *vector_data = - (int16_t *)malloc(field->dimension * sizeof(int16_t)); + (int16_t *)malloc(field_dimension * sizeof(int16_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (int16_t)((doc_id + j) % 65536); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - vector_data, - field->dimension * sizeof(int16_t)); + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(int16_t)); free(vector_data); } break; } case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { // Sparse vectors need special handling - uint32_t nnz = field->dimension > 0 - ? field->dimension / 10 - : 10; // Number of non-zero elements + uint32_t nnz = field_dimension > 0 ? field_dimension / 10 + : 10; // Number of non-zero elements size_t sparse_size = sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); void *sparse_data = malloc(sparse_size); @@ -506,15 +515,15 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, indices[j] = j * 10; // Index values[j] = (float)(doc_id + j * 0.1); // Value } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - sparse_data, sparse_size); + zvec_doc_add_field_by_value(doc, field_name, field_type, sparse_data, + sparse_size); free(sparse_data); } break; } case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { // Sparse FP16 vectors, simplified handling - uint32_t nnz = field->dimension > 0 ? field->dimension / 10 : 10; + uint32_t nnz = field_dimension > 0 ? field_dimension / 10 : 10; size_t sparse_size = sizeof(uint32_t) + nnz * (sizeof(uint32_t) + @@ -529,8 +538,8 @@ ZVecDoc *zvec_test_create_doc(uint64_t doc_id, indices[j] = j * 10; values[j] = (float)(doc_id + j * 0.1); } - zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, - sparse_data, sparse_size); + zvec_doc_add_field_by_value(doc, field_name, field_type, sparse_data, + sparse_size); free(sparse_data); } break; @@ -567,85 +576,89 @@ ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, } // Only create data for vector fields - for (size_t i = 0; i < schema->field_count; i++) { - const ZVecFieldSchema *field = schema->fields[i]; + size_t field_count = zvec_collection_schema_get_field_count(schema); + for (size_t i = 0; i < field_count; i++) { + const ZVecFieldSchema *field = zvec_collection_schema_get_field(schema, i); + const char *field_name = zvec_field_schema_get_name(field); + ZVecDataType field_type = zvec_field_schema_get_data_type(field); + uint32_t field_dimension = zvec_field_schema_get_dimension(field); // Only process specific vector type fields - if (field->data_type != ZVEC_DATA_TYPE_VECTOR_FP32 && - field->data_type != ZVEC_DATA_TYPE_VECTOR_FP16 && - field->data_type != ZVEC_DATA_TYPE_VECTOR_INT8 && - field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && - field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + if (field_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { continue; } ZVecErrorCode err = ZVEC_OK; - switch (field->data_type) { + switch (field_type) { case ZVEC_DATA_TYPE_VECTOR_FP32: { - float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (float)(doc_id + j * 0.1); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, vector_data, - field->dimension * sizeof(float)); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(float)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_FP64: { double *vector_data = - (double *)malloc(field->dimension * sizeof(double)); + (double *)malloc(field_dimension * sizeof(double)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (double)(doc_id + j * 0.1); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, vector_data, - field->dimension * sizeof(double)); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(double)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_FP16: { - float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (float)(doc_id + j * 0.1); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, vector_data, - field->dimension * sizeof(float)); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(float)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_INT8: { int8_t *vector_data = - (int8_t *)malloc(field->dimension * sizeof(int8_t)); + (int8_t *)malloc(field_dimension * sizeof(int8_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (int8_t)(doc_id % 128); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, vector_data, - field->dimension * sizeof(int8_t)); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(int8_t)); free(vector_data); } break; } case ZVEC_DATA_TYPE_VECTOR_INT16: { int16_t *vector_data = - (int16_t *)malloc(field->dimension * sizeof(int16_t)); + (int16_t *)malloc(field_dimension * sizeof(int16_t)); if (vector_data) { - for (uint32_t j = 0; j < field->dimension; j++) { + for (uint32_t j = 0; j < field_dimension; j++) { vector_data[j] = (int16_t)(doc_id % 32768); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, vector_data, - field->dimension * sizeof(int16_t)); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(int16_t)); free(vector_data); } break; @@ -667,9 +680,8 @@ ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, *((float *)ptr) = (float)(doc_id + j * 0.1); ptr += sizeof(float); } - err = zvec_doc_add_field_by_value(doc, field->name->data, - field->data_type, sparse_data, - sparse_size); + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + sparse_data, sparse_size); free(sparse_data); } break; @@ -753,29 +765,29 @@ ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, // ============================================================================= ZVecIndexParams *zvec_test_create_default_hnsw_params(void) { - ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); if (!params) return NULL; - *params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_IP, 16, 100, 50, - ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(params, 16, 100); return params; } ZVecIndexParams *zvec_test_create_default_flat_params(void) { - ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); if (!params) return NULL; - *params = ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_IP); return params; } ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize) { - ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); if (!params) return NULL; - *params = ZVEC_INVERT_PARAMS(enable_optimize, enable_optimize); + zvec_index_params_set_invert_params(params, enable_optimize, enable_optimize); return params; } @@ -787,23 +799,24 @@ ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize) { ZVecFieldSchema *zvec_test_create_scalar_field( const char *name, ZVecDataType data_type, bool nullable, const ZVecIndexParams *invert_params) { - ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, 0); if (!field) return NULL; - field->name = (ZVecString *)malloc(sizeof(ZVecString)); - if (!field->name) { - free(field); - return NULL; - } - field->name->data = name ? strdup(name) : NULL; - field->name->length = name ? strlen(name) : 0; - field->name->capacity = name ? strlen(name) + 1 : 0; - field->data_type = data_type; - field->nullable = nullable; - field->dimension = 0; - field->has_index = (invert_params != NULL); if (invert_params) { - field->index_params = *invert_params; + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(invert_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_range_opt, + enable_wildcard); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } } return field; @@ -812,23 +825,23 @@ ZVecFieldSchema *zvec_test_create_scalar_field( ZVecFieldSchema *zvec_test_create_vector_field( const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, const ZVecIndexParams *vector_index_params) { - ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, dimension); if (!field) return NULL; - field->name = (ZVecString *)malloc(sizeof(ZVecString)); - if (!field->name) { - free(field); - return NULL; - } - field->name->data = name ? strdup(name) : NULL; - field->name->length = name ? strlen(name) : 0; - field->name->capacity = name ? strlen(name) + 1 : 0; - field->data_type = data_type; - field->nullable = nullable; - field->dimension = dimension; - field->has_index = (vector_index_params != NULL); if (vector_index_params) { - field->index_params = *vector_index_params; + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(vector_index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + int m, ef_construction; + zvec_index_params_get_hnsw_params(vector_index_params, &m, + &ef_construction); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_construction); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } } return field; @@ -837,23 +850,23 @@ ZVecFieldSchema *zvec_test_create_vector_field( ZVecFieldSchema *zvec_test_create_sparse_vector_field( const char *name, ZVecDataType data_type, bool nullable, const ZVecIndexParams *vector_index_params) { - ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, 0); if (!field) return NULL; - field->name = (ZVecString *)malloc(sizeof(ZVecString)); - if (!field->name) { - free(field); - return NULL; - } - field->name->data = name ? strdup(name) : NULL; - field->name->length = name ? strlen(name) : 0; - field->name->capacity = name ? strlen(name) + 1 : 0; - field->data_type = data_type; - field->nullable = nullable; - field->dimension = 0; - field->has_index = (vector_index_params != NULL); if (vector_index_params) { - field->index_params = *vector_index_params; + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(vector_index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + int m, ef_construction; + zvec_index_params_get_hnsw_params(vector_index_params, &m, + &ef_construction); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_construction); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } } return field; @@ -863,21 +876,8 @@ ZVecFieldSchema *zvec_test_create_sparse_vector_field( // Memory Management Helper Functions Implementation // ============================================================================= -void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { - if (!fields) return; - - for (size_t i = 0; i < count; i++) { - if (fields[i].name) { - if (fields[i].name->data) { - free(fields[i].name->data); - } - free(fields[i].name); - } - // Note: index_params is now an embedded value, not a pointer - // It will be freed automatically when the struct is freed - } - free(fields); -} +// Note: zvec_test_free_field_schemas is deprecated. +// Use zvec_field_schema_destroy() to free individual field schemas. void zvec_test_free_strings(char **strings, size_t count) { if (!strings) return; diff --git a/tests/c/utils.h b/tests/c/utils.h index 0e9b42b7..202bc95b 100644 --- a/tests/c/utils.h +++ b/tests/c/utils.h @@ -224,14 +224,6 @@ ZVecFieldSchema *zvec_test_create_sparse_vector_field( // Memory Management Helper Functions // ============================================================================= -/** - * @brief Free field schema array - * - * @param fields Field array pointer - * @param count Number of fields - */ -void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count); - /** * @brief Free string array *