diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..20a57f88 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,193 @@ +name: Release + +permissions: + contents: read + +on: + push: + tags: + - 'v*' # Match v0.3.0, v1.0.0, etc. + workflow_dispatch: # Allow manual trigger + +jobs: + # ============================================================================ + # Linux x64 Build + # ============================================================================ + linux-x64: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-x64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: libzvec-capi-linux-x64.tar.gz + + # ============================================================================ + # Linux ARM64 Build + # ============================================================================ + linux-arm64: + runs-on: ubuntu-24.04-arm + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so (ARM64) + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-arm64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: libzvec-capi-linux-arm64.tar.gz + + # ============================================================================ + # macOS Universal Build (arm64 + x86_64) + # ============================================================================ + macos-universal: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + brew install cmake ninja + + - name: Build libzvec_c_api.dylib (Universal Binary) + env: + CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" + MACOSX_DEPLOYMENT_TARGET: "11.0" + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET="11.0" \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.dylib + echo "=== Check architectures ===" + lipo -archs build/src/c_api/libzvec_c_api.dylib + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.dylib . + tar -czvf libzvec-capi-macos-universal.tar.gz \ + c_api.h \ + libzvec_c_api.dylib + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-macos-universal + path: libzvec-capi-macos-universal.tar.gz + + # ============================================================================ + # Upload to GitHub Releases + # ============================================================================ + upload-release: + needs: [linux-x64, linux-arm64, macos-universal] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Download build artifacts for each platform + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-macos-universal + path: dist/ + + - name: List artifacts + run: ls -la dist/ + + # Upload to GitHub Releases + - uses: softprops/action-gh-release@v1 + with: + files: dist/*.tar.gz + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 52a59754..7730c84b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,15 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include_directories(${PROJECT_ROOT_DIR}/src/include) include_directories(${PROJECT_ROOT_DIR}/src) +# Add generated headers to global include path +include_directories(${PROJECT_BINARY_DIR}/src/generated) option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF) message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}") +option(BUILD_C_BINDINGS "Build C bindings" ON) +message(STATUS "BUILD_C_BINDINGS:${BUILD_C_BINDINGS}") + option(BUILD_TOOLS "Build tools" ON) message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}") diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt new file mode 100644 index 00000000..476b42c2 --- /dev/null +++ b/examples/c/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Basic example +add_executable(c_api_basic_example basic_example.c) +target_link_libraries(c_api_basic_example PRIVATE zvec_c_api) +target_include_directories(c_api_basic_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_basic_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c +) + + +# Schema example +add_executable(c_api_collection_schema_example collection_schema_example.c) +target_link_libraries(c_api_collection_schema_example PRIVATE zvec_c_api) +target_include_directories(c_api_collection_schema_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_collection_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c +) + +# Struct document example +add_executable(c_api_doc_example doc_example.c) +target_link_libraries(c_api_doc_example PRIVATE zvec_c_api) +target_include_directories(c_api_doc_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_doc_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c +) + +# Index example +add_executable(c_api_index_example index_example.c) +target_link_libraries(c_api_index_example PRIVATE zvec_c_api) +set_target_properties(c_api_index_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c +) + +# Newly added field schema example +add_executable(c_api_field_schema_example field_schema_example.c) +target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) +set_target_properties(c_api_field_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c +) + +# Optimized example +add_executable(c_api_optimized_example optimized_example.c) +target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) +set_target_properties(c_api_optimized_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c +) diff --git a/examples/c/basic_example.c b/examples/c/basic_example.c new file mode 100644 index 00000000..43767e8f --- /dev/null +++ b/examples/c/basic_example.c @@ -0,0 +1,265 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a simple test collection using CollectionSchema + */ +static ZVecErrorCode create_simple_test_collection( + ZVecCollection **collection) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + if (!schema) { + return ZVEC_ERROR_INTERNAL_ERROR; + } + + ZVecErrorCode error = ZVEC_OK; + + // Create index parameters using new API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + // Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + return error; + } + + // Create text field (inverted index) + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + return error; + } + + // Create embedding field (HNSW index) + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (error != ZVEC_OK) { + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + return error; + } + + // Cleanup index parameters (they have been copied to the field schemas) + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + + // Use default options + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + zvec_collection_schema_destroy(schema); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Create collection using the new API + error = zvec_collection_create_and_open("./test_collection", schema, options, + collection); + + // Cleanup resources + zvec_collection_options_destroy(options); + zvec_collection_schema_destroy(schema); + + return error; +} + +/** + * @brief Basic C API usage example + */ +int main() { + printf("=== ZVec C API Basic Example ===\n\n"); + + ZVecErrorCode error; + + // Create collection using simplified function + ZVecCollection *collection = NULL; + error = create_simple_test_collection(&collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + return 1; + } + printf("✓ Collection created successfully\n"); + + // Prepare test data + float vector1[] = {0.1f, 0.2f, 0.3f}; + float vector2[] = {0.4f, 0.5f, 0.6f}; + + ZVecDoc *docs[2]; + for (int i = 0; i < 2; ++i) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup allocated resources + for (int j = 0; j < i; ++j) { + zvec_doc_destroy(docs[j]); + } + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + // Manually add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "text", ZVEC_DATA_TYPE_STRING, + "First document", strlen("First document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 3 * sizeof(float)); + + // Manually add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "text", ZVEC_DATA_TYPE_STRING, + "Second document", strlen("Second document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 3 * sizeof(float)); + + // Insert documents + size_t success_count = 0; + size_t error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + zvec_collection_destroy(collection); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + for (int i = 0; i < 2; ++i) { + zvec_doc_destroy(docs[i]); + } + + // Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Get collection statistics + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (handle_error(error, "getting collection stats") == ZVEC_OK) { + printf("✓ Collection stats - Document count: %llu\n", + (unsigned long long)zvec_collection_stats_get_doc_count(stats)); + // Free statistics memory + zvec_collection_stats_destroy(stats); + } + + printf("Testing vector query...\n"); + // Query documents + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + return 1; + } + + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, vector1, 3 * sizeof(float)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); + + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("[ERROR] Query failed: %s\n", + error_msg ? error_msg : "Unknown error"); + free(error_msg); + zvec_vector_query_destroy(query); + goto cleanup; + } + + zvec_vector_query_destroy(query); + + printf("✓ Query successful - Returned %zu results\n", result_count); + + // Process query results + for (size_t i = 0; i < result_count && i < 5; ++i) { + const ZVecDoc *doc = results[i]; + const char *pk = zvec_doc_get_pk_copy(doc); + + printf(" Result %zu: PK=%s, DocID=%llu, Score=%.4f\n", i + 1, + pk ? pk : "NULL", (unsigned long long)zvec_doc_get_doc_id(doc), + zvec_doc_get_score(doc)); + + if (pk) { + free((void *)pk); + } + } + + // Free query results memory + zvec_docs_free(results, result_count); + +cleanup: + // Cleanup resources + zvec_collection_destroy(collection); + printf("✓ Example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c/collection_schema_example.c b/examples/c/collection_schema_example.c new file mode 100644 index 00000000..2d49b6d0 --- /dev/null +++ b/examples/c/collection_schema_example.c @@ -0,0 +1,253 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Collection schema creation and management example + */ +int main() { + printf("=== ZVec Collection Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("schema_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return 1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Set schema properties + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000000); + printf("✓ Set max documents per segment: %llu\n", + (unsigned long long) + zvec_collection_schema_get_max_doc_count_per_segment(schema)); + + // 3. Create index parameters + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + // 4. Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (!id_field) { + fprintf(stderr, "Failed to create ID field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ ID field added successfully\n"); + + // 5. Create and add text field with inverted index + ZVecFieldSchema *text_field = + zvec_field_schema_create("content", ZVEC_DATA_TYPE_STRING, true, 0); + if (!text_field) { + fprintf(stderr, "Failed to create text field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Text field with inverted index added successfully\n"); + + // 6. Create and add vector field with HNSW index + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (!vector_field) { + fprintf(stderr, "Failed to create vector field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + zvec_field_schema_set_hnsw_index(vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_field); + if (handle_error(error, "adding vector field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Vector field with HNSW index added successfully\n"); + + // 7. Check field count + // Note: This function may not exist in current API, commenting out for now + // size_t field_count = zvec_collection_schema_get_field_count(schema); + // printf("✓ Total field count: %zu\n", field_count); + + // 8. Create collection with schema + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./schema_example_collection", schema, + options, &collection); + if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_options_destroy(options); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_collection_options_destroy(options); + printf("✓ Collection created successfully with schema\n"); + + // 9. Prepare test data + float vector1[128]; + float vector2[128]; + for (int i = 0; i < 128; i++) { + vector1[i] = (float)(i + 1) / 128.0f; + vector2[i] = (float)(i + 2) / 128.0f; + } + + // 10. Create documents + ZVecDoc *docs[2]; + for (int i = 0; i < 2; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + } + + // Add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "content", ZVEC_DATA_TYPE_STRING, + "First test document", + strlen("First test document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 128 * sizeof(float)); + + // Add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "content", ZVEC_DATA_TYPE_STRING, + "Second test document", + strlen("Second test document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 128 * sizeof(float)); + + // 11. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + // Cleanup + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + + // Cleanup documents + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + + // 12. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed successfully\n"); + } + + // 13. Query test + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, vector1, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 5); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Returned %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + zvec_vector_query_destroy(query); + + // 14. Cleanup resources + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + printf("✓ Schema example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c/doc_example.c b/examples/c/doc_example.c new file mode 100644 index 00000000..a3cc05ed --- /dev/null +++ b/examples/c/doc_example.c @@ -0,0 +1,530 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a test document with all data types + * @param doc_index Document index for generating unique data + * @return ZVecDoc* Created document pointer + */ +static ZVecDoc *create_full_type_test_doc(int doc_index) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) { + fprintf(stderr, "Failed to create document\n"); + return NULL; + } + + // Set primary key + char pk_buffer[32]; + snprintf(pk_buffer, sizeof(pk_buffer), "doc_%d", doc_index); + zvec_doc_set_pk(doc, pk_buffer); + + // Add Id field with inverted index + char id_buffer[32]; + snprintf(id_buffer, sizeof(id_buffer), "id_%d", doc_index); + zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_STRING, id_buffer, + strlen(id_buffer)); + + // Add scalar fields with different data types + // String field + char string_value[64]; + snprintf(string_value, sizeof(string_value), "test_string_%d", doc_index); + zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_value, strlen(string_value)); + + // Boolean field + bool bool_value = (doc_index % 2 == 0); + zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + + // Integer fields + int32_t int32_value = doc_index * 1000; + zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + + int64_t int64_value = (int64_t)doc_index * 1000000LL; + zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + + // Floating point fields + float float_value = (float)doc_index * 1.5f; + zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + + double double_value = (double)doc_index * 2.718281828; + zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + + // Vector fields with different dimensions + // FP32 vector (3D) + float fp32_vector[3] = {(float)doc_index, (float)doc_index * 2.0f, + (float)doc_index * 3.0f}; + zvec_doc_add_field_by_value(doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, + fp32_vector, 3 * sizeof(float)); + + // Larger FP32 vector (16D) + float large_vector[16]; + for (int i = 0; i < 16; i++) { + large_vector[i] = (float)(doc_index * 16 + i) / 256.0f; + } + zvec_doc_add_field_by_value(doc, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vector, 16 * sizeof(float)); + + return doc; +} + +/** + * @brief Compare two documents for equality + */ +static bool compare_documents(const ZVecDoc *doc1, const ZVecDoc *doc2) { + if (!doc1 || !doc2) return false; + + // Compare primary keys + const char *pk1 = zvec_doc_get_pk_pointer(doc1); + const char *pk2 = zvec_doc_get_pk_pointer(doc2); + + if (!pk1 || !pk2 || strcmp(pk1, pk2) != 0) { + return false; + } + + // TODO: Compare other fields and values + + return true; +} + +/** + * @brief Print document fields and their values + * @param doc The document to print + * @param doc_index Document index for identification + */ +static void print_doc(const ZVecDoc *doc, int doc_index) { + if (!doc) { + printf("Document %d: NULL document\n", doc_index); + return; + } + + printf("\n=== Document %d ===\n", doc_index); + + // Print primary key + const char *pk = zvec_doc_get_pk_pointer(doc); + printf("Primary Key: %s\n", pk ? pk : "NULL"); + + // Print document ID + uint64_t doc_id = zvec_doc_get_doc_id(doc); + printf("Document ID: %llu\n", (unsigned long long)doc_id); + + // Print score + float score = zvec_doc_get_score(doc); + printf("Score: %.6f\n", score); + + // Print scalar fields + printf("\nScalar Fields:\n"); + + // ID field (using pointer function for strings) + const void *id_value = NULL; + size_t id_size = 0; + ZVecErrorCode error = zvec_doc_get_field_value_pointer( + doc, "id", ZVEC_DATA_TYPE_STRING, &id_value, &id_size); + if (error == ZVEC_OK && id_value) { + printf(" id: %.*s\n", (int)id_size, (const char *)id_value); + } + + // String field (using pointer function for strings) + const void *string_value = NULL; + size_t string_size = 0; + error = zvec_doc_get_field_value_pointer( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_value, &string_size); + if (error == ZVEC_OK && string_value) { + printf(" string_field: %.*s\n", (int)string_size, + (const char *)string_value); + } + + // Boolean field + bool bool_value; + error = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + if (error == ZVEC_OK) { + printf(" bool_field: %s\n", bool_value ? "true" : "false"); + } + + // Int32 field + int32_t int32_value; + error = + zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + if (error == ZVEC_OK) { + printf(" int32_field: %d\n", int32_value); + } + + // Int64 field + int64_t int64_value; + error = + zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + if (error == ZVEC_OK) { + printf(" int64_field: %lld\n", (long long)int64_value); + } + + // Float field + float float_value; + error = + zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + if (error == ZVEC_OK) { + printf(" float_field: %.6f\n", float_value); + } + + // Double field + double double_value; + error = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + if (error == ZVEC_OK) { + printf(" double_field: %.6f\n", double_value); + } + + // Print vector fields (using copy function for complex types) + printf("\nVector Fields:\n"); + + // FP32 vector (3D) + void *fp32_vector = NULL; + size_t fp32_size = 0; + error = zvec_doc_get_field_value_copy( + doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, &fp32_vector, &fp32_size); + if (error == ZVEC_OK && fp32_vector) { + const float *vec = (const float *)fp32_vector; + size_t dim = fp32_size / sizeof(float); + printf(" vector_fp32 (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(fp32_vector); // Free the allocated memory + } + + // Large vector (16D) + void *large_vector = NULL; + size_t large_size = 0; + error = zvec_doc_get_field_value_copy(doc, "large_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, + &large_vector, &large_size); + if (error == ZVEC_OK && large_vector) { + const float *vec = (const float *)large_vector; + size_t dim = large_size / sizeof(float); + printf(" large_vector (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(large_vector); // Free the allocated memory + } + + printf("==================\n\n"); +} + +/** + * @brief Document creation, manipulation, and query example + */ +int main() { + printf("=== ZVec Document Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema for document testing + ZVecCollectionSchema *schema = + zvec_collection_schema_create("doc_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create index parameters + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + // 3. Create fields for all data types + printf("Creating fields for all data types...\n"); + + // Id field with inverted index + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with inverted index added\n"); + } + } + + // Scalar fields + ZVecFieldSchema *string_field = + zvec_field_schema_create("string_field", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *bool_field = + zvec_field_schema_create("bool_field", ZVEC_DATA_TYPE_BOOL, true, 0); + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32_field", ZVEC_DATA_TYPE_INT32, true, 0); + ZVecFieldSchema *int64_field = + zvec_field_schema_create("int64_field", ZVEC_DATA_TYPE_INT64, true, 0); + ZVecFieldSchema *float_field = + zvec_field_schema_create("float_field", ZVEC_DATA_TYPE_FLOAT, true, 0); + ZVecFieldSchema *double_field = + zvec_field_schema_create("double_field", ZVEC_DATA_TYPE_DOUBLE, true, 0); + + if (string_field) zvec_collection_schema_add_field(schema, string_field); + if (bool_field) zvec_collection_schema_add_field(schema, bool_field); + if (int32_field) zvec_collection_schema_add_field(schema, int32_field); + if (int64_field) zvec_collection_schema_add_field(schema, int64_field); + if (float_field) zvec_collection_schema_add_field(schema, float_field); + if (double_field) zvec_collection_schema_add_field(schema, double_field); + + // Vector fields + ZVecFieldSchema *vector_fp32_field = zvec_field_schema_create( + "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 16); + + if (vector_fp32_field) { + zvec_field_schema_set_hnsw_index(vector_fp32_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_fp32_field); + if (handle_error(error, "adding vector FP32 field") == ZVEC_OK) { + printf("✓ Vector FP32 field with HNSW index added\n"); + } + } + + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf("✓ Large vector field with HNSW index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./doc_example_collection", schema, + options, &collection); + zvec_collection_options_destroy(options); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create and insert multiple test documents + printf("Creating and inserting test documents...\n"); + + const int doc_count = 5; + ZVecDoc *test_docs[doc_count]; + + for (int i = 0; i < doc_count; i++) { + test_docs[i] = create_full_type_test_doc(i); + if (!test_docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(test_docs[j]); + } + goto cleanup; + } + printf("✓ Created document %d with PK: %s\n", i, + zvec_doc_get_pk_pointer(test_docs[i])); + } + + // Print all documents before insertion + printf("\nDocuments before insertion:\n"); + for (int i = 0; i < doc_count; i++) { + print_doc(test_docs[i], i); + } + + // Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)test_docs, + doc_count, &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 6. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Warning: Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Use the first document's vector for querying + float query_vector[] = {0.0f, 0.0f, 0.0f}; + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_vector_query_set_field_name(query, "vector_fp32"); + zvec_vector_query_set_query_vector(query, query_vector, 3 * sizeof(float)); + zvec_vector_query_set_topk(query, 5); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, true); + zvec_vector_query_set_include_doc_id(query, true); + + ZVecDoc **query_results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &query_results, &result_count); + if (handle_error(error, "querying documents") != ZVEC_OK) { + query_results = NULL; + result_count = 0; + } + + printf("Query returned %zu results\n", result_count); + + // Print query results + printf("\nQuery Results:\n"); + for (size_t i = 0; i < result_count; i++) { + print_doc(query_results[i], i); + } + + // Compare query results + for (size_t i = 0; i < result_count && i < doc_count; i++) { + const char *result_pk = zvec_doc_get_pk_pointer(query_results[i]); + printf("Comparing query result[%zu]: %s\n", i, result_pk); + + // Find matching original document + bool found = false; + for (int j = 0; j < doc_count; j++) { + const char *original_pk = zvec_doc_get_pk_pointer(test_docs[j]); + if (strcmp(result_pk, original_pk) == 0) { + if (compare_documents(test_docs[j], query_results[i])) { + printf("✓ Query result %s matches original document\n", result_pk); + } else { + printf("✗ Query result %s does not match original document\n", + result_pk); + } + found = true; + break; + } + } + + if (!found) { + printf("⚠ Original document not found for: %s\n", result_pk); + } + } + + // 7. Filter query test + printf("\n=== Filter Query Test ===\n"); + + // Create filtered query + zvec_vector_query_set_filter(query, "string_field = 'string_field_0'"); + + ZVecDoc **filtered_results = NULL; + size_t filtered_count = 0; + + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &filtered_results, &filtered_count); + if (handle_error(error, "filtered querying") == ZVEC_OK) { + printf("Filtered query returned %zu results\n", filtered_count); + + // Verify filter results + bool filter_correct = true; + for (size_t i = 0; i < filtered_count; i++) { + // Note: Field value access may require different API + // For now, we'll just check that we got results + const char *pk = zvec_doc_get_pk_pointer(filtered_results[i]); + if (strstr(pk, "doc_") == NULL) { + filter_correct = false; + break; + } + } + + if (filter_correct) { + printf("✓ Filter query results are correct\n"); + } else { + printf("✗ Filter query results are incorrect\n"); + } + + if (filtered_results) { + zvec_docs_free(filtered_results, filtered_count); + } + } + + // 8. Cleanup query results + if (query_results) { + zvec_docs_free(query_results, result_count); + } + + // 9. Cleanup documents + for (int i = 0; i < doc_count; i++) { + zvec_doc_destroy(test_docs[i]); + } + + // 10. Final cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + printf("✓ Document example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c/field_schema_example.c b/examples/c/field_schema_example.c new file mode 100644 index 00000000..ea73f42a --- /dev/null +++ b/examples/c/field_schema_example.c @@ -0,0 +1,309 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Field schema creation and management example + */ +int main() { + printf("=== ZVec Field Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("field_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different types of index parameters + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params) { + fprintf(stderr, "Failed to create invert index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params) { + fprintf(stderr, "Failed to create Flat index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_L2); + + if (!invert_params || !hnsw_params || !flat_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(flat_params); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create scalar fields with different data types + printf("Creating scalar fields...\n"); + + // String field with inverted index + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + if (name_field) { + zvec_field_schema_set_invert_index(name_field, invert_params); + error = zvec_collection_schema_add_field(schema, name_field); + if (handle_error(error, "adding name field") == ZVEC_OK) { + printf("✓ String field 'name' with inverted index added\n"); + } + } + + // Integer field + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + if (age_field) { + error = zvec_collection_schema_add_field(schema, age_field); + if (handle_error(error, "adding age field") == ZVEC_OK) { + printf("✓ Integer field 'age' added\n"); + } + } + + // Float field + ZVecFieldSchema *score_field = + zvec_field_schema_create("score", ZVEC_DATA_TYPE_FLOAT, true, 0); + if (score_field) { + error = zvec_collection_schema_add_field(schema, score_field); + if (handle_error(error, "adding score field") == ZVEC_OK) { + printf("✓ Float field 'score' added\n"); + } + } + + // Boolean field + ZVecFieldSchema *active_field = + zvec_field_schema_create("active", ZVEC_DATA_TYPE_BOOL, false, 0); + if (active_field) { + error = zvec_collection_schema_add_field(schema, active_field); + if (handle_error(error, "adding active field") == ZVEC_OK) { + printf("✓ Boolean field 'active' added\n"); + } + } + + // 4. Create vector fields with different dimensions and indexes + printf("Creating vector fields...\n"); + + // Small dimension vector with HNSW index + ZVecFieldSchema *small_vector_field = zvec_field_schema_create( + "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (small_vector_field) { + zvec_field_schema_set_hnsw_index(small_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, small_vector_field); + if (handle_error(error, "adding small vector field") == ZVEC_OK) { + printf( + "✓ Small vector field 'small_vector' (32D) with HNSW index added\n"); + } + } + + // Medium dimension vector with Flat index + ZVecFieldSchema *medium_vector_field = zvec_field_schema_create( + "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (medium_vector_field) { + zvec_field_schema_set_flat_index(medium_vector_field, flat_params); + error = zvec_collection_schema_add_field(schema, medium_vector_field); + if (handle_error(error, "adding medium vector field") == ZVEC_OK) { + printf( + "✓ Medium vector field 'medium_vector' (128D) with Flat index " + "added\n"); + } + } + + // Large dimension vector with HNSW index + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 512); + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf( + "✓ Large vector field 'large_vector' (512D) with HNSW index added\n"); + } + } + + // 5. Create collection with the schema + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./field_example_collection", schema, + options, &collection); + zvec_collection_options_destroy(options); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 6. Create test documents with various field types + printf("Creating test documents...\n"); + + ZVecDoc *doc1 = zvec_doc_create(); + ZVecDoc *doc2 = zvec_doc_create(); + + if (!doc1 || !doc2) { + fprintf(stderr, "Failed to create documents\n"); + goto cleanup; + } + + // Document 1 + zvec_doc_set_pk(doc1, "user1"); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "Alice Johnson", strlen("Alice Johnson")); + int32_t age1 = 28; + zvec_doc_add_field_by_value(doc1, "age", ZVEC_DATA_TYPE_INT32, &age1, + sizeof(age1)); + float score1 = 87.5f; + zvec_doc_add_field_by_value(doc1, "score", ZVEC_DATA_TYPE_FLOAT, &score1, + sizeof(score1)); + bool active1 = true; + zvec_doc_add_field_by_value(doc1, "active", ZVEC_DATA_TYPE_BOOL, &active1, + sizeof(active1)); + + // Add vector data + float small_vec1[32]; + float medium_vec1[128]; + float large_vec1[512]; + + for (int i = 0; i < 32; i++) small_vec1[i] = (float)i / 32.0f; + for (int i = 0; i < 128; i++) medium_vec1[i] = (float)i / 128.0f; + for (int i = 0; i < 512; i++) large_vec1[i] = (float)i / 512.0f; + + zvec_doc_add_field_by_value(doc1, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec1, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec1, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec1, 512 * sizeof(float)); + + // Document 2 + zvec_doc_set_pk(doc2, "user2"); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, "Bob Smith", + strlen("Bob Smith")); + int32_t age2 = 35; + zvec_doc_add_field_by_value(doc2, "age", ZVEC_DATA_TYPE_INT32, &age2, + sizeof(age2)); + float score2 = 92.0f; + zvec_doc_add_field_by_value(doc2, "score", ZVEC_DATA_TYPE_FLOAT, &score2, + sizeof(score2)); + bool active2 = false; + zvec_doc_add_field_by_value(doc2, "active", ZVEC_DATA_TYPE_BOOL, &active2, + sizeof(active2)); + + // Add vector data + float small_vec2[32]; + float medium_vec2[128]; + float large_vec2[512]; + + for (int i = 0; i < 32; i++) small_vec2[i] = (float)(32 - i) / 32.0f; + for (int i = 0; i < 128; i++) medium_vec2[i] = (float)(128 - i) / 128.0f; + for (int i = 0; i < 512; i++) large_vec2[i] = (float)(512 - i) / 512.0f; + + zvec_doc_add_field_by_value(doc2, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec2, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec2, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec2, 512 * sizeof(float)); + + // 7. Insert documents + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 8. Flush and test queries + zvec_collection_flush(collection); + printf("✓ Collection flushed\n"); + + // Test vector query on medium vector field + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(query, "medium_vector"); + zvec_vector_query_set_query_vector(query, medium_vec1, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 2); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Found %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + zvec_vector_query_destroy(query); + + // 9. Cleanup +cleanup: + if (doc1) zvec_doc_destroy(doc1); + if (doc2) zvec_doc_destroy(doc2); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + printf("✓ Field schema example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c/index_example.c b/examples/c/index_example.c new file mode 100644 index 00000000..7187901c --- /dev/null +++ b/examples/c/index_example.c @@ -0,0 +1,389 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Index creation and management example + */ +int main() { + printf("=== ZVec Index Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("index_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different index parameter configurations + printf("Creating index parameters...\n"); + + // Inverted index parameters + ZVecIndexParams *invert_params_standard = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params_standard) { + fprintf(stderr, "Failed to create invert index parameters (standard)\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params_standard, true, false); + + ZVecIndexParams *invert_params_extended = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!invert_params_extended) { + fprintf(stderr, "Failed to create invert index parameters (extended)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_invert_params(invert_params_extended, true, true); + + // HNSW index parameters with different configurations + ZVecIndexParams *hnsw_params_fast = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_fast) { + fprintf(stderr, "Failed to create HNSW index parameters (fast)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_fast, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params_fast, 16, 100); + + ZVecIndexParams *hnsw_params_balanced = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_balanced) { + fprintf(stderr, "Failed to create HNSW index parameters (balanced)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_balanced, + ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params_balanced, 32, 200); + + ZVecIndexParams *hnsw_params_accurate = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params_accurate) { + fprintf(stderr, "Failed to create HNSW index parameters (accurate)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params_accurate, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(hnsw_params_accurate, 64, 400); + + // Flat index parameters + ZVecIndexParams *flat_params_l2 = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params_l2) { + fprintf(stderr, "Failed to create Flat index parameters (L2)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_index_params_destroy(hnsw_params_accurate); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(flat_params_l2, ZVEC_METRIC_TYPE_L2); + + ZVecIndexParams *flat_params_cosine = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!flat_params_cosine) { + fprintf(stderr, "Failed to create Flat index parameters (cosine)\n"); + zvec_index_params_destroy(invert_params_standard); + zvec_index_params_destroy(invert_params_extended); + zvec_index_params_destroy(hnsw_params_fast); + zvec_index_params_destroy(hnsw_params_balanced); + zvec_index_params_destroy(hnsw_params_accurate); + zvec_index_params_destroy(flat_params_l2); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(flat_params_cosine, + ZVEC_METRIC_TYPE_COSINE); + + // 3. Create fields with different index types + printf("Creating fields with various index types...\n"); + + // Fields with inverted indexes + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params_standard); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with standard inverted index added\n"); + } + } + + ZVecFieldSchema *category_field = + zvec_field_schema_create("category", ZVEC_DATA_TYPE_STRING, true, 0); + if (category_field) { + zvec_field_schema_set_invert_index(category_field, invert_params_extended); + error = zvec_collection_schema_add_field(schema, category_field); + if (handle_error(error, "adding category field") == ZVEC_OK) { + printf("✓ Category field with extended inverted index added\n"); + } + } + + // Vector fields with HNSW indexes (different configurations) + ZVecFieldSchema *fast_search_field = zvec_field_schema_create( + "fast_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 64); + if (fast_search_field) { + zvec_field_schema_set_hnsw_index(fast_search_field, hnsw_params_fast); + error = zvec_collection_schema_add_field(schema, fast_search_field); + if (handle_error(error, "adding fast search field") == ZVEC_OK) { + printf("✓ Fast search vector field (64D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *balanced_field = zvec_field_schema_create( + "balanced_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (balanced_field) { + zvec_field_schema_set_hnsw_index(balanced_field, hnsw_params_balanced); + error = zvec_collection_schema_add_field(schema, balanced_field); + if (handle_error(error, "adding balanced field") == ZVEC_OK) { + printf("✓ Balanced vector field (128D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *accurate_field = zvec_field_schema_create( + "accurate_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 256); + if (accurate_field) { + zvec_field_schema_set_hnsw_index(accurate_field, hnsw_params_accurate); + error = zvec_collection_schema_add_field(schema, accurate_field); + if (handle_error(error, "adding accurate field") == ZVEC_OK) { + printf("✓ Accurate vector field (256D) with HNSW index added\n"); + } + } + + // Vector field with Flat index + ZVecFieldSchema *exact_field = zvec_field_schema_create( + "exact_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (exact_field) { + zvec_field_schema_set_flat_index(exact_field, flat_params_l2); + error = zvec_collection_schema_add_field(schema, exact_field); + if (handle_error(error, "adding exact field") == ZVEC_OK) { + printf("✓ Exact search vector field (32D) with Flat index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./index_example_collection", schema, + options, &collection); + zvec_collection_options_destroy(options); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create test data + printf("Creating test documents...\n"); + + ZVecDoc *docs[3]; + for (int i = 0; i < 3; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + goto cleanup; + } + } + + // Prepare vector data + float fast_vec[3][64]; + float balanced_vec[3][128]; + float accurate_vec[3][256]; + float exact_vec[3][32]; + + // Generate different vector patterns for testing + for (int doc_idx = 0; doc_idx < 3; doc_idx++) { + for (int i = 0; i < 64; i++) { + fast_vec[doc_idx][i] = (float)(doc_idx * 64 + i) / (64.0f * 3.0f); + } + for (int i = 0; i < 128; i++) { + balanced_vec[doc_idx][i] = (float)(doc_idx * 128 + i) / (128.0f * 3.0f); + } + for (int i = 0; i < 256; i++) { + accurate_vec[doc_idx][i] = (float)(doc_idx * 256 + i) / (256.0f * 3.0f); + } + for (int i = 0; i < 32; i++) { + exact_vec[doc_idx][i] = (float)(doc_idx * 32 + i) / (32.0f * 3.0f); + } + } + + // Populate documents + for (int i = 0; i < 3; i++) { + char pk[16]; + snprintf(pk, sizeof(pk), "doc%d", i + 1); + zvec_doc_set_pk(docs[i], pk); + + char id_val[16]; + snprintf(id_val, sizeof(id_val), "ID_%d", i + 1); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, id_val, + strlen(id_val)); + + char category_val[16]; + snprintf(category_val, sizeof(category_val), "cat_%d", (i % 2) + 1); + zvec_doc_add_field_by_value(docs[i], "category", ZVEC_DATA_TYPE_STRING, + category_val, strlen(category_val)); + + zvec_doc_add_field_by_value(docs[i], "fast_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, fast_vec[i], + 64 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "balanced_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, balanced_vec[i], + 128 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "accurate_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, accurate_vec[i], + 256 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "exact_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, exact_vec[i], + 32 * sizeof(float)); + } + + // 6. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // Cleanup documents + for (int i = 0; i < 3; i++) { + zvec_doc_destroy(docs[i]); + } + + // 7. Flush collection to build indexes + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed - indexes built\n"); + } + + // 8. Test different query types + printf("Testing various index queries...\n"); + + // Test HNSW query (balanced) + ZVecVectorQuery *hnsw_query = zvec_vector_query_create(); + if (!hnsw_query) { + fprintf(stderr, "Failed to create HNSW query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(hnsw_query, "balanced_vector"); + zvec_vector_query_set_query_vector(hnsw_query, balanced_vec[0], + 128 * sizeof(float)); + zvec_vector_query_set_topk(hnsw_query, 2); + zvec_vector_query_set_filter(hnsw_query, ""); + zvec_vector_query_set_include_vector(hnsw_query, false); + zvec_vector_query_set_include_doc_id(hnsw_query, true); + + ZVecDoc **hnsw_results = NULL; + size_t hnsw_result_count = 0; + error = zvec_collection_query(collection, (const ZVecVectorQuery *)hnsw_query, + &hnsw_results, &hnsw_result_count); + if (error == ZVEC_OK) { + printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); + zvec_docs_free(hnsw_results, hnsw_result_count); + } + zvec_vector_query_destroy(hnsw_query); + + // Test Flat query (exact) + ZVecVectorQuery *flat_query = zvec_vector_query_create(); + if (!flat_query) { + fprintf(stderr, "Failed to create Flat query\n"); + goto cleanup; + } + zvec_vector_query_set_field_name(flat_query, "exact_vector"); + zvec_vector_query_set_query_vector(flat_query, exact_vec[0], + 32 * sizeof(float)); + zvec_vector_query_set_topk(flat_query, 2); + zvec_vector_query_set_filter(flat_query, ""); + zvec_vector_query_set_include_vector(flat_query, false); + zvec_vector_query_set_include_doc_id(flat_query, true); + + ZVecDoc **flat_results = NULL; + size_t flat_result_count = 0; + error = zvec_collection_query(collection, (const ZVecVectorQuery *)flat_query, + &flat_results, &flat_result_count); + if (error == ZVEC_OK) { + printf("✓ Flat (exact) query successful - Found %zu results\n", + flat_result_count); + zvec_docs_free(flat_results, flat_result_count); + } + zvec_vector_query_destroy(flat_query); + + // 9. Performance comparison information + printf("\nIndex Performance Characteristics:\n"); + printf("- Inverted Index: Fast text search, supports filtering\n"); + printf( + "- HNSW Index: Approximate nearest neighbor search, good balance of " + "speed/accuracy\n"); + printf("- Flat Index: Exact search, slower but 100%% accurate\n"); + printf( + "- Trade-off: Speed vs Accuracy - choose based on your requirements\n"); + + // 10. Cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + // Cleanup index parameters + + printf("✓ Index example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c/optimized_example.c b/examples/c/optimized_example.c new file mode 100644 index 00000000..86513797 --- /dev/null +++ b/examples/c/optimized_example.c @@ -0,0 +1,307 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create test vector data + */ +static float *create_test_vector(size_t dimension) { + float *vector = malloc(dimension * sizeof(float)); + if (!vector) { + return NULL; + } + + for (size_t i = 0; i < dimension; i++) { + vector[i] = (float)rand() / RAND_MAX; + } + + return vector; +} + +/** + * @brief Optimized C API usage example with performance considerations + */ +int main() { + printf("=== ZVec Optimized C API Example ===\n\n"); + + // Get version information + const char *version = zvec_get_version(); + printf("ZVec Version: %s\n\n", version ? version : "Unknown"); + + ZVecErrorCode error; + + // 1. Create optimized collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("optimized_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create optimized index parameters + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 32, 200); + + // 3. Create fields with optimized configuration + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + if (!id_field || !text_field || !embedding_field) { + fprintf(stderr, "Failed to create field schemas\n"); + goto cleanup_params; + } + + // Set indexes + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + + // Add fields to schema + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) + goto cleanup_fields; + + printf("✓ Fields configured with indexes\n"); + + // 4. Create collection with optimized options + ZVecCollectionOptions *options = zvec_collection_options_create(); + if (!options) { + fprintf(stderr, "Failed to create collection options\n"); + goto cleanup_fields; + } + zvec_collection_options_set_enable_mmap( + options, true); // Enable memory mapping for better performance + + ZVecCollection *collection = NULL; + error = zvec_collection_create_and_open("./optimized_example_collection", + schema, options, &collection); + zvec_collection_options_destroy(options); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_fields; + } + printf("✓ Collection created with optimized settings\n"); + + // 5. Bulk insert test data + const size_t DOC_COUNT = 1000; + const size_t BATCH_SIZE = 100; + + printf("Inserting %zu documents in batches of %zu...\n", DOC_COUNT, + BATCH_SIZE); + + clock_t start_time = clock(); + + for (size_t batch_start = 0; batch_start < DOC_COUNT; + batch_start += BATCH_SIZE) { + size_t current_batch_size = (batch_start + BATCH_SIZE > DOC_COUNT) + ? DOC_COUNT - batch_start + : BATCH_SIZE; + + ZVecDoc **batch_docs = malloc(current_batch_size * sizeof(ZVecDoc *)); + if (!batch_docs) { + fprintf(stderr, "Failed to allocate batch documents\n"); + break; + } + + // Create batch documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + if (!batch_docs[i]) { + fprintf(stderr, "Failed to create document\n"); + // Cleanup previous documents in batch + for (size_t j = 0; j < i; j++) { + zvec_doc_destroy(batch_docs[j]); + } + free(batch_docs); + goto cleanup_collection; + } + + size_t doc_id = batch_start + i; + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%zu", doc_id); + zvec_doc_set_pk(batch_docs[i], pk); + + // Add ID field + char id_str[32]; + snprintf(id_str, sizeof(id_str), "ID_%zu", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_STRING, + id_str, strlen(id_str)); + + // Add text field + char text_str[64]; + snprintf(text_str, sizeof(text_str), + "Document number %zu with sample text", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "text", ZVEC_DATA_TYPE_STRING, + text_str, strlen(text_str)); + + // Add vector field + float *vector = create_test_vector(128); + if (vector) { + zvec_doc_add_field_by_value(batch_docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vector, + 128 * sizeof(float)); + free(vector); + } + } + + // Insert batch + size_t success_count, error_count; + error = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + if (handle_error(error, "inserting batch") != ZVEC_OK) { + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + goto cleanup_collection; + } + + printf(" Batch %zu-%zu: %zu successful, %zu failed\n", batch_start, + batch_start + current_batch_size - 1, success_count, error_count); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + } + + clock_t insert_end_time = clock(); + double insert_time = + ((double)(insert_end_time - start_time)) / CLOCKS_PER_SEC; + printf("✓ Bulk insertion completed in %.3f seconds (%.0f docs/sec)\n", + insert_time, DOC_COUNT / insert_time); + + // 6. Flush and optimize collection + printf("Flushing and optimizing collection...\n"); + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + printf("✓ Collection optimized\n"); + + // 7. Performance query test + printf("Testing query performance...\n"); + + float *query_vector = create_test_vector(128); + if (!query_vector) { + fprintf(stderr, "Failed to create query vector\n"); + goto cleanup_collection; + } + + ZVecVectorQuery *query = zvec_vector_query_create(); + if (!query) { + fprintf(stderr, "Failed to create vector query\n"); + free(query_vector); + goto cleanup_collection; + } + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, query_vector, 128 * sizeof(float)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_filter(query, ""); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); + + const int QUERY_COUNT = 100; + start_time = clock(); + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, (const ZVecVectorQuery *)query, + &results, &result_count); + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("Query %d failed: %s\n", q, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + continue; + } + + if (results) { + zvec_docs_free(results, result_count); + } + } + + clock_t query_end_time = clock(); + double query_time = ((double)(query_end_time - start_time)) / CLOCKS_PER_SEC; + double avg_query_time = (query_time * 1000) / QUERY_COUNT; + + printf("✓ Performance test completed\n"); + printf(" Average query time: %.2f ms\n", avg_query_time); + printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); + + free(query_vector); + zvec_vector_query_destroy(query); + + // 8. Memory usage information + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf("Collection Statistics:\n"); + printf(" Document count: %llu\n", + (unsigned long long)zvec_collection_stats_get_doc_count(stats)); + zvec_collection_stats_destroy(stats); + } + + // 9. Cleanup +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_fields: + // Field schemas are managed by the collection schema, no need to destroy + // individually + +cleanup_params: + zvec_collection_schema_destroy(schema); + + printf("✓ Optimized example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 486b0b36..fe9c090f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,14 @@ build-dir = "build" wheel.expand-macos-universal-tags = true wheel.packages = ["python/zvec"] +# Exclude unnecessary files from wheel +wheel.exclude = [ + "**/*.dylib", + "**/*.a", + "lib/cmake/**", + "lib/pkgconfig/**", +] + # Source distribution sdist.include = [ "README.md", diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 00383c99..c9747a00 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,4 @@ cc_directory(ailego) cc_directory(turbo) cc_directory(core) cc_directory(db) -if(BUILD_PYTHON_BINDINGS) - cc_directory(binding) -endif() +cc_directory(binding) diff --git a/src/binding/CMakeLists.txt b/src/binding/CMakeLists.txt index 7dab04ad..700d0811 100644 --- a/src/binding/CMakeLists.txt +++ b/src/binding/CMakeLists.txt @@ -4,5 +4,11 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) # Retrieve version from git repository git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) -# Add repository -cc_directory(python) \ No newline at end of file +# Add repositories +if(BUILD_C_BINDINGS) + cc_directory(c) +endif() + +if(BUILD_PYTHON_BINDINGS) + cc_directory(python) +endif() \ No newline at end of file diff --git a/src/binding/c/CMakeLists.txt b/src/binding/c/CMakeLists.txt new file mode 100644 index 00000000..0a714584 --- /dev/null +++ b/src/binding/c/CMakeLists.txt @@ -0,0 +1,196 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +include(GNUInstallDirs) + +# Retrieve version from git repository and generate version header +git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) + +# Debug: print version variables +message(STATUS "ZVEC_VERSION: ${ZVEC_VERSION}") + +# Parse version string to extract major.minor.patch +# Format: vX.Y.Z-commit-hash or vX.Y.Z +if(ZVEC_VERSION MATCHES "^v([0-9]+)\\.([0-9]+)\\.([0-9]+)") + set(ZVEC_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(ZVEC_VERSION_MINOR "${CMAKE_MATCH_2}") + set(ZVEC_VERSION_PATCH "${CMAKE_MATCH_3}") + set(ZVEC_VERSION_STRING "${ZVEC_VERSION}") +else() + # Default version if parsing fails + set(ZVEC_VERSION_MAJOR 0) + set(ZVEC_VERSION_MINOR 2) + set(ZVEC_VERSION_PATCH 1) + set(ZVEC_VERSION_STRING "${ZVEC_VERSION_MAJOR}.${ZVEC_VERSION_MINOR}.${ZVEC_VERSION_PATCH}") +endif() + +message(STATUS "Parsed version: ${ZVEC_VERSION_MAJOR}.${ZVEC_VERSION_MINOR}.${ZVEC_VERSION_PATCH} (${ZVEC_VERSION_STRING})") + +# Configure version header file +configure_file( + ${PROJECT_SOURCE_DIR}/src/include/zvec/version.h.in + ${PROJECT_BINARY_DIR}/src/generated/zvec_version.h +) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# C API library source files +set(ZVEC_C_API_SOURCES + c_api.cc +) + +# C API library header files +set(ZVEC_C_API_HEADERS + ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h +) + +# ============================================================================= +# Build FAT Shared Library (zvec_c_api.so) +# ============================================================================= +# BUILD_RELEASE_FAT_LIBS=ON: Fully self-contained, zero external dependencies +# Users only need -lzvec_c_api +# BUILD_RELEASE_FAT_LIBS=OFF: Development mode, third-party libs linked normally +# Allows parallel test execution without symbol conflicts +# +# Implementation: +# - Always embeds zvec_db, zvec_core, zvec_ailego via --whole-archive +# - For release: also embeds all third-party libs (rocksdb, glog, protobuf, etc.) +# - Uses --exclude-libs,ALL to hide third-party symbols from export +# ============================================================================= +add_library(zvec_c_api SHARED + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} +) + +# Set library properties +set_target_properties(zvec_c_api PROPERTIES + OUTPUT_NAME "zvec_c_api" + POSITION_INDEPENDENT_CODE ON + # Hide all symbols by default, only export C API + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON +) + +find_package(Threads REQUIRED) + +# FAT mode: embed ALL libraries (including third-party) statically +# This creates a truly self-contained library with zero external dependencies +# Users only need to link libzvec_c_api.so without installing any dependencies +if(APPLE) + # First, link all libraries normally + target_link_libraries(zvec_c_api + PRIVATE + # zvec static libraries + zvec_db + zvec_core + zvec_ailego + # Third-party libraries + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + Threads::Threads + ${CMAKE_DL_LIBS} + ) + + # Then, apply -force_load to ensure all symbols are included + # Note: This may cause duplicate library warnings, but they are harmless + target_link_options(zvec_c_api PRIVATE + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + ) + +else() + target_link_libraries(zvec_c_api + PRIVATE + # Force load all zvec static libraries (extract all objects) + "-Wl,--whole-archive" + zvec_db + zvec_core + zvec_ailego + "-Wl,--no-whole-archive" + # Force load ALL third-party libraries for zero-dependency deployment + "-Wl,--whole-archive" + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + "-Wl,--no-whole-archive" + Threads::Threads + ${CMAKE_DL_LIBS} + ) +endif() + +# Include directories +target_include_directories(zvec_c_api + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src +) + +# Compile options +target_compile_options(zvec_c_api PRIVATE + $<$:-Wall -Wextra -Wpedantic> + $<$:-Wall -Wextra -Wpedantic> +) + +# ============================================================================= +# Installation Rules +# ============================================================================= + +# Install shared library +install(TARGETS zvec_c_api + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +# Install headers +install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec +) diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc new file mode 100644 index 00000000..427f6323 --- /dev/null +++ b/src/binding/c/c_api.cc @@ -0,0 +1,7339 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Error checking macros - these preserve __LINE__ accuracy +// Simplified macro for setting error with automatic file/line/function info +#define SET_LAST_ERROR(code, msg) \ + set_last_error_details(code, msg, __FILE__, __LINE__, __FUNCTION__) + +#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ + if (!(ptr)) { \ + SET_LAST_ERROR(error_code, msg); \ + return nullptr; \ + } + +#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ + if (!(ptr)) { \ + SET_LAST_ERROR(error_code, msg); \ + return (error_code); \ + } + +#define ZVEC_CHECK_COND(cond, error_code, msg) \ + if (cond) { \ + SET_LAST_ERROR(error_code, msg); \ + return nullptr; \ + } + +#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ + if (cond) { \ + SET_LAST_ERROR(error_code, msg); \ + return (error_code); \ + } + +// For void functions (no return value): +#define ZVEC_TRY_BEGIN_VOID try { +#define ZVEC_CATCH_END_VOID \ + } \ + catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_UNKNOWN, std::string("Exception: ") + e.what()); \ + } + +// For functions returning ZVecErrorCode - complete try-catch wrapper +#define ZVEC_TRY_BEGIN_CODE ZVEC_TRY_BEGIN_VOID +#define ZVEC_CATCH_END_CODE(code_on_error) \ + } \ + catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_UNKNOWN, std::string("Exception: ") + e.what()); \ + return code_on_error; \ + } \ + return ZVEC_OK; + +// For functions returning pointer - complete try-catch wrapper +// Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_NULL(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return nullptr; \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return nullptr; \ + } + +// For functions returning ErrorCode +// Usage: ZVEC_TRY_RETURN_ERROR("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return ZVEC_ERROR_INTERNAL_ERROR; \ + } + +// For functions returning scalar values (int, float, size_t, etc.) +// Usage: ZVEC_TRY_RETURN_SCALAR("error msg", error_value, code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what()); \ + return (error_val); \ + } catch (const std::exception &e) { \ + SET_LAST_ERROR(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what()); \ + return (error_val); \ + } + +// Global status flags +static std::atomic g_initialized{false}; +static std::mutex g_init_mutex; + +// Thread-local storage for error information +static thread_local std::string last_error_message; +static thread_local ZVecErrorDetails last_error_details; + +// Helper function: set error information +static void set_last_error(const std::string &msg) { + last_error_message = msg; + + last_error_details.code = ZVEC_ERROR_UNKNOWN; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = nullptr; + last_error_details.line = 0; + last_error_details.function = nullptr; +} + +// Error setting function with detailed information +static void set_last_error_details(ZVecErrorCode code, const std::string &msg, + const char *file = nullptr, int line = 0, + const char *function = nullptr) { + last_error_message = msg; + last_error_details.code = code; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = file; + last_error_details.line = line; + last_error_details.function = function; +} + +// ============================================================================= +// Version information interface implementation +// ============================================================================= + +// Store dynamically generated version information +static std::string g_version_info; +static std::mutex g_version_mutex; + +const char *zvec_get_version(void) { + std::lock_guard lock(g_version_mutex); + + if (g_version_info.empty()) { + ZVEC_TRY_BEGIN_VOID + std::string version = ZVEC_VERSION_STRING; + + // Try to get Git information + std::string git_info; +#ifdef ZVEC_GIT_DESCRIBE + git_info = ZVEC_GIT_DESCRIBE; +#elif defined(ZVEC_GIT_COMMIT_HASH) + git_info = std::string("g") + ZVEC_GIT_COMMIT_HASH; +#endif + + if (!git_info.empty()) { + version += "-" + git_info; + } + + version += + " (built " + std::string(__DATE__) + " " + std::string(__TIME__) + ")"; + + g_version_info = version; + ZVEC_CATCH_END_VOID + } + + return g_version_info.c_str(); +} + +bool zvec_check_version(int major, int minor, int patch) { + if (major < 0 || minor < 0 || patch < 0) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative"); + return false; + } + + if (ZVEC_VERSION_MAJOR > major) return true; + if (ZVEC_VERSION_MAJOR < major) return false; + + if (ZVEC_VERSION_MINOR > minor) return true; + if (ZVEC_VERSION_MINOR < minor) return false; + + return ZVEC_VERSION_PATCH >= patch; +} + +int zvec_get_version_major(void) { + return ZVEC_VERSION_MAJOR; +} + +int zvec_get_version_minor(void) { + return ZVEC_VERSION_MINOR; +} + +int zvec_get_version_patch(void) { + return ZVEC_VERSION_PATCH; +} + +// ============================================================================= +// String management functions implementation +// ============================================================================= + +ZVecString *zvec_string_create(const char *str) { + if (!str) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); + return nullptr; + } + + size_t len = strlen(str); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); + return nullptr; + } + + char *data_buffer = static_cast(malloc(len + 1)); + if (!data_buffer) { + free(zstr); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data"); + return nullptr; + } + + memcpy(data_buffer, str, len + 1); + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + return zstr; +} + +ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { + if (!view || !view->data) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null"); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); + return nullptr; + } + + char *data_buffer = static_cast(malloc(view->length + 1)); + if (!data_buffer) { + free(zstr); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data"); + return nullptr; + } + + memcpy(data_buffer, view->data, view->length); + data_buffer[view->length] = '\0'; + zstr->data = data_buffer; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; +} + +ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { + if (!data) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null"); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString"); + return nullptr; + } + + char *data_buffer = static_cast(malloc(length + 1)); + if (!data_buffer) { + free(zstr); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for binary data"); + return nullptr; + } + + memcpy(data_buffer, data, length); + data_buffer[length] = '\0'; + zstr->data = data_buffer; + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; +} + +ZVecString *zvec_string_copy(const ZVecString *str) { + if (!str || !str->data) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null"); + return nullptr; + } + + return zvec_string_create(str->data); +} + +const char *zvec_string_c_str(const ZVecString *str) { + if (!str) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); + return nullptr; + } + + return str->data; +} + +size_t zvec_string_length(const ZVecString *str) { + if (!str) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null"); + return 0; + } + + return str->length; +} + +int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { + if (!str1 || !str2) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null"); + return -1; + } + + if (!str1->data || !str2->data) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "String data cannot be null"); + return -1; + } + + return strcmp(str1->data, str2->data); +} + +// ============================================================================= +// Configuration-related functions implementation +// ============================================================================= + +// Internal structure - Console log configuration +struct ZVecConsoleLogConfig { + ZVecLogLevel level; +}; + +// Internal structure - File log configuration +struct ZVecFileLogConfig { + ZVecLogLevel level; + ZVecString *dir; + ZVecString *basename; + uint32_t file_size; + uint32_t overdue_days; +}; + +// Internal structure - Configuration data +struct ZVecConfigData { + uint64_t memory_limit_bytes; + + // log + ZVecLogType log_type; + void *log_config; // ZVecConsoleLogConfig* or ZVecFileLogConfig* + + // query + uint32_t query_thread_count; + float invert_to_forward_scan_ratio; + float brute_force_by_keys_ratio; + + // optimize + uint32_t optimize_thread_count; +}; + +ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { + ZVecConsoleLogConfig *config = + static_cast(malloc(sizeof(ZVecConsoleLogConfig))); + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConsoleLogConfig"); + return nullptr; + } + config->level = level; + return config; +} + +ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, + const char *dir, + const char *basename, + uint32_t file_size, + uint32_t overdue_days) { + if (!dir || !basename) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Directory or basename cannot be null"); + return nullptr; + } + + ZVecFileLogConfig *config = + static_cast(malloc(sizeof(ZVecFileLogConfig))); + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFileLogConfig"); + return nullptr; + } + + config->level = level; + config->dir = zvec_string_create(dir); + config->basename = zvec_string_create(basename); + + if (!config->dir || !config->basename) { + if (config->dir) zvec_free_string(config->dir); + if (config->basename) zvec_free_string(config->basename); + free(config); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create strings for file log config"); + return nullptr; + } + + config->file_size = file_size; + config->overdue_days = overdue_days; + + return config; +} + +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + free(const_cast(config)); +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir) zvec_free_string(config->dir); + if (config->basename) zvec_free_string(config->basename); + free(const_cast(config)); + } +} + +ZVecLogLevel zvec_config_console_log_get_level( + const ZVecConsoleLogConfig *config) { + if (!config) { + return ZVEC_LOG_LEVEL_WARN; + } + return config->level; +} + +ZVecErrorCode zvec_config_console_log_set_level(ZVecConsoleLogConfig *config, + ZVecLogLevel level) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->level = level; + return ZVEC_OK; +} + +ZVecLogLevel zvec_config_file_log_get_level(const ZVecFileLogConfig *config) { + if (!config) { + return ZVEC_LOG_LEVEL_WARN; + } + return config->level; +} + +ZVecErrorCode zvec_config_file_log_set_level(ZVecFileLogConfig *config, + ZVecLogLevel level) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->level = level; + return ZVEC_OK; +} + +const char *zvec_config_file_log_get_dir(const ZVecFileLogConfig *config) { + if (!config || !config->dir) { + return nullptr; + } + return config->dir->data; +} + +ZVecErrorCode zvec_config_file_log_set_dir(ZVecFileLogConfig *config, + const char *dir) { + if (!config || !dir) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or dir pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (config->dir) { + zvec_free_string(config->dir); + } + config->dir = zvec_string_create(dir); + if (!config->dir) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create dir string"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + return ZVEC_OK; +} + +const char *zvec_config_file_log_get_basename(const ZVecFileLogConfig *config) { + if (!config || !config->basename) { + return nullptr; + } + return config->basename->data; +} + +ZVecErrorCode zvec_config_file_log_set_basename(ZVecFileLogConfig *config, + const char *basename) { + if (!config || !basename) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or basename pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (config->basename) { + zvec_free_string(config->basename); + } + config->basename = zvec_string_create(basename); + if (!config->basename) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create basename string"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + return ZVEC_OK; +} + +uint32_t zvec_config_file_log_get_file_size(const ZVecFileLogConfig *config) { + if (!config) { + return 0; + } + return config->file_size; +} + +ZVecErrorCode zvec_config_file_log_set_file_size(ZVecFileLogConfig *config, + uint32_t file_size) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->file_size = file_size; + return ZVEC_OK; +} + +uint32_t zvec_config_file_log_get_overdue_days( + const ZVecFileLogConfig *config) { + if (!config) { + return 0; + } + return config->overdue_days; +} + +ZVecErrorCode zvec_config_file_log_set_overdue_days(ZVecFileLogConfig *config, + uint32_t days) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->overdue_days = days; + return ZVEC_OK; +} + +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = + static_cast(malloc(sizeof(ZVecConfigData))); + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConfigData"); + return nullptr; + } + + // Create default console log config + ZVecConsoleLogConfig *log_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + free(config); + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create console log config"); + return nullptr; + } + config->log_config = log_config; + config->log_type = ZVEC_LOG_TYPE_CONSOLE; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + return config; +} + +void zvec_config_data_destroy(ZVecConfigData *config) { + if (config) { + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + static_cast(config->log_config)); + } else { + zvec_config_file_log_destroy( + static_cast(config->log_config)); + } + } + free(config); + } +} + +ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, + uint64_t memory_limit_bytes) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->memory_limit_bytes = memory_limit_bytes; + return ZVEC_OK; +} + +uint64_t zvec_config_data_get_memory_limit(const ZVecConfigData *config) { + if (!config) { + return 0; + } + return config->memory_limit_bytes; +} + +ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, + ZVecLogType log_type, + void *log_config) { + if (!config || !log_config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Config or log_config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + static_cast(config->log_config)); + } else { + zvec_config_file_log_destroy( + static_cast(config->log_config)); + } + } + + config->log_type = log_type; + config->log_config = log_config; + return ZVEC_OK; +} + +ZVecLogType zvec_config_data_get_log_type(const ZVecConfigData *config) { + if (!config) { + return ZVEC_LOG_TYPE_CONSOLE; + } + return config->log_type; +} + +ZVecConsoleLogConfig *zvec_config_data_get_console_log_config( + const ZVecConfigData *config) { + if (!config || config->log_type != ZVEC_LOG_TYPE_CONSOLE) { + return nullptr; + } + return static_cast(config->log_config); +} + +ZVecFileLogConfig *zvec_config_data_get_file_log_config( + const ZVecConfigData *config) { + if (!config || config->log_type != ZVEC_LOG_TYPE_FILE) { + return nullptr; + } + return static_cast(config->log_config); +} + +ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, + uint32_t thread_count) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->query_thread_count = thread_count; + return ZVEC_OK; +} + +uint32_t zvec_config_data_get_query_thread_count(const ZVecConfigData *config) { + if (!config) { + return 1; + } + return config->query_thread_count; +} + +ZVecErrorCode zvec_config_data_set_invert_to_forward_scan_ratio( + ZVecConfigData *config, float ratio) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->invert_to_forward_scan_ratio = ratio; + return ZVEC_OK; +} + +float zvec_config_data_get_invert_to_forward_scan_ratio( + const ZVecConfigData *config) { + if (!config) { + return 0.0f; + } + return config->invert_to_forward_scan_ratio; +} + +ZVecErrorCode zvec_config_data_set_brute_force_by_keys_ratio( + ZVecConfigData *config, float ratio) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->brute_force_by_keys_ratio = ratio; + return ZVEC_OK; +} + +float zvec_config_data_get_brute_force_by_keys_ratio( + const ZVecConfigData *config) { + if (!config) { + return 0.0f; + } + return config->brute_force_by_keys_ratio; +} + +ZVecErrorCode zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count) { + if (!config) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + config->optimize_thread_count = thread_count; + return ZVEC_OK; +} + +uint32_t zvec_config_data_get_optimize_thread_count( + const ZVecConfigData *config) { + if (!config) { + return 1; + } + return config->optimize_thread_count; +} + + +// ============================================================================= +// Initialization and cleanup interface implementation +// ============================================================================= + +ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { + std::lock_guard lock(g_init_mutex); + + if (g_initialized.load()) { + SET_LAST_ERROR(ZVEC_ERROR_ALREADY_EXISTS, "Library already initialized"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + + ZVEC_TRY_RETURN_ERROR( + "Initialization failed", + // Convert to C++ configuration object + zvec::GlobalConfig::ConfigData cpp_config{}; + + if (config) { + cpp_config.memory_limit_bytes = + zvec_config_data_get_memory_limit(config); + cpp_config.query_thread_count = + zvec_config_data_get_query_thread_count(config); + cpp_config.invert_to_forward_scan_ratio = + zvec_config_data_get_invert_to_forward_scan_ratio(config); + cpp_config.brute_force_by_keys_ratio = + zvec_config_data_get_brute_force_by_keys_ratio(config); + cpp_config.optimize_thread_count = + zvec_config_data_get_optimize_thread_count(config); + + // Set log configuration + void *log_config = zvec_config_data_get_console_log_config(config); + if (!log_config) { + log_config = zvec_config_data_get_file_log_config(config); + } + + if (log_config) { + std::shared_ptr cpp_log_config; + + switch (zvec_config_data_get_log_type(config)) { + case ZVEC_LOG_TYPE_CONSOLE: { + ZVecConsoleLogConfig *console_config = + static_cast(log_config); + auto console_level = static_cast( + zvec_config_console_log_get_level(console_config)); + cpp_log_config = + std::make_shared( + console_level); + break; + } + case ZVEC_LOG_TYPE_FILE: { + ZVecFileLogConfig *file_config = + static_cast(log_config); + auto file_level = static_cast( + zvec_config_file_log_get_level(file_config)); + std::string dir(zvec_config_file_log_get_dir(file_config)); + std::string basename( + zvec_config_file_log_get_basename(file_config)); + cpp_log_config = + std::make_shared( + file_level, dir, basename, + zvec_config_file_log_get_file_size(file_config), + zvec_config_file_log_get_overdue_days(file_config)); + break; + } + default: + throw std::runtime_error("Unknown log type"); + } + cpp_config.log_config = cpp_log_config; + } + } else { + // Initialize with default configuration + cpp_config = zvec::GlobalConfig::ConfigData{}; + } + + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + g_initialized.store(true); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_shutdown(void) { + std::lock_guard lock(g_init_mutex); + + if (!g_initialized.load()) { + SET_LAST_ERROR(ZVEC_ERROR_FAILED_PRECONDITION, "Library not initialized"); + return ZVEC_ERROR_FAILED_PRECONDITION; + } + + ZVEC_TRY_RETURN_ERROR("Shutdown failed", g_initialized.store(false); + return ZVEC_OK;) +} + +bool zvec_is_initialized(void) { + return g_initialized.load(); +} + +// ============================================================================= +// Error handling interface implementation +// ============================================================================= + +ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { + if (!error_details) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_details = last_error_details; + return ZVEC_OK; +} + +void zvec_clear_error(void) { + last_error_message.clear(); + last_error_details = {}; +} + +// Helper functions: convert internal status to error code +static ZVecErrorCode status_to_error_code(const zvec::Status &status) { + if (status.code() < zvec::StatusCode::OK || + status.code() > zvec::StatusCode::UNKNOWN) { + set_last_error("Unexpected status code: " + + std::to_string(static_cast(status.code()))); + return ZVEC_ERROR_UNKNOWN; + } + + return static_cast(status.code()); +} + +// Helper function: handle Expected results +template +static ZVecErrorCode handle_expected_result( + const tl::expected &result, T *out_value = nullptr) { + if (result.has_value()) { + if (out_value) { + *out_value = result.value(); + } + return ZVEC_OK; + } else { + set_last_error(result.error().message()); + return status_to_error_code(result.error()); + } +} + +// Helper function: copy strings +static char *copy_string(const std::string &str) { + if (str.empty()) return nullptr; + size_t len = str.length(); + char *copy = static_cast(malloc(len + 1)); + if (!copy) return nullptr; + strncpy(copy, str.c_str(), len); + copy[len] = '\0'; // Ensure null-termination + return copy; +} + +// Helper function: free write results returned by detailed DML APIs. +static void free_write_results_internal(ZVecWriteResult *results, + size_t result_count) { + if (!results) { + return; + } + for (size_t i = 0; i < result_count; ++i) { + // pk is not stored (ordered style), only free message + if (results[i].message) { + free((void *)results[i].message); + results[i].message = nullptr; + } + } + free(results); +} + +// Helper function: convert per-doc statuses to C API write result array. +static ZVecErrorCode build_write_results( + const std::vector &statuses, + const std::vector &pks, ZVecWriteResult **results, + size_t *result_count) { + if (!results || !result_count) { + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *result_count = statuses.size(); + if (*result_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast( + calloc(*result_count, sizeof(ZVecWriteResult))); + if (!*results) { + set_last_error("Failed to allocate memory for write results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Use ordered style: result index corresponds to input index. + // No need to store pk in result, caller can access by index. + for (size_t i = 0; i < *result_count; ++i) { + const std::string message = statuses[i].message(); + (*results)[i].message = copy_string(message); + (*results)[i].code = status_to_error_code(statuses[i]); + } + + return ZVEC_OK; +} + +static std::vector collect_doc_pks(const ZVecDoc **docs, + size_t doc_count) { + std::vector pks; + pks.reserve(doc_count); + for (size_t i = 0; i < doc_count; ++i) { + if (!docs[i]) { + pks.emplace_back(""); + continue; + } + auto doc_ptr = + reinterpret_cast *>(docs[i]); + pks.emplace_back((*doc_ptr)->pk_ref()); + } + return pks; +} + +static zvec::DataType convert_data_type(ZVecDataType zvec_type) { + if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || + zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { + return zvec::DataType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +static ZVecDataType convert_zvec_data_type(zvec::DataType cpp_type) { + if (cpp_type < zvec::DataType::UNDEFINED || + cpp_type > zvec::DataType::ARRAY_DOUBLE) { + return ZVEC_DATA_TYPE_UNDEFINED; + } + + return static_cast(cpp_type); +} + +// Helper function: convert metric type +static zvec::MetricType convert_metric_type(ZVecMetricType metric_type) { + if (metric_type < ZVEC_METRIC_TYPE_UNDEFINED || + metric_type > ZVEC_METRIC_TYPE_MIPSL2) { + return zvec::MetricType::UNDEFINED; + } + + return static_cast(metric_type); +} + +// Helper function: convert ZVecIndexType to internal IndexType +static zvec::IndexType convert_index_type(ZVecIndexType zvec_type) { + if (zvec_type < ZVEC_INDEX_TYPE_UNDEFINED || + zvec_type > ZVEC_INDEX_TYPE_INVERT) { + return zvec::IndexType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: convert ZVecQuantizeType to internal QuantizeType +static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { + if (zvec_type < ZVEC_QUANTIZE_TYPE_UNDEFINED || + zvec_type > ZVEC_QUANTIZE_TYPE_INT4) { + return zvec::QuantizeType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Forward declaration: convert C index params to C++ +static std::shared_ptr convert_c_index_params_to_cpp( + const ZVecIndexParams *params); + +// Helper function: set field index params +static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, + const ZVecFieldSchema *zvec_field) { + if (!zvec_field_schema_has_index(zvec_field)) { + return zvec::Status::OK(); + } + + // Get the index params using getter - we need to access internal struct + // For this internal function, we can access the struct members since it's in + // the implementation We'll add a friend-like internal getter + ZVecIndexParams *index_params = nullptr; + // Use a hack to get the index_params - cast to access internal member + // This is safe because we're in the implementation file + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + index_params = + reinterpret_cast(zvec_field)->index_params; + + if (!index_params) { + return zvec::Status::OK(); + } + + // Use the conversion helper function + auto cpp_params = convert_c_index_params_to_cpp(index_params); + if (cpp_params) { + field_schema->set_index_params(cpp_params); + } + + return zvec::Status::OK(); +} + +// ============================================================================= +// Memory Management interface implementation +// ============================================================================= + +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + free((void *)str->data); + } + free(str); + } +} + +ZVecStringArray *zvec_string_array_create(size_t count) { + ZVecStringArray *array = (ZVecStringArray *)malloc(sizeof(ZVecStringArray)); + array->count = count; + array->strings = (ZVecString *)malloc(sizeof(ZVecString) * count); + memset(array->strings, 0, sizeof(ZVecString) * count); + return array; +} + +ZVecStringArray *zvec_string_array_create_from_strings(const char **strings, + size_t count) { + if (!strings || count == 0) { + return nullptr; + } + ZVecStringArray *array = zvec_string_array_create(count); + for (size_t i = 0; i < count; ++i) { + zvec_string_array_add(array, i, strings[i]); + } + return array; +} + +void zvec_string_array_add(ZVecStringArray *array, size_t idx, + const char *str) { + if (idx >= array->count) return; + size_t len = strlen(str); + array->strings[idx].data = (char *)malloc(len + 1); + memcpy(array->strings[idx].data, str, len + 1); + array->strings[idx].length = len; + array->strings[idx].capacity = len + 1; +} + +void zvec_string_array_destroy(ZVecStringArray *array) { + if (!array) return; + for (size_t i = 0; i < array->count; i++) { + free((void *)array->strings[i].data); + } + free(array->strings); + free(array); +} + + +// Byte array helper functions +ZVecMutableByteArray *zvec_byte_array_create(size_t capacity) { + ZVecMutableByteArray *array = + (ZVecMutableByteArray *)malloc(sizeof(ZVecMutableByteArray)); + if (!array) return nullptr; + + array->data = (uint8_t *)malloc(capacity); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = 0; + array->capacity = capacity; + memset(array->data, 0, capacity); + return array; +} + +void zvec_byte_array_destroy(ZVecMutableByteArray *array) { + if (!array) return; + if (array->data) { + free(array->data); + } + free(array); +} + +// Float array helper functions +ZVecFloatArray *zvec_float_array_create(size_t count) { + ZVecFloatArray *array = (ZVecFloatArray *)malloc(sizeof(ZVecFloatArray)); + if (!array) return nullptr; + + array->data = (const float *)malloc(sizeof(float) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(float) * count); + return array; +} + +void zvec_float_array_destroy(ZVecFloatArray *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +// Int64 array helper functions +ZVecInt64Array *zvec_int64_array_create(size_t count) { + ZVecInt64Array *array = (ZVecInt64Array *)malloc(sizeof(ZVecInt64Array)); + if (!array) return nullptr; + + array->data = (const int64_t *)malloc(sizeof(int64_t) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(int64_t) * count); + return array; +} + +void zvec_int64_array_destroy(ZVecInt64Array *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +void zvec_free_float_array(float *array) { + if (array) { + free(array); + } +} + +void zvec_free_str_array(char **array, size_t count) { + if (!array) return; + + // If count is 0, only free the string array itself, don't process internal + // strings + if (count == 0) { + free(array); + return; + } + + for (size_t i = 0; i < count; ++i) { + if (array[i]) { // Only free when string pointer is not null + free(array[i]); + } + } + free(array); +} + +ZVecErrorCode zvec_get_last_error(char **error_msg) { + if (!error_msg) { + set_last_error("Invalid argument: error_msg cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_msg = copy_string(last_error_message); + return ZVEC_OK; +} + +void zvec_free_uint8_array(uint8_t *array) { + if (array) { + free(array); + } +} + +void zvec_free_ptr(void *ptr) { + if (ptr) { + free(ptr); + } +} + +void zvec_free_field_schema(ZVecFieldSchema *field_schema) { + if (field_schema) { + // index_params is embedded, no need to free + free(field_schema); + } +} + +// ============================================================================= +// Index parameters management interface implementation (deprecated) +// These are deprecated in favor of the opaque pointer API +// ============================================================================= + +// Deprecated: Use zvec_index_params_create() instead +void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type) { + // This function is deprecated and should not be used + // Use zvec_index_params_create() instead + SET_LAST_ERROR( + ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_init is deprecated. Use zvec_index_params_create()"); +} + +// Deprecated: Use zvec_index_params_set_hnsw_params() instead +void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, + int ef_construction, int ef_search) { + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_hnsw is deprecated. Use " + "zvec_index_params_set_hnsw_params()"); +} + +// Deprecated: Use zvec_index_params_set_ivf_params() instead +void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, + bool use_soar, int n_probe) { + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_ivf is deprecated. Use " + "zvec_index_params_set_ivf_params()"); +} + +// Deprecated: Use zvec_index_params_set_invert_params() instead +void zvec_index_params_set_invert(ZVecIndexParams *params, + bool enable_range_opt, bool enable_wildcard) { + SET_LAST_ERROR(ZVEC_ERROR_NOT_SUPPORTED, + "zvec_index_params_set_invert is deprecated. Use " + "zvec_index_params_set_invert_params()"); +} + +// ============================================================================= +// ZVecIndexParams opaque pointer implementation +// ============================================================================= + +// Internal structure - holds C++ shared_ptr +struct ZVecIndexParams { + std::shared_ptr cpp_params; + ZVecIndexType index_type; + ZVecMetricType metric_type; + ZVecQuantizeType quantize_type; + + // Type-specific storage (only one is active based on index_type) + struct { + bool enable_range_optimization; + bool enable_extended_wildcard; + } invert; + + struct { + int m; + int ef_construction; + } hnsw; + + struct { + int n_list; + int n_iters; + bool use_soar; + } ivf; +}; + +// ============================================================================= +// ZVecFieldSchema opaque pointer implementation +// ============================================================================= + +// Internal structure - field schema with private members +struct ZVecFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; // Owned by field schema + bool has_index; +}; + +// Internal structure - collection schema with private members +struct ZVecCollectionSchema { + ZVecString *name; + ZVecFieldSchema **fields; + size_t field_count; + size_t field_capacity; + uint64_t max_doc_count_per_segment; +}; + +// ============================================================================= +// Configuration structures opaque pointer implementation +// ============================================================================= + +// Internal structure - QueryParams (base) +struct ZVecQueryParams { + ZVecIndexType index_type; + float radius; + bool is_linear; + bool is_using_refiner; +}; + +// Internal structure - HnswQueryParams +struct ZVecHnswQueryParams { + ZVecQueryParams base; + int ef; +}; + +// Internal structure - IVFQueryParams +struct ZVecIVFQueryParams { + ZVecQueryParams base; + int nprobe; + float scale_factor; +}; + +// Internal structure - FlatQueryParams +struct ZVecFlatQueryParams { + ZVecQueryParams base; + float scale_factor; +}; + +// Internal structure - VectorQuery +struct ZVecVectorQuery { + int topk; + ZVecString *field_name; + ZVecByteArray query_vector; + ZVecByteArray query_sparse_indices; + ZVecByteArray query_sparse_values; + ZVecString *filter; + bool include_vector; + bool include_doc_id; + ZVecStringArray *output_fields; + void *query_params; // Type-specific params (HnswQueryParams*, + // IVFQueryParams*, etc.) + ZVecIndexType params_type; // To track the type of query_params +}; + +// Internal structure - GroupByVectorQuery +struct ZVecGroupByVectorQuery { + ZVecString *field_name; + ZVecByteArray query_vector; + ZVecByteArray query_sparse_indices; + ZVecByteArray query_sparse_values; + ZVecString *filter; + bool include_vector; + ZVecStringArray *output_fields; + ZVecString *group_by_field_name; + uint32_t group_count; + uint32_t group_topk; + void *query_params; // Type-specific params + ZVecIndexType params_type; // To track the type of query_params +}; + +// Internal structure - CollectionOptions +struct ZVecCollectionOptions { + bool enable_mmap; + size_t max_buffer_size; + bool read_only; + uint64_t max_doc_count_per_segment; +}; + +// Internal structure - CollectionStats +struct ZVecCollectionStats { + uint64_t doc_count; + ZVecString **index_names; + float *index_completeness; + size_t index_count; +}; + +ZVecIndexParams *zvec_index_params_create(ZVecIndexType index_type) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecIndexParams", + ZVecIndexParams *params = new ZVecIndexParams(); + params->index_type = index_type; + params->metric_type = ZVEC_METRIC_TYPE_L2; // Default + params->quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + // Initialize type-specific params with defaults + memset(¶ms->invert, 0, sizeof(params->invert)); + memset(¶ms->hnsw, 0, sizeof(params->hnsw)); + memset(¶ms->ivf, 0, sizeof(params->ivf)); + + // Set defaults based on index type + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + params->invert.enable_range_optimization = true; + params->invert.enable_extended_wildcard = false; + break; + case ZVEC_INDEX_TYPE_HNSW: + params->hnsw.m = 16; + params->hnsw.ef_construction = 200; + break; + case ZVEC_INDEX_TYPE_IVF: + params->ivf.n_list = 100; + params->ivf.n_iters = 10; + params->ivf.use_soar = false; + break; + case ZVEC_INDEX_TYPE_FLAT: + default: + break; + } + + return params;) + + return nullptr; +} + +void zvec_index_params_destroy(ZVecIndexParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_index_params_set_metric_type(ZVecIndexParams *params, + ZVecMetricType metric_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->metric_type = metric_type; + return ZVEC_OK; +} + +ZVecMetricType zvec_index_params_get_metric_type( + const ZVecIndexParams *params) { + if (!params) { + return ZVEC_METRIC_TYPE_L2; // Default + } + return params->metric_type; +} + +ZVecErrorCode zvec_index_params_set_quantize_type( + ZVecIndexParams *params, ZVecQuantizeType quantize_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->quantize_type = quantize_type; + return ZVEC_OK; +} + +ZVecQuantizeType zvec_index_params_get_quantize_type( + const ZVecIndexParams *params) { + if (!params) { + return ZVEC_QUANTIZE_TYPE_UNDEFINED; + } + return params->quantize_type; +} + +ZVecIndexType zvec_index_params_get_type(const ZVecIndexParams *params) { + if (!params) { + return ZVEC_INDEX_TYPE_FLAT; // Default + } + return params->index_type; +} + +ZVecErrorCode zvec_index_params_set_hnsw_params(ZVecIndexParams *params, int m, + int ef_construction) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->hnsw.m = m; + params->hnsw.ef_construction = ef_construction; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_hnsw_params(const ZVecIndexParams *params, + int *out_m, + int *out_ef_construction) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_m) *out_m = params->hnsw.m; + if (out_ef_construction) *out_ef_construction = params->hnsw.ef_construction; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_set_ivf_params(ZVecIndexParams *params, + int n_list, int n_iters, + bool use_soar) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->ivf.n_list = n_list; + params->ivf.n_iters = n_iters; + params->ivf.use_soar = use_soar; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_ivf_params(const ZVecIndexParams *params, + int *out_n_list, + int *out_n_iters, + bool *out_use_soar) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_n_list) *out_n_list = params->ivf.n_list; + if (out_n_iters) *out_n_iters = params->ivf.n_iters; + if (out_use_soar) *out_use_soar = params->ivf.use_soar; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_set_invert_params(ZVecIndexParams *params, + bool enable_range_opt, + bool enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->invert.enable_range_optimization = enable_range_opt; + params->invert.enable_extended_wildcard = enable_wildcard; + return ZVEC_OK; +} + +ZVecErrorCode zvec_index_params_get_invert_params(const ZVecIndexParams *params, + bool *out_enable_range_opt, + bool *out_enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (out_enable_range_opt) + *out_enable_range_opt = params->invert.enable_range_optimization; + if (out_enable_wildcard) + *out_enable_wildcard = params->invert.enable_extended_wildcard; + return ZVEC_OK; +} + +// Helper function to convert C++ IndexParams to C ZVecIndexParams +static ZVecIndexParams *convert_cpp_index_params_to_c( + const std::shared_ptr &cpp_params) { + if (!cpp_params) { + return nullptr; + } + + ZVecIndexType c_type; + switch (cpp_params->type()) { + case zvec::IndexType::HNSW: + c_type = ZVEC_INDEX_TYPE_HNSW; + break; + case zvec::IndexType::IVF: + c_type = ZVEC_INDEX_TYPE_IVF; + break; + case zvec::IndexType::FLAT: + c_type = ZVEC_INDEX_TYPE_FLAT; + break; + case zvec::IndexType::INVERT: + c_type = ZVEC_INDEX_TYPE_INVERT; + break; + default: + c_type = ZVEC_INDEX_TYPE_FLAT; + break; + } + + ZVecIndexParams *params = zvec_index_params_create(c_type); + if (!params) return nullptr; + + params->cpp_params = cpp_params; + + // Extract metric and quantize types from VectorIndexParams if applicable + if (cpp_params->is_vector_index_type()) { + auto *vec_params = + dynamic_cast(cpp_params.get()); + if (vec_params) { + switch (vec_params->metric_type()) { + case zvec::MetricType::L2: + params->metric_type = ZVEC_METRIC_TYPE_L2; + break; + case zvec::MetricType::IP: + params->metric_type = ZVEC_METRIC_TYPE_IP; + break; + case zvec::MetricType::COSINE: + params->metric_type = ZVEC_METRIC_TYPE_COSINE; + break; + default: + params->metric_type = ZVEC_METRIC_TYPE_L2; + break; + } + // Note: quantize_type would need similar mapping if used + } + } + + // Extract type-specific parameters + switch (c_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto *hnsw = + dynamic_cast(cpp_params.get()); + if (hnsw) { + params->hnsw.m = hnsw->m(); + params->hnsw.ef_construction = hnsw->ef_construction(); + } + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto *ivf = dynamic_cast(cpp_params.get()); + if (ivf) { + params->ivf.n_list = ivf->n_list(); + params->ivf.n_iters = ivf->n_iters(); + params->ivf.use_soar = ivf->use_soar(); + } + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + auto *invert = + dynamic_cast(cpp_params.get()); + if (invert) { + params->invert.enable_range_optimization = + invert->enable_range_optimization(); + params->invert.enable_extended_wildcard = + invert->enable_extended_wildcard(); + } + break; + } + default: + break; + } + + return params; +} + +// Helper function to convert C ZVecIndexParams to C++ IndexParams +static std::shared_ptr convert_c_index_params_to_cpp( + const ZVecIndexParams *params) { + if (!params) { + return nullptr; + } + + zvec::MetricType metric = zvec::MetricType::L2; + switch (params->metric_type) { + case ZVEC_METRIC_TYPE_L2: + metric = zvec::MetricType::L2; + break; + case ZVEC_METRIC_TYPE_IP: + metric = zvec::MetricType::IP; + break; + case ZVEC_METRIC_TYPE_COSINE: + metric = zvec::MetricType::COSINE; + break; + default: + metric = zvec::MetricType::L2; + break; + } + + zvec::QuantizeType quantize = zvec::QuantizeType::UNDEFINED; + // Add quantize type mapping if needed + + switch (params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: + return std::make_shared( + metric, params->hnsw.m, params->hnsw.ef_construction, quantize); + case ZVEC_INDEX_TYPE_IVF: + return std::make_shared( + metric, params->ivf.n_list, params->ivf.n_iters, params->ivf.use_soar, + quantize); + case ZVEC_INDEX_TYPE_FLAT: + return std::make_shared(metric, quantize); + case ZVEC_INDEX_TYPE_INVERT: + return std::make_shared( + params->invert.enable_range_optimization, + params->invert.enable_extended_wildcard); + default: + return std::make_shared(zvec::MetricType::L2); + } +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return nullptr; + } + + ZVecFieldSchema *schema = new ZVecFieldSchema(); + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema"); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name"); + return nullptr; + } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + schema->index_params = nullptr; + schema->has_index = false; + + return schema; +} + +void zvec_field_schema_destroy(ZVecFieldSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + } + delete schema; + } +} + +// Getter functions +const char *zvec_field_schema_get_name(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return nullptr; + } + return zvec_string_c_str(schema->name); +} + +ZVecDataType zvec_field_schema_get_data_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_DATA_TYPE_UNDEFINED; + } + return schema->data_type; +} + +bool zvec_field_schema_is_nullable(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + return schema->nullable; +} + +ZVecErrorCode zvec_field_schema_set_nullable(ZVecFieldSchema *schema, + bool nullable) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + schema->nullable = nullable; + return ZVEC_OK; +} + +uint32_t zvec_field_schema_get_dimension(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return 0; + } + return schema->dimension; +} + +ZVecErrorCode zvec_field_schema_set_dimension(ZVecFieldSchema *schema, + uint32_t dimension) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + schema->dimension = dimension; + return ZVEC_OK; +} + +bool zvec_field_schema_has_index(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + return schema->has_index; +} + +ZVecIndexType zvec_field_schema_get_index_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_INDEX_TYPE_UNDEFINED; + } + if (!schema->index_params) { + return ZVEC_INDEX_TYPE_UNDEFINED; + } + return schema->index_params->index_type; +} + +const ZVecIndexParams *zvec_field_schema_get_index_params( + const ZVecFieldSchema *schema) { + if (!schema) { + return nullptr; + } + return schema->index_params; +} + +bool zvec_field_schema_is_vector_field(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +bool zvec_field_schema_is_dense_vector(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16); +} + +bool zvec_field_schema_is_sparse_vector(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +bool zvec_field_schema_is_array_type(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + ZVecDataType data_type = schema->data_type; + return (data_type == ZVEC_DATA_TYPE_ARRAY_BINARY || + data_type == ZVEC_DATA_TYPE_ARRAY_STRING || + data_type == ZVEC_DATA_TYPE_ARRAY_BOOL || + data_type == ZVEC_DATA_TYPE_ARRAY_INT32 || + data_type == ZVEC_DATA_TYPE_ARRAY_INT64 || + data_type == ZVEC_DATA_TYPE_ARRAY_UINT32 || + data_type == ZVEC_DATA_TYPE_ARRAY_UINT64 || + data_type == ZVEC_DATA_TYPE_ARRAY_FLOAT || + data_type == ZVEC_DATA_TYPE_ARRAY_DOUBLE); +} + +ZVecDataType zvec_field_schema_get_element_data_type( + const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_DATA_TYPE_UNDEFINED; + } + ZVecDataType data_type = schema->data_type; + switch (data_type) { + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return ZVEC_DATA_TYPE_BINARY; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return ZVEC_DATA_TYPE_STRING; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return ZVEC_DATA_TYPE_BOOL; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return ZVEC_DATA_TYPE_INT32; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return ZVEC_DATA_TYPE_INT64; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return ZVEC_DATA_TYPE_UINT32; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return ZVEC_DATA_TYPE_UINT64; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return ZVEC_DATA_TYPE_FLOAT; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return ZVEC_DATA_TYPE_DOUBLE; + default: + return data_type; + } +} + +bool zvec_field_schema_has_invert_index(const ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return false; + } + // Invert index is for non-vector fields with index + if (zvec_field_schema_is_vector_field(schema)) { + return false; + } + return schema->has_index && schema->index_params && + schema->index_params->index_type == ZVEC_INDEX_TYPE_INVERT; +} + +// Helper function to check if a data type is a vector type +bool zvec_is_vector_data_type(ZVecDataType data_type) { + return (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); +} + +ZVecErrorCode zvec_field_schema_clear_index(ZVecFieldSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + schema->has_index = false; + return ZVEC_OK; +} + +ZVecErrorCode zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!index_params) { + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + schema->has_index = false; + return ZVEC_OK; + } + + // Clone the index_params (create a new copy) + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + } + schema->index_params = zvec_index_params_create(index_params->index_type); + if (!schema->index_params) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to clone index params"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Copy all fields using getter/setter API + ZVecErrorCode err = ZVEC_OK; + err = zvec_index_params_set_metric_type(schema->index_params, + index_params->metric_type); + if (err != ZVEC_OK) return err; + + err = zvec_index_params_set_quantize_type(schema->index_params, + index_params->quantize_type); + if (err != ZVEC_OK) return err; + + // Copy type-specific params + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: + err = zvec_index_params_set_invert_params( + schema->index_params, index_params->invert.enable_range_optimization, + index_params->invert.enable_extended_wildcard); + break; + case ZVEC_INDEX_TYPE_HNSW: + err = zvec_index_params_set_hnsw_params( + schema->index_params, index_params->hnsw.m, + index_params->hnsw.ef_construction); + break; + case ZVEC_INDEX_TYPE_IVF: + err = zvec_index_params_set_ivf_params( + schema->index_params, index_params->ivf.n_list, + index_params->ivf.n_iters, index_params->ivf.use_soar); + break; + case ZVEC_INDEX_TYPE_FLAT: + default: + break; + } + + if (err != ZVEC_OK) return err; + + schema->has_index = true; + return ZVEC_OK; +} + +void zvec_field_schema_set_invert_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *invert_params) { + if (field_schema && invert_params) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->index_params->metric_type = invert_params->metric_type; + field_schema->index_params->quantize_type = invert_params->quantize_type; + field_schema->index_params->invert.enable_range_optimization = + invert_params->invert.enable_range_optimization; + field_schema->index_params->invert.enable_extended_wildcard = + invert_params->invert.enable_extended_wildcard; + field_schema->has_index = true; + } + } +} + +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *hnsw_params) { + if (field_schema && hnsw_params) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->index_params->metric_type = hnsw_params->metric_type; + field_schema->index_params->quantize_type = hnsw_params->quantize_type; + field_schema->index_params->hnsw.m = hnsw_params->hnsw.m; + field_schema->index_params->hnsw.ef_construction = + hnsw_params->hnsw.ef_construction; + field_schema->has_index = true; + } + } +} + +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *flat_params) { + if (field_schema && flat_params) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->index_params->metric_type = flat_params->metric_type; + field_schema->index_params->quantize_type = flat_params->quantize_type; + field_schema->has_index = true; + } + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *ivf_params) { + if (field_schema && ivf_params) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + field_schema->index_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + if (field_schema->index_params) { + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->index_params->metric_type = ivf_params->metric_type; + field_schema->index_params->quantize_type = ivf_params->quantize_type; + field_schema->index_params->ivf.n_list = ivf_params->ivf.n_list; + field_schema->index_params->ivf.n_iters = ivf_params->ivf.n_iters; + field_schema->index_params->ivf.use_soar = ivf_params->ivf.use_soar; + field_schema->has_index = true; + } + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + zvec_free_string(field_schema->name); + field_schema->name = nullptr; + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + field_schema->index_params = nullptr; + } +} + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +// ============================================================================= +// CollectionOptions functions implementation +// ============================================================================= + +ZVecCollectionOptions *zvec_collection_options_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecCollectionOptions", + ZVecCollectionOptions *options = new ZVecCollectionOptions(); + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + return options;) + return nullptr; +} + +void zvec_collection_options_destroy(ZVecCollectionOptions *options) { + if (options) { + delete options; + } +} + +ZVecErrorCode zvec_collection_options_set_enable_mmap( + ZVecCollectionOptions *options, bool enable) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->enable_mmap = enable; + return ZVEC_OK; +} + +bool zvec_collection_options_get_enable_mmap( + const ZVecCollectionOptions *options) { + if (!options) { + return true; // Default + } + return options->enable_mmap; +} + +ZVecErrorCode zvec_collection_options_set_max_buffer_size( + ZVecCollectionOptions *options, size_t size) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->max_buffer_size = size; + return ZVEC_OK; +} + +size_t zvec_collection_options_get_max_buffer_size( + const ZVecCollectionOptions *options) { + if (!options) { + return zvec::DEFAULT_MAX_BUFFER_SIZE; // Default + } + return options->max_buffer_size; +} + +ZVecErrorCode zvec_collection_options_set_read_only( + ZVecCollectionOptions *options, bool read_only) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->read_only = read_only; + return ZVEC_OK; +} + +bool zvec_collection_options_get_read_only( + const ZVecCollectionOptions *options) { + if (!options) { + return false; // Default + } + return options->read_only; +} + +ZVecErrorCode zvec_collection_options_set_max_doc_count_per_segment( + ZVecCollectionOptions *options, uint64_t count) { + if (!options) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + options->max_doc_count_per_segment = count; + return ZVEC_OK; +} + +uint64_t zvec_collection_options_get_max_doc_count_per_segment( + const ZVecCollectionOptions *options) { + if (!options) { + return zvec::MAX_DOC_COUNT_PER_SEGMENT; // Default + } + return options->max_doc_count_per_segment; +} + +// ============================================================================= +// CollectionStats functions implementation +// ============================================================================= + +uint64_t zvec_collection_stats_get_doc_count(const ZVecCollectionStats *stats) { + if (!stats) { + return 0; + } + return stats->doc_count; +} + +size_t zvec_collection_stats_get_index_count(const ZVecCollectionStats *stats) { + if (!stats) { + return 0; + } + return stats->index_count; +} + +const char *zvec_collection_stats_get_index_name( + const ZVecCollectionStats *stats, size_t index) { + if (!stats || !stats->index_names || index >= stats->index_count) { + return nullptr; + } + return stats->index_names[index]->data; +} + +float zvec_collection_stats_get_index_completeness( + const ZVecCollectionStats *stats, size_t index) { + if (!stats || !stats->index_completeness || index >= stats->index_count) { + return 0.0f; + } + return stats->index_completeness[index]; +} + +// ============================================================================= +// CollectionSchema management interface implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { + if (!name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null"); + return nullptr; + } + + ZVecCollectionSchema *schema = new ZVecCollectionSchema(); + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecCollectionSchema"); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for collection name"); + return nullptr; + } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; +} + +void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + free(schema->fields); + } + + delete schema; + } +} + +const char *zvec_collection_schema_get_name( + const ZVecCollectionSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return nullptr; + } + return zvec_string_c_str(schema->name); +} + +ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, + ZVecFieldSchema *field) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + const char *field_name = zvec_field_schema_get_name(field); + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *existing_name = zvec_field_schema_get_name(schema->fields[i]); + if (existing_name && strcmp(existing_name, field_name) == 0) { + SET_LAST_ERROR(ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field_name + "' already exists"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + } + + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *const *fields, + size_t field_count) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!fields && field_count > 0) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + // Validate all fields first + for (size_t i = 0; i < field_count; ++i) { + if (!fields[i]) { + SET_LAST_ERROR( + ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + std::to_string(i) + " is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const char *field_name = zvec_field_schema_get_name(fields[i]); + if (!field_name || strlen(field_name) == 0) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + std::to_string(i) + + " has invalid name"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } + + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + // Clone each field and add to schema + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema *src_field = fields[i]; + const char *field_name = zvec_field_schema_get_name(src_field); + ZVecDataType data_type = zvec_field_schema_get_data_type(src_field); + bool nullable = zvec_field_schema_is_nullable(src_field); + uint32_t dimension = zvec_field_schema_get_dimension(src_field); + + // Create a new field with the same properties + ZVecFieldSchema *new_field = + zvec_field_schema_create(field_name, data_type, nullable, dimension); + if (!new_field) { + // Clean up previously created fields + for (size_t j = 0; j < i; ++j) { + zvec_field_schema_destroy( + schema->fields[schema->field_count - (i - j)]); + } + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create new field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Copy index params if present + if (zvec_field_schema_has_index(src_field)) { + // Internal access: we need to get the index_params pointer + // Use the same hack as in set_field_index_params + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *src_index_params = + reinterpret_cast(src_field) + ->index_params; + if (src_index_params) { + zvec_field_schema_set_index_params(new_field, src_index_params); + } + } + + schema->fields[schema->field_count] = new_field; + schema->field_count++; + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); + + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + return ZVEC_OK; + } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found"); + return ZVEC_ERROR_NOT_FOUND; +} + +ZVecErrorCode zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_names && field_count > 0) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + SET_LAST_ERROR( + ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + std::to_string(i) + " is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + std::vector remove_indices; + std::vector not_found_fields; + + for (size_t field_idx = 0; field_idx < field_count; ++field_idx) { + std::string target_name(field_names[field_idx]); + bool found = false; + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *current_name = zvec_field_schema_get_name(schema->fields[i]); + if (current_name && strcmp(current_name, target_name.c_str()) == 0) { + remove_indices.push_back(i); + found = true; + break; + } + } + + if (!found) { + not_found_fields.push_back(target_name); + } + } + + + if (!not_found_fields.empty()) { + std::string error_msg = "Fields not found: "; + for (size_t i = 0; i < not_found_fields.size(); ++i) { + error_msg += "'" + not_found_fields[i] + "'"; + if (i < not_found_fields.size() - 1) { + error_msg += ", "; + } + } + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, error_msg); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *current_name = zvec_field_schema_get_name(schema->fields[i]); + if (current_name && strcmp(current_name, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return nullptr; + } + + if (index >= schema->field_count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field index out of bounds"); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Collection name is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + const char *field_name = zvec_field_schema_get_name(field); + if (!field_name || strlen(field_name) == 0) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name is required"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_set_name(ZVecCollectionSchema *schema, + const char *name) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + if (schema->name) { + zvec_free_string(schema->name); + } + schema->name = zvec_string_create(name); + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +bool zvec_collection_schema_has_field(const ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema || !field_name) { + return false; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + const char *name = zvec_field_schema_get_name(schema->fields[i]); + if (name && strcmp(name, field_name) == 0) { + return true; + } + } + return false; +} + +ZVecErrorCode zvec_collection_schema_alter_field( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecFieldSchema *new_field) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!new_field) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "New field cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_CODE + // Find the field + for (size_t i = 0; i < schema->field_count; ++i) { + const char *name = zvec_field_schema_get_name(schema->fields[i]); + if (name && strcmp(name, field_name) == 0) { + // Clone the new field + ZVecFieldSchema *cloned = + zvec_field_schema_create(zvec_field_schema_get_name(new_field), + zvec_field_schema_get_data_type(new_field), + zvec_field_schema_is_nullable(new_field), + zvec_field_schema_get_dimension(new_field)); + + if (zvec_field_schema_has_index(new_field)) { + ZVecIndexType idx_type = zvec_field_schema_get_index_type(new_field); + ZVecIndexParams *cloned_params = zvec_index_params_create(idx_type); + const ZVecIndexParams *src_params = + zvec_field_schema_get_index_params(new_field); + + // Copy index parameters + switch (idx_type) { + case ZVEC_INDEX_TYPE_INVERT: { + bool enable_opt; + bool enable_wildcard; + zvec_index_params_get_invert_params(src_params, &enable_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_opt, + enable_wildcard); + break; + } + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_const; + zvec_index_params_get_hnsw_params(src_params, &m, &ef_const); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_const); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(src_params, &n_list, &n_iters, + &use_soar); + zvec_index_params_set_ivf_params(cloned_params, n_list, n_iters, + use_soar); + break; + } + default: + break; + } + + zvec_field_schema_set_index_params(cloned, cloned_params); + zvec_index_params_destroy(cloned_params); + } + + // Destroy old field and replace with new one + zvec_field_schema_destroy(schema->fields[i]); + schema->fields[i] = cloned; + return ZVEC_OK; + } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) +} + +ZVecFieldSchema *zvec_collection_schema_get_forward_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Check if it's a scalar field (not vector) + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type)) { + return field; + } + } + } + return nullptr; +} + +ZVecFieldSchema *zvec_collection_schema_get_vector_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Check if it's a vector field + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (zvec_is_vector_data_type(data_type)) { + return field; + } + } + } + return nullptr; +} + +ZVecErrorCode zvec_collection_schema_get_forward_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, fields, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + // Count scalar fields + size_t scalar_count = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (!zvec_is_vector_data_type(data_type)) { + scalar_count++; + } + } + + *fields = + (ZVecFieldSchema **)malloc(scalar_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (!zvec_is_vector_data_type(data_type)) { + (*fields)[idx++] = schema->fields[i]; + } + } + + *count = scalar_count; + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_get_forward_fields_with_index( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, fields, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + // Count scalar fields with index + size_t indexed_count = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type) && + zvec_field_schema_has_index(field)) { + indexed_count++; + } + } + + *fields = + (ZVecFieldSchema **)malloc(indexed_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + ZVecDataType data_type = zvec_field_schema_get_data_type(field); + if (!zvec_is_vector_data_type(data_type) && + zvec_field_schema_has_index(field)) { + (*fields)[idx++] = field; + } + } + + *count = indexed_count; + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_get_all_field_names( + const ZVecCollectionSchema *schema, const char ***names, size_t *count) { + if (!schema || !names || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, names, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + *count = schema->field_count; + *names = (const char **)malloc(schema->field_count * sizeof(const char *)); + if (!*names) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + (*names)[i] = zvec_field_schema_get_name(schema->fields[i]); + } + + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_get_vector_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count) { + if (!schema || !fields || !count) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Schema, fields, and count cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_VOID + // Count vector fields + size_t vector_count = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (zvec_is_vector_data_type(data_type)) { + vector_count++; + } + } + + *fields = + (ZVecFieldSchema **)malloc(vector_count * sizeof(ZVecFieldSchema *)); + if (!*fields) { + SET_LAST_ERROR(ZVEC_ERROR_RESOURCE_EXHAUSTED, "Failed to allocate memory"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Fill the array + size_t idx = 0; + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecDataType data_type = zvec_field_schema_get_data_type(schema->fields[i]); + if (zvec_is_vector_data_type(data_type)) { + (*fields)[idx++] = schema->fields[i]; + } + } + + *count = vector_count; + ZVEC_CATCH_END_VOID + + return ZVEC_OK; +} + +bool zvec_collection_schema_has_index(const ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema || !field_name) { + return false; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + return zvec_field_schema_has_index(field); + } + } + return false; +} + +ZVecErrorCode zvec_collection_schema_add_index( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecIndexParams *index_params) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!index_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Index params cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_CODE + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Clone the index params + ZVecIndexType idx_type = zvec_index_params_get_type(index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(idx_type); + + // Copy parameters based on type + switch (idx_type) { + case ZVEC_INDEX_TYPE_INVERT: { + bool enable_opt, enable_wildcard; + zvec_index_params_get_invert_params(index_params, &enable_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_opt, + enable_wildcard); + break; + } + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_const; + zvec_index_params_get_hnsw_params(index_params, &m, &ef_const); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_const); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(index_params, &n_list, &n_iters, + &use_soar); + zvec_index_params_set_ivf_params(cloned_params, n_list, n_iters, + use_soar); + break; + } + default: + break; + } + + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + return ZVEC_OK; + } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) +} + +ZVecErrorCode zvec_collection_schema_drop_index(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (!field_name) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Field name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_BEGIN_CODE + for (size_t i = 0; i < schema->field_count; ++i) { + ZVecFieldSchema *field = schema->fields[i]; + const char *name = zvec_field_schema_get_name(field); + if (name && strcmp(name, field_name) == 0) { + // Clear the index + zvec_field_schema_clear_index(field); + return ZVEC_OK; + } + } + + SET_LAST_ERROR(ZVEC_ERROR_NOT_FOUND, "Field not found"); + return ZVEC_ERROR_NOT_FOUND; + ZVEC_CATCH_END_CODE(ZVEC_ERROR_UNKNOWN) +} + +void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { + if (!schema) return; + + ZVEC_TRY_BEGIN_VOID + if (schema->name) { + zvec_free_string(schema->name); + } + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + free(schema->fields); + schema->fields = nullptr; + schema->field_count = 0; + } + + schema->max_doc_count_per_segment = 0; + ZVEC_CATCH_END_VOID +} + +// ============================================================================= +// Helper functions +// ============================================================================= + +const char *zvec_error_code_to_string(ZVecErrorCode error_code) { + switch (error_code) { + case ZVEC_OK: + return "OK"; + case ZVEC_ERROR_NOT_FOUND: + return "NOT_FOUND"; + case ZVEC_ERROR_ALREADY_EXISTS: + return "ALREADY_EXISTS"; + case ZVEC_ERROR_INVALID_ARGUMENT: + return "INVALID_ARGUMENT"; + case ZVEC_ERROR_PERMISSION_DENIED: + return "PERMISSION_DENIED"; + case ZVEC_ERROR_FAILED_PRECONDITION: + return "FAILED_PRECONDITION"; + case ZVEC_ERROR_RESOURCE_EXHAUSTED: + return "RESOURCE_EXHAUSTED"; + case ZVEC_ERROR_UNAVAILABLE: + return "UNAVAILABLE"; + case ZVEC_ERROR_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case ZVEC_ERROR_NOT_SUPPORTED: + return "NOT_SUPPORTED"; + case ZVEC_ERROR_UNKNOWN: + return "UNKNOWN"; + default: + return "UNKNOWN_ERROR_CODE"; + } +} + +const char *zvec_data_type_to_string(ZVecDataType data_type) { + switch (data_type) { + case ZVEC_DATA_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_DATA_TYPE_BINARY: + return "BINARY"; + case ZVEC_DATA_TYPE_STRING: + return "STRING"; + case ZVEC_DATA_TYPE_BOOL: + return "BOOL"; + case ZVEC_DATA_TYPE_INT32: + return "INT32"; + case ZVEC_DATA_TYPE_INT64: + return "INT64"; + case ZVEC_DATA_TYPE_UINT32: + return "UINT32"; + case ZVEC_DATA_TYPE_UINT64: + return "UINT64"; + case ZVEC_DATA_TYPE_FLOAT: + return "FLOAT"; + case ZVEC_DATA_TYPE_DOUBLE: + return "DOUBLE"; + case ZVEC_DATA_TYPE_VECTOR_BINARY32: + return "VECTOR_BINARY32"; + case ZVEC_DATA_TYPE_VECTOR_BINARY64: + return "VECTOR_BINARY64"; + case ZVEC_DATA_TYPE_VECTOR_FP16: + return "VECTOR_FP16"; + case ZVEC_DATA_TYPE_VECTOR_FP32: + return "VECTOR_FP32"; + case ZVEC_DATA_TYPE_VECTOR_FP64: + return "VECTOR_FP64"; + case ZVEC_DATA_TYPE_VECTOR_INT4: + return "VECTOR_INT4"; + case ZVEC_DATA_TYPE_VECTOR_INT8: + return "VECTOR_INT8"; + case ZVEC_DATA_TYPE_VECTOR_INT16: + return "VECTOR_INT16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + return "SPARSE_VECTOR_FP16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: + return "SPARSE_VECTOR_FP32"; + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return "ARRAY_BINARY"; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return "ARRAY_STRING"; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return "ARRAY_BOOL"; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return "ARRAY_INT32"; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return "ARRAY_INT64"; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return "ARRAY_UINT32"; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return "ARRAY_UINT64"; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; + default: + return "UNKNOWN_DATA_TYPE"; + } +} + +const char *zvec_index_type_to_string(ZVecIndexType index_type) { + switch (index_type) { + case ZVEC_INDEX_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_INDEX_TYPE_HNSW: + return "HNSW"; + case ZVEC_INDEX_TYPE_IVF: + return "IVF"; + case ZVEC_INDEX_TYPE_FLAT: + return "FLAT"; + case ZVEC_INDEX_TYPE_INVERT: + return "INVERT"; + default: + return "UNKNOWN_INDEX_TYPE"; + } +} + +const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { + switch (metric_type) { + case ZVEC_METRIC_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_METRIC_TYPE_L2: + return "L2"; + case ZVEC_METRIC_TYPE_IP: + return "IP"; + case ZVEC_METRIC_TYPE_COSINE: + return "COSINE"; + case ZVEC_METRIC_TYPE_MIPSL2: + return "MIPSL2"; + default: + return "UNKNOWN_METRIC_TYPE"; + } +} + +bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { + ZVecDataType data_type = zvec_field_schema_get_data_type(&zvec_field); + bool is_vector_field = (data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + return is_vector_field; +} + +// ============================================================================= +// Doc functions implementation +// ============================================================================= + +ZVecDoc *zvec_doc_create(void) { + ZVEC_TRY_RETURN_NULL("Failed to create document", { + auto doc_ptr = + new std::shared_ptr(std::make_shared()); + return reinterpret_cast(doc_ptr); + }) +} + +void zvec_doc_destroy(ZVecDoc *doc) { + if (doc) { + delete reinterpret_cast *>(doc); + } +} + +void zvec_doc_clear(ZVecDoc *doc) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + ZVEC_CATCH_END_VOID +} + +void zvec_docs_free(ZVecDoc **docs, size_t count) { + if (!docs) return; + + for (size_t i = 0; i < count; ++i) { + zvec_doc_destroy(docs[i]); + } + + free(docs); +} + +void zvec_write_results_free(ZVecWriteResult *results, size_t result_count) { + free_write_results_internal(results, result_count); +} + +void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { + if (!doc || !pk) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_pk(std::string(pk)); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_doc_id(doc_id); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_score(ZVecDoc *doc, float score) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_score(score); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_operator(static_cast(op)); + ZVEC_CATCH_END_VOID +} + +ZVecErrorCode zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to set null field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_null(std::string(field_name)); return ZVEC_OK;) +} + +// ============================================================================= +// Document interface implementation +// ============================================================================= + +// Helper function to extract scalar values from raw data +template +T extract_scalar_value(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size != sizeof(T)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return T{}; + } + return *static_cast(value); +} + +// Helper function to extract vector values from raw data +template +std::vector extract_vector_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to extract array values from raw data +template +std::vector extract_array_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to handle sparse vector extraction +template +std::pair, std::vector> extract_sparse_vector( + const void *value, size_t value_size, ZVecErrorCode *error_code) { + if (value_size < sizeof(uint32_t)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *data = static_cast(value); + uint32_t nnz = data[0]; + + size_t required_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(T)); + if (value_size < required_size) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *indices = data + 1; + const T *values = reinterpret_cast(indices + nnz); + + std::vector index_vec(indices, indices + nnz); + std::vector value_vec(values, values + nnz); + + return std::make_pair(std::move(index_vec), std::move(value_vec)); +} + +// Helper function to extract string array from raw data (C-string array) +std::vector extract_string_array(const void *value, + size_t value_size) { + std::vector string_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + size_t str_len = strlen(data + pos); + if (pos + str_len >= value_size) { + break; + } + string_array.emplace_back(data + pos, str_len); + pos += str_len + 1; + } + return string_array; +} + +// Helper function to extract string array from ZVecString** array +std::vector extract_string_array_from_zvec( + ZVecString **zvec_strings, size_t count) { + std::vector string_array; + string_array.reserve(count); + + for (size_t i = 0; i < count; ++i) { + if (zvec_strings[i] && zvec_strings[i]->data) { + string_array.emplace_back(zvec_strings[i]->data, zvec_strings[i]->length); + } else { + string_array.emplace_back("", 0); + } + } + + return string_array; +} + +// Helper function to extract binary array from raw data +std::vector extract_binary_array(const void *value, + size_t value_size) { + std::vector binary_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + if (pos + sizeof(uint32_t) > value_size) { + break; + } + uint32_t bin_len = *reinterpret_cast(data + pos); + pos += sizeof(uint32_t); + + if (pos + bin_len > value_size) { + break; + } + binary_array.emplace_back(data + pos, bin_len); + pos += bin_len; + } + return binary_array; +} + +static std::vector convert_zvec_docs_to_internal( + const ZVecDoc **zvec_docs, size_t doc_count) { + std::vector docs; + docs.reserve(doc_count); + + for (size_t i = 0; i < doc_count; ++i) { + docs.push_back( + *(*reinterpret_cast *>(zvec_docs[i]))); + } + + return docs; +} + + +static zvec::Status convert_zvec_collection_schema_to_internal( + const ZVecCollectionSchema *schema, + zvec::CollectionSchema::Ptr &collection_schema) { + std::string coll_name(zvec_string_c_str(schema->name), + zvec_string_length(schema->name)); + collection_schema = std::make_shared(coll_name); + collection_schema->set_max_doc_count_per_segment( + schema->max_doc_count_per_segment); + + for (size_t i = 0; i < schema->field_count; ++i) { + const ZVecFieldSchema *zvec_field = schema->fields[i]; + ZVecDataType field_data_type = zvec_field_schema_get_data_type(zvec_field); + zvec::DataType data_type = convert_data_type(field_data_type); + std::string field_name = zvec_field_schema_get_name(zvec_field); + bool nullable = zvec_field_schema_is_nullable(zvec_field); + uint32_t dimension = zvec_field_schema_get_dimension(zvec_field); + zvec::FieldSchema::Ptr field_schema; + + bool is_vector_field = check_is_vector_field(*zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared(field_name, data_type, + dimension, nullable); + } else { + field_schema = + std::make_shared(field_name, data_type, nullable); + } + + if (zvec_field_schema_has_index(zvec_field)) { + zvec::Status status = set_field_index_params(field_schema, zvec_field); + if (!status.ok()) { + return status; + } + } + + zvec::Status status = collection_schema->add_field(field_schema); + if (!status.ok()) { + return status; + } + } + + return zvec::Status::OK(); +} + +static zvec::Status convert_zvec_field_schema_to_internal( + const ZVecFieldSchema *zvec_field, zvec::FieldSchema::Ptr &field_schema) { + // Validate input + if (!zvec_field) { + return zvec::Status::InvalidArgument("Field schema cannot be null"); + } + + const char *field_name_cstr = zvec_field_schema_get_name(zvec_field); + if (!field_name_cstr) { + return zvec::Status::InvalidArgument("Field name cannot be null"); + } + + ZVecDataType data_type = zvec_field_schema_get_data_type(zvec_field); + zvec::DataType data_type_internal = convert_data_type(data_type); + if (data_type_internal == zvec::DataType::UNDEFINED) { + return zvec::Status::InvalidArgument("Invalid data type"); + } + + std::string field_name(field_name_cstr); + bool nullable = zvec_field_schema_is_nullable(zvec_field); + uint32_t dimension = zvec_field_schema_get_dimension(zvec_field); + bool is_vector_field = check_is_vector_field(*zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type_internal, dimension, nullable); + + if (zvec_field_schema_has_index(zvec_field)) { + // Internal access to index_params + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *index_params = + reinterpret_cast(zvec_field) + ->index_params; + + if (index_params) { + ZVecIndexType index_type = zvec_index_params_get_type(index_params); + ZVecMetricType metric_type = + zvec_index_params_get_metric_type(index_params); + ZVecQuantizeType quantize_type = + zvec_index_params_get_quantize_type(index_params); + + auto metric = convert_metric_type(metric_type); + auto quantize = convert_quantize_type(quantize_type); + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + int m, ef_construction; + zvec_index_params_get_hnsw_params(index_params, &m, + &ef_construction); + auto hnsw_params = std::make_shared( + metric, m, ef_construction, quantize); + field_schema->set_index_params(hnsw_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(flat_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(index_params, &n_list, &n_iters, + &use_soar); + auto ivf_params = std::make_shared( + metric, n_list, n_iters, use_soar, quantize); + field_schema->set_index_params(ivf_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema = std::make_shared( + field_name, data_type_internal, nullable); + + if (zvec_field_schema_has_index(zvec_field)) { + struct InternalFieldSchema { + ZVecString *name; + ZVecDataType data_type; + bool nullable; + uint32_t dimension; + ZVecIndexParams *index_params; + bool has_index; + }; + const ZVecIndexParams *index_params = + reinterpret_cast(zvec_field) + ->index_params; + + if (index_params && + zvec_index_params_get_type(index_params) == ZVEC_INDEX_TYPE_INVERT) { + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(index_params, &enable_range_opt, + &enable_wildcard); + auto invert_params = std::make_shared( + enable_range_opt, enable_wildcard); + field_schema->set_index_params(invert_params); + } + } + } + + return zvec::Status::OK(); +} + +ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, + ZVecDataType data_type, + const void *value, + size_t value_size) { + if (!doc || !field_name || !value) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + std::string name(field_name); ZVecErrorCode error_code = ZVEC_OK; + + switch (data_type) { + // Scalar types + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + bool val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for bool type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT32: { + int32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT64: { + int64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + uint32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + uint64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + float val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for float type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + double val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for double type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp32 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + auto vec = extract_vector_values(value, value_size, + &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp16 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp64 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int8 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int16 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors are packed - each byte contains 2 int4 values + size_t count = value_size * 2; + const int8_t *packed_vals = static_cast(value); + std::vector vec; + vec.reserve(count); + + // Unpack int4 values + for (size_t i = 0; i < value_size; ++i) { + int8_t byte_val = packed_vals[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + vec.push_back((byte_val >> 4) & 0x0F); + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, std::move(sparse_vec)); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, std::move(sparse_vec)); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, std::move(binary_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // Check if this is a ZVecString** array or a C-string array + // ZVecString** array has pointer-sized elements + constexpr size_t ptr_size = sizeof(void *); + if (value_size % ptr_size == 0) { + // Likely a ZVecString** array + size_t count = value_size / ptr_size; + ZVecString **zvec_str_array = + reinterpret_cast(const_cast(value)); + auto string_array = + extract_string_array_from_zvec(zvec_str_array, count); + (*doc_ptr)->set(name, std::move(string_array)); + } else { + // C-string array (null-terminated strings) + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, std::move(string_array)); + } + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_bool type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int32 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int64 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_float type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_double type"); + return error_code; + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + + default: + set_last_error("Unsupported data type: " + std::to_string(data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, + const ZVecDocField *field) { + if (!doc || !field) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + + std::string name(field->name.data, field->name.length); + + switch (field->data_type) { + // Scalar types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + (*doc_ptr)->set(name, field->value.bool_value); + break; + } + case ZVEC_DATA_TYPE_INT32: { + (*doc_ptr)->set(name, field->value.int32_value); + break; + } + case ZVEC_DATA_TYPE_INT64: { + (*doc_ptr)->set(name, field->value.int64_value); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + (*doc_ptr)->set(name, field->value.uint32_value); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + (*doc_ptr)->set(name, field->value.uint64_value); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + (*doc_ptr)->set(name, field->value.float_value); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + (*doc_ptr)->set(name, field->value.double_value); + break; + } + + // Vector types (in ZVecDataType order: BINARY32, BINARY64, FP16, FP32, + // FP64, INT4, INT8, INT16) + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + size_t byte_count = (field->value.vector_value.length + 1) / 2; + const int8_t *packed_data = + reinterpret_cast(field->value.vector_value.data); + std::vector vec; + vec.reserve(field->value.vector_value.length); + + for (size_t i = 0; + i < byte_count && vec.size() < field->value.vector_value.length; + ++i) { + int8_t byte_val = packed_data[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + if (vec.size() < field->value.vector_value.length) { + vec.push_back((byte_val >> 4) & 0x0F); + } + } + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + + // Sparse vector types (in ZVecDataType order: FP16, FP32) + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(vec)); + break; + } + + // Array types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + std::vector array_values; + const uint8_t *data_ptr = field->value.binary_value.data; + size_t total_length = field->value.binary_value.length; + size_t offset = 0; + + while (offset + sizeof(uint32_t) <= total_length) { + uint32_t elem_length = + *reinterpret_cast(data_ptr + offset); + offset += sizeof(uint32_t); + + if (offset + elem_length <= total_length) { + std::string elem( + reinterpret_cast(data_ptr + offset), + elem_length); + array_values.push_back(elem); + offset += elem_length; + } else { + break; + } + } + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + std::vector array_values; + const char *data_ptr = field->value.string_value.data; + size_t total_length = field->value.string_value.length; + size_t offset = 0; + + while (offset < total_length) { + size_t str_len = strlen(data_ptr + offset); + if (str_len > 0 && offset + str_len <= total_length) { + array_values.emplace_back(data_ptr + offset, str_len); + offset += str_len + 1; + } else { + break; + } + } + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + std::vector array_values( + reinterpret_cast(field->value.binary_value.data), + reinterpret_cast(field->value.binary_value.data) + + field->value.binary_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + std::vector array_values(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, std::move(array_values)); + break; + } + + default: + set_last_error("Unsupported data type: " + + std::to_string(field->data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->pk_ref().data(); +} + +const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + const std::string &pk = (*doc_ptr)->pk_ref(); + if (pk.empty()) return nullptr; + + char *result = static_cast(malloc(pk.length() + 1)); + strcpy(result, pk.c_str()); + return result; +} + +uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document ID", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->doc_id();) +} + +float zvec_doc_get_score(const ZVecDoc *doc) { + if (!doc) return 0.0f; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document score", 0.0f, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->score();) +} + +ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { + if (!doc) return ZVEC_DOC_OP_INSERT; // default + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document operator", ZVEC_DOC_OP_INSERT, + auto doc_ptr = reinterpret_cast *>(doc); + zvec::Operator op = (*doc_ptr)->get_operator(); + return static_cast(op);) +} + +size_t zvec_doc_get_field_count(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get field count", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->field_names().size();) +} + +ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void *value_buffer, + size_t buffer_size) { + if (!doc || !field_name || !value_buffer) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle basic data types that return values directly + switch (field_type) { + case ZVEC_DATA_TYPE_BOOL: { + if (buffer_size < sizeof(bool)) { + set_last_error("Buffer too small for bool value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const bool val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT32: { + if (buffer_size < sizeof(int32_t)) { + set_last_error("Buffer too small for int32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT64: { + if (buffer_size < sizeof(int64_t)) { + set_last_error("Buffer too small for int64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT32: { + if (buffer_size < sizeof(uint32_t)) { + set_last_error("Buffer too small for uint32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT64: { + if (buffer_size < sizeof(uint64_t)) { + set_last_error("Buffer too small for uint64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + if (buffer_size < sizeof(float)) { + set_last_error("Buffer too small for float value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const float val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + if (buffer_size < sizeof(double)) { + set_last_error("Buffer too small for double value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const double val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + default: { + set_last_error("Data type not supported for basic value return"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void **value, size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value copy", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle copy-returning data types (allocate new memory) + switch (field_type) { + // Basic types - copy the actual values + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(bool)); + if (!buffer) { + set_last_error("Memory allocation failed for bool"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(float)); + if (!buffer) { + set_last_error("Memory allocation failed for float"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(double)); + if (!buffer) { + set_last_error("Memory allocation failed for double"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(double); + break; + } + + // String and binary types - copy the data + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(val.length()); + if (!buffer) { + set_last_error("Memory allocation failed for string/binary"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), val.length()); + *value = buffer; + *value_size = val.length(); + break; + } + + // Vector types - copy the data + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(zvec::float16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int8_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int8 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + + // Sparse vector types - create flattened representation + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + using SparseVecFP16 = + std::pair, std::vector>; + const SparseVecFP16 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = sizeof(size_t) + nnz * (sizeof(uint32_t) + + sizeof(zvec::float16_t)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP16"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(zvec::float16_t); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + using SparseVecFP32 = + std::pair, std::vector>; + const SparseVecFP32 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(float); + } + + *value = buffer; + *value_size = total_size; + break; + } + + // Array types - create serialized representations + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + using BinaryArray = std::vector; + const BinaryArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &bin_val : array_vals) { + total_size += bin_val.length(); + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for binary array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &bin_val : array_vals) { + memcpy(ptr, bin_val.data(), bin_val.length()); + ptr += bin_val.length(); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + using StringArray = std::vector; + const StringArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &str_val : array_vals) { + total_size += str_val.length() + 1; // +1 for null terminator + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for string array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &str_val : array_vals) { + memcpy(ptr, str_val.c_str(), str_val.length()); + ptr += str_val.length(); + *ptr = '\0'; + ptr++; + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + using BoolArray = std::vector; + const BoolArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t byte_count = (array_vals.size() + 7) / 8; + void *buffer = malloc(byte_count); + if (!buffer) { + set_last_error("Memory allocation failed for bool array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + uint8_t *bytes = static_cast(buffer); + memset(bytes, 0, byte_count); + + for (size_t i = 0; i < array_vals.size(); ++i) { + if (array_vals[i]) { + bytes[i / 8] |= (1 << (i % 8)); + } + } + + *value = buffer; + *value_size = byte_count; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + using Int32Array = std::vector; + const Int32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + using Int64Array = std::vector; + const Int64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + using UInt32Array = std::vector; + const UInt32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + using UInt64Array = std::vector; + const UInt64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + using FloatArray = std::vector; + const FloatArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for float array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + using DoubleArray = std::vector; + const DoubleArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for double array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + const void **value, + size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value pointer", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Get field value based on data type + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.data(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.c_str(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + const bool &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 vectors typically stored as uint16_t + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(zvec::float16_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors typically stored as int8_t with 2 values per byte + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int16_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(double); + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +bool zvec_doc_is_empty(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return true; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if document is empty", true, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_empty();) +} + +ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to remove field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->remove(std::string(field_name)); return ZVEC_OK;) +} + + +bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has(std::string(field_name));) +} + +bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field value existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has_value(std::string(field_name));) +} + +bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if field is null", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_null(std::string(field_name));) +} + +ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, + size_t *count) { + if (!doc || !field_names || !count) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field names", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector names = (*doc_ptr)->field_names(); + + *count = names.size(); + if (*count == 0) { + *field_names = nullptr; + return ZVEC_OK; + } + + *field_names = static_cast(malloc(*count * sizeof(char *))); + if (!*field_names) { + set_last_error("Failed to allocate memory for field names"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *count; ++i) { + (*field_names)[i] = copy_string(names[i]); + if (!(*field_names)[i]) { + for (size_t j = 0; j < i; ++j) { + free((*field_names)[j]); + } + free(*field_names); + *field_names = nullptr; + set_last_error("Failed to copy field name"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, + size_t *size) { + if (!doc || !data || !size) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to serialize document", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector serialized_data = (*doc_ptr)->serialize(); + + *size = serialized_data.size(); + if (*size == 0) { + *data = nullptr; + return ZVEC_OK; + } + + *data = static_cast(malloc(*size)); + if (!*data) { + set_last_error("Failed to allocate memory for serialized data"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(*data, serialized_data.data(), *size); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, + ZVecDoc **doc) { + if (!data || !doc || size == 0) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to deserialize document", + auto deserialized_doc = zvec::Doc::deserialize(data, size); + if (!deserialized_doc) { + set_last_error("Failed to deserialize document"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_ptr = new std::shared_ptr(deserialized_doc); + *doc = reinterpret_cast(doc_ptr); return ZVEC_OK;) +} + +void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { + if (!doc || !other) { + set_last_error("Document pointers are null"); + return; + } + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + auto other_ptr = reinterpret_cast *>(other); + (*doc_ptr)->merge(**other_ptr); + ZVEC_CATCH_END_VOID +} + +size_t zvec_doc_memory_usage(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return 0; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document memory usage", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->memory_usage();) +} + +ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, + const ZVecCollectionSchema *schema, + bool is_update, char **error_msg) { + if (!doc || !schema) { + set_last_error("Document or schema pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to validate document", + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + auto doc_ptr = reinterpret_cast *>(doc); + status = (*doc_ptr)->validate(schema_ptr, is_update); if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + if (error_msg) { *error_msg = nullptr; } return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { + if (!doc || !detail_str) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get document detail string", + auto doc_ptr = reinterpret_cast *>(doc); + std::string detail = (*doc_ptr)->to_detail_string(); + *detail_str = copy_string(detail); + + if (!*detail_str && !detail.empty()) { + set_last_error("Failed to copy detail string"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + return ZVEC_OK;) +} + +// ============================================================================= +// Collection functions implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection) { + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_create_and_open_with_schema", + if (!path || !schema || !collection) { + set_last_error("Path, schema, or collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::shared_ptr + schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::CreateAndOpen(path, *schema_ptr, + collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_open(const char *path, + const ZVecCollectionOptions *options, + ZVecCollection **collection) { + if (!path || !collection) { + set_last_error("Invalid arguments: path and collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::Open(path, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + delete reinterpret_cast *>(collection); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Destroy(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Flush(); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, + ZVecCollectionSchema **schema) { + if (!collection || !schema) { + set_last_error("Invalid arguments: collection and schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + const auto &cpp_schema = result.value(); + + // Create new schema structure + ZVecCollectionSchema *c_schema = static_cast( + malloc(sizeof(ZVecCollectionSchema))); + if (!c_schema) { + set_last_error("Failed to allocate memory for schema"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize the schema structure + c_schema->name = nullptr; + c_schema->fields = nullptr; + c_schema->field_count = 0; + c_schema->field_capacity = 0; + c_schema->max_doc_count_per_segment = + cpp_schema.max_doc_count_per_segment(); + + // Set collection name + c_schema->name = zvec_string_create(cpp_schema.name().c_str()); + if (!c_schema->name) { + free(c_schema); + set_last_error("Failed to allocate memory for collection name"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Convert and copy fields + const auto &cpp_fields = cpp_schema.fields(); + c_schema->field_count = cpp_fields.size(); + c_schema->field_capacity = cpp_fields.size(); + + if (c_schema->field_count > 0) { + // Allocate array of field pointers + c_schema->fields = static_cast( + malloc(c_schema->field_count * sizeof(ZVecFieldSchema *))); + if (!c_schema->fields) { + zvec_collection_schema_destroy(c_schema); + set_last_error("Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize all field pointers to nullptr + for (size_t i = 0; i < c_schema->field_count; ++i) { + c_schema->fields[i] = nullptr; + } + + size_t i = 0; + for (const auto &cpp_field : cpp_fields) { + try { + // Create new field schema + c_schema->fields[i] = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!c_schema->fields[i]) { + throw std::bad_alloc(); + } + + // Copy field name using zvec_string_create + c_schema->fields[i]->name = + zvec_string_create(cpp_field->name().c_str()); + if (!c_schema->fields[i]->name) { + throw std::bad_alloc(); + } + + // Convert data type + c_schema->fields[i]->data_type = + convert_zvec_data_type(cpp_field->data_type()); + + // Copy dimension for vector fields + c_schema->fields[i]->dimension = cpp_field->dimension(); + + // Copy nullable flag + c_schema->fields[i]->nullable = cpp_field->nullable(); + + // Initialize index parameters to nullptr + c_schema->fields[i]->index_params = nullptr; + c_schema->fields[i]->has_index = false; + + // Convert index parameters based on the actual type + auto index_params = cpp_field->index_params(); + if (index_params) { + // Use helper function to convert C++ index params to C + c_schema->fields[i]->index_params = + convert_cpp_index_params_to_c(index_params); + if (c_schema->fields[i]->index_params) { + c_schema->fields[i]->has_index = true; + } + } + } catch (const std::bad_alloc &) { + // Clean up already allocated fields + for (size_t j = 0; j <= i; ++j) { + if (c_schema->fields[j]) { + zvec_field_schema_destroy(c_schema->fields[j]); + } + } + free(c_schema->fields); + zvec_free_string(c_schema->name); + free(c_schema); + set_last_error("Failed to allocate memory for field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ++i; + } + } + + *schema = c_schema; + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, + ZVecCollectionOptions **options) { + if (!collection || !options) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get collection options", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Options(); + + if (!result.has_value()) { + set_last_error("Failed to get collection option: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Create and initialize options using new + *options = new ZVecCollectionOptions(); + if (!*options) { + set_last_error("Failed to allocate memory for options"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + (*options) + ->enable_mmap = result.value().enable_mmap_; + (*options)->max_buffer_size = result.value().max_buffer_size_; + (*options)->read_only = result.value().read_only_; + (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, + ZVecCollectionStats **stats) { + if (!collection || !stats) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get detailed collection stats", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Stats(); + + if (!result.has_value()) { + set_last_error("Failed to get collection stats: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + *stats = new ZVecCollectionStats(); + if (!*stats) { + set_last_error("Failed to allocate memory for stats"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + (*stats)->doc_count = result.value().doc_count; + (*stats)->index_count = result.value().index_completeness.size(); + if ((*stats)->index_count > 0) { + (*stats)->index_completeness = static_cast( + malloc((*stats)->index_count * sizeof(float))); + (*stats)->index_names = static_cast( + malloc((*stats)->index_count * sizeof(ZVecString *))); + int i = 0; + for (auto &[name, completeness] : result.value().index_completeness) { + (*stats)->index_completeness[i] = completeness; + (*stats)->index_names[i] = zvec_string_create(name.c_str()); + i++; + } + } + } else { + (*stats)->index_completeness = nullptr; + (*stats)->index_names = nullptr; + } + + return error_code;) +} + +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + free(stats); + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecQueryParams", + ZVecQueryParams *params = new ZVecQueryParams(); + params->index_type = index_type; params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; return params;) + return nullptr; +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecIndexType zvec_query_params_get_index_type(const ZVecQueryParams *params) { + if (!params) { + return ZVEC_INDEX_TYPE_UNDEFINED; + } + return params->index_type; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->radius = radius; + return ZVEC_OK; +} + +float zvec_query_params_get_radius(const ZVecQueryParams *params) { + if (!params) { + return 0.0f; + } + return params->radius; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->is_linear = is_linear; + return ZVEC_OK; +} + +bool zvec_query_params_get_is_linear(const ZVecQueryParams *params) { + if (!params) { + return false; + } + return params->is_linear; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +bool zvec_query_params_get_is_using_refiner(const ZVecQueryParams *params) { + if (!params) { + return false; + } + return params->is_using_refiner; +} + +// ============================================================================= +// HnswQueryParams functions implementation +// ============================================================================= + +ZVecHnswQueryParams *zvec_query_params_hnsw_create(int ef, float radius, + bool is_linear, + bool is_using_refiner) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecHnswQueryParams", + ZVecHnswQueryParams *params = new ZVecHnswQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_HNSW; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; return params;) + return nullptr; +} + +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->ef = ef; + return ZVEC_OK; +} + +int zvec_query_params_hnsw_get_ef(const ZVecHnswQueryParams *params) { + if (!params) { + return zvec::core_interface::kDefaultHnswEfSearch; + } + return params->ef; +} + +// ============================================================================= +// IVFQueryParams functions implementation +// ============================================================================= + +ZVecIVFQueryParams *zvec_query_params_ivf_create(int nprobe, + bool is_using_refiner, + float scale_factor) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecIVFQueryParams", + ZVecIVFQueryParams *params = new ZVecIVFQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_IVF; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; return params;) + return nullptr; +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->nprobe = nprobe; + return ZVEC_OK; +} + +int zvec_query_params_ivf_get_nprobe(const ZVecIVFQueryParams *params) { + if (!params) { + return 10; + } + return params->nprobe; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->scale_factor = scale_factor; + return ZVEC_OK; +} + +float zvec_query_params_ivf_get_scale_factor(const ZVecIVFQueryParams *params) { + if (!params) { + return 10.0f; + } + return params->scale_factor; +} + +// ============================================================================= +// FlatQueryParams functions implementation +// ============================================================================= + +ZVecFlatQueryParams *zvec_query_params_flat_create(bool is_using_refiner, + float scale_factor) { + ZVEC_TRY_RETURN_NULL("Failed to create ZVecFlatQueryParams", + ZVecFlatQueryParams *params = new ZVecFlatQueryParams(); + params->base.index_type = ZVEC_INDEX_TYPE_FLAT; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; return params;) + return nullptr; +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_flat_set_scale_factor( + ZVecFlatQueryParams *params, float scale_factor) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Flat query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + params->scale_factor = scale_factor; + return ZVEC_OK; +} + +float zvec_query_params_flat_get_scale_factor( + const ZVecFlatQueryParams *params) { + if (!params) { + return 10.0f; + } + return params->scale_factor; +} + +// ============================================================================= +// VectorQuery and GroupByVectorQuery functions implementation +// ============================================================================= + +ZVecVectorQuery *zvec_vector_query_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecVectorQuery", + ZVecVectorQuery *query = new ZVecVectorQuery(); + query->topk = 10; query->field_name = nullptr; + query->query_vector.data = nullptr; query->query_vector.length = 0; + query->query_sparse_indices.data = nullptr; + query->query_sparse_indices.length = 0; + query->query_sparse_values.data = nullptr; + query->query_sparse_values.length = 0; query->filter = nullptr; + query->include_vector = false; query->include_doc_id = true; + query->output_fields = nullptr; query->query_params = nullptr; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; return query;) + return nullptr; +} + +void zvec_vector_query_destroy(ZVecVectorQuery *query) { + if (query) { + if (query->field_name) { + zvec_free_string(query->field_name); + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (query->query_params) { + // Delete type-specific params based on params_type + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_IVF: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_FLAT: + delete static_cast(query->query_params); + break; + default: + delete static_cast(query->query_params); + break; + } + } + delete query; + } +} + +ZVecErrorCode zvec_vector_query_set_topk(ZVecVectorQuery *query, int topk) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->topk = topk; + return ZVEC_OK; +} + +int zvec_vector_query_get_topk(const ZVecVectorQuery *query) { + if (!query) { + return 10; + } + return query->topk; +} + +ZVecErrorCode zvec_vector_query_set_field_name(ZVecVectorQuery *query, + const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->field_name) { + zvec_free_string(query->field_name); + } + query->field_name = zvec_string_create(field_name); + return ZVEC_OK; +} + +const char *zvec_vector_query_get_field_name(const ZVecVectorQuery *query) { + if (!query || !query->field_name) { + return nullptr; + } + return query->field_name->data; +} + +ZVecErrorCode zvec_vector_query_set_query_vector(ZVecVectorQuery *query, + const void *data, + size_t size) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->query_vector.data = (const uint8_t *)data; + query->query_vector.length = size; + return ZVEC_OK; +} + +ZVecErrorCode zvec_vector_query_set_filter(ZVecVectorQuery *query, + const char *filter) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (filter && strlen(filter) > 0) { + query->filter = zvec_string_create(filter); + } else { + query->filter = nullptr; + } + return ZVEC_OK; +} + +const char *zvec_vector_query_get_filter(const ZVecVectorQuery *query) { + if (!query || !query->filter) { + return nullptr; + } + return query->filter->data; +} + +ZVecErrorCode zvec_vector_query_set_include_vector(ZVecVectorQuery *query, + bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->include_vector = include; + return ZVEC_OK; +} + +ZVecErrorCode zvec_vector_query_set_include_doc_id(ZVecVectorQuery *query, + bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->include_doc_id = include; + return ZVEC_OK; +} + +ZVecErrorCode zvec_vector_query_set_output_fields(ZVecVectorQuery *query, + const char **fields, + size_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (fields && count > 0) { + query->output_fields = zvec_string_array_create_from_strings(fields, count); + } else { + query->output_fields = nullptr; + } + return ZVEC_OK; +} + +ZVecErrorCode zvec_vector_query_set_query_params(ZVecVectorQuery *query, + void *params) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + // Note: We don't delete old params here, caller should manage lifetime + query->query_params = params; + // Set params_type based on the type of params (caller should ensure + // consistency) For now, we assume params is one of the known types + if (params) { + // We can't automatically determine the type, so we'll need to trust the + // caller to set the correct type via a separate call if needed + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; + } + return ZVEC_OK; +} + +// GroupByVectorQuery functions + +ZVecGroupByVectorQuery *zvec_group_by_vector_query_create(void) { + ZVEC_TRY_RETURN_NULL( + "Failed to create ZVecGroupByVectorQuery", + ZVecGroupByVectorQuery *query = new ZVecGroupByVectorQuery(); + query->field_name = nullptr; query->query_vector.data = nullptr; + query->query_vector.length = 0; + query->query_sparse_indices.data = nullptr; + query->query_sparse_indices.length = 0; + query->query_sparse_values.data = nullptr; + query->query_sparse_values.length = 0; query->filter = nullptr; + query->include_vector = false; query->output_fields = nullptr; + query->group_by_field_name = nullptr; query->group_count = 0; + query->group_topk = 0; query->query_params = nullptr; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; return query;) + return nullptr; +} + +void zvec_group_by_vector_query_destroy(ZVecGroupByVectorQuery *query) { + if (query) { + if (query->field_name) { + zvec_free_string(query->field_name); + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (query->group_by_field_name) { + zvec_free_string(query->group_by_field_name); + } + if (query->query_params) { + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_IVF: + delete static_cast(query->query_params); + break; + case ZVEC_INDEX_TYPE_FLAT: + delete static_cast(query->query_params); + break; + default: + delete static_cast(query->query_params); + break; + } + } + delete query; + } +} + +ZVecErrorCode zvec_group_by_vector_query_set_field_name( + ZVecGroupByVectorQuery *query, const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->field_name) { + zvec_free_string(query->field_name); + } + query->field_name = zvec_string_create(field_name); + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_group_by_field_name( + ZVecGroupByVectorQuery *query, const char *field_name) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->group_by_field_name) { + zvec_free_string(query->group_by_field_name); + } + query->group_by_field_name = zvec_string_create(field_name); + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_group_count( + ZVecGroupByVectorQuery *query, uint32_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->group_count = count; + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_group_topk( + ZVecGroupByVectorQuery *query, uint32_t topk) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->group_topk = topk; + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_query_vector( + ZVecGroupByVectorQuery *query, const void *data, size_t size) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->query_vector.data = (const uint8_t *)data; + query->query_vector.length = size; + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_filter( + ZVecGroupByVectorQuery *query, const char *filter) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->filter) { + zvec_free_string(query->filter); + } + if (filter && strlen(filter) > 0) { + query->filter = zvec_string_create(filter); + } else { + query->filter = nullptr; + } + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_include_vector( + ZVecGroupByVectorQuery *query, bool include) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->include_vector = include; + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_output_fields( + ZVecGroupByVectorQuery *query, const char **fields, size_t count) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + if (query->output_fields) { + zvec_string_array_destroy(query->output_fields); + } + if (fields && count > 0) { + query->output_fields = zvec_string_array_create_from_strings(fields, count); + } else { + query->output_fields = nullptr; + } + return ZVEC_OK; +} + +ZVecErrorCode zvec_group_by_vector_query_set_query_params( + ZVecGroupByVectorQuery *query, void *params) { + if (!query) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Group by vector query pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + query->query_params = params; + query->params_type = ZVEC_INDEX_TYPE_UNDEFINED; + return ZVEC_OK; +} + +// ============================================================================= +// Index Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params) { + if (!collection || !column_name || !index_params) { + set_last_error( + "Invalid arguments: collection, column_name, and index_params cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_create_index", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(column_name); + + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + auto cpp_params = std::make_shared( + index_params->invert.enable_range_optimization, + index_params->invert.enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_HNSW: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared( + metric, index_params->hnsw.m, index_params->hnsw.ef_construction, + quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_FLAT: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_IVF: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared( + metric, index_params->ivf.n_list, index_params->ivf.n_iters, + index_params->ivf.use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; +} + } + ) + } + + // Legacy function - kept for backward compatibility, just calls + // zvec_collection_create_index + ZVecErrorCode zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *hnsw_params) { + if (!hnsw_params) { + set_last_error("Invalid HNSW parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, hnsw_params); + } + + ZVecErrorCode zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *flat_params) { + if (!flat_params) { + set_last_error("Invalid Flat parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, flat_params); + } + + ZVecErrorCode zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *ivf_params) { + if (!ivf_params) { + set_last_error("Invalid IVF parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, ivf_params); + } + + ZVecErrorCode zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *invert_params) { + if (!invert_params) { + set_last_error("Invalid Invert parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, invert_params); + } + + ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropIndex(column_name); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->Optimize(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + + // ============================================================================= + // Column Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, + const ZVecFieldSchema *field_schema, + const char *expression) { + if (!collection || !field_schema) { + set_last_error( + "Invalid arguments: collection and field_schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::DataType data_type = + convert_data_type(zvec_field_schema_get_data_type(field_schema)); + if (data_type == zvec::DataType::UNDEFINED) { + set_last_error("Invalid data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::string field_name(zvec_field_schema_get_name(field_schema)); + bool is_vector_field = check_is_vector_field(*field_schema); + zvec::FieldSchema::Ptr schema; + if (is_vector_field) { + schema = std::make_shared( + field_name, data_type, + zvec_field_schema_get_dimension(field_schema), + zvec_field_schema_is_nullable(field_schema)); + } else { + schema = std::make_shared( + field_name, data_type, + zvec_field_schema_is_nullable(field_schema)); + } + + std::string expr = expression ? expression : ""; + zvec::Status status = (*coll_ptr)->AddColumn(schema, expr); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropColumn(column_name); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_alter_column( + ZVecCollection *collection, const char *column_name, const char *new_name, + const ZVecFieldSchema *new_schema) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string rename = new_name ? new_name : ""; + + zvec::FieldSchema::Ptr schema = nullptr; + if (new_schema) { + auto status = + convert_zvec_field_schema_to_internal(new_schema, schema); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + zvec::Status status = + (*coll_ptr)->AlterColumn(column_name, rename, schema); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + // ============================================================================= + // DML Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_docs", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } else { + *success_count = 0; + *error_count = doc_count; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_insert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_update(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_update_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_update_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_upsert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_upsert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !pks || pk_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_delete_with_results(ZVecCollection *collection, + const char *const *pks, + size_t pk_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !pks || pk_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_delete_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } else { + primary_keys.emplace_back(""); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), primary_keys, results, + result_count);) + } + + ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, + const char *filter) { + if (!collection || !filter) { + set_last_error("Invalid arguments: collection,filter cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + auto status = (*coll_ptr)->DeleteByFilter(filter); if (!status.ok()) { + set_last_error(status.message()); + return status_to_error_code(status); + } return ZVEC_OK;) + } + + // ============================================================================= + // Data query interface implementation + // ============================================================================= + + // Helper function to convert common query parameters + void convert_common_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + internal_query.topk_ = query->topk; + internal_query.field_name_ = + query->field_name + ? std::string(query->field_name->data, query->field_name->length) + : ""; + internal_query.filter_ = + query->filter ? std::string(query->filter->data, query->filter->length) + : ""; + internal_query.include_vector_ = query->include_vector; + internal_query.include_doc_id_ = query->include_doc_id; + + // Binary data conversion (query_vector) + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + // Sparse vector data conversion + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + // Output fields conversion + if (query->output_fields && query->output_fields->count > 0) { + internal_query.output_fields_ = std::vector(); + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->emplace_back( + query->output_fields->strings[i].data, + query->output_fields->strings[i].length); + } + } + } + + // Helper function to convert query parameters + void convert_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + convert_common_query_params(internal_query, query); + + // QueryParams conversion + if (query->query_params) { + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + hnsw_params->ef, hnsw_params->base.radius, + hnsw_params->base.is_linear, hnsw_params->base.is_using_refiner); + internal_query.query_params_ = internal_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + ivf_params->nprobe, ivf_params->base.is_using_refiner, + ivf_params->scale_factor); + internal_query.query_params_ = internal_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + flat_params->base.is_using_refiner, flat_params->scale_factor); + internal_query.query_params_ = internal_params; + break; + } + default: { + auto base_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + static_cast(base_params->index_type)); + internal_params->set_radius(base_params->radius); + internal_params->set_is_linear(base_params->is_linear); + internal_params->set_is_using_refiner(base_params->is_using_refiner); + internal_query.query_params_ = internal_params; + break; + } + } + } + } + + // Helper function to convert group by query parameters + void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, + const ZVecGroupByVectorQuery *query) { + internal_query.field_name_ = + query->field_name + ? std::string(query->field_name->data, query->field_name->length) + : ""; + internal_query.filter_ = + query->filter ? std::string(query->filter->data, query->filter->length) + : ""; + internal_query.include_vector_ = query->include_vector; + internal_query.group_by_field_name_ = + query->group_by_field_name + ? std::string(query->group_by_field_name->data, + query->group_by_field_name->length) + : ""; + internal_query.group_count_ = query->group_count; + internal_query.group_topk_ = query->group_topk; + + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + if (query->output_fields && query->output_fields->count > 0) { + if (!internal_query.output_fields_.has_value()) { + internal_query.output_fields_ = std::vector(); + } + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->push_back( + std::string(query->output_fields->strings[i].data, + query->output_fields->strings[i].length)); + } + } + + if (query->query_params) { + switch (query->params_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + hnsw_params->ef, hnsw_params->base.radius, + hnsw_params->base.is_linear, hnsw_params->base.is_using_refiner); + internal_query.query_params_ = internal_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + ivf_params->nprobe, ivf_params->base.is_using_refiner, + ivf_params->scale_factor); + internal_query.query_params_ = internal_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + flat_params->base.is_using_refiner, flat_params->scale_factor); + internal_query.query_params_ = internal_params; + break; + } + default: { + auto base_params = + static_cast(query->query_params); + auto internal_params = std::make_shared( + static_cast(base_params->index_type)); + internal_params->set_radius(base_params->radius); + internal_params->set_is_linear(base_params->is_linear); + internal_params->set_is_using_refiner(base_params->is_using_refiner); + internal_query.query_params_ = internal_params; + break; + } + } + } + } + + // Helper function to convert document results to C API format + ZVecErrorCode convert_document_results( + const std::vector> &query_results, + ZVecDoc ***results, size_t *result_count) { + *result_count = query_results.size(); + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const auto &internal_doc = query_results[i]; + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < i; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = *internal_doc; // Copy assignment + (*results)[i] = c_doc; // Store the pointer, not dereference + } + + return ZVEC_OK; + } + + // Helper function to convert grouped document results to C API format + ZVecErrorCode convert_grouped_document_results( + const std::vector &group_results, ZVecDoc ***results, + ZVecString ***group_by_values, size_t *result_count) { + // Calculate total document count across all groups + size_t total_docs = 0; + for (const auto &group_result : group_results) { + total_docs += group_result.docs_.size(); + } + + // Allocate memory for document pointers and group by values + *result_count = total_docs; + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + *group_by_values = static_cast( + malloc(group_results.size() * sizeof(ZVecString *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ grouped results to C API format + size_t doc_index = 0; + for (const auto &group_result : group_results) { + for (const auto &internal_doc : group_result.docs_) { + if (doc_index >= *result_count) { + break; + } + + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = internal_doc; // Copy assignment + + ZVecString *c_group_value = + zvec_string_create(group_result.group_by_value_.c_str()); + if (!c_group_value) { + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + zvec_free_string((*group_by_values)[doc_index]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create string wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + (*group_by_values)[doc_index] = c_group_value; + (*results)[doc_index] = c_doc; + ++doc_index; + } + } + + return ZVEC_OK; + } + + // Helper function to convert fetched document results to C API format + static void normalize_nullable_fields_for_fetch( + const zvec::CollectionSchema &schema, zvec::DocPtrMap &doc_map) { + std::vector nullable_fields; + nullable_fields.reserve(schema.fields().size()); + + for (const auto &field : schema.fields()) { + if (field && field->nullable()) { + nullable_fields.push_back(field->name()); + } + } + + if (nullable_fields.empty()) { + return; + } + + for (auto &[_, doc_ptr] : doc_map) { + if (!doc_ptr) { + continue; + } + + for (const auto &field_name : nullable_fields) { + if (!doc_ptr->has(field_name)) { + doc_ptr->set_null(field_name); + } + } + } + } + + ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, + ZVecDoc ***results, + size_t *doc_count) { + // Calculate actual document count (some PKs might not exist) + size_t actual_count = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr) { + actual_count++; + } + } + + // Allocate memory for document pointers + *doc_count = actual_count; + if (*doc_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast(malloc(*doc_count * sizeof(ZVecDoc *))); + if (!*results) { + set_last_error("Failed to allocate memory for document pointers"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ DocPtrMap to C ZVecDoc pointer array + size_t index = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr && index < *doc_count) { + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *doc_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto cpp_doc_ptr = + reinterpret_cast *>(c_doc); + *(*cpp_doc_ptr) = *doc_ptr; // Copy assignment + + // Set the primary key explicitly + zvec_doc_set_pk(c_doc, pk.c_str()); + + (*results)[index] = c_doc; + ++index; + } + } + + return ZVEC_OK; + } + + ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, + const ZVecVectorQuery *query, + ZVecDoc ***results, + size_t *result_count) { + if (!collection || !query || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results and result_count " + "cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert query parameters using helper function + zvec::VectorQuery internal_query; + convert_query_params(internal_query, query); + + auto result = (*coll_ptr)->Query(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &query_results = result.value(); + error_code = + convert_document_results(query_results, results, result_count); + } else { + *results = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count) { + if (!collection || !query || !results || !group_by_values || + !result_count) { + set_last_error( + "Invalid arguments: collection, query, results, group_by_values and " + "result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + zvec::GroupByVectorQuery internal_query; + convert_groupby_query_params(internal_query, query); + + auto result = (*coll_ptr)->GroupByQuery(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &group_results = result.value(); + error_code = convert_grouped_document_results( + group_results, results, group_by_values, result_count); + } else { + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + ZVecDoc ***results, size_t *doc_count) { + if (!collection || !pks || !results || !doc_count) { + set_last_error( + "Invalid arguments: collection, pks, results and doc_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle empty case + if (pk_count == 0) { + *results = nullptr; + *doc_count = 0; + return ZVEC_OK; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_fetch", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert C array to C++ vector + std::vector pk_vector; pk_vector.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + pk_vector.emplace_back(pks[i]); + } else { + set_last_error("Null primary key at index " + std::to_string(i)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + // Call C++ fetch method + auto result = (*coll_ptr)->Fetch(pk_vector); + if (!result.has_value()) { + set_last_error("Failed to fetch documents: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_map = result.value(); + auto schema_result = (*coll_ptr)->Schema(); + if (schema_result.has_value()) { + normalize_nullable_fields_for_fetch(schema_result.value(), doc_map); + } return convert_fetched_document_results(doc_map, results, doc_count);) + } diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt index 160b25ea..c78aa033 100644 --- a/src/binding/python/CMakeLists.txt +++ b/src/binding/python/CMakeLists.txt @@ -56,4 +56,4 @@ elseif (APPLE) ) endif () -target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) +target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) \ No newline at end of file diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 765a1b4a..b2689278 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -14,11 +14,11 @@ cc_directory(sqlengine) file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) cc_library( - NAME zvec_db STATIC STRICT SRCS_NO_GLOB + NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} PUBINCS ${PROJECT_ROOT_DIR}/src/include - LIBS + LIBS zvec_ailego zvec_core glog diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h new file mode 100644 index 00000000..775012bd --- /dev/null +++ b/src/include/zvec/c_api.h @@ -0,0 +1,3102 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_C_API_H +#define ZVEC_C_API_H + +#include +#include +#include +#include + +// Include generated version header +#if defined(__has_include) && __has_include() +#include +#else +#include "zvec_version.h" +#endif + +// ============================================================================= +// API Export Control +// ============================================================================= + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifdef ZVEC_BUILD_SHARED +#define ZVEC_EXPORT __declspec(dllexport) +#elif defined(ZVEC_USE_SHARED) +#define ZVEC_EXPORT __declspec(dllimport) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL __cdecl +#else +#if __GNUC__ >= 4 +#define ZVEC_EXPORT __attribute__((visibility("default"))) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +// ============================================================================= +// Version Information +// ============================================================================= + +/** + * @brief Get library version information + * + * Return format: "{base_version}[-{git_info}] (built {build_time})" + * Example: "0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + * + * @return const char* Version string, managed internally by the library, caller + * should not free + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_get_version(void); + +/** + * @brief Check API version compatibility + * + * Check if the current library version meets the specified minimum version + * requirements Following semantic versioning specification: MAJOR.MINOR.PATCH + * + * @param major Required major version number + * @param minor Required minor version number + * @param patch Required patch version number + * @return bool Returns true if compatible, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_check_version(int major, int minor, int patch); + +/** + * @brief Get major version number + * + * @return int Major version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_major(void); + +/** + * @brief Get minor version number + * + * @return int Minor version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_minor(void); + + +/** + * @brief Get patch version number + * + * @return int Patch version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_patch(void); + + +// ============================================================================= +// Error Code Definitions +// ============================================================================= + +/** + * @brief ZVec C API error code enumeration + */ +typedef enum { + ZVEC_OK = 0, /**< Success */ + ZVEC_ERROR_NOT_FOUND = 1, /**< Resource not found */ + ZVEC_ERROR_ALREADY_EXISTS = 2, /**< Resource already exists */ + ZVEC_ERROR_INVALID_ARGUMENT = 3, /**< Invalid argument */ + ZVEC_ERROR_PERMISSION_DENIED = 4, /**< Permission denied */ + ZVEC_ERROR_FAILED_PRECONDITION = 5, /**< Failed precondition */ + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, /**< Resource exhausted */ + ZVEC_ERROR_UNAVAILABLE = 7, /**< Unavailable */ + ZVEC_ERROR_INTERNAL_ERROR = 8, /**< Internal error */ + ZVEC_ERROR_NOT_SUPPORTED = 9, /**< Unsupported operation */ + ZVEC_ERROR_UNKNOWN = 10 /**< Unknown error */ +} ZVecErrorCode; + +/** + * @brief Error details structure + */ +typedef struct { + ZVecErrorCode code; /**< Error code */ + const char *message; /**< Error message */ + const char *file; /**< File where error occurred */ + int line; /**< Line number where error occurred */ + const char *function; /**< Function where error occurred */ +} ZVecErrorDetails; + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Get last error message + * @param[out] error_msg Returned error message string (needs to be freed by + * calling free) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Basic Data Structures +// ============================================================================= + +/** + * @brief String view structure (does not own memory) + */ +typedef struct { + const char *data; /**< String data pointer */ + size_t length; /**< String length */ +} ZVecStringView; + +/** + * @brief Mutable string structure (owns memory) + */ +typedef struct { + char *data; /**< String data pointer */ + size_t length; /**< String length */ + size_t capacity; /**< Allocated capacity */ +} ZVecString; + +/** + * @brief String array structure + */ +typedef struct { + ZVecString *strings; /**< String array */ + size_t count; /**< String count */ +} ZVecStringArray; + +/** + * @brief Float array structure + */ +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +/** + * @brief Integer array structure + */ +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +/** + * @brief Byte array structure + */ +typedef struct { + const uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Array length */ +} ZVecByteArray; + +/** + * @brief Mutable byte array structure + */ +typedef struct { + uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Current length */ + size_t capacity; /**< Allocated capacity */ +} ZVecMutableByteArray; + +// ============================================================================= +// String management functions +// ============================================================================= + +/** + * @brief Create string from C string + * @param str C string + * @return ZVecString* Pointer to the newly created string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); + +/** + * @brief Create string from string view + * + * Creates a new ZVecString by copying data from a ZVecStringView. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param view Pointer to source string view (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL +zvec_string_create_from_view(const ZVecStringView *view); + +/** + * @brief Create binary-safe string from raw data + * + * Creates a new ZVecString from raw binary data that may contain null bytes. + * Unlike zvec_string_create(), this function takes explicit length parameter + * and doesn't rely on null-termination. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param data Raw binary data pointer (must not be NULL) + * @param length Length of data in bytes + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + * @note This function is suitable for binary data containing null bytes + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, + size_t length); + +/** + * @brief Copy string + * + * Creates a new ZVecString by copying an existing string. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param str Pointer to source string (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_copy(const ZVecString *str); + +/** + * @brief Get C string from ZVecString + * @param str ZVecString pointer + * @return const char* C string + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_string_c_str(const ZVecString *str); + +/** + * @brief Get string length + * @param str ZVecString pointer + * @return size_t String length + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); + +/** + * @brief Compare two strings + * @param str1 First string + * @param str2 Second string + * @return int Comparison result (-1, 0, or 1) + */ +ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, + const ZVecString *str2); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + + +// ============================================================================= +// Array Memory management functions +// ============================================================================= + +/** + * @brief Create a new string array + * @param count Initial number of strings to allocate space for + * @return Pointer to the newly created string array, or NULL on failure + */ +ZVEC_EXPORT ZVecStringArray *ZVEC_CALL zvec_string_array_create(size_t count); + +/** + * @brief Add a string to the string array at specified index + * @param array String array pointer + * @param idx Index position where the string should be added + * @param str Null-terminated C string to add + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_add(ZVecStringArray *array, + size_t idx, const char *str); + +/** + * @brief Destroy string array and free all associated memory + * @param array String array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_destroy(ZVecStringArray *array); + +/** + * @brief Create a new mutable byte array + * @param capacity Initial capacity in bytes + * @return Pointer to the newly created byte array, or NULL on failure + */ +ZVEC_EXPORT ZVecMutableByteArray *ZVEC_CALL +zvec_byte_array_create(size_t capacity); + + +/** + * @brief Destroy byte array and free all associated memory + * @param array Byte array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_byte_array_destroy(ZVecMutableByteArray *array); + +/** + * @brief Create a new float array + * @param count Number of floats to allocate space for + * @return Pointer to the newly created float array, or NULL on failure + */ +ZVEC_EXPORT ZVecFloatArray *ZVEC_CALL zvec_float_array_create(size_t count); + +/** + * @brief Destroy float array and free all associated memory + * @param array Float array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_float_array_destroy(ZVecFloatArray *array); + +/** + * @brief Create a new int64 array + * @param count Number of int64 values to allocate space for + * @return Pointer to the newly created int64 array, or NULL on failure + */ +ZVEC_EXPORT ZVecInt64Array *ZVEC_CALL zvec_int64_array_create(size_t count); + +/** + * @brief Destroy int64 array and free all associated memory + * @param array Int64 array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_int64_array_destroy(ZVecInt64Array *array); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + +/** + * @brief Free heap memory allocated by zvec C API. + * + * Use this helper for pointer-returning APIs that document malloc-allocated + * buffers. This avoids allocator mismatch across DLL boundaries. + * + * @param ptr Memory pointer returned by zvec C API + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_ptr(void *ptr); + + +// ============================================================================= +// Configuration and Options Structures +// ============================================================================= + +/** + * @brief Log level enumeration + */ +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; + +/** + * @brief Log type enumeration + */ +typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; + +// ============================================================================= +// Configuration Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Console log configuration (opaque pointer) + * Corresponds to zvec::GlobalConfig::ConsoleLogConfig + * Use zvec_config_console_log_create() to create and + * zvec_config_console_log_destroy() to destroy + */ +typedef struct ZVecConsoleLogConfig ZVecConsoleLogConfig; + +/** + * @brief File log configuration (opaque pointer) + * Corresponds to zvec::GlobalConfig::FileLogConfig + * Use zvec_config_file_log_create() to create and + * zvec_config_file_log_destroy() to destroy + */ +typedef struct ZVecFileLogConfig ZVecFileLogConfig; + +/** + * @brief Configuration data (opaque pointer) + * Corresponds to zvec::GlobalConfig::ConfigData + * Use zvec_config_data_create() to create and + * zvec_config_data_destroy() to destroy + */ +typedef struct ZVecConfigData ZVecConfigData; + +// ============================================================================= +// Log Configuration Management Functions +// ============================================================================= + +/** + * @brief Create console log configuration + * @param level Log level + * @return ZVecConsoleLogConfig* Pointer to the newly created console log + * configuration + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_console_log_create(ZVecLogLevel level); + +/** + * @brief Create file log configuration + * @param level Log level + * @param dir Log directory + * @param basename Log file base name + * @param file_size Log file size (MB) + * @param overdue_days Log expiration days + * @return ZVecFileLogConfig* Pointer to the newly created file log + * configuration + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( + ZVecLogLevel level, const char *dir, const char *basename, + uint32_t file_size, uint32_t overdue_days); + +/** + * @brief Destroy console log configuration + * @param config Console log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); + +/** + * @brief Destroy file log configuration + * @param config File log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_file_log_destroy(ZVecFileLogConfig *config); + +/** + * @brief Get log level from console log config + * @param config Console log configuration pointer + * @return ZVecLogLevel Log level + */ +ZVEC_EXPORT ZVecLogLevel ZVEC_CALL +zvec_config_console_log_get_level(const ZVecConsoleLogConfig *config); + +/** + * @brief Set log level in console log config + * @param config Console log configuration pointer + * @param level Log level + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_console_log_set_level( + ZVecConsoleLogConfig *config, ZVecLogLevel level); + +/** + * @brief Get log level from file log config + * @param config File log configuration pointer + * @return ZVecLogLevel Log level + */ +ZVEC_EXPORT ZVecLogLevel ZVEC_CALL +zvec_config_file_log_get_level(const ZVecFileLogConfig *config); + +/** + * @brief Set log level in file log config + * @param config File log configuration pointer + * @param level Log level + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_level(ZVecFileLogConfig *config, ZVecLogLevel level); + +/** + * @brief Get log directory from file log config + * @param config File log configuration pointer + * @return const char* Log directory (owned by config, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_config_file_log_get_dir(const ZVecFileLogConfig *config); + +/** + * @brief Set log directory in file log config + * @param config File log configuration pointer + * @param dir Log directory + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_dir(ZVecFileLogConfig *config, const char *dir); + +/** + * @brief Get log file basename from file log config + * @param config File log configuration pointer + * @return const char* Log file basename (owned by config, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_config_file_log_get_basename(const ZVecFileLogConfig *config); + +/** + * @brief Set log file basename in file log config + * @param config File log configuration pointer + * @param basename Log file basename + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_file_log_set_basename( + ZVecFileLogConfig *config, const char *basename); + +/** + * @brief Get log file size from file log config + * @param config File log configuration pointer + * @return uint32_t Log file size in MB + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_file_log_get_file_size(const ZVecFileLogConfig *config); + +/** + * @brief Set log file size in file log config + * @param config File log configuration pointer + * @param file_size Log file size in MB + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_file_log_set_file_size( + ZVecFileLogConfig *config, uint32_t file_size); + +/** + * @brief Get log overdue days from file log config + * @param config File log configuration pointer + * @return uint32_t Log overdue days + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_file_log_get_overdue_days(const ZVecFileLogConfig *config); + +/** + * @brief Set log overdue days in file log config + * @param config File log configuration pointer + * @param days Log overdue days + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_file_log_set_overdue_days(ZVecFileLogConfig *config, uint32_t days); + +// ============================================================================= +// Configuration Data Management Functions +// ============================================================================= + +/** + * @brief Create configuration data + * @return ZVecConfigData* Pointer to the newly created configuration data + */ +ZVEC_EXPORT ZVecConfigData *ZVEC_CALL zvec_config_data_create(void); + +/** + * @brief Destroy configuration data + * @param config Configuration data pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); + +/** + * @brief Set memory limit in configuration data + * @param config Configuration data pointer + * @param memory_limit_bytes Memory limit in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( + ZVecConfigData *config, uint64_t memory_limit_bytes); + +/** + * @brief Get memory limit from configuration data + * @param config Configuration data pointer + * @return uint64_t Memory limit in bytes + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_config_data_get_memory_limit(const ZVecConfigData *config); + +/** + * @brief Set log configuration in configuration data + * @param config Configuration data pointer + * @param log_type Log type (console or file) + * @param log_config Log configuration pointer (ownership is transferred to + * config, do not free separately) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( + ZVecConfigData *config, ZVecLogType log_type, void *log_config); + +/** + * @brief Get log type from configuration data + * @param config Configuration data pointer + * @return ZVecLogType Log type + */ +ZVEC_EXPORT ZVecLogType ZVEC_CALL +zvec_config_data_get_log_type(const ZVecConfigData *config); + +/** + * @brief Get console log config from configuration data + * @param config Configuration data pointer + * @return ZVecConsoleLogConfig* Console log configuration (owned by config, do + * not destroy) + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_data_get_console_log_config(const ZVecConfigData *config); + +/** + * @brief Get file log config from configuration data + * @param config Configuration data pointer + * @return ZVecFileLogConfig* File log configuration (owned by config, do not + * destroy) + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL +zvec_config_data_get_file_log_config(const ZVecConfigData *config); + +/** + * @brief Set query thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Query thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Get query thread count from configuration data + * @param config Configuration data pointer + * @return uint32_t Query thread count + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_data_get_query_thread_count(const ZVecConfigData *config); + +/** + * @brief Set invert to forward scan ratio in configuration data + * @param config Configuration data pointer + * @param ratio Invert to forward scan ratio + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_data_set_invert_to_forward_scan_ratio(ZVecConfigData *config, + float ratio); + +/** + * @brief Get invert to forward scan ratio from configuration data + * @param config Configuration data pointer + * @return float Invert to forward scan ratio + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_config_data_get_invert_to_forward_scan_ratio(const ZVecConfigData *config); + +/** + * @brief Set brute force by keys ratio in configuration data + * @param config Configuration data pointer + * @param ratio Brute force by keys ratio + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_config_data_set_brute_force_by_keys_ratio(ZVecConfigData *config, + float ratio); + +/** + * @brief Get brute force by keys ratio from configuration data + * @param config Configuration data pointer + * @return float Brute force by keys ratio + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_config_data_get_brute_force_by_keys_ratio(const ZVecConfigData *config); + +/** + * @brief Set optimize thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Optimize thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Get optimize thread count from configuration data + * @param config Configuration data pointer + * @return uint32_t Optimize thread count + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_config_data_get_optimize_thread_count(const ZVecConfigData *config); + +// ============================================================================= +// Initialization and Cleanup Interface +// ============================================================================= + +/** + * @brief Initialize ZVec library + * @param config Configuration data (optional, NULL means using default + * configuration) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_initialize(const ZVecConfigData *config); + +/** + * @brief Clean up ZVec library resources + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); + +/** + * @brief Check if library is initialized + * @return true if initialized, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_is_initialized(void); + +// ============================================================================= +// Data Type Enumerations +// ============================================================================= + +/** + * @brief Data type enumeration + */ +typedef enum { + ZVEC_DATA_TYPE_UNDEFINED = 0, + + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; + +/** + * @brief Index type enumeration + */ +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, + ZVEC_INDEX_TYPE_IVF = 3, + ZVEC_INDEX_TYPE_FLAT = 4, + ZVEC_INDEX_TYPE_INVERT = 10 +} ZVecIndexType; + +/** + * @brief Distance metric type enumeration + */ +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, + ZVEC_METRIC_TYPE_IP = 2, + ZVEC_METRIC_TYPE_COSINE = 3, + ZVEC_METRIC_TYPE_MIPSL2 = 4 +} ZVecMetricType; + +/** + * @brief Quantization type enumeration + */ +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, + ZVEC_QUANTIZE_TYPE_INT8 = 2, + ZVEC_QUANTIZE_TYPE_INT4 = 3 +} ZVecQuantizeType; + +// ============================================================================= +// Forward Declarations +// ============================================================================= + +typedef struct ZVecCollection ZVecCollection; + +// ============================================================================= +// Index Parameters Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Index parameters (opaque pointer) + * + * Use zvec_index_params_create() to create and zvec_index_params_destroy() to + * destroy. Specific parameters are set via type-specific setter functions. + */ +typedef struct ZVecIndexParams ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Field schema (opaque pointer) + * + * Use zvec_field_schema_create() to create and zvec_field_schema_destroy() to + * destroy. Fields are accessed via getter/setter functions. + */ +typedef struct ZVecFieldSchema ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Interface +// ============================================================================= + +/** + * @brief Create index parameters + * @param index_type Index type + * @return Pointer to newly created ZVecIndexParams, or NULL on error + */ +ZVEC_EXPORT ZVecIndexParams *ZVEC_CALL +zvec_index_params_create(ZVecIndexType index_type); + +/** + * @brief Destroy index parameters + * @param params Index parameters to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); + +/** + * @brief Get index type + * @param params Index parameters (must not be NULL) + * @return Index type + */ +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_index_params_get_type(const ZVecIndexParams *params); + +/** + * @brief Set metric type (for vector indexes) + * @param params Index parameters (must be vector index type) + * @param metric_type Metric type + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_metric_type( + ZVecIndexParams *params, ZVecMetricType metric_type); + +/** + * @brief Get metric type + * @param params Index parameters (must not be NULL) + * @return Metric type + */ +ZVEC_EXPORT ZVecMetricType ZVEC_CALL +zvec_index_params_get_metric_type(const ZVecIndexParams *params); + +/** + * @brief Set quantize type (for vector indexes) + * @param params Index parameters (must be vector index type) + * @param quantize_type Quantize type + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_quantize_type( + ZVecIndexParams *params, ZVecQuantizeType quantize_type); + +/** + * @brief Get quantize type + * @param params Index parameters (must not be NULL) + * @return Quantize type + */ +ZVEC_EXPORT ZVecQuantizeType ZVEC_CALL +zvec_index_params_get_quantize_type(const ZVecIndexParams *params); + +/** + * @brief Set HNSW specific parameters + * @param params Index parameters (must be HNSW type) + * @param m Graph connectivity parameter + * @param ef_construction Construction exploration factor + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_hnsw_params( + ZVecIndexParams *params, int m, int ef_construction); + +/** + * @brief Get HNSW m parameter + * @param params Index parameters (must not be NULL) + * @return m parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_hnsw_m(const ZVecIndexParams *params); + +/** + * @brief Get HNSW ef_construction parameter + * @param params Index parameters (must not be NULL) + * @return ef_construction parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_hnsw_ef_construction(const ZVecIndexParams *params); + +/** + * @brief Get HNSW parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_m Output parameter for m + * @param out_ef_construction Output parameter for ef_construction + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_get_hnsw_params( + const ZVecIndexParams *params, int *out_m, int *out_ef_construction); + +/** + * @brief Set IVF specific parameters + * @param params Index parameters (must be IVF type) + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_ivf_params( + ZVecIndexParams *params, int n_list, int n_iters, bool use_soar); + +/** + * @brief Get IVF parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_n_list Output parameter for n_list + * @param out_n_iters Output parameter for n_iters + * @param out_use_soar Output parameter for use_soar + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_index_params_get_ivf_params(const ZVecIndexParams *params, int *out_n_list, + int *out_n_iters, bool *out_use_soar); + +/** + * @brief Get invert index parameters (all at once) + * @param params Index parameters (must not be NULL) + * @param out_enable_range_opt Output parameter for enable_range_optimization + * @param out_enable_wildcard Output parameter for enable_extended_wildcard + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_get_invert_params( + const ZVecIndexParams *params, bool *out_enable_range_opt, + bool *out_enable_wildcard); + +/** + * @brief Set invert index specific parameters + * @param params Index parameters (must be INVERT type) + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_invert_params( + ZVecIndexParams *params, bool enable_range_opt, bool enable_wildcard); + +// ============================================================================= +// Query Parameters Structures (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Base query parameters (opaque pointer) + * Corresponds to zvec::QueryParams + * Use zvec_query_params_create() to create and zvec_query_params_destroy() to + * destroy + */ +typedef struct ZVecQueryParams ZVecQueryParams; + +/** + * @brief HNSW query parameters (opaque pointer) + * Corresponds to zvec::HnswQueryParams + * Use zvec_query_params_hnsw_create() to create + */ +typedef struct ZVecHnswQueryParams ZVecHnswQueryParams; + +/** + * @brief IVF query parameters (opaque pointer) + * Corresponds to zvec::IVFQueryParams + * Use zvec_query_params_ivf_create() to create + */ +typedef struct ZVecIVFQueryParams ZVecIVFQueryParams; + +/** + * @brief Flat query parameters (opaque pointer) + * Corresponds to zvec::FlatQueryParams + * Use zvec_query_params_flat_create() to create + */ +typedef struct ZVecFlatQueryParams ZVecFlatQueryParams; + +// Deprecated: ZVecQueryParamsUnion is no longer needed +// Use specific query params types directly instead + +// ============================================================================= +// Query Structures (Updated to use opaque pointer QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (opaque pointer) + * Aligned with zvec::VectorQuery + * Use zvec_vector_query_create() to create and zvec_vector_query_destroy() to + * destroy + */ +typedef struct ZVecVectorQuery ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (opaque pointer) + * Aligned with zvec::GroupByVectorQuery + * Use zvec_group_by_vector_query_create() to create and + * zvec_group_by_vector_query_destroy() to destroy + */ +typedef struct ZVecGroupByVectorQuery ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +// ----------------------------------------------------------------------------- +// ZVecQueryParams (Base Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Destroy base query parameters + * @param params Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Set index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Get index type + * @param params Query parameters pointer + * @return ZVecIndexType Index type + */ +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_query_params_get_index_type(const ZVecQueryParams *params); + +/** + * @brief Set search radius + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Get search radius + * @param params Query parameters pointer + * @return float Search radius + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_get_radius(const ZVecQueryParams *params); + +/** + * @brief Set linear search mode + * @param params Query parameters pointer + * @param is_linear Whether linear search + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Get linear search mode + * @param params Query parameters pointer + * @return bool Whether linear search + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_query_params_get_is_linear(const ZVecQueryParams *params); + +/** + * @brief Set whether to use refiner + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Get whether to use refiner + * @param params Query parameters pointer + * @return bool Whether to use refiner + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_query_params_get_is_using_refiner(const ZVecQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecHnswQueryParams (HNSW Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create HNSW query parameters + * @param ef Exploration factor during search (default: 40) + * @param radius Search radius (default: 0.0) + * @param is_linear Whether linear search (default: false) + * @param is_using_refiner Whether using refiner (default: false) + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL zvec_query_params_hnsw_create( + int ef, float radius, bool is_linear, bool is_using_refiner); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Set exploration factor + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Get exploration factor + * @param params HNSW query parameters pointer + * @return int Exploration factor + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_query_params_hnsw_get_ef(const ZVecHnswQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecIVFQueryParams (IVF Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create IVF query parameters + * @param nprobe Number of clusters to probe (default: 10) + * @param is_using_refiner Whether using refiner (default: false) + * @param scale_factor Scale factor (default: 10.0) + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL zvec_query_params_ivf_create( + int nprobe, bool is_using_refiner, float scale_factor); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Set number of probe clusters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Get number of probe clusters + * @param params IVF query parameters pointer + * @return int Number of probe clusters + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_query_params_ivf_get_nprobe(const ZVecIVFQueryParams *params); + +/** + * @brief Set scale factor + * @param params IVF query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Get scale factor + * @param params IVF query parameters pointer + * @return float Scale factor + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_ivf_get_scale_factor(const ZVecIVFQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecFlatQueryParams (Flat Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create Flat query parameters + * @param is_using_refiner Whether using refiner (default: false) + * @param scale_factor Scale factor (default: 10.0) + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL +zvec_query_params_flat_create(bool is_using_refiner, float scale_factor); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Set scale factor + * @param params Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_flat_set_scale_factor( + ZVecFlatQueryParams *params, float scale_factor); + +/** + * @brief Get scale factor + * @param params Flat query parameters pointer + * @return float Scale factor + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_flat_get_scale_factor(const ZVecFlatQueryParams *params); + +// ----------------------------------------------------------------------------- +// ZVecVectorQuery (Vector Query) +// ----------------------------------------------------------------------------- + +/** + * @brief Create vector query + * @return ZVecVectorQuery* Pointer to the newly created vector query + */ +ZVEC_EXPORT ZVecVectorQuery *ZVEC_CALL zvec_vector_query_create(void); + +/** + * @brief Destroy vector query + * @param query Vector query pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_destroy(ZVecVectorQuery *query); + +/** + * @brief Set topk (number of results to return) + * @param query Vector query pointer + * @param topk Number of results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_topk(ZVecVectorQuery *query, int topk); + +/** + * @brief Get topk + * @param query Vector query pointer + * @return int Number of results + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_vector_query_get_topk(const ZVecVectorQuery *query); + +/** + * @brief Set field name + * @param query Vector query pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_field_name( + ZVecVectorQuery *query, const char *field_name); + +/** + * @brief Get field name + * @param query Vector query pointer + * @return const char* Field name (owned by query, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_vector_query_get_field_name(const ZVecVectorQuery *query); + +/** + * @brief Set query vector data + * @param query Vector query pointer + * @param data Vector data pointer + * @param size Data size in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_query_vector( + ZVecVectorQuery *query, const void *data, size_t size); + +/** + * @brief Set filter expression + * @param query Vector query pointer + * @param filter Filter expression string + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_filter(ZVecVectorQuery *query, const char *filter); + +/** + * @brief Get filter expression + * @param query Vector query pointer + * @return const char* Filter expression (owned by query, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_vector_query_get_filter(const ZVecVectorQuery *query); + +/** + * @brief Set whether to include vector data in results + * @param query Vector query pointer + * @param include Whether to include vector + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_include_vector(ZVecVectorQuery *query, bool include); + +/** + * @brief Set whether to include doc ID in results + * @param query Vector query pointer + * @param include Whether to include doc ID + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_include_doc_id(ZVecVectorQuery *query, bool include); + +/** + * @brief Set output fields + * @param query Vector query pointer + * @param fields Array of field names + * @param count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_output_fields( + ZVecVectorQuery *query, const char **fields, size_t count); + +/** + * @brief Set query parameters (takes ownership) + * @param query Vector query pointer + * @param params Query parameters pointer (type-specific: ZVecHnswQueryParams*, + * etc.) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_vector_query_set_query_params(ZVecVectorQuery *query, void *params); + +// ----------------------------------------------------------------------------- +// ZVecGroupByVectorQuery (Group By Vector Query) +// ----------------------------------------------------------------------------- + +/** + * @brief Create group by vector query + * @return ZVecGroupByVectorQuery* Pointer to the newly created group by vector + * query + */ +ZVEC_EXPORT ZVecGroupByVectorQuery *ZVEC_CALL +zvec_group_by_vector_query_create(void); + +/** + * @brief Destroy group by vector query + * @param query Group by vector query pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_group_by_vector_query_destroy(ZVecGroupByVectorQuery *query); + +/** + * @brief Set field name + * @param query Group by vector query pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_field_name( + ZVecGroupByVectorQuery *query, const char *field_name); + +/** + * @brief Set group by field name + * @param query Group by vector query pointer + * @param field_name Group by field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_group_by_field_name( + ZVecGroupByVectorQuery *query, const char *field_name); + +/** + * @brief Set group count + * @param query Group by vector query pointer + * @param count Number of groups + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_group_count( + ZVecGroupByVectorQuery *query, uint32_t count); + +/** + * @brief Set group topk + * @param query Group by vector query pointer + * @param topk Number of results per group + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_group_topk( + ZVecGroupByVectorQuery *query, uint32_t topk); + +/** + * @brief Set query vector data + * @param query Group by vector query pointer + * @param data Vector data pointer + * @param size Data size in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_query_vector( + ZVecGroupByVectorQuery *query, const void *data, size_t size); + +/** + * @brief Set filter expression + * @param query Group by vector query pointer + * @param filter Filter expression string + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_filter( + ZVecGroupByVectorQuery *query, const char *filter); + +/** + * @brief Set whether to include vector data in results + * @param query Group by vector query pointer + * @param include Whether to include vectors + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_include_vector(ZVecGroupByVectorQuery *query, + bool include); + +/** + * @brief Set output fields + * @param query Group by vector query pointer + * @param fields Array of field names + * @param count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_group_by_vector_query_set_output_fields(ZVecGroupByVectorQuery *query, + const char **fields, size_t count); + +/** + * @brief Set query parameters (takes ownership) + * @param query Group by vector query pointer + * @param params Query parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_group_by_vector_query_set_query_params( + ZVecGroupByVectorQuery *query, void *params); + +// Deprecated macros (use create/destroy functions instead) +// #define ZVEC_VECTOR_QUERY(...) - Deprecated, use zvec_vector_query_create() +// and setters #define ZVEC_DEFAULT_OPTIONS() - Deprecated, use +// zvec_collection_options_create() and setters + +// ============================================================================= +// Collection Options and Statistics (Opaque Pointer Pattern) +// ============================================================================= + +/** + * @brief Collection options (opaque pointer) + * Use zvec_collection_options_create() to create and + * zvec_collection_options_destroy() to destroy + */ +typedef struct ZVecCollectionOptions ZVecCollectionOptions; + +/** + * @brief Collection statistics (opaque pointer) + * Use zvec_collection_stats_get functions to access fields + */ +typedef struct ZVecCollectionStats ZVecCollectionStats; + +// ============================================================================= +// Collection Options Management Functions +// ============================================================================= + +/** + * @brief Create collection options + * @return ZVecCollectionOptions* Pointer to the newly created collection + * options + */ +ZVEC_EXPORT ZVecCollectionOptions *ZVEC_CALL +zvec_collection_options_create(void); + +/** + * @brief Destroy collection options + * @param options Collection options pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_destroy(ZVecCollectionOptions *options); + +/** + * @brief Set whether to enable memory mapping + * @param options Collection options pointer + * @param enable Whether to enable mmap + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_enable_mmap( + ZVecCollectionOptions *options, bool enable); + +/** + * @brief Get whether to enable memory mapping + * @param options Collection options pointer + * @return bool Whether mmap is enabled + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_collection_options_get_enable_mmap(const ZVecCollectionOptions *options); + +/** + * @brief Set maximum buffer size + * @param options Collection options pointer + * @param size Maximum buffer size in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_max_buffer_size( + ZVecCollectionOptions *options, size_t size); + +/** + * @brief Get maximum buffer size + * @param options Collection options pointer + * @return size_t Maximum buffer size in bytes + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_collection_options_get_max_buffer_size( + const ZVecCollectionOptions *options); + +/** + * @brief Set whether read-only mode + * @param options Collection options pointer + * @param read_only Whether read-only + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_options_set_read_only( + ZVecCollectionOptions *options, bool read_only); + +/** + * @brief Get whether read-only mode + * @param options Collection options pointer + * @return bool Whether read-only mode + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_collection_options_get_read_only(const ZVecCollectionOptions *options); + +/** + * @brief Set maximum document count per segment + * @param options Collection options pointer + * @param count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_options_set_max_doc_count_per_segment( + ZVecCollectionOptions *options, uint64_t count); + +/** + * @brief Get maximum document count per segment + * @param options Collection options pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_options_get_max_doc_count_per_segment( + const ZVecCollectionOptions *options); + +// ============================================================================= +// Collection Statistics Management Functions +// ============================================================================= + +/** + * @brief Get document count from collection stats + * @param stats Collection statistics pointer + * @return uint64_t Document count + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_stats_get_doc_count(const ZVecCollectionStats *stats); + +/** + * @brief Get index count from collection stats + * @param stats Collection statistics pointer + * @return size_t Number of indexes + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_stats_get_index_count(const ZVecCollectionStats *stats); + +/** + * @brief Get index name at specified index + * @param stats Collection statistics pointer + * @param index Index of the index name + * @return const char* Index name (owned by stats, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_collection_stats_get_index_name( + const ZVecCollectionStats *stats, size_t index); + +/** + * @brief Get index completeness at specified index + * @param stats Collection statistics pointer + * @param index Index of the completeness value + * @return float Index completeness + */ +ZVEC_EXPORT float ZVEC_CALL zvec_collection_stats_get_index_completeness( + const ZVecCollectionStats *stats, size_t index); + + +/** + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); + +/** + * @brief Destroy field schema + * @param schema Field schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); + +/** + * @brief Get field name + * @param schema Field schema pointer (must not be NULL) + * @return const char* Field name (owned by schema, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_field_schema_get_name(const ZVecFieldSchema *schema); + +/** + * @brief Get field data type + * @param schema Field schema pointer (must not be NULL) + * @return ZVecDataType Data type + */ +ZVEC_EXPORT ZVecDataType ZVEC_CALL +zvec_field_schema_get_data_type(const ZVecFieldSchema *schema); + +/** + * @brief Check if field is nullable + * @param schema Field schema pointer (must not be NULL) + * @return bool true if nullable, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_nullable(const ZVecFieldSchema *schema); + +/** + * @brief Set field nullable + * @param schema Field schema pointer + * @param nullable Whether nullable + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_field_schema_set_nullable(ZVecFieldSchema *schema, bool nullable); + +/** + * @brief Get field dimension (for vector fields) + * @param schema Field schema pointer (must not be NULL) + * @return uint32_t Dimension value + */ +ZVEC_EXPORT uint32_t ZVEC_CALL +zvec_field_schema_get_dimension(const ZVecFieldSchema *schema); + +/** + * @brief Set field dimension (for vector fields) + * @param schema Field schema pointer + * @param dimension Dimension value + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_field_schema_set_dimension(ZVecFieldSchema *schema, uint32_t dimension); + +/** + * @brief Check if field has index + * @param schema Field schema pointer (must not be NULL) + * @return bool true if has index, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_has_index(const ZVecFieldSchema *schema); + +/** + * @brief Get index type of the field + * @param schema Field schema pointer (must not be NULL) + * @return ZVecIndexType Index type, ZVEC_INDEX_TYPE_UNDEFINED if no index + */ +ZVEC_EXPORT ZVecIndexType ZVEC_CALL +zvec_field_schema_get_index_type(const ZVecFieldSchema *schema); + +/** + * @brief Get index params of the field (returns pointer owned by the field + * schema, do not destroy) + * @param schema Field schema pointer (must not be NULL) + * @return ZVecIndexParams* Index params pointer, NULL if no index + */ +ZVEC_EXPORT const ZVecIndexParams *ZVEC_CALL +zvec_field_schema_get_index_params(const ZVecFieldSchema *schema); + +/** + * @brief Check if field is a vector field (dense or sparse) + * @param schema Field schema pointer (must not be NULL) + * @return bool true if vector field, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_vector_field(const ZVecFieldSchema *schema); + +/** + * @brief Check if field is a dense vector field + * @param schema Field schema pointer (must not be NULL) + * @return bool true if dense vector field, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_dense_vector(const ZVecFieldSchema *schema); + +/** + * @brief Check if field is a sparse vector field + * @param schema Field schema pointer (must not be NULL) + * @return bool true if sparse vector field, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_sparse_vector(const ZVecFieldSchema *schema); + +/** + * @brief Check if field is an array type + * @param schema Field schema pointer (must not be NULL) + * @return bool true if array type, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_is_array_type(const ZVecFieldSchema *schema); + +/** + * @brief Get element data type for array fields + * @param schema Field schema pointer (must not be NULL) + * @return ZVecDataType Element data type, or original type if not array + */ +ZVEC_EXPORT ZVecDataType ZVEC_CALL +zvec_field_schema_get_element_data_type(const ZVecFieldSchema *schema); + +/** + * @brief Check if field has inverted index (for scalar fields) + * @param schema Field schema pointer (must not be NULL) + * @return bool true if has inverted index, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_field_schema_has_invert_index(const ZVecFieldSchema *schema); + +/** + * @brief Set index parameters for field + * @param schema Field schema pointer + * @param index_params Index parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params); + +/** + * @brief Set inverted index parameters for field schema (convenience function) + * @param field_schema Field schema pointer + * @param invert_params Inverted index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *invert_params); + +/** + * @brief Set HNSW index parameters for field schema (convenience function) + * @param field_schema Field schema pointer + * @param hnsw_params HNSW index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *hnsw_params); + +/** + * @brief Set Flat index parameters for field schema (convenience function) + * @param field_schema Field schema pointer + * @param flat_params Flat index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *flat_params); + +/** + * @brief Clear index from field schema + * @param schema Field schema pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_field_schema_clear_index(ZVecFieldSchema *schema); + +/** + * @brief Set IVF index parameters for field schema (convenience function) + * @param field_schema Field schema pointer + * @param ivf_params IVF index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *ivf_params); + + +// ============================================================================= +// Collection Schema Structures +// ============================================================================= + +/** + * @brief Collection schema (opaque pointer) + * + * Use zvec_collection_schema_create() to create and + * zvec_collection_schema_destroy() to destroy. Fields are accessed via + * getter/setter functions. + */ +typedef struct ZVecCollectionSchema ZVecCollectionSchema; + +/** + * @brief Create collection schema + * @param name Collection name + * @return ZVecCollectionSchema* Pointer to the newly created collection schema + */ +ZVEC_EXPORT ZVecCollectionSchema *ZVEC_CALL +zvec_collection_schema_create(const char *name); + +/** + * @brief Destroy collection schema + * @param schema Collection schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_schema_destroy(ZVecCollectionSchema *schema); + +/** + * @brief Get collection schema name + * @param schema Collection schema pointer (must not be NULL) + * @return const char* Collection name (owned by schema, do not free) + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_collection_schema_get_name(const ZVecCollectionSchema *schema); + +/** + * @brief Add field to collection schema + * @param schema Collection schema pointer + * @param field Field schema pointer (function takes ownership) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( + ZVecCollectionSchema *schema, ZVecFieldSchema *field); + +/** + * @brief Add multiple fields to collection schema at once + * + * @param schema Collection schema pointer + * @param fields Array of field pointers to add + * @param field_count Number of fields to add + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *const *fields, + size_t field_count); + +/** + * @brief Remove field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_field( + ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Remove multiple fields from collection schema at once + * + * @param schema Collection schema pointer + * @param field_names Array of field names to remove + * @param field_count Number of fields to remove + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count); + +/** + * @brief Get field count + * + * @param schema Collection schema pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_schema_get_field_count(const ZVecCollectionSchema *schema); + +/** + * @brief Find field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, returns NULL if not found + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Validate collection schema + * @param schema Collection schema pointer + * @param[out] error_msg Error message (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg); + + +/** + * @brief Get field by index + * @param schema Collection schema pointer + * @param index Field index + * @return ZVecFieldSchema* Field schema pointer + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index); + +/** + * @brief Set maximum document count per segment + * @param schema Collection schema pointer + * @param max_doc_count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count); + +/** + * @brief Get maximum document count per segment of collection schema + * + * @param schema Collection schema pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema); + +/** + * @brief Set collection schema name + * @param schema Collection schema pointer + * @param name New collection name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_name(ZVecCollectionSchema *schema, const char *name); + +/** + * @brief Check if field exists in schema + * @param schema Collection schema pointer + * @param field_name Field name to check + * @return true if field exists, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_collection_schema_has_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Alter field schema + * @param schema Collection schema pointer + * @param field_name Name of field to alter + * @param new_field New field schema with updated properties + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_alter_field( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecFieldSchema *new_field); + +/** + * @brief Get forward (scalar) field by name + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, NULL if not found or not + * scalar + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_forward_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Get vector field by name + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, NULL if not found or not + * vector + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_vector_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Get all forward (scalar) fields + * @param schema Collection schema pointer + * @param[out] fields Output array of field pointers (owned by caller, do not + * destroy) + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_forward_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Get all forward fields with index + * @param schema Collection schema pointer + * @param[out] fields Output array of field pointers + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_get_forward_fields_with_index( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Get all field names + * @param schema Collection schema pointer + * @param[out] names Output array of field names (owned by caller, do not free) + * @param[out] count Number of field names + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_all_field_names( + const ZVecCollectionSchema *schema, const char ***names, size_t *count); + +/** + * @brief Get all vector fields + * @param schema Collection schema pointer + * @param[out) fields Output array of field pointers + * @param[out] count Number of fields + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_get_vector_fields( + const ZVecCollectionSchema *schema, ZVecFieldSchema ***fields, + size_t *count); + +/** + * @brief Check if field has index + * @param schema Collection schema pointer + * @param field_name Field name + * @return true if field has index, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_collection_schema_has_index( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Add index to field + * @param schema Collection schema pointer + * @param field_name Field name to add index to + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_index( + ZVecCollectionSchema *schema, const char *field_name, + const ZVecIndexParams *index_params); + +/** + * @brief Drop index from field + * @param schema Collection schema pointer + * @param field_name Field name to drop index from + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_drop_index( + ZVecCollectionSchema *schema, const char *field_name); + + +// ============================================================================= +// Collection Management Functions +// ============================================================================= + +/** + * @brief Create and open collection + * @param path Collection path + * @param schema Collection schema pointer + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection); + + +/** + * @brief Open existing collection + * @param path Collection path + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_open(const char *path, const ZVecCollectionOptions *options, + ZVecCollection **collection); + + +/** + * @brief Close collection + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_close(ZVecCollection *collection); + + +/** + * @brief Destroy collection + * + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_destroy(ZVecCollection *collection); + +/** + * @brief Flush collection data to disk + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_flush(ZVecCollection *collection); + +/** + * @brief Get collection schema + * @param collection Collection handle + * @param[out] schema + * Returned collection schema pointer (needs to be freed by calling + * zvec_collection_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( + const ZVecCollection *collection, ZVecCollectionSchema **schema); + + +/** + * @brief Get collection options + * @param collection Collection handle + * @param[out] options + * Returned collection options pointer (needs to be freed by calling + * zvec_collection_options_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_options( + const ZVecCollection *collection, ZVecCollectionOptions **options); + +/** + * @brief Get collection statistics + * @param collection Collection handle + * @param[out] stats + * Returned statistics pointer (needs to be freed by calling + * zvec_collection_stats_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( + const ZVecCollection *collection, ZVecCollectionStats **stats); + +/** + * @brief Destroy collection statistics + * @param stats Statistics pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_stats_destroy(ZVecCollectionStats *stats); + +/** + * @brief Free field schema memory + * + * @param field_schema Field schema pointer to be freed + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_free_field_schema(ZVecFieldSchema *field_schema); + + +// ============================================================================= +// Index Management Interface +// ============================================================================= + +/** + * @brief Create index + * + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_create_index(ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *index_params); + +/** + * @brief Create HNSW index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param hnsw_params HNSW index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *hnsw_params); + +/** + * @brief Create Flat index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param flat_params Flat index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *flat_params); + +/** + * @brief Create IVF index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param ivf_params IVF index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *ivf_params); + +/** + * @brief Create scalar index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param invert_params Scalar index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *invert_params); + +/** + * @brief Drop index + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); + +/** + * @brief Optimize collection (rebuild indexes, merge segments, etc.) + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_optimize(ZVecCollection *collection); + +// ============================================================================= +// Column Management Interface (DDL) +// ============================================================================= + +/** + * @brief Add column + * @param collection Collection handle + * @param field_schema Field schema pointer + * @param default_expression Default value expression (can be NULL) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_column( + ZVecCollection *collection, const ZVecFieldSchema *field_schema, + const char *default_expression); + +/** + * @brief Drop column + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_column(ZVecCollection *collection, const char *field_name); + +/** + * @brief Alter column + * @param collection Collection handle + * @param old_name Original field name + * @param new_name New field name (can be NULL to indicate no renaming) + * @param new_schema New field schema (can be NULL to indicate no schema + * modification) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_column( + ZVecCollection *collection, const char *old_name, const char *new_name, + const ZVecFieldSchema *new_schema); + + +/** + * @brief Document structure (opaque pointer mode) + * Internal implementation details are not visible to the outside, and + * operations are performed through API functions + */ +typedef struct ZVecDoc ZVecDoc; + +/** + * @brief Per-document status returned by detailed DML APIs. + * @note Uses ordered style: result index corresponds to input document index. + * Caller should access pk by index from the original input array. + */ +typedef struct { + ZVecErrorCode code; /**< Per-document status code */ + const char *message; /**< Per-document status message (allocated by API) */ +} ZVecWriteResult; + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Insert documents into collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully inserted documents + * @param[out] error_count Number of failed insertions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Insert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Update documents in collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully updated documents + * @param[out] error_count Number of failed updates + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Update documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Insert or update documents in collection (upsert operation) + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successful operations + * @param[out] error_count Number of failed operations + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Upsert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Delete documents from collection + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] success_count Number of successfully deleted documents + * @param[out] error_count Number of failed deletions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents by PK and return per-document statuses. + * + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_with_results( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Free result arrays returned by detailed DML APIs. + * + * @param results Result array pointer + * @param result_count Number of entries in result array + */ +ZVEC_EXPORT void ZVEC_CALL zvec_write_results_free(ZVecWriteResult *results, + size_t result_count); + +/** + * @brief Delete documents by filter condition + * @param collection Collection handle + * @param filter Filter expression + * @param[out] deleted_count Number of deleted documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_by_filter( + ZVecCollection *collection, const char *filter); + +// ============================================================================= +// Data Query Interface (DQL) +// ============================================================================= + +/** + * @brief Vector similarity search + * @param collection Collection handle + * @param query Query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( + const ZVecCollection *collection, const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count); + +/** + * @brief Grouped vector similarity search + * @param collection Collection handle + * @param query Grouped query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] group_by_values Returned group by field values array (needs to be + * freed by calling zvec_string_array_destroy) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); + +/** + * @brief Fetch documents by primary keys + * @param collection Collection handle + * @param primary_keys Primary key array + * @param count Number of primary keys + * @param[out] documents Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] found_count Number of found documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_fetch( + ZVecCollection *collection, const char *const *primary_keys, size_t count, + ZVecDoc ***documents, size_t *found_count); + +// ============================================================================= +// Document Related Structures +// ============================================================================= + +/** + * @brief Document field value union + */ +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; /**< Binary data value */ +} ZVecFieldValue; + +/** + * @brief Document field structure + */ +typedef struct { + ZVecString name; ///< Field name + ZVecDataType data_type; ///< Data type + ZVecFieldValue value; ///< Field value +} ZVecDocField; + +/** + * @brief Document operator enumeration + */ +typedef enum { + ZVEC_DOC_OP_INSERT = 0, ///< Insert operation + ZVEC_DOC_OP_UPDATE = 1, ///< Update operation + ZVEC_DOC_OP_UPSERT = 2, ///< Insert or update operation + ZVEC_DOC_OP_DELETE = 3 ///< Delete operation +} ZVecDocOperator; + + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Create a new document object + * + * @return ZVecDoc* Pointer to the newly created document object, returns NULL + * on failure + */ +ZVEC_EXPORT ZVecDoc *ZVEC_CALL zvec_doc_create(void); + +/** + * @brief Destroy the document object and release all resources + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_destroy(ZVecDoc *doc); + +/** + * @brief Clear the document object + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_clear(ZVecDoc *doc); + +/** + * @brief Add field to document by value + * + * @param doc Document object pointer + * @param field_name Field name + * @param data_type Data type + * @param value Value pointer + * @param value_size Value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_add_field_by_value( + ZVecDoc *doc, const char *field_name, ZVecDataType data_type, + const void *value, size_t value_size); + +/** + * @brief Add field to document by structure + * + * @param doc Document object pointer + * @param field Field structure pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_add_field_by_struct(ZVecDoc *doc, const ZVecDocField *field); + +/** + * @brief Remove field from document + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_remove_field(ZVecDoc *doc, const char *field_name); + + +/** + * @brief Batch release document array + * + * @param documents Document pointer array + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **documents, size_t count); + +/** + * @brief Set document primary key + * + * @param doc Pointer to the document structure + * @param pk Primary key string + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_pk(ZVecDoc *doc, const char *pk); + +/** + * @brief Set document ID + * + * @param doc Document structure pointer + * @param doc_id Document ID + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id); + +/** + * @brief Set document score + * + * @param doc Document structure pointer + * @param score Score value + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); + +/** + * @brief Set document operator + * + * @param doc Document structure pointer + * @param op Operator + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, + ZVecDocOperator op); + +/** + * @brief Explicitly mark a document field as null. + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name); + +/** + * @brief Get document ID + * + * @param doc Document structure pointer + * @return uint64_t Document ID + */ +ZVEC_EXPORT uint64_t ZVEC_CALL zvec_doc_get_doc_id(const ZVecDoc *doc); + +/** + * @brief Get document score + * + * @param doc Document structure pointer + * @return float Score value + */ +ZVEC_EXPORT float ZVEC_CALL zvec_doc_get_score(const ZVecDoc *doc); + +/** + * @brief Get document operator + * + * @param doc Document structure pointer + * @return ZVecDocOperator Operator + */ +ZVEC_EXPORT ZVecDocOperator ZVEC_CALL zvec_doc_get_operator(const ZVecDoc *doc); + +/** + * @brief Get document field count + * + * @param doc Document structure pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_get_field_count(const ZVecDoc *doc); + + +/** + * @brief Get document primary key pointer (no copy) + * + * @param doc Document object pointer + * @return const char* Primary key string pointer, returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_pointer(const ZVecDoc *doc); + +/** + * @brief Get document primary key copy (needs manual release) + * + * @param doc Document object pointer + * @return const char* Primary key string copy, needs to call free() to release, + * returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); + +/** + * @brief Get field value (basic type returned directly) + * + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * FLOAT, DOUBLE. The value is copied directly into the provided buffer. + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * or zvec_doc_get_field_value_pointer instead. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type (must be a basic numeric type) + * @param value_buffer Output buffer to receive the value + * @param buffer_size Size of the output buffer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void *value_buffer, size_t buffer_size); + +/** + * @brief Get field value copy (allocate new memory) + * + * Supports all data types including: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING, BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, + * zvec_free_uint8_array() for binary data). + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (needs manual release) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void **value, size_t *value_size); + +/** + * @brief Get field value pointer (data remains in document) + * + * Supports data types where direct pointer access is safe: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING (returns null-terminated C string), BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, + * ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid + * as long as the document exists. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (points to document-internal data) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_pointer( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + const void **value, size_t *value_size); + +/** + * @brief Check if document is empty + * + * @param doc Document object pointer + * @return bool Returns true if document is empty, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_empty(const ZVecDoc *doc); + +/** + * @brief Check if document contains specified field + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field exists, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field has value + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field has value, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field_value(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field is null + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field is null, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_field_null(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Get all field names of document + * + * @param doc Document object pointer + * @param[out] field_names + * Returned field name array (needs to call zvec_free_str_array to release) + * @param[out] count Returned field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_names( + const ZVecDoc *doc, char ***field_names, size_t *count); + +/** + * @brief Release string array memory + * + * @param array String array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str_array(char **array, size_t count); + +/** + * @brief Serialize document + * + * @param doc Document object pointer + * @param[out] data Returned serialized data (needs to call + * zvec_free_uint8_array to release) + * @param[out] size Returned data size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_serialize(const ZVecDoc *doc, + uint8_t **data, + size_t *size); + +/** + * @brief Deserialize document + * + * @param data Serialized data + * @param size Data size + * @param[out] doc Returned document object pointer (needs to call + * zvec_doc_destroy to release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_deserialize(const uint8_t *data, + size_t size, + ZVecDoc **doc); + +/** + * @brief Merge two documents + * + * @param doc Target document object pointer + * @param other Source document object pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other); + +/** + * @brief Get document memory usage + * + * @param doc Document object pointer + * @return size_t Memory usage (bytes) + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_memory_usage(const ZVecDoc *doc); + +/** + * @brief Validate document against Schema + * + * @param doc Document object pointer + * @param schema Schema object pointer + * @param is_update Whether it's an update operation + * @param[out] error_msg Error message (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_validate(const ZVecDoc *doc, const ZVecCollectionSchema *schema, + bool is_update, char **error_msg); + +/** + * @brief Get detailed string representation of document + * + * @param doc Document object pointer + * @param[out] detail_str Returned detailed string (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); + +/** + * @brief Free docs array memory + * @param docs Document array pointer + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); + + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * @brief Convert error code to description string + * @param error_code Error code + * @return const char* Error description string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_error_code_to_string(ZVecErrorCode error_code); + +/** + * @brief Convert data type to string + * @param data_type Data type + * @return const char* Data type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_data_type_to_string(ZVecDataType data_type); + +/** + * @brief Convert index type to string + * @param index_type Index type + * @return const char* Index type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_index_type_to_string(ZVecIndexType index_type); + +/** + * @brief Convert metric type to string + * @param metric_type Metric type + * @return const char* Metric type string + */ +const char *zvec_metric_type_to_string(ZVecMetricType metric_type); + + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * @brief Simplified HNSW index parameters initialization macro + * @param _metric Distance metric type + * @param _m Connectivity parameter + * @param _ef_construction Exploration factor during construction + * @param _ef_search Exploration factor during search + * @param _quant Quantization type + * + * Usage example: + * @code + * ZVecIndexParams params = ZVEC_HNSW_PARAMS( + * ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + * @endcode + */ +// clang-format off +#define ZVEC_HNSW_PARAMS(_metric, _m, _ef_construction, _ef_search, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_HNSW, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .hnsw.m = (_m), \ + .hnsw.ef_construction = (_ef_construction), \ + .hnsw.ef_search = (_ef_search) }) +// clang-format on + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +// clang-format off +#define ZVEC_INVERT_PARAMS(_range_opt, _wildcard) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_INVERT, \ + .invert.enable_range_optimization = (_range_opt), \ + .invert.enable_extended_wildcard = (_wildcard) }) +// clang-format on + +/** + * @brief Simplified Flat index parameters initialization macro + * @param metric Distance metric type + * @param quant Quantization type + */ +// clang-format off +#define ZVEC_FLAT_PARAMS(_metric, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_FLAT, \ + .metric_type = (_metric), \ + .quantize_type = (_quant) }) +// clang-format on + +/** + * @brief Simplified IVF index parameters initialization macro + * @param metric Distance metric type + * @param nlist Number of cluster centers + * @param niters Number of iterations + * @param soar Whether to use SOAR algorithm + * @param nprobe Number of clusters to probe during search + * @param quant Quantization type + */ +// clang-format off +#define ZVEC_IVF_PARAMS(_metric, _nlist, _niters, _soar, _nprobe, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_IVF, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .ivf.n_list = (_nlist), \ + .ivf.n_iters = (_niters), \ + .ivf.use_soar = (_soar), \ + .ivf.n_probe = (_nprobe) }) +// clang-format on + +/** + * @brief Simplified string initialization macro + * @param str String content + * + * Usage example: + * ZVecString name = ZVEC_STRING("my_collection"); + */ +#define ZVEC_STRING(str) \ + (ZVecString) { \ + .data = str, .length = strlen(str) \ + } + +/** + * @brief Simplified string view initialization macro + * @param str String content + * + * Usage example: + * ZVecStringView name = ZVEC_STRING_VIEW("my_collection"); + */ +#define ZVEC_STRING_VIEW(str) \ + (ZVecStringView) { \ + .data = str, .length = strlen(str) \ + } + +// Has been replaced by the new ZVEC_STRING_VIEW macro + +/** + * @brief Simplified float array initialization macro + * @param data_ptr Float array pointer + * @param len Array length + * + * Usage example: + * float vectors[] = {0.1f, 0.2f, 0.3f}; + * ZVecFloatArray vec_array = ZVEC_FLOAT_ARRAY(vectors, 3); + */ +#define ZVEC_FLOAT_ARRAY(data_ptr, len) \ + (ZVecFloatArray) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified integer array initialization macro + * @param data_ptr Integer array pointer + * @param len Array length + */ +#define ZVEC_INT64_ARRAY(data_ptr, len) \ + (ZVecInt64Array) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified document field initialization macro + * @param name_str Field name + * @param type Data type + * @param value_union Field value union + * + * Usage example: + * ZVecDocField field = ZVEC_DOC_FIELD("id", ZVEC_DATA_TYPE_STRING, + * {.string_value = ZVEC_STRING("doc1")}); + */ +#define ZVEC_DOC_FIELD(name_str, type, value_union) \ + (ZVecDocField) { \ + .name = ZVEC_STRING(name_str), .data_type = type, .value = value_union \ + } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ZVEC_C_API_H diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h index fa056053..e6d13c86 100644 --- a/src/include/zvec/db/doc.h +++ b/src/include/zvec/db/doc.h @@ -68,6 +68,10 @@ class Doc { return pk_; } + const std::string &pk_ref() const { + return pk_; + } + void set_score(float score) { score_ = score; } @@ -103,6 +107,10 @@ class Doc { return op_; } + Operator get_operator() const { + return op_; + } + // Set field value template bool set(const std::string &field_name, T value) { @@ -232,6 +240,26 @@ class Doc { return std::nullopt; } + // Get field value as const reference, throws exception if field doesn't exist + // or type mismatches + template + const T &get_ref(const std::string &field_name) const { + auto it = fields_.find(field_name); + if (it == fields_.end()) { + throw std::runtime_error("Field '" + field_name + "' not found"); + } + + if (std::holds_alternative(it->second)) { + throw std::runtime_error("Field '" + field_name + "' is null"); + } + + try { + return std::get(it->second); + } catch (const std::bad_variant_access &) { + throw std::runtime_error("Field '" + field_name + "' type mismatch"); + } + } + void remove(const std::string &field_name) { fields_.erase(field_name); } diff --git a/src/include/zvec/version.h.in b/src/include/zvec/version.h.in new file mode 100644 index 00000000..2d92b728 --- /dev/null +++ b/src/include/zvec/version.h.in @@ -0,0 +1,16 @@ +#ifndef ZVEC_VERSION_H +#define ZVEC_VERSION_H + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR @ZVEC_VERSION_MAJOR@ + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR @ZVEC_VERSION_MINOR@ + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH @ZVEC_VERSION_PATCH@ + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "@ZVEC_VERSION_STRING@" + +#endif // ZVEC_VERSION_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03250f1c..7308514b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,3 +4,6 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) +if(BUILD_C_BINDINGS) + cc_directories(c) +endif() diff --git a/tests/c/CMakeLists.txt b/tests/c/CMakeLists.txt new file mode 100644 index 00000000..b5e461a2 --- /dev/null +++ b/tests/c/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) + +file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) + +foreach(CC_SRCS ${ALL_TEST_SRCS}) + get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE) + cc_gtest( + NAME ${CC_TARGET} + STRICT + LIBS zvec_c_api + SRCS ${CC_SRCS} utils.c + INCS . .. ../../src + ) +endforeach() diff --git a/tests/c/c_api_test.c b/tests/c/c_api_test.c new file mode 100644 index 00000000..f3946c61 --- /dev/null +++ b/tests/c/c_api_test.c @@ -0,0 +1,4912 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#ifdef _POSIX_C_SOURCE +#include +#endif +#include +#include "utils.h" + +// ============================================================================= +// Test helper macro definitions +// ============================================================================= + +static int test_count = 0; +static int passed_count = 0; +static int current_test_passed = 1; // Track if current test function passes + +#define TEST_START() \ + do { \ + printf("Running test: %s\n", __func__); \ + test_count++; \ + current_test_passed = 1; \ + } while (0) + +#define TEST_ASSERT(condition) \ + do { \ + if (condition) { \ + printf(" ✓ PASS\n"); \ + } else { \ + printf(" ✗ FAIL at line %d\n", __LINE__); \ + current_test_passed = 0; \ + } \ + } while (0) + +#define TEST_END() \ + do { \ + if (current_test_passed) { \ + passed_count++; \ + } \ + } while (0) + +// ============================================================================= +// Helper functions tests +// ============================================================================= + +void test_version_functions(void) { + TEST_START(); + + // Test version retrieval functions + const char *version = zvec_get_version(); + TEST_ASSERT(version != NULL); + printf(" Version string: %s\n", version); + + // Test version component retrieval + int major = zvec_get_version_major(); + int minor = zvec_get_version_minor(); + int patch = zvec_get_version_patch(); + + printf(" Version components: %d.%d.%d\n", major, minor, patch); + TEST_ASSERT(major >= 0); + TEST_ASSERT(minor >= 0); + TEST_ASSERT(patch >= 0); + + // Test version compatibility check with current version (should pass) + TEST_ASSERT(zvec_check_version(major, minor, patch)); + + // Test with older version (should pass - current is newer) + if (minor > 0) { + TEST_ASSERT(zvec_check_version(major, minor - 1, patch)); + } + if (major > 0) { + TEST_ASSERT(zvec_check_version(major - 1, minor, patch)); + } + + // Test with much newer version (should fail - current is older) + bool not_compatible = zvec_check_version(99, 99, 99); + TEST_ASSERT(not_compatible == false); + + // Test with invalid negative versions (should fail and set error) + TEST_ASSERT(zvec_check_version(-1, 0, 0) == false); + TEST_ASSERT(zvec_check_version(0, -1, 0) == false); + TEST_ASSERT(zvec_check_version(0, 0, -1) == false); + + TEST_END(); +} + +void test_error_handling_functions(void) { + TEST_START(); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_get_last_error(&error_msg); + TEST_ASSERT(err == ZVEC_OK); + + if (error_msg) { + free(error_msg); + } + + // Test error clearing + zvec_clear_error(); + + // Test error details retrieval + ZVecErrorDetails error_details = {0}; + err = zvec_get_last_error_details(&error_details); + TEST_ASSERT(err == ZVEC_OK); + + TEST_END(); +} + +void test_zvec_config() { + TEST_START(); + + // Test 1: Console log config creation and destruction + ZVecConsoleLogConfig *console_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(console_config != NULL); + if (console_config) { + TEST_ASSERT(zvec_config_console_log_get_level(console_config) == + ZVEC_LOG_LEVEL_INFO); + zvec_config_console_log_destroy(console_config); + } + + // Test 2: File log config creation and destruction + ZVecFileLogConfig *file_config = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); + TEST_ASSERT(file_config != NULL); + if (file_config) { + TEST_ASSERT(zvec_config_file_log_get_level(file_config) == + ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(file_config), "./logs") == + 0); + TEST_ASSERT(strcmp(zvec_config_file_log_get_basename(file_config), + "test_log") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(file_config) == 100); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(file_config) == 7); + zvec_config_file_log_destroy(file_config); + } + + // Test 3: File log config edge cases + ZVecFileLogConfig *empty_file_config = + zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); + TEST_ASSERT(empty_file_config != NULL); + if (empty_file_config) { + TEST_ASSERT(zvec_config_file_log_get_level(empty_file_config) == + ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(empty_file_config), "") == + 0); + TEST_ASSERT( + strcmp(zvec_config_file_log_get_basename(empty_file_config), "") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(empty_file_config) == 0); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(empty_file_config) == 0); + zvec_config_file_log_destroy(empty_file_config); + } + + // Test 4: Log config creation with console type + ZVecConsoleLogConfig *temp_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); + TEST_ASSERT(temp_console != NULL); + if (temp_console) { + zvec_config_console_log_destroy(temp_console); + } + + // Test 5: Log config creation with file type + ZVecFileLogConfig *temp_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(temp_file != NULL); + TEST_ASSERT(zvec_config_file_log_get_level(temp_file) == + ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT(strcmp(zvec_config_file_log_get_dir(temp_file), "./logs") == 0); + TEST_ASSERT(strcmp(zvec_config_file_log_get_basename(temp_file), "app") == 0); + TEST_ASSERT(zvec_config_file_log_get_file_size(temp_file) == 50); + TEST_ASSERT(zvec_config_file_log_get_overdue_days(temp_file) == 30); + + zvec_config_file_log_destroy(temp_file); + + // Test 6: Config data creation and basic operations + ZVecConfigData *config_data = zvec_config_data_create(); + TEST_ASSERT(config_data != NULL); + if (config_data) { + // Test initial values + TEST_ASSERT(zvec_config_data_get_console_log_config(config_data) != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_CONSOLE); + + // Test memory limit setting + ZVecErrorCode err = + zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_memory_limit(config_data) == + 1024 * 1024 * 1024); + + // Test thread count settings + err = zvec_config_data_set_query_thread_count(config_data, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_query_thread_count(config_data) == 8); + + err = zvec_config_data_set_optimize_thread_count(config_data, 4); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_optimize_thread_count(config_data) == 4); + + // Test log config replacement + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(zvec_config_data_get_console_log_config(config_data) != NULL); + + ZVecFileLogConfig *new_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(new_file != NULL); + zvec_config_data_set_log_config(config_data, ZVEC_LOG_TYPE_FILE, new_file); + TEST_ASSERT(zvec_config_data_get_log_type(config_data) == + ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(zvec_config_data_get_file_log_config(config_data) != NULL); + + zvec_config_data_destroy(config_data); + } + + // Test 7: Edge cases and error conditions + // Test NULL pointer handling + ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_log_config(NULL, ZVEC_LOG_TYPE_CONSOLE, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_query_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_optimize_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test boundary values + ZVecConfigData *boundary_config = zvec_config_data_create(); + if (boundary_config) { + // Test zero values + err = zvec_config_data_set_memory_limit(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_memory_limit(boundary_config) == 0); + + // Test maximum values + err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_memory_limit(boundary_config) == + UINT64_MAX); + + // Test zero thread counts + err = zvec_config_data_set_query_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_query_thread_count(boundary_config) == 0); + + err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_config_data_get_optimize_thread_count(boundary_config) == + 0); + + zvec_config_data_destroy(boundary_config); + } + + // Test 8: Memory leak prevention - double destroy safety + ZVecConfigData *double_destroy_test = zvec_config_data_create(); + if (double_destroy_test) { + zvec_config_data_destroy(double_destroy_test); + } + + TEST_END(); +} + +void test_zvec_initialize() { + TEST_START(); + + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + if (config) { + TEST_ASSERT(zvec_config_data_get_console_log_config(config) != NULL); + TEST_ASSERT(zvec_config_data_get_log_type(config) == ZVEC_LOG_TYPE_CONSOLE); + } + ZVecErrorCode err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_is_initialized()); + + TEST_END(); +} + +// ============================================================================= +// Schema-related tests +// ============================================================================= + +void test_schema_basic_operations(void) { + TEST_START(); + + // Test 1: Basic Schema creation and destruction + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + TEST_ASSERT(schema != NULL); + TEST_ASSERT(zvec_collection_schema_get_name(schema) != NULL); + TEST_ASSERT(strcmp(zvec_collection_schema_get_name(schema), "demo") == 0); + TEST_ASSERT(zvec_collection_schema_get_field_count(schema) == 0); + TEST_ASSERT(zvec_collection_schema_get_max_doc_count_per_segment(schema) > 0); + + // Test 2: Schema field count operations + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 0); + + // Test 3: Adding fields to schema + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecErrorCode err = zvec_collection_schema_add_field(schema, id_field); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_add == 1); + + // Test 4: Finding fields in schema + const ZVecFieldSchema *found_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(found_field != NULL); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(found_field), "id") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(found_field) == + ZVEC_DATA_TYPE_INT64); + + // Test 5: Getting field by index + ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(indexed_field != NULL); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(indexed_field), "id") == 0); + + // Test 6: Adding multiple fields + const ZVecFieldSchema *fields_to_add[2]; + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + + fields_to_add[0] = name_field; + fields_to_add[1] = age_field; + + err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_multi_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_multi_add == 3); + + // Test 7: Finding newly added fields + const ZVecFieldSchema *name_found = + zvec_collection_schema_find_field(schema, "name"); + TEST_ASSERT(name_found != NULL); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(name_found), "name") == 0); + + const ZVecFieldSchema *age_found = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(age_found != NULL); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(age_found), "age") == 0); + + // Clean up fields we created + zvec_field_schema_destroy(name_field); + zvec_field_schema_destroy(age_field); + + // Test 8: Setting and getting max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 10000); + + // Test 9: Schema validation + ZVecString *validation_error = NULL; + err = zvec_collection_schema_validate(schema, &validation_error); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(validation_error == NULL); + + // Test 10: Removing single field + err = zvec_collection_schema_remove_field(schema, "age"); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_remove = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_remove == 2); + + const ZVecFieldSchema *removed_field = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(removed_field == NULL); + + // Test 11: Removing multiple fields + const char *fields_to_remove[] = {"name", "id"}; + err = zvec_collection_schema_remove_fields(schema, fields_to_remove, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t final_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(final_count == 0); + + // Test 12: Schema cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_edge_cases(void) { + TEST_START(); + + // Test 1: NULL parameter handling for schema creation + ZVecCollectionSchema *null_schema = zvec_collection_schema_create(NULL); + TEST_ASSERT(null_schema == NULL); + + // Test 2: Empty string schema name + ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); + TEST_ASSERT(empty_schema != NULL); + TEST_ASSERT(zvec_collection_schema_get_name(empty_schema) != NULL); + TEST_ASSERT(strcmp(zvec_collection_schema_get_name(empty_schema), "") == 0); + zvec_collection_schema_destroy(empty_schema); + + // Test 3: Very long schema name + char long_name[1024]; + memset(long_name, 'a', 1023); + long_name[1023] = '\0'; + ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); + TEST_ASSERT(long_schema != NULL); + TEST_ASSERT(zvec_collection_schema_get_name(long_schema) != NULL); + TEST_ASSERT(strlen(zvec_collection_schema_get_name(long_schema)) == 1023); + zvec_collection_schema_destroy(long_schema); + + // Test 4: NULL schema parameter handling for all functions + ZVecErrorCode err; + size_t count = zvec_collection_schema_get_field_count(NULL); + TEST_ASSERT(count == 0); + + const ZVecFieldSchema *null_field = + zvec_collection_schema_find_field(NULL, "test"); + TEST_ASSERT(null_field == NULL); + + ZVecFieldSchema *null_indexed_field = + zvec_collection_schema_get_field(NULL, 0); + TEST_ASSERT(null_indexed_field == NULL); + + uint64_t null_max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(NULL); + TEST_ASSERT(null_max_doc_count == 0); + + err = zvec_collection_schema_set_max_doc_count_per_segment(NULL, 1000); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + ZVecString *null_validation_error = NULL; + err = zvec_collection_schema_validate(NULL, &null_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(null_validation_error == NULL); + + err = zvec_collection_schema_add_field(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_add_fields(NULL, NULL, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_remove_field(NULL, "test"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + const char *null_field_names[] = {NULL}; + err = zvec_collection_schema_remove_fields(NULL, null_field_names, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 5: Working with valid schema for edge cases + ZVecCollectionSchema *schema = zvec_collection_schema_create("edge_test"); + TEST_ASSERT(schema != NULL); + + // Test 6: Adding NULL field to schema + err = zvec_collection_schema_add_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 7: Adding fields with NULL array + err = zvec_collection_schema_add_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 8: Adding zero fields + err = zvec_collection_schema_add_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 9: Finding field with NULL name + const ZVecFieldSchema *null_name_field = + zvec_collection_schema_find_field(schema, NULL); + TEST_ASSERT(null_name_field == NULL); + + // Test 10: Finding non-existent field + const ZVecFieldSchema *nonexistent_field = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent_field == NULL); + + // Test 11: Getting field with invalid index + ZVecFieldSchema *invalid_index_field = + zvec_collection_schema_get_field(schema, 1000); + TEST_ASSERT(invalid_index_field == NULL); + + // Test 12: Getting field from empty schema with index 0 + ZVecFieldSchema *zero_index_field = + zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(zero_index_field == NULL); + + // Test 13: Removing field with NULL name + err = zvec_collection_schema_remove_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 14: Removing non-existent field + err = zvec_collection_schema_remove_field(schema, "nonexistent"); + TEST_ASSERT(err == ZVEC_ERROR_NOT_FOUND); + + // Test 15: Removing fields with NULL array + err = zvec_collection_schema_remove_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 16: Removing zero fields + err = zvec_collection_schema_remove_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 17: Setting extremely large max doc count + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + uint64_t retrieved_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(retrieved_max_count == UINT64_MAX); + + // Test 18: Setting zero max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + uint64_t zero_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(zero_max_count == 0); + + // Test 19: Schema validation with empty schema + ZVecString *empty_validation_error = NULL; + err = zvec_collection_schema_validate(schema, &empty_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 20: Add duplicate field names + ZVecFieldSchema *first_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *second_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_STRING, false, 0); + + err = zvec_collection_schema_add_field(schema, first_id); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_schema_add_field(schema, second_id); + TEST_ASSERT(err == ZVEC_ERROR_ALREADY_EXISTS); + zvec_field_schema_destroy(second_id); + + // Verify fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count == 1); + + // Test 21: Cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_field_operations(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test field count + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 5); + + // Test finding non-existent field + const ZVecFieldSchema *nonexistent = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent == NULL); + + // Test finding existing field + const ZVecFieldSchema *id_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(id_field != NULL); + if (id_field) { + TEST_ASSERT(strcmp(zvec_field_schema_get_name(id_field), "id") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(id_field) == + ZVEC_DATA_TYPE_INT64); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_normal_schema_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(false, "test_normal", NULL, NULL, 1000); + TEST_ASSERT(schema != NULL); + + if (schema) { + TEST_ASSERT( + strcmp(zvec_collection_schema_get_name(schema), "test_normal") == 0); + + // Verify field count + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_schema_with_indexes(void) { + TEST_START(); + + // Test Schema with scalar index + ZVecCollectionSchema *scalar_index_schema = + zvec_test_create_schema_with_scalar_index(true, true, + "scalar_index_test"); + TEST_ASSERT(scalar_index_schema != NULL); + if (scalar_index_schema) { + zvec_collection_schema_destroy(scalar_index_schema); + } + + // Test Schema with vector index + ZVecCollectionSchema *vector_index_schema = + zvec_test_create_schema_with_vector_index(false, "vector_index_test", + NULL); + TEST_ASSERT(vector_index_schema != NULL); + if (vector_index_schema) { + zvec_collection_schema_destroy(vector_index_schema); + } + + TEST_END(); +} + +void test_schema_max_doc_count(void) { + TEST_START(); + + // Test 1: Setting max doc count to a valid value + ZVecCollectionSchema *schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + ZVecErrorCode err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 1000); + + zvec_collection_schema_destroy(schema); + + // Test 2: Setting max doc count to zero + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 0); + + zvec_collection_schema_destroy(schema); + + // Test 3: Setting max doc count to maximum value + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == UINT64_MAX); + + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_collection_schema_helpers(void) { + TEST_START(); + + // Create schema with various field types + ZVecCollectionSchema *schema = zvec_collection_schema_create("helper_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add scalar fields + ZVecFieldSchema *int_field = + zvec_field_schema_create("int_field", ZVEC_DATA_TYPE_INT32, false, 0); + ZVecFieldSchema *str_field = + zvec_field_schema_create("str_field", ZVEC_DATA_TYPE_STRING, true, 0); + + // Add vector field + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "vec_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + zvec_collection_schema_add_field(schema, int_field); + zvec_collection_schema_add_field(schema, str_field); + zvec_collection_schema_add_field(schema, vec_field); + + // Test has_field + TEST_ASSERT(zvec_collection_schema_has_field(schema, "int_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "str_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "vec_field") == true); + TEST_ASSERT(zvec_collection_schema_has_field(schema, "nonexistent") == + false); + + // Test get_forward_field (scalar field) + ZVecFieldSchema *found_int = + zvec_collection_schema_get_forward_field(schema, "int_field"); + TEST_ASSERT(found_int != NULL); + TEST_ASSERT(zvec_field_schema_get_data_type(found_int) == + ZVEC_DATA_TYPE_INT32); + + // get_forward_field should return NULL for vector field + ZVecFieldSchema *vec_as_forward = + zvec_collection_schema_get_forward_field(schema, "vec_field"); + TEST_ASSERT(vec_as_forward == NULL); + + // Test get_vector_field + ZVecFieldSchema *found_vec = + zvec_collection_schema_get_vector_field(schema, "vec_field"); + TEST_ASSERT(found_vec != NULL); + TEST_ASSERT(zvec_field_schema_is_vector_field(found_vec) == true); + + // get_vector_field should return NULL for scalar field + ZVecFieldSchema *int_as_vec = + zvec_collection_schema_get_vector_field(schema, "int_field"); + TEST_ASSERT(int_as_vec == NULL); + + // Test get_all_field_names + const char **names = NULL; + size_t name_count = 0; + ZVecErrorCode err = + zvec_collection_schema_get_all_field_names(schema, &names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count == 3); + // Note: We don't free names as they are owned by the schema + + // Test get_forward_fields + ZVecFieldSchema **forward_fields = NULL; + size_t forward_count = 0; + err = zvec_collection_schema_get_forward_fields(schema, &forward_fields, + &forward_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(forward_count == 2); // int_field and str_field + free(forward_fields); + + // Test get_vector_fields + ZVecFieldSchema **vector_fields = NULL; + size_t vector_count = 0; + err = zvec_collection_schema_get_vector_fields(schema, &vector_fields, + &vector_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(vector_count == 1); // vec_field + free(vector_fields); + + // Test has_index (initially no fields have index) + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == false); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "str_field") == false); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "vec_field") == false); + + // Test add_index + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + + err = zvec_collection_schema_add_index(schema, "int_field", invert_params); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == true); + + // Test drop_index + err = zvec_collection_schema_drop_index(schema, "int_field"); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_collection_schema_has_index(schema, "int_field") == false); + + zvec_index_params_destroy(invert_params); + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_collection_schema_alter_field(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_collection_schema_create("alter_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create initial field + ZVecFieldSchema *field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + TEST_ASSERT(field != NULL); + + ZVecErrorCode err = zvec_collection_schema_add_field(schema, field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify initial state + ZVecFieldSchema *found = + zvec_collection_schema_find_field(schema, "test_field"); + TEST_ASSERT(found != NULL); + TEST_ASSERT(zvec_field_schema_is_nullable(found) == false); + + // Alter the field to make it nullable + ZVecFieldSchema *new_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, true, 0); + TEST_ASSERT(new_field != NULL); + + err = zvec_collection_schema_alter_field(schema, "test_field", new_field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify the change + found = zvec_collection_schema_find_field(schema, "test_field"); + TEST_ASSERT(found != NULL); + TEST_ASSERT(zvec_field_schema_is_nullable(found) == true); + + // Test alter non-existent field + err = zvec_collection_schema_alter_field(schema, "nonexistent", new_field); + TEST_ASSERT(err != ZVEC_OK); + + zvec_field_schema_destroy(new_field); + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +// ============================================================================= +// Collection-related tests +// ============================================================================= + +void test_collection_basic_operations(void) { + TEST_START(); + + // Create temporary directory + char temp_dir[] = "/tmp/zvec_test_collection_basic_operations"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test collection operations + ZVecDoc *doc1 = zvec_test_create_doc(1, schema, NULL); + ZVecDoc *doc2 = zvec_test_create_doc(2, schema, NULL); + ZVecDoc *doc3 = zvec_test_create_doc(3, schema, NULL); + + TEST_ASSERT(doc1 != NULL); + TEST_ASSERT(doc2 != NULL); + TEST_ASSERT(doc3 != NULL); + + if (doc1 && doc2 && doc3) { + ZVecDoc *docs[] = {doc1, doc2, doc3}; + size_t success_count, error_count; + + // Test insert operation + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 3); + TEST_ASSERT(error_count == 0); + + // Test update operation + zvec_doc_set_score(doc1, 0.95f); + ZVecDoc *update_docs[] = {doc1}; + err = zvec_collection_update(collection, (const ZVecDoc **)update_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test upsert operation + zvec_doc_set_pk(doc3, "pk_3_modified"); + ZVecDoc *upsert_docs[] = {doc3}; + err = zvec_collection_upsert(collection, (const ZVecDoc **)upsert_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test delete operation by primary keys + const char *pks[] = {"pk_1", "pk_2"}; + err = zvec_collection_delete(collection, pks, 2, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 2); + TEST_ASSERT(error_count == 0); + + // Test delete by filter + err = zvec_collection_delete_by_filter(collection, "id > 0"); + TEST_ASSERT(err == ZVEC_OK); + + // Clean up documents + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + zvec_doc_destroy(doc3); + } + + // Test collection flush + err = zvec_collection_flush(collection); + TEST_ASSERT(err == ZVEC_OK); + + // Test collection optimization + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_edge_cases(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_edge_cases"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + + // Test empty name collection + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test long name collection + char long_name[256]; + memset(long_name, 'a', 255); + long_name[255] = '\0'; + + char long_path[512]; + snprintf(long_path, sizeof(long_path), "%s/%s", temp_dir, + "very_long_collection_name_that_tests_path_limits"); + + err = zvec_collection_create_and_open(long_path, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test NULL name集合 + err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_delete_by_filter(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_delete_by_filter"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test normal deletion filtering + err = zvec_collection_delete_by_filter(collection, "id > 1"); + TEST_ASSERT(err == ZVEC_OK); + + // Test NULL filter + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test empty string filter + err = zvec_collection_delete_by_filter(collection, ""); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_stats(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + // Basic validation of statistics + TEST_ASSERT(zvec_collection_stats_get_doc_count(stats) == + 0); // New collection should have no documents + zvec_collection_stats_destroy(stats); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Field-related tests +// ============================================================================= + +void test_field_schema_functions(void) { + TEST_START(); + + // Test scalar field creation using API + ZVecFieldSchema *scalar_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_STRING, true, 0); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(scalar_field), "test_field") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(scalar_field) == + ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_is_nullable(scalar_field) == true); + TEST_ASSERT(zvec_field_schema_get_dimension(scalar_field) == 0); + + // Test new functions for scalar field + TEST_ASSERT(zvec_field_schema_is_vector_field(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_dense_vector(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_is_array_type(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_get_element_data_type(scalar_field) == + ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_has_invert_index(scalar_field) == false); + TEST_ASSERT(zvec_field_schema_get_index_type(scalar_field) == + ZVEC_INDEX_TYPE_UNDEFINED); + + zvec_field_schema_destroy(scalar_field); + } + + // Test vector field creation using API + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "vec_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(zvec_field_schema_get_name(vector_field), "vec_field") == + 0); + TEST_ASSERT(zvec_field_schema_get_data_type(vector_field) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_is_nullable(vector_field) == false); + TEST_ASSERT(zvec_field_schema_get_dimension(vector_field) == 128); + + // Test new functions for dense vector field + TEST_ASSERT(zvec_field_schema_is_vector_field(vector_field) == true); + TEST_ASSERT(zvec_field_schema_is_dense_vector(vector_field) == true); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(vector_field) == false); + TEST_ASSERT(zvec_field_schema_is_array_type(vector_field) == false); + + zvec_field_schema_destroy(vector_field); + } + + // Test sparse vector field creation using API + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + TEST_ASSERT(sparse_field != NULL); + if (sparse_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(sparse_field), "sparse_field") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(sparse_field) == + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + // Test new functions for sparse vector field + TEST_ASSERT(zvec_field_schema_is_vector_field(sparse_field) == true); + TEST_ASSERT(zvec_field_schema_is_dense_vector(sparse_field) == false); + TEST_ASSERT(zvec_field_schema_is_sparse_vector(sparse_field) == true); + + zvec_field_schema_destroy(sparse_field); + } + + // Test array field + ZVecFieldSchema *array_field = zvec_field_schema_create( + "array_field", ZVEC_DATA_TYPE_ARRAY_INT32, false, 0); + TEST_ASSERT(array_field != NULL); + if (array_field) { + TEST_ASSERT(zvec_field_schema_is_array_type(array_field) == true); + TEST_ASSERT(zvec_field_schema_is_vector_field(array_field) == false); + TEST_ASSERT(zvec_field_schema_get_element_data_type(array_field) == + ZVEC_DATA_TYPE_INT32); + + zvec_field_schema_destroy(array_field); + } + + // Test field with invert index + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_metric_type(invert_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_invert_params(invert_params, true, false); + + ZVecFieldSchema *indexed_field = + zvec_field_schema_create("indexed_field", ZVEC_DATA_TYPE_INT64, false, 0); + TEST_ASSERT(indexed_field != NULL); + if (indexed_field) { + zvec_field_schema_set_index_params(indexed_field, invert_params); + TEST_ASSERT(zvec_field_schema_has_index(indexed_field) == true); + TEST_ASSERT(zvec_field_schema_get_index_type(indexed_field) == + ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(zvec_field_schema_has_invert_index(indexed_field) == true); + + zvec_field_schema_destroy(indexed_field); + } + zvec_index_params_destroy(invert_params); + + // Test field with HNSW index + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 200); + + ZVecFieldSchema *hnsw_field = zvec_field_schema_create( + "hnsw_field", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(hnsw_field != NULL); + if (hnsw_field) { + zvec_field_schema_set_hnsw_index(hnsw_field, hnsw_params); + TEST_ASSERT(zvec_field_schema_has_index(hnsw_field) == true); + TEST_ASSERT(zvec_field_schema_get_index_type(hnsw_field) == + ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(zvec_field_schema_has_invert_index(hnsw_field) == + false); // Vector field, no invert index + + zvec_field_schema_destroy(hnsw_field); + } + zvec_index_params_destroy(hnsw_params); + + TEST_END(); +} + +void test_field_helper_functions(void) { + TEST_START(); + + // Test scalar field helper functions + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); + ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( + "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(scalar_field), "test_scalar") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(scalar_field) == + ZVEC_DATA_TYPE_INT32); + zvec_field_schema_destroy(scalar_field); + } + zvec_index_params_destroy(invert_params); + + // Test vector field helper functions + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecFieldSchema *vector_field = zvec_test_create_vector_field( + "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT( + strcmp(zvec_field_schema_get_name(vector_field), "test_vector") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(vector_field) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_get_dimension(vector_field) == 128); + zvec_field_schema_destroy(vector_field); + } + zvec_index_params_destroy(hnsw_params); + + TEST_END(); +} + +// ============================================================================= +// Document-related tests +// ============================================================================= + +void test_doc_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test complete document creation + ZVecDoc *doc = zvec_test_create_doc(1, schema, NULL); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_destroy(doc); + } + + // Test null value document creation + ZVecDoc *null_doc = zvec_test_create_doc_null(2, schema, NULL); + TEST_ASSERT(null_doc != NULL); + if (null_doc) { + zvec_doc_destroy(null_doc); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_doc_primary_key(void) { + TEST_START(); + + // Test primary key generation + char *pk = zvec_test_make_pk(12345); + TEST_ASSERT(pk != NULL); + if (pk) { + TEST_ASSERT(strcmp(pk, "pk_12345") == 0); + free(pk); + } + + TEST_END(); +} + +// Test for zvec_doc_add_field_by_value - covers all data types +void test_doc_add_field_by_value(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + // Scalar types + // BINARY + const char *binary_data = "binary"; + ZVecErrorCode err = + zvec_doc_add_field_by_value(doc, "binary_field", ZVEC_DATA_TYPE_BINARY, + binary_data, strlen(binary_data)); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + const char *string_data = "hello"; + err = zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_data, strlen(string_data)); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + int64_t int64_val = -9876543210LL; + err = zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + uint32_t uint32_val = 4294967295U; + err = zvec_doc_add_field_by_value(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + uint64_t uint64_val = 18446744073709551615ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + float float_val = 3.14159f; + err = zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + double double_val = 3.14159265358979; + err = zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // Vector types + // VECTOR_BINARY32 + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + err = zvec_doc_add_field_by_value(doc, "binary32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + binary32_vec, sizeof(binary32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + err = zvec_doc_add_field_by_value(doc, "binary64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + binary64_vec, sizeof(binary64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec, + sizeof(fp16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + err = zvec_doc_add_field_by_value(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, fp32_vec, + sizeof(fp32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec, + sizeof(fp64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 (packed - each byte contains 2 values) + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0}; + err = zvec_doc_add_field_by_value(doc, "int4_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT4, int4_vec, + sizeof(int4_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec, + sizeof(int8_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec, + sizeof(int16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 - format: [nnz(size_t)][indices...][values...] + size_t sparse_fp16_nnz = 3; + uint32_t sparse_fp16_indices[] = {0, 5, 10}; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + size_t sparse_fp16_size = sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices) + + sizeof(sparse_fp16_values); + char *sparse_fp16_buffer = (char *)malloc(sparse_fp16_size); + memcpy(sparse_fp16_buffer, &sparse_fp16_nnz, sizeof(sparse_fp16_nnz)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz), sparse_fp16_indices, + sizeof(sparse_fp16_indices)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices), + sparse_fp16_values, sizeof(sparse_fp16_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_buffer, sparse_fp16_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_buffer); + + // SPARSE_VECTOR_FP32 + size_t sparse_fp32_nnz = 3; + uint32_t sparse_fp32_indices[] = {2, 7, 15}; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + size_t sparse_fp32_size = sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices) + + sizeof(sparse_fp32_values); + char *sparse_fp32_buffer = (char *)malloc(sparse_fp32_size); + memcpy(sparse_fp32_buffer, &sparse_fp32_nnz, sizeof(sparse_fp32_nnz)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz), sparse_fp32_indices, + sizeof(sparse_fp32_indices)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices), + sparse_fp32_values, sizeof(sparse_fp32_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_buffer, sparse_fp32_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_buffer); + + // Array types + // ARRAY_BINARY - format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING - null-terminated strings + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + bool array_bool_data[] = {true, false, true, false}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +// Test for zvec_doc_add_field_by_struct - covers all data types +void test_doc_add_field_by_struct(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err; + ZVecDocField field; + + // Scalar types + // BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "binary_field"; + field.name.length = strlen("binary_field"); + field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x01, 0x02, 0x03, 0x04}; + field.value.binary_value.data = binary_data; + field.value.binary_value.length = sizeof(binary_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + memset(&field, 0, sizeof(field)); + field.name.data = "string_field"; + field.name.length = strlen("string_field"); + field.data_type = ZVEC_DATA_TYPE_STRING; + const char *string_data = "hello world"; + field.value.string_value.data = (char *)string_data; + field.value.string_value.length = strlen(string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "bool_field"; + field.name.length = strlen("bool_field"); + field.data_type = ZVEC_DATA_TYPE_BOOL; + field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "int32_field"; + field.name.length = strlen("int32_field"); + field.data_type = ZVEC_DATA_TYPE_INT32; + field.value.int32_value = -12345; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "int64_field"; + field.name.length = strlen("int64_field"); + field.data_type = ZVEC_DATA_TYPE_INT64; + field.value.int64_value = -9876543210LL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "uint32_field"; + field.name.length = strlen("uint32_field"); + field.data_type = ZVEC_DATA_TYPE_UINT32; + field.value.uint32_value = 4294967295U; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "uint64_field"; + field.name.length = strlen("uint64_field"); + field.data_type = ZVEC_DATA_TYPE_UINT64; + field.value.uint64_value = 18446744073709551615ULL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "float_field"; + field.name.length = strlen("float_field"); + field.data_type = ZVEC_DATA_TYPE_FLOAT; + field.value.float_value = 3.14159f; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "double_field"; + field.name.length = strlen("double_field"); + field.data_type = ZVEC_DATA_TYPE_DOUBLE; + field.value.double_value = 3.14159265358979; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY32 + memset(&field, 0, sizeof(field)); + field.name.data = "binary32_vec_field"; + field.name.length = strlen("binary32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY32; + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + field.value.vector_value.data = (const float *)binary32_vec; + field.value.vector_value.length = sizeof(binary32_vec) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + memset(&field, 0, sizeof(field)); + field.name.data = "binary64_vec_field"; + field.name.length = strlen("binary64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY64; + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + field.value.vector_value.data = (const float *)binary64_vec; + field.value.vector_value.length = sizeof(binary64_vec) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "fp16_vec_field"; + field.name.length = strlen("fp16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP16; + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + field.value.vector_value.data = (const float *)fp16_vec; + field.value.vector_value.length = sizeof(fp16_vec) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "fp32_vec_field"; + field.name.length = strlen("fp32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + field.value.vector_value.data = fp32_vec; + field.value.vector_value.length = sizeof(fp32_vec) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + memset(&field, 0, sizeof(field)); + field.name.data = "fp64_vec_field"; + field.name.length = strlen("fp64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP64; + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + field.value.vector_value.data = (const float *)fp64_vec; + field.value.vector_value.length = sizeof(fp64_vec) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 + memset(&field, 0, sizeof(field)); + field.name.data = "int4_vec_field"; + field.name.length = strlen("int4_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT4; + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78}; + field.value.vector_value.data = (const float *)int4_vec; + field.value.vector_value.length = + sizeof(int4_vec) * 2; // Each byte contains 2 values + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + memset(&field, 0, sizeof(field)); + field.name.data = "int8_vec_field"; + field.name.length = strlen("int8_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT8; + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + field.value.vector_value.data = (const float *)int8_vec; + field.value.vector_value.length = sizeof(int8_vec) / sizeof(int8_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + memset(&field, 0, sizeof(field)); + field.name.data = "int16_vec_field"; + field.name.length = strlen("int16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT16; + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + field.value.vector_value.data = (const float *)int16_vec; + field.value.vector_value.length = sizeof(int16_vec) / sizeof(int16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp16_field"; + field.name.length = strlen("sparse_fp16_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + field.value.vector_value.data = (const float *)sparse_fp16_values; + field.value.vector_value.length = + sizeof(sparse_fp16_values) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // SPARSE_VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp32_field"; + field.name.length = strlen("sparse_fp32_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + field.value.vector_value.data = sparse_fp32_values; + field.value.vector_value.length = sizeof(sparse_fp32_values) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Array types + // ARRAY_BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "array_binary_field"; + field.name.length = strlen("array_binary_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BINARY; + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + field.value.binary_value.data = array_bin_data; + field.value.binary_value.length = sizeof(array_bin_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING + memset(&field, 0, sizeof(field)); + field.name.data = "array_string_field"; + field.name.length = strlen("array_string_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_STRING; + const char array_string_data[] = "str1\0str2\0str3\0"; + field.value.string_value.data = (char *)array_string_data; + field.value.string_value.length = sizeof(array_string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "array_bool_field"; + field.name.length = strlen("array_bool_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BOOL; + bool array_bool_data[] = {true, false, true, false}; + field.value.binary_value.data = (const uint8_t *)array_bool_data; + field.value.binary_value.length = sizeof(array_bool_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int32_field"; + field.name.length = strlen("array_int32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT32; + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + field.value.vector_value.data = (const float *)array_int32_data; + field.value.vector_value.length = sizeof(array_int32_data) / sizeof(int32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int64_field"; + field.name.length = strlen("array_int64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT64; + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + field.value.vector_value.data = (const float *)array_int64_data; + field.value.vector_value.length = sizeof(array_int64_data) / sizeof(int64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint32_field"; + field.name.length = strlen("array_uint32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT32; + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + field.value.vector_value.data = (const float *)array_uint32_data; + field.value.vector_value.length = + sizeof(array_uint32_data) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint64_field"; + field.name.length = strlen("array_uint64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT64; + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + field.value.vector_value.data = (const float *)array_uint64_data; + field.value.vector_value.length = + sizeof(array_uint64_data) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "array_float_field"; + field.name.length = strlen("array_float_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_FLOAT; + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + field.value.vector_value.data = array_float_data; + field.value.vector_value.length = sizeof(array_float_data) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "array_double_field"; + field.name.length = strlen("array_double_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_DOUBLE; + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + field.value.vector_value.data = (const float *)array_double_data; + field.value.vector_value.length = sizeof(array_double_data) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +void test_doc_basic_operations(void); +void test_doc_null_field_api(void); +void test_doc_get_field_value_basic(void); +void test_doc_get_field_value_copy(void); +void test_doc_get_field_value_pointer(void); +void test_doc_field_operations(void); +void test_doc_error_conditions(void); +void test_doc_serialization(void); +void test_doc_add_field_by_value(void); +void test_doc_add_field_by_struct(void); + +void test_doc_functions(void) { + test_doc_basic_operations(); + test_doc_null_field_api(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); +} + +void test_doc_basic_operations(void) { + TEST_START(); + + // Create test document + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Test primary key operations + zvec_doc_set_pk(doc, "test_doc_complete"); + const char *pk = zvec_doc_get_pk_pointer(doc); + TEST_ASSERT(pk != NULL); + TEST_ASSERT(strcmp(pk, "test_doc_complete") == 0); + + // Test document ID and score operations + zvec_doc_set_doc_id(doc, 99999); + uint64_t doc_id = zvec_doc_get_doc_id(doc); + TEST_ASSERT(doc_id == 99999); + + zvec_doc_set_score(doc, 0.95f); + float score = zvec_doc_get_score(doc); + TEST_ASSERT(score == 0.95f); + + // Test operator operations + zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); + ZVecDocOperator op = zvec_doc_get_operator(doc); + TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_null_field_api(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err = zvec_doc_set_field_null(doc, "nullable_field"); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_doc_has_field(doc, "nullable_field") == true); + TEST_ASSERT(zvec_doc_has_field_value(doc, "nullable_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nullable_field") == true); + + err = zvec_doc_set_field_null(NULL, "nullable_field"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_doc_set_field_null(doc, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_doc_destroy(doc); + TEST_END(); +} + +void test_doc_get_field_value_basic(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_basic with all supported types " + "===\n"); + + // BOOL type + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + bool bool_result; + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_result, sizeof(bool_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_result == true); + + // INT32 type + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; // Min int32 + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + int32_t int32_result; + err = zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_result == -2147483648); + + // INT64 type + ZVecDocField int64_field; + int64_field.name.data = "int64_field"; + int64_field.name.length = strlen("int64_field"); + int64_field.data_type = ZVEC_DATA_TYPE_INT64; + int64_field.value.int64_value = 9223372036854775807LL; // Max int64 + err = zvec_doc_add_field_by_struct(doc, &int64_field); + TEST_ASSERT(err == ZVEC_OK); + + int64_t int64_result; + err = zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_result, sizeof(int64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_result == 9223372036854775807LL); + + // UINT32 type + ZVecDocField uint32_field; + uint32_field.name.data = "uint32_field"; + uint32_field.name.length = strlen("uint32_field"); + uint32_field.data_type = ZVEC_DATA_TYPE_UINT32; + uint32_field.value.uint32_value = 4294967295U; // Max uint32 + err = zvec_doc_add_field_by_struct(doc, &uint32_field); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_result; + err = + zvec_doc_get_field_value_basic(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_result, sizeof(uint32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_result == 4294967295U); + + // UINT64 type + ZVecDocField uint64_field; + uint64_field.name.data = "uint64_field"; + uint64_field.name.length = strlen("uint64_field"); + uint64_field.data_type = ZVEC_DATA_TYPE_UINT64; + uint64_field.value.uint64_value = 18446744073709551615ULL; // Max uint64 + err = zvec_doc_add_field_by_struct(doc, &uint64_field); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_result; + err = + zvec_doc_get_field_value_basic(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_result, sizeof(uint64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_result == 18446744073709551615ULL); + + // FLOAT type + ZVecDocField float_field; + float_field.name.data = "float_field"; + float_field.name.length = strlen("float_field"); + float_field.data_type = ZVEC_DATA_TYPE_FLOAT; + float_field.value.float_value = 3.14159265359f; + err = zvec_doc_add_field_by_struct(doc, &float_field); + TEST_ASSERT(err == ZVEC_OK); + + float float_result; + err = zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_result, sizeof(float_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabsf(float_result - 3.14159265359f) < 1e-6f); + + // DOUBLE type + ZVecDocField double_field; + double_field.name.data = "double_field"; + double_field.name.length = strlen("double_field"); + double_field.data_type = ZVEC_DATA_TYPE_DOUBLE; + double_field.value.double_value = 2.71828182845904523536; + err = zvec_doc_add_field_by_struct(doc, &double_field); + TEST_ASSERT(err == ZVEC_OK); + + double double_result; + err = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_result, sizeof(double_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_copy(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_copy with all supported types " + "===\n"); + + // Basic scalar types first + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *bool_copy_result; + size_t bool_copy_size; + err = zvec_doc_get_field_value_copy(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_copy_result, &bool_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_copy_result != NULL); + TEST_ASSERT(bool_copy_size == sizeof(bool)); + TEST_ASSERT(*(bool *)bool_copy_result == true); + free(bool_copy_result); + + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int32_copy_result; + size_t int32_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_copy_result, &int32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_copy_result != NULL); + TEST_ASSERT(int32_copy_size == sizeof(int32_t)); + TEST_ASSERT(*(int32_t *)int32_copy_result == -12345); + free(int32_copy_result); + + int64_t int64_val = -9223372036854775807LL; + err = zvec_doc_add_field_by_value(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int64_copy_result; + size_t int64_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_copy_result, &int64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_copy_result != NULL); + TEST_ASSERT(int64_copy_size == sizeof(int64_t)); + TEST_ASSERT(*(int64_t *)int64_copy_result == -9223372036854775807LL); + free(int64_copy_result); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint32_copy_result; + size_t uint32_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_copy_result, &uint32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_copy_result != NULL); + TEST_ASSERT(uint32_copy_size == sizeof(uint32_t)); + TEST_ASSERT(*(uint32_t *)uint32_copy_result == 4000000000U); + free(uint32_copy_result); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint64_copy_result; + size_t uint64_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_copy_result, &uint64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_copy_result != NULL); + TEST_ASSERT(uint64_copy_size == sizeof(uint64_t)); + TEST_ASSERT(*(uint64_t *)uint64_copy_result == 18000000000000000000ULL); + free(uint64_copy_result); + + float float_val = 3.14159265f; + err = zvec_doc_add_field_by_value(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *float_copy_result; + size_t float_copy_size; + err = zvec_doc_get_field_value_copy(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_copy_result, &float_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_copy_result != NULL); + TEST_ASSERT(float_copy_size == sizeof(float)); + TEST_ASSERT(fabs(*(float *)float_copy_result - 3.14159265f) < 1e-6f); + free(float_copy_result); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *double_copy_result; + size_t double_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_copy_result, &double_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_copy_result != NULL); + TEST_ASSERT(double_copy_size == sizeof(double)); + TEST_ASSERT(fabs(*(double *)double_copy_result - 2.718281828459045) < 1e-15); + free(double_copy_result); + + // String and binary types + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + void *string_result; + size_t string_size; + err = zvec_doc_get_field_value_copy( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_result != NULL); + TEST_ASSERT(string_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); + free(string_result); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + void *binary_result; + size_t binary_size; + err = zvec_doc_get_field_value_copy( + doc, "binary_field", ZVEC_DATA_TYPE_BINARY, &binary_result, &binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_result != NULL); + TEST_ASSERT(binary_size == 6); + TEST_ASSERT(memcmp(binary_result, "\x00\x01\x02\xFF\xFE\xFD", binary_size) == + 0); + free(binary_result); + + // VECTOR_FP32 type + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + void *fp32_vec_result; + size_t fp32_vec_size; + err = zvec_doc_get_field_value_copy(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_result, &fp32_vec_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_result != NULL); + TEST_ASSERT(fp32_vec_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); + free(fp32_vec_result); + + // VECTOR_FP16 type (16-bit float vector) + uint16_t fp16_data[] = {0x3C00, 0x4000, 0x4200, + 0x4400}; // FP16: 1.0, 2.0, 3.0, 4.0 + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_data, + sizeof(fp16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp16_result; + size_t fp16_size; + err = zvec_doc_get_field_value_copy(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, &fp16_result, + &fp16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_result != NULL); + TEST_ASSERT(fp16_size == sizeof(fp16_data)); + TEST_ASSERT(memcmp(fp16_result, fp16_data, fp16_size) == 0); + free(fp16_result); + + // VECTOR_INT8 type + int8_t int8_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_data, + sizeof(int8_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int8_result; + size_t int8_size; + err = zvec_doc_get_field_value_copy(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, &int8_result, + &int8_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_result != NULL); + TEST_ASSERT(int8_size == sizeof(int8_data)); + TEST_ASSERT(memcmp(int8_result, int8_data, int8_size) == 0); + free(int8_result); + + // VECTOR_BINARY32 type (32-bit aligned binary vector) + uint8_t bin32_data[] = {0xAA, 0x55, 0xAA, 0x55}; + err = zvec_doc_add_field_by_value(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, bin32_data, + sizeof(bin32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin32_result; + size_t bin32_size; + err = zvec_doc_get_field_value_copy(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + &bin32_result, &bin32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin32_result != NULL); + TEST_ASSERT(bin32_size == sizeof(bin32_data)); + TEST_ASSERT(memcmp(bin32_result, bin32_data, bin32_size) == 0); + free(bin32_result); + + // VECTOR_BINARY64 type (64-bit aligned binary vector) + uint64_t bin64_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, bin64_data, + sizeof(bin64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin64_result; + size_t bin64_size; + err = zvec_doc_get_field_value_copy(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_result, &bin64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_result != NULL); + TEST_ASSERT(bin64_size == sizeof(bin64_data)); + TEST_ASSERT(memcmp(bin64_result, bin64_data, bin64_size) == 0); + free(bin64_result); + + // VECTOR_FP64 type (double precision vector) + double fp64_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_data, + sizeof(fp64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp64_result; + size_t fp64_size; + err = zvec_doc_get_field_value_copy(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, &fp64_result, + &fp64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_result != NULL); + TEST_ASSERT(fp64_size == sizeof(fp64_data)); + TEST_ASSERT(memcmp(fp64_result, fp64_data, fp64_size) == 0); + free(fp64_result); + + // VECTOR_INT16 type + int16_t int16_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_data, + sizeof(int16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int16_result; + size_t int16_size; + err = zvec_doc_get_field_value_copy(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_result, &int16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_result != NULL); + TEST_ASSERT(int16_size == sizeof(int16_data)); + TEST_ASSERT(memcmp(int16_result, int16_data, int16_size) == 0); + free(int16_result); + + // SPARSE_VECTOR_FP16 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp16_nnz = 3; + size_t sparse_fp16_size_input = + sizeof(uint32_t) + + sparse_fp16_nnz * (sizeof(uint32_t) + sizeof(uint16_t)); + void *sparse_fp16_input = malloc(sparse_fp16_size_input); + uint32_t *fp16_nnz_ptr = (uint32_t *)sparse_fp16_input; + *fp16_nnz_ptr = sparse_fp16_nnz; + uint32_t *fp16_indices = + (uint32_t *)((char *)sparse_fp16_input + sizeof(uint32_t)); + uint16_t *fp16_values = + (uint16_t *)((char *)sparse_fp16_input + sizeof(uint32_t) + + sparse_fp16_nnz * sizeof(uint32_t)); + fp16_indices[0] = 0; + fp16_indices[1] = 5; + fp16_indices[2] = 10; + fp16_values[0] = 0x3C00; + fp16_values[1] = 0x4000; + fp16_values[2] = 0x4200; // FP16: 1.0, 2.0, 3.0 + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_input, sparse_fp16_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_input); + + void *sparse_fp16_result; + size_t sparse_fp16_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp16_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + &sparse_fp16_result, &sparse_fp16_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp16_result != NULL); + // Sparse vector format: [nnz(size_t)][indices...][values...] + size_t retrieved_nnz = *(size_t *)sparse_fp16_result; + TEST_ASSERT(retrieved_nnz == 3); + uint32_t *retrieved_fp16_indices = + (uint32_t *)((char *)sparse_fp16_result + sizeof(size_t)); + uint16_t *retrieved_fp16_vals = + (uint16_t *)((char *)sparse_fp16_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp16_indices[0] == 0); + TEST_ASSERT(retrieved_fp16_indices[1] == 5); + TEST_ASSERT(retrieved_fp16_indices[2] == 10); + TEST_ASSERT(retrieved_fp16_vals[0] == 0x3C00); + TEST_ASSERT(retrieved_fp16_vals[1] == 0x4000); + TEST_ASSERT(retrieved_fp16_vals[2] == 0x4200); + free(sparse_fp16_result); + + // SPARSE_VECTOR_FP32 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp32_nnz = 4; + size_t sparse_fp32_size_input = + sizeof(uint32_t) + sparse_fp32_nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_fp32_input = malloc(sparse_fp32_size_input); + uint32_t *fp32_nnz_ptr = (uint32_t *)sparse_fp32_input; + *fp32_nnz_ptr = sparse_fp32_nnz; + uint32_t *fp32_indices = + (uint32_t *)((char *)sparse_fp32_input + sizeof(uint32_t)); + float *fp32_values = (float *)((char *)sparse_fp32_input + sizeof(uint32_t) + + sparse_fp32_nnz * sizeof(uint32_t)); + fp32_indices[0] = 2; + fp32_indices[1] = 7; + fp32_indices[2] = 15; + fp32_indices[3] = 20; + fp32_values[0] = 1.5f; + fp32_values[1] = 2.5f; + fp32_values[2] = 3.5f; + fp32_values[3] = 4.5f; + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_input, sparse_fp32_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_input); + + void *sparse_fp32_result; + size_t sparse_fp32_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp32_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + &sparse_fp32_result, &sparse_fp32_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp32_result != NULL); + retrieved_nnz = *(size_t *)sparse_fp32_result; + TEST_ASSERT(retrieved_nnz == 4); + uint32_t *retrieved_fp32_indices = + (uint32_t *)((char *)sparse_fp32_result + sizeof(size_t)); + float *retrieved_fp32_vals = + (float *)((char *)sparse_fp32_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp32_indices[0] == 2); + TEST_ASSERT(retrieved_fp32_indices[1] == 7); + TEST_ASSERT(retrieved_fp32_indices[2] == 15); + TEST_ASSERT(retrieved_fp32_indices[3] == 20); + TEST_ASSERT(fabs(retrieved_fp32_vals[0] - 1.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[1] - 2.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[2] - 3.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[3] - 4.5f) < 1e-5f); + free(sparse_fp32_result); + + // ARRAY_BINARY type + // Format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + void *array_binary_result; + size_t array_binary_size; + err = zvec_doc_get_field_value_copy(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, + &array_binary_result, &array_binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_binary_result != NULL); + // The result is a contiguous buffer of binary data without length prefixes + TEST_ASSERT(array_binary_size == 5); // 1 + 2 + 2 bytes + const uint8_t *result_bytes = (const uint8_t *)array_binary_result; + TEST_ASSERT(result_bytes[0] == 0x01); + TEST_ASSERT(result_bytes[1] == 0x02); + TEST_ASSERT(result_bytes[2] == 0x03); + TEST_ASSERT(result_bytes[3] == 0x04); + TEST_ASSERT(result_bytes[4] == 0x05); + free(array_binary_result); + + + // ARRAY_STRING type + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_string_result; + size_t array_string_size; + err = zvec_doc_get_field_value_copy(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, + &array_string_result, &array_string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_string_result != NULL); + free(array_string_result); + for (int i = 0; i < 3; i++) { + zvec_free_string(array_zvec_str[i]); + } + + free(string_field.value.string_value.data); + + // ARRAY_BOOL type + bool array_bool_data[] = {true, false, true, false, true}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_bool_result; + size_t array_bool_size; + err = zvec_doc_get_field_value_copy(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, + &array_bool_result, &array_bool_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_bool_result != NULL); + // Verify the bit-packed bool array + uint8_t *bool_bytes = (uint8_t *)array_bool_result; + TEST_ASSERT((bool_bytes[0] & 0x01) != 0); // index 0: true + TEST_ASSERT((bool_bytes[0] & 0x02) == 0); // index 1: false + TEST_ASSERT((bool_bytes[0] & 0x04) != 0); // index 2: true + TEST_ASSERT((bool_bytes[0] & 0x08) == 0); // index 3: false + TEST_ASSERT((bool_bytes[0] & 0x10) != 0); // index 4: true + free(array_bool_result); + + // ARRAY_INT32 type + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int32_result; + size_t array_int32_size; + err = zvec_doc_get_field_value_copy(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_result, &array_int32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_result != NULL); + TEST_ASSERT(array_int32_size == sizeof(array_int32_data)); + TEST_ASSERT(((int32_t *)array_int32_result)[0] == 100); + TEST_ASSERT(((int32_t *)array_int32_result)[1] == 200); + TEST_ASSERT(((int32_t *)array_int32_result)[2] == 300); + free(array_int32_result); + + // ARRAY_INT64 type + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int64_result; + size_t array_int64_size; + err = zvec_doc_get_field_value_copy(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_result, &array_int64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_result != NULL); + TEST_ASSERT(array_int64_size == sizeof(array_int64_data)); + TEST_ASSERT(((int64_t *)array_int64_result)[0] == -9223372036854775807LL); + TEST_ASSERT(((int64_t *)array_int64_result)[1] == 0); + TEST_ASSERT(((int64_t *)array_int64_result)[2] == 9223372036854775807LL); + free(array_int64_result); + + // ARRAY_UINT32 type + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint32_result; + size_t array_uint32_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint32_field", + ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_result, &array_uint32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_result != NULL); + TEST_ASSERT(array_uint32_size == sizeof(array_uint32_data)); + TEST_ASSERT(((uint32_t *)array_uint32_result)[0] == 0U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[1] == 1000000U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[2] == 4000000000U); + free(array_uint32_result); + + // ARRAY_UINT64 type + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint64_result; + size_t array_uint64_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint64_field", + ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_result, &array_uint64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_result != NULL); + TEST_ASSERT(array_uint64_size == sizeof(array_uint64_data)); + TEST_ASSERT(((uint64_t *)array_uint64_result)[0] == 0ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[1] == 1000000000000ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[2] == 18000000000000000000ULL); + free(array_uint64_result); + + // ARRAY_FLOAT type + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_float_result; + size_t array_float_size; + err = zvec_doc_get_field_value_copy(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_result, &array_float_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_result != NULL); + TEST_ASSERT(array_float_size == sizeof(array_float_data)); + TEST_ASSERT(((float *)array_float_result)[0] == 1.5f); + TEST_ASSERT(((float *)array_float_result)[1] == 2.5f); + TEST_ASSERT(((float *)array_float_result)[2] == 3.5f); + free(array_float_result); + + // ARRAY_DOUBLE type + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_double_result; + size_t array_double_size; + err = zvec_doc_get_field_value_copy(doc, "array_double_field", + ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_result, &array_double_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_result != NULL); + TEST_ASSERT(array_double_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((double *)array_double_result)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[2] - 3.333333) < 1e-10); + free(array_double_result); + + + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_pointer(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for pointer testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + // Add more fields for comprehensive pointer testing + int64_t int64_val = -9223372036854775807LL; + err = + zvec_doc_add_field_by_value(doc, "int64_field_ptr", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_val, + sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_val, + sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + float float_val = 3.14159265f; + err = + zvec_doc_add_field_by_value(doc, "float_field_ptr", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_val, + sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t bin64_vec_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + bin64_vec_data, sizeof(bin64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec_data[] = {0x3C00, 0x4000, 0x4200, 0x4400}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec_data, + sizeof(fp16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec_data, + sizeof(fp64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec_data, + sizeof(int8_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec_data, + sizeof(int16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + array_uint32_data, sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + array_uint64_data, sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field_ptr", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + array_double_data, sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + printf( + "=== Testing zvec_doc_get_field_value_pointer with all supported types " + "===\n"); + + // Test pointer access to BOOL + const void *bool_ptr; + size_t bool_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_ptr, &bool_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_ptr != NULL); + TEST_ASSERT(bool_ptr_size == sizeof(bool)); + TEST_ASSERT(*(const bool *)bool_ptr == true); + + // Test pointer access to INT32 + const void *int32_ptr; + size_t int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "int32_field", ZVEC_DATA_TYPE_INT32, &int32_ptr, &int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_ptr != NULL); + TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); + TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); + + // Test pointer access to STRING + const void *string_ptr; + size_t string_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "string_field", + ZVEC_DATA_TYPE_STRING, &string_ptr, + &string_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_ptr != NULL); + TEST_ASSERT(string_ptr_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_ptr, "Hello, 世界!", string_ptr_size) == 0); + + // Test pointer access to BINARY + const void *binary_ptr; + size_t binary_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "binary_field", + ZVEC_DATA_TYPE_BINARY, &binary_ptr, + &binary_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_ptr != NULL); + TEST_ASSERT(binary_ptr_size == 6); + TEST_ASSERT(memcmp(binary_ptr, "\x00\x01\x02\xFF\xFE\xFD", binary_ptr_size) == + 0); + + // Test pointer access to VECTOR_FP32 + const void *fp32_vec_ptr; + size_t fp32_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_ptr, &fp32_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_ptr != NULL); + TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); + + // Test pointer access to INT64 + const void *int64_ptr; + size_t int64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int64_field_ptr", + ZVEC_DATA_TYPE_INT64, &int64_ptr, + &int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_ptr != NULL); + TEST_ASSERT(int64_ptr_size == sizeof(int64_t)); + TEST_ASSERT(*(const int64_t *)int64_ptr == -9223372036854775807LL); + + // Test pointer access to UINT32 + const void *uint32_ptr; + size_t uint32_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_ptr, + &uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_ptr != NULL); + TEST_ASSERT(uint32_ptr_size == sizeof(uint32_t)); + TEST_ASSERT(*(const uint32_t *)uint32_ptr == 4000000000U); + + // Test pointer access to UINT64 + const void *uint64_ptr; + size_t uint64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_ptr, + &uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_ptr != NULL); + TEST_ASSERT(uint64_ptr_size == sizeof(uint64_t)); + TEST_ASSERT(*(const uint64_t *)uint64_ptr == 18000000000000000000ULL); + + // Test pointer access to FLOAT + const void *float_ptr; + size_t float_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "float_field_ptr", + ZVEC_DATA_TYPE_FLOAT, &float_ptr, + &float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_ptr != NULL); + TEST_ASSERT(float_ptr_size == sizeof(float)); + TEST_ASSERT(fabs(*(const float *)float_ptr - 3.14159265f) < 1e-6f); + + // Test pointer access to DOUBLE + const void *double_ptr; + size_t double_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_ptr, + &double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_ptr != NULL); + TEST_ASSERT(double_ptr_size == sizeof(double)); + TEST_ASSERT(fabs(*(const double *)double_ptr - 2.718281828459045) < 1e-15); + + // Test pointer access to VECTOR_BINARY64 + const void *bin64_vec_ptr; + size_t bin64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_vec_ptr, &bin64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_vec_ptr != NULL); + TEST_ASSERT(bin64_vec_ptr_size == sizeof(bin64_vec_data)); + TEST_ASSERT(memcmp(bin64_vec_ptr, bin64_vec_data, bin64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP16 + const void *fp16_vec_ptr; + size_t fp16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, + &fp16_vec_ptr, &fp16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_vec_ptr != NULL); + TEST_ASSERT(fp16_vec_ptr_size == sizeof(fp16_vec_data)); + TEST_ASSERT(memcmp(fp16_vec_ptr, fp16_vec_data, fp16_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP64 + const void *fp64_vec_ptr; + size_t fp64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, + &fp64_vec_ptr, &fp64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_vec_ptr != NULL); + TEST_ASSERT(fp64_vec_ptr_size == sizeof(fp64_vec_data)); + TEST_ASSERT(memcmp(fp64_vec_ptr, fp64_vec_data, fp64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT8 + const void *int8_vec_ptr; + size_t int8_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, + &int8_vec_ptr, &int8_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_vec_ptr != NULL); + TEST_ASSERT(int8_vec_ptr_size == sizeof(int8_vec_data)); + TEST_ASSERT(memcmp(int8_vec_ptr, int8_vec_data, int8_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT16 + const void *int16_vec_ptr; + size_t int16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_vec_ptr, &int16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_vec_ptr != NULL); + TEST_ASSERT(int16_vec_ptr_size == sizeof(int16_vec_data)); + TEST_ASSERT(memcmp(int16_vec_ptr, int16_vec_data, int16_vec_ptr_size) == 0); + + // Test pointer access to ARRAY_INT32 + const void *array_int32_ptr; + size_t array_int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int32_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_ptr, &array_int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_ptr != NULL); + TEST_ASSERT(array_int32_ptr_size == sizeof(array_int32_data)); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[0] == 100); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[1] == 200); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[2] == 300); + + // Test pointer access to ARRAY_INT64 + const void *array_int64_ptr; + size_t array_int64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int64_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_ptr, &array_int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_ptr != NULL); + TEST_ASSERT(array_int64_ptr_size == sizeof(array_int64_data)); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[0] == -9223372036854775807LL); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[1] == 0); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[2] == 9223372036854775807LL); + + // Test pointer access to ARRAY_UINT32 + const void *array_uint32_ptr; + size_t array_uint32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_ptr, &array_uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_ptr != NULL); + TEST_ASSERT(array_uint32_ptr_size == sizeof(array_uint32_data)); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[0] == 0U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[1] == 1000000U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[2] == 4000000000U); + + // Test pointer access to ARRAY_UINT64 + const void *array_uint64_ptr; + size_t array_uint64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_ptr, &array_uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_ptr != NULL); + TEST_ASSERT(array_uint64_ptr_size == sizeof(array_uint64_data)); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[0] == 0ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[1] == 1000000000000ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[2] == + 18000000000000000000ULL); + + // Test pointer access to ARRAY_FLOAT + const void *array_float_ptr; + size_t array_float_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_float_field_ptr", ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_ptr, &array_float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_ptr != NULL); + TEST_ASSERT(array_float_ptr_size == sizeof(array_float_data)); + TEST_ASSERT(((const float *)array_float_ptr)[0] == 1.5f); + TEST_ASSERT(((const float *)array_float_ptr)[1] == 2.5f); + TEST_ASSERT(((const float *)array_float_ptr)[2] == 3.5f); + + // Test pointer access to ARRAY_DOUBLE + const void *array_double_ptr; + size_t array_double_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_ptr, &array_double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_ptr != NULL); + TEST_ASSERT(array_double_ptr_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[2] - 3.333333) < 1e-10); + + free(string_field.value.string_value.data); + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_field_operations(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add some fields + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test field count + size_t field_count = zvec_doc_get_field_count(doc); + TEST_ASSERT(field_count >= 3); + + // Test field existence checks + TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "int32_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "string_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "nonexistent") == false); + + TEST_ASSERT(zvec_doc_has_field_value(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_is_field_null(doc, "bool_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nonexistent") == false); + + // Test field names retrieval + char **field_names; + size_t name_count; + err = zvec_doc_get_field_names(doc, &field_names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count >= 3); + TEST_ASSERT(field_names != NULL); + + // Verify some expected fields are present + bool found_key_fields = false; + for (size_t i = 0; i < name_count; i++) { + if (strcmp(field_names[i], "bool_field") == 0 || + strcmp(field_names[i], "int32_field") == 0 || + strcmp(field_names[i], "string_field") == 0) { + found_key_fields = true; + break; + } + } + TEST_ASSERT(found_key_fields == true); + + zvec_free_str_array(field_names, name_count); + free(string_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_error_conditions(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Add a field for error testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + zvec_doc_add_field_by_struct(doc, &bool_field); + + ZVecErrorCode err; + const void *dummy_ptr; + size_t dummy_ptr_size; + int32_t int32_result; + void *string_result; + size_t string_size; + + printf("=== Testing error conditions ===\n"); + + // Test non-existent field + err = + zvec_doc_get_field_value_basic(doc, "missing_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = + zvec_doc_get_field_value_copy(doc, "missing_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "missing_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // Test wrong data type access + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_copy(doc, "bool_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_serialization(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for serialization testing + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Serialization Test"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + printf("=== Testing document serialization ===\n"); + + uint8_t *serialized_data; + size_t data_size; + err = zvec_doc_serialize(doc, &serialized_data, &data_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(serialized_data != NULL); + TEST_ASSERT(data_size > 0); + + ZVecDoc *deserialized_doc; + err = zvec_doc_deserialize(serialized_data, data_size, &deserialized_doc); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_doc != NULL); + + // Verify deserialized document has same field count + size_t field_count = zvec_doc_get_field_count(doc); + size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); + TEST_ASSERT(deserialized_field_count == field_count); + + // Test a field from deserialized document + int32_t deserialized_int32; + err = zvec_doc_get_field_value_basic( + deserialized_doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &deserialized_int32, sizeof(deserialized_int32)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_int32 == -2147483648); + + zvec_free_uint8_array(serialized_data); + free(string_field.value.string_value.data); + zvec_doc_destroy(deserialized_doc); + zvec_doc_destroy(doc); + + TEST_END(); +} + +// ============================================================================= +// Index parameter tests +// ============================================================================= + +void test_index_params(void) { + TEST_START(); + + // Test HNSW parameter creation + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + TEST_ASSERT(hnsw_params != NULL); + if (hnsw_params) { + free(hnsw_params); + } + + // Test Flat parameter creation + ZVecIndexParams *flat_params = zvec_test_create_default_flat_params(); + TEST_ASSERT(flat_params != NULL); + if (flat_params) { + free(flat_params); + } + + // Test scalar index parameter creation + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); + TEST_ASSERT(invert_params != NULL); + if (invert_params) { + free(invert_params); + } + + TEST_END(); +} + +// ============================================================================= +// Memory management tests +// ============================================================================= +void test_zvec_string_functions(void) { + TEST_START(); + + // Test string creation and basic operations + ZVecString *str1 = zvec_string_create("Hello World"); + TEST_ASSERT(str1 != NULL); + TEST_ASSERT(zvec_string_length(str1) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str1), "Hello World") == 0); + + // Test string copy + ZVecString *str2 = zvec_string_copy(str1); + TEST_ASSERT(str2 != NULL); + TEST_ASSERT(zvec_string_length(str2) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str2), "Hello World") == 0); + + // Test string comparison + int cmp_result = zvec_string_compare(str1, str2); + TEST_ASSERT(cmp_result == 0); + + ZVecString *str3 = zvec_string_create("Hello"); + TEST_ASSERT(zvec_string_compare(str1, str3) > 0); + + // Test string creation from view + ZVecStringView view = {"Hello View", 10}; + ZVecString *str4 = zvec_string_create_from_view(&view); + TEST_ASSERT(str4 != NULL); + TEST_ASSERT(zvec_string_length(str4) == 10); + TEST_ASSERT(strcmp(zvec_string_c_str(str4), "Hello View") == 0); + + // Test string view with embedded null bytes + char binary_data[] = {'H', 'e', 'l', 'l', 'o', '\0', 'W', 'o', 'r', 'l', 'd'}; + ZVecStringView binary_view = {binary_data, 11}; + ZVecString *str5 = zvec_string_create_from_view(&binary_view); + TEST_ASSERT(str5 != NULL); + TEST_ASSERT(zvec_string_length(str5) == 11); + // Note: strcmp will stop at first null byte, so we need to compare manually + TEST_ASSERT(memcmp(zvec_string_c_str(str5), binary_data, 11) == 0); + + // Cleanup + zvec_free_string(str1); + zvec_free_string(str2); + zvec_free_string(str3); + zvec_free_string(str4); + zvec_free_string(str5); + + TEST_END(); +} + +void test_index_params_functions(void) { + TEST_START(); + + // Test index params with new opaque pointer API + // Test HNSW params + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + // Default metric type is L2, need to set it explicitly + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_COSINE); + + int m, ef_construction; + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 16); + TEST_ASSERT(ef_construction == 200); + + // Test invert index params + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); // Default is true + TEST_ASSERT(enable_wildcard == false); // Default is false + + // Test flat index params + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + // Default metric type is L2, need to set it explicitly + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); + + // Test IVF index params + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + // Default metric type is L2 + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 100); + TEST_ASSERT(n_iters == 10); + TEST_ASSERT(use_soar == false); // Default is false + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(ivf_params); + + TEST_END(); +} + +void test_index_params_api_functions(void) { + TEST_START(); + + // Test zvec_index_params_create for HNSW + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_metric_type + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_COSINE); + + // Test zvec_index_params_set_hnsw_params + zvec_index_params_set_hnsw_params(hnsw_params, 32, 300); + int m, ef_construction; + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 32); + TEST_ASSERT(ef_construction == 300); + + // Test zvec_index_params_create for IVF + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_ivf_params + zvec_index_params_set_ivf_params(ivf_params, 200, 20, true); + int n_list, n_iters; + bool use_soar; + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 200); + TEST_ASSERT(n_iters == 20); + TEST_ASSERT(use_soar == true); + + // Test zvec_index_params_create for INVERT + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + + // Test zvec_index_params_set_invert_params + zvec_index_params_set_invert_params(invert_params, true, true); + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); + TEST_ASSERT(enable_wildcard == true); + + // Test zvec_index_params_create for FLAT + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(flat_params); + + TEST_END(); +} + +void test_utility_functions(void) { + TEST_START(); + + // Test error code to string conversion + const char *error_str = zvec_error_code_to_string(ZVEC_OK); + TEST_ASSERT(error_str != NULL); + TEST_ASSERT(strlen(error_str) > 0); + + error_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(error_str != NULL); + + // Test data type to string conversion + const char *data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_INT32); + TEST_ASSERT(data_type_str != NULL); + TEST_ASSERT(strlen(data_type_str) > 0); + + data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(data_type_str != NULL); + + // Test index type to string conversion + const char *index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(index_type_str != NULL); + TEST_ASSERT(strlen(index_type_str) > 0); + + index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(index_type_str != NULL); + + TEST_END(); +} + +void test_memory_management_functions(void) { + TEST_START(); + + // Test string allocation and deallocation + ZVecString *str = zvec_string_create("Test String"); + TEST_ASSERT(str != NULL); + zvec_free_string(str); + + void *buffer = malloc(64); + TEST_ASSERT(buffer != NULL); + zvec_free_ptr(buffer); + + TEST_END(); +} + +void test_query_params_functions(void) { + TEST_START(); + + // Test basic query parameters creation and destruction + ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params != NULL); + + // Test HNSW query parameters + ZVecHnswQueryParams *hnsw_params = + zvec_query_params_hnsw_create(50, 0.5f, false, true); + TEST_ASSERT(hnsw_params != NULL); + + // Test IVF query parameters + ZVecIVFQueryParams *ivf_params = zvec_query_params_ivf_create(10, true, 1.5f); + TEST_ASSERT(ivf_params != NULL); + + // Test Flat query parameters + ZVecFlatQueryParams *flat_params = zvec_query_params_flat_create(false, 2.0f); + TEST_ASSERT(flat_params != NULL); + + // Test setting various parameters on base query params + ZVecErrorCode err; + + // Test index type setting + err = zvec_query_params_set_index_type(base_params, ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(err == ZVEC_OK); + + // Test radius setting + err = zvec_query_params_set_radius(base_params, 0.8f); + TEST_ASSERT(err == ZVEC_OK); + + // Test linear search setting + err = zvec_query_params_set_is_linear(base_params, false); + TEST_ASSERT(err == ZVEC_OK); + + // Test refiner setting + err = zvec_query_params_set_is_using_refiner(base_params, true); + TEST_ASSERT(err == ZVEC_OK); + + // Test HNSW-specific parameters + err = zvec_query_params_hnsw_set_ef(hnsw_params, 75); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF-specific parameters + err = zvec_query_params_ivf_set_nprobe(ivf_params, 15); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF scale factor setting + err = zvec_query_params_ivf_set_scale_factor(ivf_params, 2.5f); + TEST_ASSERT(err == ZVEC_OK); + + // Test destruction of valid parameters + zvec_query_params_destroy(base_params); + zvec_query_params_hnsw_destroy(hnsw_params); + zvec_query_params_ivf_destroy(ivf_params); + zvec_query_params_flat_destroy(flat_params); + + + // Test boundary cases - null pointer handling + zvec_query_params_hnsw_destroy(NULL); + zvec_query_params_ivf_destroy(NULL); + zvec_query_params_flat_destroy(NULL); + + + TEST_END(); +} + +void test_collection_stats_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats_functions"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + + // Test normal statistics retrieval + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + TEST_ASSERT(zvec_collection_stats_get_doc_count(stats) == 0); + zvec_collection_stats_destroy(stats); + } + + // Test NULL parameters + err = zvec_collection_get_stats(NULL, &stats); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_get_stats(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test statistics destruction boundary cases + zvec_collection_stats_destroy(NULL); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_dml_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_dml"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test insertion function boundary cases + size_t success_count, error_count; + + // Test NULL collection + err = zvec_collection_insert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL document array + err = zvec_collection_insert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test zero document count + ZVecDoc *empty_docs[1]; + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 0, + &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL count pointer + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 1, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test update function boundary cases + err = zvec_collection_update(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test upsert function boundary cases + err = zvec_collection_upsert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion function boundary cases + const char *pks[1]; + err = zvec_collection_delete(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, pks, 0, NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion by filter boundary cases + err = zvec_collection_delete_by_filter(NULL, NULL); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test detailed DML result APIs + ZVecDoc *result_doc = zvec_test_create_doc(101, schema, NULL); + TEST_ASSERT(result_doc != NULL); + if (result_doc) { + ZVecDoc *result_docs[] = {result_doc}; + ZVecWriteResult *results = NULL; + size_t result_count = 0; + + err = zvec_collection_upsert_with_results(collection, + (const ZVecDoc **)result_docs, + 1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + TEST_ASSERT(results[0].code == ZVEC_OK); + zvec_write_results_free(results, result_count); + } + + const char *delete_pks[] = {"pk_101"}; + results = NULL; + result_count = 0; + err = zvec_collection_delete_with_results(collection, delete_pks, 1, + &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + zvec_write_results_free(results, result_count); + } + + zvec_doc_destroy(result_doc); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_nullable_roundtrip(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_nullable_roundtrip"; + zvec_test_delete_dir(temp_dir); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + if (!schema) { + TEST_END(); + return; + } + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_set_pk(doc, "pk_nullable"); + + int64_t id = 77; + err = zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_INT64, &id, + sizeof(id)); + TEST_ASSERT(err == ZVEC_OK); + + const char *name = "nullable"; + err = zvec_doc_add_field_by_value(doc, "name", ZVEC_DATA_TYPE_STRING, + name, strlen(name)); + TEST_ASSERT(err == ZVEC_OK); + + // "weight" in temp schema is nullable. + err = zvec_doc_set_field_null(doc, "weight"); + TEST_ASSERT(err == ZVEC_OK); + + float dense[128]; + for (size_t i = 0; i < 128; ++i) { + dense[i] = (float)i / 128.0f; + } + err = zvec_doc_add_field_by_value( + doc, "dense", ZVEC_DATA_TYPE_VECTOR_FP32, dense, sizeof(dense)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t nnz = 3; + uint32_t sparse_indices[] = {1, 5, 9}; + float sparse_values[] = {0.2f, 0.5f, 0.9f}; + char sparse_buffer[sizeof(nnz) + sizeof(sparse_indices) + + sizeof(sparse_values)]; + memcpy(sparse_buffer, &nnz, sizeof(nnz)); + memcpy(sparse_buffer + sizeof(nnz), sparse_indices, + sizeof(sparse_indices)); + memcpy(sparse_buffer + sizeof(nnz) + sizeof(sparse_indices), + sparse_values, sizeof(sparse_values)); + err = zvec_doc_add_field_by_value(doc, "sparse", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_buffer, sizeof(sparse_buffer)); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDoc *docs[] = {doc}; + size_t success_count = 0; + size_t error_count = 0; + err = zvec_collection_upsert(collection, (const ZVecDoc **)docs, 1, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + const char *pks[] = {"pk_nullable"}; + ZVecDoc **fetched = NULL; + size_t fetched_count = 0; + err = zvec_collection_fetch(collection, pks, 1, &fetched, &fetched_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fetched_count == 1); + if (fetched && fetched_count == 1) { + TEST_ASSERT(zvec_doc_has_field(fetched[0], "weight") == true); + TEST_ASSERT(zvec_doc_has_field_value(fetched[0], "weight") == false); + TEST_ASSERT(zvec_doc_is_field_null(fetched[0], "weight") == true); + } + zvec_docs_free(fetched, fetched_count); + zvec_doc_destroy(doc); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +// ============================================================================= +// Actual Query Execution Tests +// ============================================================================= + +void test_actual_vector_queries(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_actual_queries"; + + // Create schema with vector field + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add ID field + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // Add vector field with HNSW index + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + zvec_index_params_destroy(hnsw_params); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Insert test documents + float vec1[] = {1.0f, 0.0f, 0.0f, 0.0f}; + float vec2[] = {0.0f, 1.0f, 0.0f, 0.0f}; + float vec3[] = {0.0f, 0.0f, 1.0f, 0.0f}; + float vec4[] = {0.7f, 0.7f, 0.0f, 0.0f}; // Similar to vec1 and vec2 + + ZVecDoc *docs[4]; + for (int i = 0; i < 4; i++) { + docs[i] = zvec_doc_create(); + zvec_doc_set_pk(docs[i], zvec_test_make_pk(i + 1)); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_INT64, + &(int64_t){i + 1}, sizeof(int64_t)); + } + + zvec_doc_add_field_by_value( + docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, sizeof(vec1)); + zvec_doc_add_field_by_value( + docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, sizeof(vec2)); + zvec_doc_add_field_by_value( + docs[2], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec3, sizeof(vec3)); + zvec_doc_add_field_by_value( + docs[3], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec4, sizeof(vec4)); + + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 4, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 4); + TEST_ASSERT(error_count == 0); + + // Flush collection to build index + zvec_collection_flush(collection); + + // Test 1: Basic vector search + ZVecVectorQuery *query1 = zvec_vector_query_create(); + TEST_ASSERT(query1 != NULL); + zvec_vector_query_set_field_name(query1, "embedding"); + zvec_vector_query_set_query_vector(query1, vec1, sizeof(vec1)); + zvec_vector_query_set_topk(query1, 3); + zvec_vector_query_set_include_vector(query1, true); + zvec_vector_query_set_include_doc_id(query1, true); + + ZVecDoc **results = NULL; + size_t result_count = 0; + err = zvec_collection_query(collection, query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count > 0); + TEST_ASSERT(results != NULL); + + // First result should be vec1 itself (distance ~0) + if (result_count > 0) { + float score = zvec_doc_get_score(results[0]); + TEST_ASSERT(score < 0.001f); // Very small distance + } + + zvec_docs_free(results, result_count); + + // Test 2: Search with filter + zvec_vector_query_set_filter(query1, "id > 2"); + + err = zvec_collection_query(collection, query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + + // Should only return documents with id > 2 + for (size_t i = 0; i < result_count; i++) { + int64_t id; + zvec_doc_get_field_value_basic(results[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + TEST_ASSERT(id > 2); + } + + zvec_docs_free(results, result_count); + + // Cleanup documents and query + for (int i = 0; i < 4; i++) { + zvec_doc_destroy(docs[i]); + } + + zvec_vector_query_destroy(query1); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_index_creation_and_management(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_index_management"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Create HNSW index + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + + err = zvec_collection_create_hnsw_index(collection, "dense", hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Create scalar index + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + zvec_index_params_set_invert_params(invert_params, true, false); + + err = zvec_collection_create_invert_index(collection, "name", + invert_params); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_drop_index(collection, "name"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 3: Optimize collection + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(invert_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_ddl_operations(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_ddl"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + size_t field_count = zvec_collection_schema_get_field_count(schema); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Add new column + ZVecFieldSchema *new_field = + zvec_field_schema_create("new_int32", ZVEC_DATA_TYPE_INT32, true, 0); + TEST_ASSERT(new_field != NULL); + + err = zvec_collection_add_column(collection, new_field, NULL); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Get collection schema and verify field count + ZVecCollectionSchema *retrieved_schema = NULL; + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(retrieved_schema != NULL); + + size_t new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT((field_count + 1) == new_field_count); + + // Test 3: Alter column + ZVecFieldSchema *alter_field = + zvec_field_schema_create("new_float", ZVEC_DATA_TYPE_FLOAT, true, 0); + TEST_ASSERT(alter_field != NULL); + + err = zvec_collection_alter_column(collection, "new_int32", "", + alter_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test 4: Drop column + err = zvec_collection_drop_column(collection, "new_float"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 5: Verify field count after drop + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(new_field_count == field_count); + + zvec_collection_schema_destroy(retrieved_schema); + zvec_field_schema_destroy(new_field); + zvec_field_schema_destroy(alter_field); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_field_ddl_operations(void) { + TEST_START(); + + // Test field schema creation with various configurations + ZVecFieldSchema *field1 = + zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); + TEST_ASSERT(field1 != NULL); + TEST_ASSERT(strcmp(zvec_field_schema_get_name(field1), "test_field1") == 0); + TEST_ASSERT(zvec_field_schema_get_data_type(field1) == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(zvec_field_schema_is_nullable(field1) == false); + TEST_ASSERT(zvec_field_schema_get_dimension(field1) == 0); + + ZVecFieldSchema *field2 = zvec_field_schema_create( + "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); + TEST_ASSERT(field2 != NULL); + TEST_ASSERT(zvec_field_schema_get_data_type(field2) == + ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(zvec_field_schema_is_nullable(field2) == true); + TEST_ASSERT(zvec_field_schema_get_dimension(field2) == 128); + + // Test index parameter setting + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + + ZVecErrorCode err = zvec_field_schema_set_index_params(field2, hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Cleanup + zvec_field_schema_destroy(field1); + zvec_field_schema_destroy(field2); + zvec_index_params_destroy(hnsw_params); + + TEST_END(); +} + +void test_performance_benchmarks(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_performance"; + + ZVecCollectionSchema *schema = zvec_collection_schema_create("perf_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create simple schema for performance testing + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + TEST_ASSERT(collection != NULL); + + if (collection) { + const size_t BATCH_SIZE = 1000; + const size_t TOTAL_DOCS = 10000; + + // Test bulk insertion performance +#ifdef _POSIX_C_SOURCE + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); +#else + clock_t start_clock = clock(); +#endif + + for (size_t batch_start = 0; batch_start < TOTAL_DOCS; + batch_start += BATCH_SIZE) { + ZVecDoc *batch_docs[BATCH_SIZE]; + size_t current_batch_size = (batch_start + BATCH_SIZE > TOTAL_DOCS) + ? TOTAL_DOCS - batch_start + : BATCH_SIZE; + + // Create batch of documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + zvec_doc_set_pk(batch_docs[i], zvec_test_make_pk(batch_start + i)); + + int64_t id = batch_start + i; + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + + // Create random vector + float vec[128]; + for (int j = 0; j < 128; j++) { + vec[j] = (float)rand() / RAND_MAX; + } + zvec_doc_add_field_by_value(batch_docs[i], "vec", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, + sizeof(vec)); + } + + // Insert batch + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == current_batch_size); + TEST_ASSERT(error_count == 0); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&end_time, NULL); + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; +#else + clock_t end_clock = clock(); + double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; +#endif + printf(" Inserted %zu documents in %.3f seconds (%.0f docs/sec)\n", + TOTAL_DOCS, insert_time, TOTAL_DOCS / insert_time); + + // Flush and optimize + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test query performance + float query_vec[128]; + for (int i = 0; i < 128; i++) { + query_vec[i] = (float)rand() / RAND_MAX; + } + + ZVecVectorQuery *query = zvec_vector_query_create(); + TEST_ASSERT(query != NULL); + zvec_vector_query_set_field_name(query, "vec"); + zvec_vector_query_set_query_vector(query, query_vec, sizeof(query_vec)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); + + const int QUERY_COUNT = 100; +#ifdef _POSIX_C_SOURCE + struct timeval query_start_time, query_end_time; + gettimeofday(&query_start_time, NULL); +#else + clock_t query_start_clock = clock(); +#endif + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + err = zvec_collection_query(collection, query, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count <= 10); + + zvec_docs_free(results, result_count); + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&query_end_time, NULL); + double query_time = + (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; +#else + clock_t query_end_clock = clock(); + double query_time = + ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; +#endif + double avg_query_time = + (query_time * 1000) / QUERY_COUNT; // ms per query + printf(" Average query time: %.2f ms\n", avg_query_time); + + zvec_vector_query_destroy(query); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Additional tests for uncovered API functions +// ============================================================================= + +void test_zvec_shutdown(void) { + TEST_START(); + + // Test shutdown + ZVecErrorCode err = zvec_shutdown(); + TEST_ASSERT(err == ZVEC_OK); + + // Re-initialize for other tests + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + zvec_config_data_destroy(config); + + TEST_END(); +} + +void test_index_params_creation_functions(void) { + TEST_START(); + + // Test HNSW parameters using new API + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(hnsw_params) == ZVEC_INDEX_TYPE_HNSW); + // Default metric type is L2 + TEST_ASSERT(zvec_index_params_get_metric_type(hnsw_params) == + ZVEC_METRIC_TYPE_L2); + + int m, ef_construction; + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + zvec_index_params_get_hnsw_params(hnsw_params, &m, &ef_construction); + TEST_ASSERT(m == 16); + TEST_ASSERT(ef_construction == 100); + + // Test IVF parameters using new API + ZVecIndexParams *ivf_params = zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(ivf_params) == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(zvec_index_params_get_metric_type(ivf_params) == + ZVEC_METRIC_TYPE_L2); + + int n_list, n_iters; + bool use_soar; + zvec_index_params_set_ivf_params(ivf_params, 100, 10, true); + zvec_index_params_get_ivf_params(ivf_params, &n_list, &n_iters, &use_soar); + TEST_ASSERT(n_list == 100); + TEST_ASSERT(n_iters == 10); + TEST_ASSERT(use_soar == true); + + // Test Flat parameters using new API + ZVecIndexParams *flat_params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(flat_params) == ZVEC_INDEX_TYPE_FLAT); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == + ZVEC_METRIC_TYPE_IP); + + // Test Invert parameters using new API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(invert_params) == + ZVEC_INDEX_TYPE_INVERT); + bool enable_range_opt, enable_wildcard; + zvec_index_params_set_invert_params(invert_params, true, false); + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + TEST_ASSERT(enable_range_opt == true); + TEST_ASSERT(enable_wildcard == false); + + // Cleanup + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(invert_params); + + TEST_END(); +} + +void test_collection_advanced_index_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_advanced_index"; + zvec_test_delete_dir(temp_dir); + + // Create schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_collection_schema_add_field(schema, id_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollectionOptions *options = zvec_collection_options_create(); + TEST_ASSERT(options != NULL); + zvec_collection_options_set_max_doc_count_per_segment(options, 1000); + ZVecCollection *collection = NULL; + + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, options, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test zvec_collection_create_flat_index + ZVecIndexParams *flat_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params != NULL); + zvec_index_params_set_metric_type(flat_params, ZVEC_METRIC_TYPE_L2); + err = zvec_collection_create_flat_index(collection, "vec", flat_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_collection_create_ivf_index + ZVecIndexParams *ivf_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params != NULL); + zvec_index_params_set_metric_type(ivf_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_ivf_params(ivf_params, 100, 10, true); + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_ivf_index(collection, "vec", ivf_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_collection_create_hnsw_index + ZVecIndexParams *hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_hnsw_index(collection, "vec", hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_field_schema_set_ivf_index + ZVecFieldSchema *new_vec_field = zvec_field_schema_create( + "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(new_vec_field != NULL); + ZVecIndexParams *ivf_params2 = + zvec_index_params_create(ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params2 != NULL); + zvec_index_params_set_metric_type(ivf_params2, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_ivf_params(ivf_params2, 50, 5, false); + zvec_field_schema_set_ivf_index(new_vec_field, ivf_params2); + TEST_ASSERT(zvec_field_schema_has_index(new_vec_field) == true); + zvec_field_schema_destroy(new_vec_field); + zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(hnsw_params); + zvec_index_params_destroy(ivf_params2); + + zvec_collection_options_destroy(options); + zvec_collection_destroy(collection); + } + zvec_collection_schema_destroy(schema); + } + + zvec_test_delete_dir(temp_dir); + TEST_END(); +} + +void test_collection_query_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_query_funcs"; + zvec_test_delete_dir(temp_dir); + + // Create schema and collection + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + ZVecIndexParams *hnsw_params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params != NULL); + zvec_index_params_set_metric_type(hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(hnsw_params, 16, 100); + + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_index_params_destroy(hnsw_params); + + zvec_collection_schema_add_field(schema, name_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Insert test documents + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "doc1"); + float vec1[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc1, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, + sizeof(vec1)); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "document1", 9); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_set_pk(doc2, "doc2"); + float vec2[4] = {0.0f, 1.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc2, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, + sizeof(vec2)); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, + "document2", 9); + + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test zvec_collection_fetch + const char *pks[] = {"doc1", "doc2"}; + ZVecDoc **results = NULL; + size_t found_count = 0; + err = zvec_collection_fetch(collection, pks, 2, &results, &found_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(found_count == 2); + zvec_docs_free(results, found_count); + + // Test zvec_collection_query_by_group + ZVecGroupByVectorQuery *group_query = zvec_group_by_vector_query_create(); + TEST_ASSERT(group_query != NULL); + zvec_group_by_vector_query_set_field_name(group_query, "vec"); + float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; + zvec_group_by_vector_query_set_query_vector(group_query, query_vec, + sizeof(query_vec)); + zvec_group_by_vector_query_set_group_by_field_name(group_query, "name"); + zvec_group_by_vector_query_set_group_count(group_query, 2); + zvec_group_by_vector_query_set_group_topk(group_query, 1); + zvec_group_by_vector_query_set_include_vector(group_query, false); + + const char *output_fields[] = {"name"}; + zvec_group_by_vector_query_set_output_fields(group_query, output_fields, 1); + + ZVecDoc **group_results = NULL; + ZVecString **group_values = NULL; + size_t group_result_count = 0; + err = + zvec_collection_query_by_group(collection, group_query, &group_results, + &group_values, &group_result_count); + TEST_ASSERT(err == ZVEC_OK); + if (group_results) { + zvec_docs_free(group_results, group_result_count); + } + if (group_values) { + for (size_t i = 0; i < group_result_count; i++) { + zvec_free_string(group_values[i]); + } + free(group_values); + } + + zvec_group_by_vector_query_destroy(group_query); + + // Test zvec_collection_get_options + ZVecCollectionOptions *options = NULL; + err = zvec_collection_get_options(collection, &options); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(options != NULL); + zvec_collection_options_destroy(options); + + zvec_collection_destroy(collection); + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + } + + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +void test_doc_advanced_functions(void) { + TEST_START(); + + // Test zvec_doc_clear + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "test_pk"); + zvec_doc_add_field_by_value(doc, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_get_field_count(doc) > 0); + zvec_doc_clear(doc); + TEST_ASSERT(zvec_doc_get_field_count(doc) == 0); + + // Test zvec_doc_get_pk_copy + zvec_doc_set_pk(doc, "test_pk_copy"); + const char *pk_copy = zvec_doc_get_pk_copy(doc); + TEST_ASSERT(pk_copy != NULL); + TEST_ASSERT(strcmp(pk_copy, "test_pk_copy") == 0); + free((void *)pk_copy); + + // Test zvec_doc_is_empty + ZVecDoc *empty_doc = zvec_doc_create(); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == true); + zvec_doc_add_field_by_value(empty_doc, "test", ZVEC_DATA_TYPE_INT32, + &(int32_t){1}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == false); + zvec_doc_destroy(empty_doc); + + // Test zvec_doc_memory_usage + ZVecDoc *mem_doc = zvec_doc_create(); + zvec_doc_set_pk(mem_doc, "memory_test"); + char large_data[1024]; + memset(large_data, 'A', sizeof(large_data)); + zvec_doc_add_field_by_value(mem_doc, "large_field", ZVEC_DATA_TYPE_STRING, + large_data, sizeof(large_data)); + size_t mem_usage = zvec_doc_memory_usage(mem_doc); + TEST_ASSERT(mem_usage > 0); + zvec_doc_destroy(mem_doc); + + // Test zvec_doc_merge + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "merge_test"); + zvec_doc_add_field_by_value(doc1, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_add_field_by_value(doc2, "field2", ZVEC_DATA_TYPE_STRING, "merged", + 6); + + zvec_doc_merge(doc1, doc2); + TEST_ASSERT(zvec_doc_has_field(doc1, "field1") == true); + TEST_ASSERT(zvec_doc_has_field(doc1, "field2") == true); + + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + + // Test zvec_doc_validate + ZVecCollectionSchema *schema = zvec_collection_schema_create("validate_test"); + ZVecFieldSchema *val_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, val_field); + + ZVecDoc *val_doc = zvec_doc_create(); + zvec_doc_set_pk(val_doc, "test_pk"); + zvec_doc_add_field_by_value(val_doc, "test_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){42}, sizeof(int32_t)); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_doc_validate(val_doc, schema, false, &error_msg); + TEST_ASSERT(err == ZVEC_OK); + if (error_msg) { + free(error_msg); + } + + zvec_doc_destroy(val_doc); + zvec_collection_schema_destroy(schema); + zvec_doc_destroy(doc); + + // Test zvec_doc_to_detail_string + ZVecDoc *detail_doc = zvec_doc_create(); + zvec_doc_set_pk(detail_doc, "detail_test"); + zvec_doc_add_field_by_value(detail_doc, "int_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){12345}, sizeof(int32_t)); + zvec_doc_add_field_by_value(detail_doc, "str_field", ZVEC_DATA_TYPE_STRING, + "hello", 5); + + char *detail_str = NULL; + err = zvec_doc_to_detail_string(detail_doc, &detail_str); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(detail_str != NULL); + printf(" Document detail: %s\n", detail_str); + free(detail_str); + + zvec_doc_destroy(detail_doc); + + TEST_END(); +} + +void test_array_memory_functions(void) { + TEST_START(); + + // Test ZVecStringArray + ZVecStringArray *str_array = zvec_string_array_create(3); + TEST_ASSERT(str_array != NULL); + if (str_array) { + TEST_ASSERT(str_array->count == 3); + TEST_ASSERT(str_array->strings != NULL); + + // Add strings at specific indices + zvec_string_array_add(str_array, 0, "string1"); + zvec_string_array_add(str_array, 1, "string2"); + zvec_string_array_add(str_array, 2, "string3"); + + // Verify strings were added + TEST_ASSERT(strcmp(str_array->strings[0].data, "string1") == 0); + TEST_ASSERT(strcmp(str_array->strings[1].data, "string2") == 0); + TEST_ASSERT(strcmp(str_array->strings[2].data, "string3") == 0); + zvec_string_array_destroy(str_array); + } + + // Test ZVecMutableByteArray + ZVecMutableByteArray *byte_array = zvec_byte_array_create(1024); + TEST_ASSERT(byte_array != NULL); + if (byte_array) { + TEST_ASSERT(byte_array->capacity == 1024); + TEST_ASSERT(byte_array->length == 0); + TEST_ASSERT(byte_array->data != NULL); + + // Write some data + byte_array->data[0] = 0x01; + byte_array->data[1] = 0x02; + byte_array->data[2] = 0x03; + byte_array->length = 3; + + TEST_ASSERT(byte_array->length == 3); + TEST_ASSERT(byte_array->data[0] == 0x01); + TEST_ASSERT(byte_array->data[1] == 0x02); + TEST_ASSERT(byte_array->data[2] == 0x03); + + zvec_byte_array_destroy(byte_array); + } + + // Test ZVecFloatArray + ZVecFloatArray *float_array = zvec_float_array_create(10); + TEST_ASSERT(float_array != NULL); + if (float_array) { + TEST_ASSERT(float_array->length == 10); + TEST_ASSERT(float_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_float_array_create + // The const qualifier indicates this is typically used for read-only access + // For testing, we verify the allocation succeeded and length is correct + TEST_ASSERT(float_array->data[0] == 0.0f); + TEST_ASSERT(float_array->data[9] == 0.0f); + + zvec_float_array_destroy(float_array); + } + + // Test ZVecInt64Array + ZVecInt64Array *int64_array = zvec_int64_array_create(5); + TEST_ASSERT(int64_array != NULL); + if (int64_array) { + TEST_ASSERT(int64_array->length == 5); + TEST_ASSERT(int64_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_int64_array_create + // The const qualifier indicates this is typically used for read-only access + TEST_ASSERT(int64_array->data[0] == 0); + TEST_ASSERT(int64_array->data[4] == 0); + + zvec_int64_array_destroy(int64_array); + } + + // Test edge case: create with zero size + ZVecMutableByteArray *zero_array = zvec_byte_array_create(0); + TEST_ASSERT(zero_array != NULL); + if (zero_array) { + zvec_byte_array_destroy(zero_array); + } + + TEST_END(); +} + +// ============================================================================= +// Main function +// ============================================================================= + +int main(void) { + printf("Starting comprehensive C API tests...\n\n"); + + // Clean up previous test directories + printf("Cleaning up previous test directories...\n"); + system("rm -rf /tmp/zvec_test_*"); + printf("Cleanup completed.\n\n"); + + test_version_functions(); + test_error_handling_functions(); + test_zvec_config(); + test_zvec_initialize(); + test_zvec_string_functions(); + + // Schema-related tests + test_schema_basic_operations(); + test_schema_edge_cases(); + test_schema_field_operations(); + test_normal_schema_creation(); + test_schema_with_indexes(); + test_schema_max_doc_count(); + test_collection_schema_helpers(); + test_collection_schema_alter_field(); + + // Field-related tests + test_field_schema_functions(); + test_field_helper_functions(); + test_field_ddl_operations(); + + // Collection-related tests + test_collection_basic_operations(); + test_collection_edge_cases(); + test_collection_delete_by_filter(); + test_collection_stats(); + test_collection_stats_functions(); + test_collection_dml_functions(); + test_collection_nullable_roundtrip(); + test_collection_ddl_operations(); + + // Doc-related tests + test_doc_creation(); + test_doc_primary_key(); + test_doc_basic_operations(); + test_doc_null_field_api(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); + test_doc_add_field_by_value(); + test_doc_add_field_by_struct(); + + // Index tests + test_index_params(); + test_index_params_functions(); + test_index_params_api_functions(); + test_index_creation_and_management(); + + // Query tests + test_query_params_functions(); + test_actual_vector_queries(); + + // Performance tests + // test_performance_benchmarks(); + + // Utility function tests + test_utility_functions(); + + // Memory management tests + test_memory_management_functions(); + + // Additional API coverage tests + test_zvec_shutdown(); + test_index_params_creation_functions(); + test_collection_advanced_index_functions(); + test_collection_query_functions(); + test_doc_advanced_functions(); + test_array_memory_functions(); + + printf("\n=== Comprehensive Test Summary ===\n"); + printf("Total tests: %d\n", test_count); + printf("Passed: %d\n", passed_count); + printf("Failed: %d\n", test_count - passed_count); + + return test_count == passed_count ? 0 : 1; +} diff --git a/tests/c/utils.c b/tests/c/utils.c new file mode 100644 index 00000000..f570f9fd --- /dev/null +++ b/tests/c/utils.c @@ -0,0 +1,922 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" +#include +#include +#include +#include + +// ============================================================================= +// Internal Helper Functions +// ============================================================================= + +static char *strdup_safe(const char *str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char *copy = (char *)malloc(len); + if (copy) { + memcpy(copy, str, len); + } + return copy; +} + +// ============================================================================= +// Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_test_create_temp_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + + // Create index parameters using new opaque pointer API + ZVecIndexParams *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_invert_params(invert_params, true, true); + + ZVecIndexParams *dense_hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(dense_hnsw_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_hnsw_params(dense_hnsw_params, 16, 100); + + ZVecIndexParams *sparse_hnsw_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_set_metric_type(sparse_hnsw_params, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(sparse_hnsw_params, 16, 100); + + ZVecIndexParams *name_invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_invert_params(name_invert_params, false, false); + + // Create and add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + zvec_collection_schema_add_field(schema, id_field); + + // Create name field (inverted index without optimization) + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(name_field, name_invert_params); + zvec_collection_schema_add_field(schema, name_field); + + // Create weight field (no index) + ZVecFieldSchema *weight_field = + zvec_field_schema_create("weight", ZVEC_DATA_TYPE_FLOAT, true, 0); + zvec_collection_schema_add_field(schema, weight_field); + + // Create dense field (HNSW index) + ZVecFieldSchema *dense_field = + zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); + zvec_collection_schema_add_field(schema, dense_field); + + // Create sparse field (HNSW index) + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); + zvec_collection_schema_add_field(schema, sparse_field); + + // Cleanup index parameters + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(dense_hnsw_params); + zvec_index_params_destroy(sparse_hnsw_params); + zvec_index_params_destroy(name_invert_params); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + + // Create fields + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, int32_field); + + ZVecFieldSchema *string_field = + zvec_field_schema_create("string", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, string_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create(name ? name : "demo"); + zvec_collection_schema_set_max_doc_count_per_segment(schema, max_doc_count); + + // Create scalar fields (8) + const char *scalar_names[] = {"int32", "string", "uint32", "bool", + "float", "double", "int64", "uint64"}; + ZVecDataType scalar_types[] = {ZVEC_DATA_TYPE_INT32, ZVEC_DATA_TYPE_STRING, + ZVEC_DATA_TYPE_UINT32, ZVEC_DATA_TYPE_BOOL, + ZVEC_DATA_TYPE_FLOAT, ZVEC_DATA_TYPE_DOUBLE, + ZVEC_DATA_TYPE_INT64, ZVEC_DATA_TYPE_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index(field, scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create array fields (8) + const char *array_names[] = {"array_int32", "array_string", "array_uint32", + "array_bool", "array_float", "array_double", + "array_int64", "array_uint64"}; + ZVecDataType array_types[] = { + ZVEC_DATA_TYPE_ARRAY_INT32, ZVEC_DATA_TYPE_ARRAY_STRING, + ZVEC_DATA_TYPE_ARRAY_UINT32, ZVEC_DATA_TYPE_ARRAY_BOOL, + ZVEC_DATA_TYPE_ARRAY_FLOAT, ZVEC_DATA_TYPE_ARRAY_DOUBLE, + ZVEC_DATA_TYPE_ARRAY_INT64, ZVEC_DATA_TYPE_ARRAY_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index(field, scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create vector fields (5) + // dense vectors + ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( + "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index(dense_fp32, vector_index_params); + } + zvec_collection_schema_add_field(schema, dense_fp32); + + ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( + "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); + ZVecIndexParams *flat_params1 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + zvec_index_params_destroy(flat_params1); + zvec_collection_schema_add_field(schema, dense_fp16); + + ZVecFieldSchema *dense_int8 = zvec_field_schema_create( + "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); + ZVecIndexParams *flat_params2 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(dense_int8, flat_params2); + zvec_index_params_destroy(flat_params2); + zvec_collection_schema_add_field(schema, dense_int8); + + // sparse vectors + ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( + "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index(sparse_fp32, vector_index_params); + } + zvec_collection_schema_add_field(schema, sparse_fp32); + + ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( + "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); + ZVecIndexParams *flat_params3 = zvec_test_create_default_flat_params(); + zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + zvec_index_params_destroy(flat_params3); + zvec_collection_schema_add_field(schema, sparse_fp16); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name) { + ZVecIndexParams *invert_params = + zvec_test_create_default_invert_params(enable_optimize); + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); + free(invert_params); + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecIndexParams *vector_index_params) { + ZVecIndexParams *default_params = NULL; + if (!vector_index_params) { + default_params = zvec_test_create_default_hnsw_params(); + } + + ZVecCollectionSchema *schema = zvec_test_create_normal_schema( + nullable, name, NULL, + vector_index_params ? vector_index_params : default_params, 1000); + + if (default_params) { + free(default_params); + } + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count) { + return zvec_test_create_normal_schema(false, "demo", NULL, NULL, doc_count); +} + +// ============================================================================= +// Document Creation Helper Functions Implementation +// ============================================================================= + +char *zvec_test_make_pk(uint64_t doc_id) { + char *pk = (char *)malloc(32); // Sufficiently large buffer + if (pk) { + snprintf(pk, 32, "pk_%llu", (unsigned long long)doc_id); + } + return pk; +} + +uint64_t zvec_test_extract_doc_id(const char *pk) { + if (!pk || strlen(pk) < 4) return 0; + return strtoull(pk + 3, NULL, 10); +} + +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + if (!schema) return NULL; + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create test data for each field + size_t field_count = zvec_collection_schema_get_field_count(schema); + for (size_t i = 0; i < field_count; i++) { + const ZVecFieldSchema *field = zvec_collection_schema_get_field(schema, i); + const char *field_name = zvec_field_schema_get_name(field); + ZVecDataType field_type = zvec_field_schema_get_data_type(field); + uint32_t field_dimension = zvec_field_schema_get_dimension(field); + + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + char binary_str[32]; + snprintf(binary_str, sizeof(binary_str), "binary_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field_name, field_type, binary_str, + strlen(binary_str)); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(bool){doc_id % 10 == 0}, sizeof(bool)); + break; + } + case ZVEC_DATA_TYPE_INT32: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + } + case ZVEC_DATA_TYPE_INT64: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(int64_t){(int64_t)doc_id}, + sizeof(int64_t)); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(uint32_t){(uint32_t)doc_id}, + sizeof(uint32_t)); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(uint64_t){(uint64_t)doc_id}, + sizeof(uint64_t)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(float){(float)doc_id}, sizeof(float)); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + zvec_doc_add_field_by_value(doc, field_name, field_type, + &(double){(double)doc_id}, sizeof(double)); + break; + } + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field_name, field_type, string_val, + strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + bool bool_array[10]; + for (int j = 0; j < 10; j++) { + bool_array[j] = (doc_id + j) % 2 == 0; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, bool_array, + sizeof(bool_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + int32_t int32_array[10]; + for (int j = 0; j < 10; j++) { + int32_array[j] = (int32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, int32_array, + sizeof(int32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + int64_t int64_array[10]; + for (int j = 0; j < 10; j++) { + int64_array[j] = (int64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, int64_array, + sizeof(int64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + uint32_t uint32_array[10]; + for (int j = 0; j < 10; j++) { + uint32_array[j] = (uint32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, uint32_array, + sizeof(uint32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + uint64_t uint64_array[10]; + for (int j = 0; j < 10; j++) { + uint64_array[j] = (uint64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, uint64_array, + sizeof(uint64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + float float_array[10]; + for (int j = 0; j < 10; j++) { + float_array[j] = (float)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, float_array, + sizeof(float_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + double double_array[10]; + for (int j = 0; j < 10; j++) { + double_array[j] = (double)doc_id; + } + zvec_doc_add_field_by_value(doc, field_name, field_type, double_array, + sizeof(double_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // String arrays need special handling + char string_data[256]; + size_t offset = 0; + for (int j = 0; j < 10; j++) { + char temp_str[32]; + snprintf(temp_str, sizeof(temp_str), "value_%llu_%d", + (unsigned long long)doc_id, j); + size_t len = strlen(temp_str); + if (offset + len + 1 < sizeof(string_data)) { + strcpy(string_data + offset, temp_str); + offset += len + 1; + } + } + zvec_doc_add_field_by_value(doc, field_name, field_type, string_data, + offset); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + uint32_t *vector_data = + (uint32_t *)malloc(field_dimension * sizeof(uint32_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (uint32_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(uint32_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + uint64_t *vector_data = + (uint64_t *)malloc(field_dimension * sizeof(uint64_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (uint64_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(uint64_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field_dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 needs special handling, simplified to FP32 here + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field_dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (int8_t)((doc_id + j) % 256); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field_dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (int16_t)((doc_id + j) % 65536); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, vector_data, + field_dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + // Sparse vectors need special handling + uint32_t nnz = field_dimension > 0 ? field_dimension / 10 + : 10; // Number of non-zero elements + size_t sparse_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; // Set number of non-zero elements + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; // Index + values[j] = (float)(doc_id + j * 0.1); // Value + } + zvec_doc_add_field_by_value(doc, field_name, field_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + // Sparse FP16 vectors, simplified handling + uint32_t nnz = field_dimension > 0 ? field_dimension / 10 : 10; + size_t sparse_size = + sizeof(uint32_t) + + nnz * (sizeof(uint32_t) + + sizeof(float)); // Still use float for storage + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; + values[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field_name, field_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + + default: + // Unsupported data type + break; + } + + // Remove reference to removed variable err + /* + if (err != ZVEC_OK) { + // Error handling: continue processing other fields + } + */ + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + // Reuse create_doc function, but only process vector fields + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Only create data for vector fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + for (size_t i = 0; i < field_count; i++) { + const ZVecFieldSchema *field = zvec_collection_schema_get_field(schema, i); + const char *field_name = zvec_field_schema_get_name(field); + ZVecDataType field_type = zvec_field_schema_get_data_type(field); + uint32_t field_dimension = zvec_field_schema_get_dimension(field); + + // Only process specific vector type fields + if (field_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + continue; + } + + ZVecErrorCode err = ZVEC_OK; + + switch (field_type) { + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field_dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + float *vector_data = (float *)malloc(field_dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field_dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (int8_t)(doc_id % 128); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field_dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field_dimension; j++) { + vector_data[j] = (int16_t)(doc_id % 32768); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + vector_data, + field_dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + const size_t nnz = 100; + size_t sparse_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + char *sparse_data = (char *)malloc(sparse_size); + if (sparse_data) { + char *ptr = sparse_data; + *((size_t *)ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t j = 0; j < nnz; j++) { + *((uint32_t *)ptr) = (uint32_t)j; + ptr += sizeof(uint32_t); + *((float *)ptr) = (float)(doc_id + j * 0.1); + ptr += sizeof(float); + } + err = zvec_doc_add_field_by_value(doc, field_name, field_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + default: + break; + } + + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create data for specified fields + for (size_t i = 0; i < field_count; i++) { + ZVecErrorCode err = ZVEC_OK; + + switch (field_types[i]) { + case ZVEC_DATA_TYPE_INT32: + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: + err = + zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(float){(float)doc_id}, sizeof(float)); + break; + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float vector_data[128]; + for (int j = 0; j < 128; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + vector_data, sizeof(vector_data)); + break; + } + default: + // Other types can be added here + break; + } + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +// ============================================================================= +// Index Parameter Creation Helper Functions Implementation +// ============================================================================= + +ZVecIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + if (!params) return NULL; + + zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_IP); + zvec_index_params_set_hnsw_params(params, 16, 100); + + return params; +} + +ZVecIndexParams *zvec_test_create_default_flat_params(void) { + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_FLAT); + if (!params) return NULL; + + zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_IP); + + return params; +} + +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize) { + ZVecIndexParams *params = zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + if (!params) return NULL; + + zvec_index_params_set_invert_params(params, enable_optimize, enable_optimize); + + return params; +} + +// ============================================================================= +// Field Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *invert_params) { + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, 0); + if (!field) return NULL; + + if (invert_params) { + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(invert_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + bool enable_range_opt, enable_wildcard; + zvec_index_params_get_invert_params(invert_params, &enable_range_opt, + &enable_wildcard); + zvec_index_params_set_invert_params(cloned_params, enable_range_opt, + enable_wildcard); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } + } + + return field; +} + +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecIndexParams *vector_index_params) { + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, dimension); + if (!field) return NULL; + + if (vector_index_params) { + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(vector_index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + int m, ef_construction; + zvec_index_params_get_hnsw_params(vector_index_params, &m, + &ef_construction); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_construction); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } + } + + return field; +} + +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *vector_index_params) { + // Use the public API to create the field + ZVecFieldSchema *field = + zvec_field_schema_create(name, data_type, nullable, 0); + if (!field) return NULL; + + if (vector_index_params) { + // Clone the index params using setter API + ZVecIndexType type = zvec_index_params_get_type(vector_index_params); + ZVecIndexParams *cloned_params = zvec_index_params_create(type); + if (cloned_params) { + int m, ef_construction; + zvec_index_params_get_hnsw_params(vector_index_params, &m, + &ef_construction); + zvec_index_params_set_hnsw_params(cloned_params, m, ef_construction); + zvec_field_schema_set_index_params(field, cloned_params); + zvec_index_params_destroy(cloned_params); + } + } + + return field; +} + +// ============================================================================= +// Memory Management Helper Functions Implementation +// ============================================================================= + +// Note: zvec_test_free_field_schemas is deprecated. +// Use zvec_field_schema_destroy() to free individual field schemas. + +void zvec_test_free_strings(char **strings, size_t count) { + if (!strings) return; + + for (size_t i = 0; i < count; i++) { + if (strings[i]) { + free(strings[i]); + } + } + + free(strings); +} + +// ============================================================================= +// File System Helper Functions Implementation +// ============================================================================= + +/** + * @brief Delete directory and all its contents (wrapper function) + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path) { + if (!dir_path) { + return -1; + } + +#ifdef _WIN32 + // Windows platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rd /s /q \"%s\" >nul 2>&1", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#else + // Unix/Linux/macOS platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rm -rf \"%s\" 2>/dev/null", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#endif +} diff --git a/tests/c/utils.h b/tests/c/utils.h new file mode 100644 index 00000000..202bc95b --- /dev/null +++ b/tests/c/utils.h @@ -0,0 +1,247 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_TESTS_C_API_UTILS_H +#define ZVEC_TESTS_C_API_UTILS_H + +#include +#include +#include +#include "zvec/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================= +// Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create temporary test schema + * Contains basic scalar fields and vector fields + * + * @return ZVecCollectionSchema* Created schema pointer, needs to be released by + * calling zvec_collection_schema_cleanup + */ +ZVecCollectionSchema *zvec_test_create_temp_schema(void); + +/** + * @brief Create pure scalar schema + * Contains only scalar fields (int32, string) + * + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_scalar_schema(void); + +/** + * @brief Create full-featured schema + * Contains all supported data type fields + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param scalar_index_params Scalar index parameters (can be NULL) + * @param vector_index_params Vector index parameters (can be NULL) + * @param max_doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count); + +/** + * @brief Create schema with scalar index + * + * @param nullable Whether to allow null values + * @param enable_optimize Whether to enable optimization + * @param name Schema name + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name); + +/** + * @brief Create schema with vector index + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param vector_index_params Vector index parameters (can be NULL, uses default + * HNSW parameters) + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecIndexParams *vector_index_params); + +/** + * @brief Create schema with specified maximum document count + * + * @param doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count); + +// ============================================================================= +// Document Creation Helper Functions +// ============================================================================= + +/** + * @brief Generate primary key based on document ID + * + * @param doc_id Document ID + * @return char* Generated primary key string, needs to be released by calling + * free() + */ +char *zvec_test_make_pk(uint64_t doc_id); + +/** + * @brief Create complete document + * Create corresponding test data for each field according to schema + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer, needs to be released by calling + * zvec_doc_destroy + */ +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create partial null document + * Only set values for vector fields, keep scalar fields as null + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create document with specified fields + * Only create data for specified fields + * + * @param doc_id Document ID + * @param field_names Field name array + * @param field_types Field type array + * @param field_count Number of fields + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk); + +// ============================================================================= +// Index Parameter Creation Helper Functions +// ============================================================================= + +/** + * @brief Create default HNSW index parameters + * + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_hnsw_params(void); + +/** + * @brief Create default Flat index parameters + * + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_flat_params(void); + +/** + * @brief Create default scalar index parameters + * + * @param enable_optimize Whether to enable optimization + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize); + +// ============================================================================= +// Field Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create scalar field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param invert_params Scalar index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer, needs to be released + * by calling free() + */ +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *invert_params); + +/** + * @brief Create vector field schema + * + * @param name Field name + * @param data_type Data type + * @param dimension Vector dimension + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecIndexParams *vector_index_params); + +/** + * @brief Create sparse vector field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *vector_index_params); + +// ============================================================================= +// Memory Management Helper Functions +// ============================================================================= + +/** + * @brief Free string array + * + * @param strings String array pointer + * @param count Number of strings + */ +void zvec_test_free_strings(char **strings, size_t count); + +/** + * @brief Delete directory and all its contents + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path); + +#ifdef __cplusplus +} +#endif + +#endif // ZVEC_TESTS_C_API_UTILS_H \ No newline at end of file diff --git a/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc b/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc index c89d086b..59dcb574 100644 --- a/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc +++ b/tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc @@ -257,7 +257,7 @@ TEST_F(FlatSparseBuilderTest, TestHalfFloatConverter) { ASSERT_EQ(0UL, stats.discarded_count()); ASSERT_EQ(0UL, stats.trained_costtime()); ASSERT_EQ(stats.built_costtime(), 0UL); - //ASSERT_GT(stats.dumped_costtime(), 0UL); + // ASSERT_GT(stats.dumped_costtime(), 0UL); // cleanup and rebuild ASSERT_EQ(0, builder->cleanup()); diff --git a/tests/core/algorithm/ivf/ivf_searcher_test.cc b/tests/core/algorithm/ivf/ivf_searcher_test.cc index 9911e0e2..75d5df1c 100644 --- a/tests/core/algorithm/ivf/ivf_searcher_test.cc +++ b/tests/core/algorithm/ivf/ivf_searcher_test.cc @@ -392,7 +392,7 @@ TEST_F(IVFSearcherTest, TestSimpleCosine) { { size_t topk = 33; context->set_topk(topk); - + std::string new_vec; IndexQueryMeta new_meta; ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta)); diff --git a/tests/core/metric/quantized_integer_metric_test.cc b/tests/core/metric/quantized_integer_metric_test.cc index 501d8c7b..835a07fb 100644 --- a/tests/core/metric/quantized_integer_metric_test.cc +++ b/tests/core/metric/quantized_integer_metric_test.cc @@ -251,7 +251,7 @@ void TestDistanceMatrixInt8(const std::string &metric_name) { const size_t batch_size = M; const size_t query_size = N; - size_t dimension = (std::uniform_int_distribution(1, 65))(gen)*4; + size_t dimension = (std::uniform_int_distribution(1, 65))(gen) * 4; auto holder = GetHolder(dimension, batch_size, dist); IndexMeta meta(IndexMeta::DT_FP32, dimension); meta.set_metric(metric_name, 0, Params()); @@ -453,7 +453,7 @@ void TestDistanceMatrixInt4(const std::string &metric_name) { const size_t batch_size = M; const size_t query_size = N; - size_t dimension = (std::uniform_int_distribution(1, 65))(gen)*8; + size_t dimension = (std::uniform_int_distribution(1, 65))(gen) * 8; auto holder = GetHolder(dimension, batch_size, dist); IndexMeta meta(IndexMeta::DT_FP32, dimension); meta.set_metric(metric_name, 0, Params());