diff --git a/CMakeLists.txt b/CMakeLists.txt index f9ad6bc00..fc4f5315c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,18 +171,22 @@ if(BUILD_PYTHON_BINDINGS) endif() message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}") - install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR}) + # Install the extension inside the zvec package (zvec/_zvec*.so) rather than + # at the site-packages root, so it does not pollute the top-level namespace. + # The Python code imports it as `zvec._zvec` accordingly. + install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR}/zvec) # DiskAnn ships as a runtime-loaded shared module # (libzvec_diskann_plugin.so) that is brought online implicitly the # first time a DiskAnn index is created — users never call any load # function. The Python extension resolves the module next to _zvec.so # (see the $ORIGIN rpath in src/binding/python/CMakeLists.txt); the - # module must therefore be installed alongside _zvec.so in the wheel. + # module must therefore be installed alongside _zvec.so, i.e. inside the + # zvec package directory as well. # The target exists only on platforms where DiskAnn is buildable # (currently Linux x86_64 with libaio). if(TARGET core_knn_diskann) - install(TARGETS core_knn_diskann LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR}) + install(TARGETS core_knn_diskann LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR}/zvec) endif() # Bundle cppjieba's dictionary files so the `jieba` FTS tokenizer works # out of the box. python/zvec/__init__.py resolves this directory via diff --git a/python/tests/test_convert.py b/python/tests/test_convert.py index f30eada93..2fac2f5b9 100644 --- a/python/tests/test_convert.py +++ b/python/tests/test_convert.py @@ -3,7 +3,7 @@ import math import pytest -from _zvec import _Doc +from zvec._zvec import _Doc from zvec.model.convert import convert_to_py_doc, convert_to_cpp_doc from zvec import Doc, CollectionSchema, DataType, FieldSchema, VectorSchema diff --git a/python/tests/test_doc.py b/python/tests/test_doc.py index d4a60ff77..ecfd31e4d 100644 --- a/python/tests/test_doc.py +++ b/python/tests/test_doc.py @@ -17,7 +17,7 @@ import pytest -from _zvec import _Doc +from zvec._zvec import _Doc from zvec import FieldSchema, VectorSchema, Doc, DataType diff --git a/python/tests/test_fts_query.py b/python/tests/test_fts_query.py index 16db8b4aa..e87c7c137 100644 --- a/python/tests/test_fts_query.py +++ b/python/tests/test_fts_query.py @@ -79,7 +79,7 @@ class TestFtsQueryBinding: def test_import_fts_query(self): """_Fts should be importable from _zvec.param.""" - from _zvec.param import _Fts + from zvec._zvec.param import _Fts fts = _Fts() assert fts.query_string == "" @@ -87,7 +87,7 @@ def test_import_fts_query(self): def test_fts_query_set_fields(self): """Setting fields on _Fts should work.""" - from _zvec.param import _Fts + from zvec._zvec.param import _Fts fts = _Fts() fts.query_string = "+hello -world" @@ -99,7 +99,7 @@ def test_fts_query_set_fields(self): def test_fts_query_pickle(self): """_Fts should support pickling.""" - from _zvec.param import _Fts + from zvec._zvec.param import _Fts fts = _Fts() fts.query_string = "+vector search" @@ -112,7 +112,7 @@ def test_fts_query_pickle(self): def test_search_query_fts_field(self): """_SearchQuery should have fts field.""" - from _zvec.param import _Fts, _SearchQuery + from zvec._zvec.param import _Fts, _SearchQuery vq = _SearchQuery() # fts should be None by default (optional) @@ -127,7 +127,7 @@ def test_search_query_fts_field(self): def test_search_query_pickle_with_fts(self): """_SearchQuery with fts should survive pickling.""" - from _zvec.param import _Fts, _SearchQuery + from zvec._zvec.param import _Fts, _SearchQuery vq = _SearchQuery() vq.topk = 10 @@ -145,7 +145,7 @@ def test_search_query_pickle_with_fts(self): def test_search_query_pickle_without_fts(self): """_SearchQuery without fts should survive pickling.""" - from _zvec.param import _SearchQuery + from zvec._zvec.param import _SearchQuery vq = _SearchQuery() vq.topk = 5 diff --git a/python/tests/test_params.py b/python/tests/test_params.py index 2d2ba2795..728b305fa 100644 --- a/python/tests/test_params.py +++ b/python/tests/test_params.py @@ -40,7 +40,7 @@ VectorSchema, ) -from _zvec.param import _SearchQuery +from zvec._zvec.param import _SearchQuery # ---------------------------- # Invert Index Param Test Case diff --git a/python/tests/test_pickle_compat.py b/python/tests/test_pickle_compat.py new file mode 100644 index 000000000..3349af74e --- /dev/null +++ b/python/tests/test_pickle_compat.py @@ -0,0 +1,48 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Legacy-pickle compatibility after the compiled extension moved from the +top-level ``_zvec`` module to ``zvec._zvec``. + +pybind11 records a class's defining module inside the pickle stream, so objects +pickled by zvec <= 0.5.x reference ``_zvec.param`` / ``_zvec.schema`` / +``_zvec.typing``. ``zvec/__init__.py`` registers ``sys.modules`` aliases so +those pickles still load after upgrading. +""" + +from __future__ import annotations + +import pickle +import sys + +from zvec.model.param import HnswIndexParam + + +class TestLegacyPickleCompat: + def test_legacy_module_aliases_registered(self): + ext = sys.modules["zvec._zvec"] + assert sys.modules.get("_zvec") is ext + for sub in ("param", "schema", "typing"): + assert sys.modules.get(f"_zvec.{sub}") is getattr(ext, sub) + + def test_old_layout_pickle_still_loads(self): + param = HnswIndexParam(m=16, ef_construction=200) + # protocol 2 encodes globals as ``c\n\n`` (no length + # prefix / framing), so we can rewrite the module path to the pre-0.5 + # top-level form to forge what an old wheel would have produced. + blob = pickle.dumps(param, protocol=2) + assert b"czvec._zvec.param\n" in blob + legacy = blob.replace(b"czvec._zvec.param\n", b"c_zvec.param\n") + + restored = pickle.loads(legacy) + assert restored.m == 16 diff --git a/python/tests/test_query_executor.py b/python/tests/test_query_executor.py index 823e6efc1..7237c143e 100644 --- a/python/tests/test_query_executor.py +++ b/python/tests/test_query_executor.py @@ -18,7 +18,7 @@ import numpy as np import math -from _zvec.param import _SearchQuery +from zvec._zvec.param import _SearchQuery import pytest from zvec.executor.query_executor import ( diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py index 5fdf9732c..396a87935 100644 --- a/python/zvec/__init__.py +++ b/python/zvec/__init__.py @@ -28,7 +28,7 @@ try: from importlib.resources import files as _resource_files - from _zvec import ( + from zvec._zvec import ( get_default_jieba_dict_dir, set_default_jieba_dict_dir, ) @@ -48,7 +48,7 @@ # DiskAnn normally auto-loads on first use; these APIs let tests and # diagnostic tools preload the plugin and get a clear error if libaio is # missing or the plugin shared object cannot be located. -from _zvec import ( +from zvec._zvec import ( DISKANN_PLUGIN_DLOPEN_FAILED, DISKANN_PLUGIN_LIBAIO_MISSING, DISKANN_PLUGIN_OK, @@ -58,6 +58,21 @@ load_diskann_plugin, ) +# —— Backwards compatibility for legacy pickles —— +# zvec <= 0.5.x shipped the compiled extension at the top level as ``_zvec`` +# (with submodules ``_zvec.param`` / ``_zvec.schema`` / ``_zvec.typing``); it +# now lives at ``zvec._zvec``. pybind11 records a class's defining module inside +# the pickle stream, so objects pickled by an old wheel reference the legacy +# paths. Alias the old names in ``sys.modules`` so those pickles still load +# after upgrading. (``zvec._zvec`` is already imported by the re-exports above.) +_legacy_ext = sys.modules["zvec._zvec"] +sys.modules.setdefault("_zvec", _legacy_ext) +for _legacy_sub in ("param", "schema", "typing"): + _legacy_mod = getattr(_legacy_ext, _legacy_sub, None) + if _legacy_mod is not None: + sys.modules.setdefault(f"_zvec.{_legacy_sub}", _legacy_mod) +del _legacy_ext, _legacy_sub, _legacy_mod + from . import model as model # —— Extensions —— diff --git a/python/zvec/executor/query_executor.py b/python/zvec/executor/query_executor.py index 62bc9e332..8de8b0500 100644 --- a/python/zvec/executor/query_executor.py +++ b/python/zvec/executor/query_executor.py @@ -16,8 +16,9 @@ from typing import Optional, Union import numpy as np -from _zvec import _Collection, _MultiQuery -from _zvec.param import _Fts, _SearchQuery, _SubQuery + +from zvec._zvec import _Collection, _MultiQuery +from zvec._zvec.param import _Fts, _SearchQuery, _SubQuery from ..extension import CallbackReRanker, ReRanker, RrfReRanker, WeightedReRanker from ..model.convert import convert_to_py_doc diff --git a/python/zvec/extension/multi_vector_reranker.py b/python/zvec/extension/multi_vector_reranker.py index acee984b1..a04c9f840 100644 --- a/python/zvec/extension/multi_vector_reranker.py +++ b/python/zvec/extension/multi_vector_reranker.py @@ -16,7 +16,13 @@ from collections.abc import Callable from typing import TYPE_CHECKING -from _zvec import _CallbackParams, _Doc, _reranker_rerank, _RrfParams, _WeightedParams +from zvec._zvec import ( + _CallbackParams, + _Doc, + _reranker_rerank, + _RrfParams, + _WeightedParams, +) from ..model.doc import Doc, DocList from .rerank_function import RerankFunction diff --git a/python/zvec/model/collection.py b/python/zvec/model/collection.py index de16753a6..f7072aa64 100644 --- a/python/zvec/model/collection.py +++ b/python/zvec/model/collection.py @@ -16,7 +16,7 @@ import warnings from typing import Optional, Union, overload -from _zvec import _Collection +from zvec._zvec import _Collection from ..executor import QueryContext, QueryExecutor from ..extension import ReRanker diff --git a/python/zvec/model/convert.py b/python/zvec/model/convert.py index 2eac08c40..421bd1741 100644 --- a/python/zvec/model/convert.py +++ b/python/zvec/model/convert.py @@ -11,7 +11,7 @@ # limitations under the License. from __future__ import annotations -from _zvec import _Doc +from zvec._zvec import _Doc from .doc import Doc from .schema import CollectionSchema diff --git a/python/zvec/model/param/__init__.py b/python/zvec/model/param/__init__.py index 43fc1ddce..782b0377f 100644 --- a/python/zvec/model/param/__init__.py +++ b/python/zvec/model/param/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from __future__ import annotations -from _zvec.param import ( +from zvec._zvec.param import ( AddColumnOption, AlterColumnOption, CollectionOption, diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi index 759b41348..ed184d339 100644 --- a/python/zvec/model/param/__init__.pyi +++ b/python/zvec/model/param/__init__.pyi @@ -7,7 +7,7 @@ from __future__ import annotations import collections import typing -import _zvec.typing +import zvec._zvec.typing __all__: list[str] = [ "AddColumnOption", @@ -159,8 +159,8 @@ class FlatIndexParam(VectorIndexParam): def __getstate__(self) -> tuple: ... def __init__( self, - metric_type: _zvec.typing.MetricType = ..., - quantize_type: _zvec.typing.QuantizeType = ..., + metric_type: zvec._zvec.typing.MetricType = ..., + quantize_type: zvec._zvec.typing.QuantizeType = ..., ) -> None: """ Constructs a FlatIndexParam instance. @@ -219,10 +219,10 @@ class HnswIndexParam(VectorIndexParam): def __getstate__(self) -> tuple: ... def __init__( self, - metric_type: _zvec.typing.MetricType = ..., + metric_type: zvec._zvec.typing.MetricType = ..., m: typing.SupportsInt = 50, ef_construction: typing.SupportsInt = 500, - quantize_type: _zvec.typing.QuantizeType = ..., + quantize_type: zvec._zvec.typing.QuantizeType = ..., use_contiguous_memory: bool = False, ) -> None: ... def __repr__(self) -> str: ... @@ -363,7 +363,7 @@ class HnswRabitqIndexParam(VectorIndexParam): def __getstate__(self) -> tuple: ... def __init__( self, - metric_type: _zvec.typing.MetricType = ..., + metric_type: zvec._zvec.typing.MetricType = ..., total_bits: typing.SupportsInt = 7, num_clusters: typing.SupportsInt = 16, m: typing.SupportsInt = 50, @@ -491,11 +491,11 @@ class IVFIndexParam(VectorIndexParam): def __getstate__(self) -> tuple: ... def __init__( self, - metric_type: _zvec.typing.MetricType = ..., + metric_type: zvec._zvec.typing.MetricType = ..., n_list: typing.SupportsInt = 10, n_iters: typing.SupportsInt = 10, use_soar: bool = False, - quantize_type: _zvec.typing.QuantizeType = ..., + quantize_type: zvec._zvec.typing.QuantizeType = ..., ) -> None: """ Constructs an IVFIndexParam instance. @@ -593,14 +593,14 @@ class VamanaIndexParam(VectorIndexParam): def __getstate__(self) -> tuple: ... def __init__( self, - metric_type: _zvec.typing.MetricType = ..., + metric_type: zvec._zvec.typing.MetricType = ..., max_degree: typing.SupportsInt = 64, search_list_size: typing.SupportsInt = 100, alpha: typing.SupportsFloat = 1.2, saturate_graph: bool = False, use_contiguous_memory: bool = False, use_id_map: bool = False, - quantize_type: _zvec.typing.QuantizeType = ..., + quantize_type: zvec._zvec.typing.QuantizeType = ..., ) -> None: ... def __repr__(self) -> str: ... def __setstate__(self, arg0: tuple) -> None: ... @@ -734,7 +734,7 @@ class IndexParam: """ @property - def type(self) -> _zvec.typing.IndexType: + def type(self) -> zvec._zvec.typing.IndexType: """ IndexType: The type of the index. """ @@ -863,7 +863,7 @@ class QueryParam: IndexType: The type of index this query targets. """ @property - def type(self) -> _zvec.typing.IndexType: + def type(self) -> zvec._zvec.typing.IndexType: """ IndexType: The type of index this query targets. """ @@ -933,13 +933,13 @@ class VectorIndexParam(IndexParam): """ @property - def metric_type(self) -> _zvec.typing.MetricType: + def metric_type(self) -> zvec._zvec.typing.MetricType: """ MetricType: Distance metric (e.g., IP, COSINE, L2). """ @property - def quantize_type(self) -> _zvec.typing.QuantizeType: + def quantize_type(self) -> zvec._zvec.typing.QuantizeType: """ QuantizeType: Vector quantization type (e.g., FP16, INT8). """ diff --git a/python/zvec/model/schema/__init__.py b/python/zvec/model/schema/__init__.py index 5ff532ff2..b9524549a 100644 --- a/python/zvec/model/schema/__init__.py +++ b/python/zvec/model/schema/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from __future__ import annotations -from _zvec.schema import CollectionStats +from zvec._zvec.schema import CollectionStats from .collection_schema import CollectionSchema from .field_schema import FieldSchema, VectorSchema diff --git a/python/zvec/model/schema/__init__.pyi b/python/zvec/model/schema/__init__.pyi index 30ba20f5e..0dfa80d28 100644 --- a/python/zvec/model/schema/__init__.pyi +++ b/python/zvec/model/schema/__init__.pyi @@ -7,8 +7,8 @@ from __future__ import annotations import collections.abc import typing -import _zvec.param -import _zvec.typing +import zvec._zvec.param +import zvec._zvec.typing from .collection_schema import CollectionSchema from .field_schema import FieldSchema, VectorSchema @@ -85,20 +85,20 @@ class _FieldSchema: def __init__( self, name: str, - data_type: _zvec.typing.DataType, + data_type: zvec._zvec.typing.DataType, nullable: bool = False, dimension: typing.SupportsInt = 0, - index_param: _zvec.param.IndexParam = None, + index_param: zvec._zvec.param.IndexParam = None, ) -> None: ... def __ne__(self, arg0: _FieldSchema) -> bool: ... @property - def data_type(self) -> _zvec.typing.DataType: ... + def data_type(self) -> zvec._zvec.typing.DataType: ... @property def dimension(self) -> int: ... @property def index_param(self) -> typing.Any: ... @property - def index_type(self) -> _zvec.typing.IndexType: ... + def index_type(self) -> zvec._zvec.typing.IndexType: ... @property def is_dense_vector(self) -> bool: ... @property diff --git a/python/zvec/model/schema/collection_schema.py b/python/zvec/model/schema/collection_schema.py index e07095b1d..3e8971040 100644 --- a/python/zvec/model/schema/collection_schema.py +++ b/python/zvec/model/schema/collection_schema.py @@ -16,7 +16,7 @@ import json from typing import Optional, Union -from _zvec.schema import _CollectionSchema, _FieldSchema +from zvec._zvec.schema import _CollectionSchema, _FieldSchema from .field_schema import FieldSchema, VectorSchema diff --git a/python/zvec/model/schema/field_schema.py b/python/zvec/model/schema/field_schema.py index 1af00b5da..36ad84227 100644 --- a/python/zvec/model/schema/field_schema.py +++ b/python/zvec/model/schema/field_schema.py @@ -16,8 +16,7 @@ import json from typing import Any, Optional, Union -from _zvec.schema import _FieldSchema - +from zvec._zvec.schema import _FieldSchema from zvec.model.param import ( FlatIndexParam, HnswIndexParam, diff --git a/python/zvec/typing/__init__.py b/python/zvec/typing/__init__.py index da83c44df..0faa19d72 100644 --- a/python/zvec/typing/__init__.py +++ b/python/zvec/typing/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from __future__ import annotations -from _zvec.typing import ( +from zvec._zvec.typing import ( DataType, IndexType, MetricType, diff --git a/python/zvec/zvec.py b/python/zvec/zvec.py index 9f3e815bb..7bc6bc695 100644 --- a/python/zvec/zvec.py +++ b/python/zvec/zvec.py @@ -15,7 +15,7 @@ from typing import Optional -from _zvec import Initialize, _Collection +from zvec._zvec import Initialize, _Collection from .model import Collection from .model.param import CollectionOption