diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 4ad3fef3..e7562934 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.18.0" + ".": "0.19.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 5ca021e4..c6bec7fc 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 49 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-1cb5e131887b451004a08be20efac2fa11c58a80dcee0176c38182b335499f05.yml -openapi_spec_hash: 1476cba193e17ebfb4bbf20c74753b05 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-d45a3a3119fa9a3db2a6cae3c6d376f99fb874ed359c369d9b9531fdea55dcec.yml +openapi_spec_hash: aedb38c67ac4c4b9ee79d130ddeb583a config_hash: 810d9712d3d0d6a1f50d71a25511d8a7 diff --git a/CHANGELOG.md b/CHANGELOG.md index e0bcf4f5..e705664b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.19.0 (2025-07-17) + +Full Changelog: [v0.18.0...v0.19.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.18.0...v0.19.0) + +### Features + +* **api:** api update ([3f1db0c](https://github.com/mixedbread-ai/mixedbread-python/commit/3f1db0c5653fa0f8efbe22d7489fe433677db68f)) + ## 0.18.0 (2025-07-17) Full Changelog: [v0.17.0...v0.18.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.17.0...v0.18.0) diff --git a/api.md b/api.md index a9f5376f..257f5d28 100644 --- a/api.md +++ b/api.md @@ -71,7 +71,7 @@ from mixedbread.types.vector_stores import ( Methods: - client.vector_stores.files.create(vector_store_identifier, \*\*params) -> VectorStoreFile -- client.vector_stores.files.retrieve(file_id, \*, vector_store_identifier) -> VectorStoreFile +- client.vector_stores.files.retrieve(file_id, \*, vector_store_identifier, \*\*params) -> VectorStoreFile - client.vector_stores.files.list(vector_store_identifier, \*\*params) -> SyncCursor[VectorStoreFile] - client.vector_stores.files.delete(file_id, \*, vector_store_identifier) -> FileDeleteResponse - client.vector_stores.files.search(\*\*params) -> FileSearchResponse diff --git a/pyproject.toml b/pyproject.toml index 0e3a091e..5a7c87c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mixedbread" -version = "0.18.0" +version = "0.19.0" description = "The official Python library for the Mixedbread API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py index f16b39b9..54e58eec 100644 --- a/src/mixedbread/_version.py +++ b/src/mixedbread/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "mixedbread" -__version__ = "0.18.0" # x-release-please-version +__version__ = "0.19.0" # x-release-please-version diff --git a/src/mixedbread/resources/vector_stores/files.py b/src/mixedbread/resources/vector_stores/files.py index f73400a2..44d28868 100644 --- a/src/mixedbread/resources/vector_stores/files.py +++ b/src/mixedbread/resources/vector_stores/files.py @@ -20,7 +20,7 @@ ) from ...pagination import SyncCursor, AsyncCursor from ..._base_client import AsyncPaginator, make_request_options -from ...types.vector_stores import file_list_params, file_create_params, file_search_params +from ...types.vector_stores import file_list_params, file_create_params, file_search_params, file_retrieve_params from ...types.vector_stores.vector_store_file import VectorStoreFile from ...types.vector_stores.file_delete_response import FileDeleteResponse from ...types.vector_stores.file_search_response import FileSearchResponse @@ -113,6 +113,7 @@ def retrieve( file_id: str, *, vector_store_identifier: str, + return_chunks: bool | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -133,6 +134,8 @@ def retrieve( file_id: The ID of the file + return_chunks: Whether to return the chunks for the file + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -150,7 +153,11 @@ def retrieve( return self._get( f"/v1/vector_stores/{vector_store_identifier}/files/{file_id}", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"return_chunks": return_chunks}, file_retrieve_params.FileRetrieveParams), ), cast_to=VectorStoreFile, ) @@ -542,6 +549,7 @@ async def retrieve( file_id: str, *, vector_store_identifier: str, + return_chunks: bool | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -562,6 +570,8 @@ async def retrieve( file_id: The ID of the file + return_chunks: Whether to return the chunks for the file + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -579,7 +589,13 @@ async def retrieve( return await self._get( f"/v1/vector_stores/{vector_store_identifier}/files/{file_id}", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + {"return_chunks": return_chunks}, file_retrieve_params.FileRetrieveParams + ), ), cast_to=VectorStoreFile, ) diff --git a/src/mixedbread/types/object_type.py b/src/mixedbread/types/object_type.py index aecff986..fe19bc12 100644 --- a/src/mixedbread/types/object_type.py +++ b/src/mixedbread/types/object_type.py @@ -17,4 +17,5 @@ "api_key", "data_source", "data_source.connector", + "vector_store.histogram", ] diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py index 8f11be23..8a9d43e8 100644 --- a/src/mixedbread/types/scored_audio_url_input_chunk.py +++ b/src/mixedbread/types/scored_audio_url_input_chunk.py @@ -44,11 +44,11 @@ class ScoredAudioURLInputChunk(BaseModel): type: Optional[Literal["audio_url"]] = None """Input type identifier""" - audio_url: AudioURL - """The audio input specification.""" - transcription: Optional[str] = None """speech recognition (sr) text of the audio""" summary: Optional[str] = None """summary of the audio""" + + audio_url: AudioURL + """The audio input specification.""" diff --git a/src/mixedbread/types/scored_image_url_input_chunk.py b/src/mixedbread/types/scored_image_url_input_chunk.py index 8be62ae0..9bd0deb2 100644 --- a/src/mixedbread/types/scored_image_url_input_chunk.py +++ b/src/mixedbread/types/scored_image_url_input_chunk.py @@ -47,11 +47,11 @@ class ScoredImageURLInputChunk(BaseModel): type: Optional[Literal["image_url"]] = None """Input type identifier""" - image_url: ImageURL - """The image input specification.""" - ocr_text: Optional[str] = None """ocr text of the image""" summary: Optional[str] = None """summary of the image""" + + image_url: ImageURL + """The image input specification.""" diff --git a/src/mixedbread/types/scored_text_input_chunk.py b/src/mixedbread/types/scored_text_input_chunk.py index 398a8a7c..35f20c06 100644 --- a/src/mixedbread/types/scored_text_input_chunk.py +++ b/src/mixedbread/types/scored_text_input_chunk.py @@ -39,5 +39,8 @@ class ScoredTextInputChunk(BaseModel): type: Optional[Literal["text"]] = None """Input type identifier""" + offset: Optional[int] = None + """The offset of the text in the file relative to the start of the file.""" + text: str """Text content to process""" diff --git a/src/mixedbread/types/scored_video_url_input_chunk.py b/src/mixedbread/types/scored_video_url_input_chunk.py index bd0bea16..78d77359 100644 --- a/src/mixedbread/types/scored_video_url_input_chunk.py +++ b/src/mixedbread/types/scored_video_url_input_chunk.py @@ -44,11 +44,11 @@ class ScoredVideoURLInputChunk(BaseModel): type: Optional[Literal["video_url"]] = None """Input type identifier""" - video_url: VideoURL - """The video input specification.""" - transcription: Optional[str] = None """speech recognition (sr) text of the video""" summary: Optional[str] = None """summary of the video""" + + video_url: VideoURL + """The video input specification.""" diff --git a/src/mixedbread/types/vector_stores/__init__.py b/src/mixedbread/types/vector_stores/__init__.py index 6ce1aafe..6919f613 100644 --- a/src/mixedbread/types/vector_stores/__init__.py +++ b/src/mixedbread/types/vector_stores/__init__.py @@ -8,6 +8,7 @@ from .file_search_params import FileSearchParams as FileSearchParams from .rerank_config_param import RerankConfigParam as RerankConfigParam from .file_delete_response import FileDeleteResponse as FileDeleteResponse +from .file_retrieve_params import FileRetrieveParams as FileRetrieveParams from .file_search_response import FileSearchResponse as FileSearchResponse from .scored_vector_store_file import ScoredVectorStoreFile as ScoredVectorStoreFile from .vector_store_file_status import VectorStoreFileStatus as VectorStoreFileStatus diff --git a/src/mixedbread/types/vector_stores/file_retrieve_params.py b/src/mixedbread/types/vector_stores/file_retrieve_params.py new file mode 100644 index 00000000..0bbdedfc --- /dev/null +++ b/src/mixedbread/types/vector_stores/file_retrieve_params.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +__all__ = ["FileRetrieveParams"] + + +class FileRetrieveParams(TypedDict, total=False): + vector_store_identifier: Required[str] + """The ID or name of the vector store""" + + return_chunks: bool + """Whether to return the chunks for the file""" diff --git a/src/mixedbread/types/vector_stores/scored_vector_store_file.py b/src/mixedbread/types/vector_stores/scored_vector_store_file.py index 225806b2..ca72a30f 100644 --- a/src/mixedbread/types/vector_stores/scored_vector_store_file.py +++ b/src/mixedbread/types/vector_stores/scored_vector_store_file.py @@ -51,8 +51,8 @@ class ScoredVectorStoreFile(BaseModel): object: Optional[Literal["vector_store.file"]] = None """Type of the object""" - score: float - """score of the file""" - chunks: Optional[List[Chunk]] = None """chunks""" + + score: float + """score of the file""" diff --git a/src/mixedbread/types/vector_stores/vector_store_file.py b/src/mixedbread/types/vector_stores/vector_store_file.py index 642c066f..96171171 100644 --- a/src/mixedbread/types/vector_stores/vector_store_file.py +++ b/src/mixedbread/types/vector_stores/vector_store_file.py @@ -1,13 +1,119 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Optional +from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, Annotated, TypeAlias +from ..._utils import PropertyInfo from ..._models import BaseModel from .vector_store_file_status import VectorStoreFileStatus -__all__ = ["VectorStoreFile"] +__all__ = [ + "VectorStoreFile", + "Chunk", + "ChunkTextInputChunk", + "ChunkImageURLInputChunkBase", + "ChunkAudioURLInputChunkBase", + "ChunkVideoURLInputChunkBase", +] + + +class ChunkTextInputChunk(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[Dict[str, object]] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["text"]] = None + """Input type identifier""" + + offset: Optional[int] = None + """The offset of the text in the file relative to the start of the file.""" + + text: str + """Text content to process""" + + +class ChunkImageURLInputChunkBase(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[Dict[str, object]] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["image_url"]] = None + """Input type identifier""" + + ocr_text: Optional[str] = None + """ocr text of the image""" + + summary: Optional[str] = None + """summary of the image""" + + +class ChunkAudioURLInputChunkBase(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[Dict[str, object]] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["audio_url"]] = None + """Input type identifier""" + + transcription: Optional[str] = None + """speech recognition (sr) text of the audio""" + + summary: Optional[str] = None + """summary of the audio""" + + +class ChunkVideoURLInputChunkBase(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[Dict[str, object]] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["video_url"]] = None + """Input type identifier""" + + transcription: Optional[str] = None + """speech recognition (sr) text of the video""" + + summary: Optional[str] = None + """summary of the video""" + + +Chunk: TypeAlias = Annotated[ + Union[ChunkTextInputChunk, ChunkImageURLInputChunkBase, ChunkAudioURLInputChunkBase, ChunkVideoURLInputChunkBase], + PropertyInfo(discriminator="type"), +] class VectorStoreFile(BaseModel): @@ -40,3 +146,6 @@ class VectorStoreFile(BaseModel): object: Optional[Literal["vector_store.file"]] = None """Type of the object""" + + chunks: Optional[List[Chunk]] = None + """chunks""" diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py index 1208d103..e0abab0e 100644 --- a/tests/api_resources/vector_stores/test_files.py +++ b/tests/api_resources/vector_stores/test_files.py @@ -87,6 +87,15 @@ def test_method_retrieve(self, client: Mixedbread) -> None: ) assert_matches_type(VectorStoreFile, file, path=["response"]) + @parametrize + def test_method_retrieve_with_all_params(self, client: Mixedbread) -> None: + file = client.vector_stores.files.retrieve( + file_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + vector_store_identifier="vector_store_identifier", + return_chunks=True, + ) + assert_matches_type(VectorStoreFile, file, path=["response"]) + @parametrize def test_raw_response_retrieve(self, client: Mixedbread) -> None: response = client.vector_stores.files.with_raw_response.retrieve( @@ -390,6 +399,15 @@ async def test_method_retrieve(self, async_client: AsyncMixedbread) -> None: ) assert_matches_type(VectorStoreFile, file, path=["response"]) + @parametrize + async def test_method_retrieve_with_all_params(self, async_client: AsyncMixedbread) -> None: + file = await async_client.vector_stores.files.retrieve( + file_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + vector_store_identifier="vector_store_identifier", + return_chunks=True, + ) + assert_matches_type(VectorStoreFile, file, path=["response"]) + @parametrize async def test_raw_response_retrieve(self, async_client: AsyncMixedbread) -> None: response = await async_client.vector_stores.files.with_raw_response.retrieve(