diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 4ad3fef3..e7562934 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.18.0"
+ ".": "0.19.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 5ca021e4..c6bec7fc 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 49
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-1cb5e131887b451004a08be20efac2fa11c58a80dcee0176c38182b335499f05.yml
-openapi_spec_hash: 1476cba193e17ebfb4bbf20c74753b05
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-d45a3a3119fa9a3db2a6cae3c6d376f99fb874ed359c369d9b9531fdea55dcec.yml
+openapi_spec_hash: aedb38c67ac4c4b9ee79d130ddeb583a
config_hash: 810d9712d3d0d6a1f50d71a25511d8a7
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e0bcf4f5..e705664b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
# Changelog
+## 0.19.0 (2025-07-17)
+
+Full Changelog: [v0.18.0...v0.19.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.18.0...v0.19.0)
+
+### Features
+
+* **api:** api update ([3f1db0c](https://github.com/mixedbread-ai/mixedbread-python/commit/3f1db0c5653fa0f8efbe22d7489fe433677db68f))
+
## 0.18.0 (2025-07-17)
Full Changelog: [v0.17.0...v0.18.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.17.0...v0.18.0)
diff --git a/api.md b/api.md
index a9f5376f..257f5d28 100644
--- a/api.md
+++ b/api.md
@@ -71,7 +71,7 @@ from mixedbread.types.vector_stores import (
Methods:
- client.vector_stores.files.create(vector_store_identifier, \*\*params) -> VectorStoreFile
-- client.vector_stores.files.retrieve(file_id, \*, vector_store_identifier) -> VectorStoreFile
+- client.vector_stores.files.retrieve(file_id, \*, vector_store_identifier, \*\*params) -> VectorStoreFile
- client.vector_stores.files.list(vector_store_identifier, \*\*params) -> SyncCursor[VectorStoreFile]
- client.vector_stores.files.delete(file_id, \*, vector_store_identifier) -> FileDeleteResponse
- client.vector_stores.files.search(\*\*params) -> FileSearchResponse
diff --git a/pyproject.toml b/pyproject.toml
index 0e3a091e..5a7c87c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "mixedbread"
-version = "0.18.0"
+version = "0.19.0"
description = "The official Python library for the Mixedbread API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py
index f16b39b9..54e58eec 100644
--- a/src/mixedbread/_version.py
+++ b/src/mixedbread/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "mixedbread"
-__version__ = "0.18.0" # x-release-please-version
+__version__ = "0.19.0" # x-release-please-version
diff --git a/src/mixedbread/resources/vector_stores/files.py b/src/mixedbread/resources/vector_stores/files.py
index f73400a2..44d28868 100644
--- a/src/mixedbread/resources/vector_stores/files.py
+++ b/src/mixedbread/resources/vector_stores/files.py
@@ -20,7 +20,7 @@
)
from ...pagination import SyncCursor, AsyncCursor
from ..._base_client import AsyncPaginator, make_request_options
-from ...types.vector_stores import file_list_params, file_create_params, file_search_params
+from ...types.vector_stores import file_list_params, file_create_params, file_search_params, file_retrieve_params
from ...types.vector_stores.vector_store_file import VectorStoreFile
from ...types.vector_stores.file_delete_response import FileDeleteResponse
from ...types.vector_stores.file_search_response import FileSearchResponse
@@ -113,6 +113,7 @@ def retrieve(
file_id: str,
*,
vector_store_identifier: str,
+ return_chunks: bool | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -133,6 +134,8 @@ def retrieve(
file_id: The ID of the file
+ return_chunks: Whether to return the chunks for the file
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -150,7 +153,11 @@ def retrieve(
return self._get(
f"/v1/vector_stores/{vector_store_identifier}/files/{file_id}",
options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"return_chunks": return_chunks}, file_retrieve_params.FileRetrieveParams),
),
cast_to=VectorStoreFile,
)
@@ -542,6 +549,7 @@ async def retrieve(
file_id: str,
*,
vector_store_identifier: str,
+ return_chunks: bool | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -562,6 +570,8 @@ async def retrieve(
file_id: The ID of the file
+ return_chunks: Whether to return the chunks for the file
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -579,7 +589,13 @@ async def retrieve(
return await self._get(
f"/v1/vector_stores/{vector_store_identifier}/files/{file_id}",
options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform(
+ {"return_chunks": return_chunks}, file_retrieve_params.FileRetrieveParams
+ ),
),
cast_to=VectorStoreFile,
)
diff --git a/src/mixedbread/types/object_type.py b/src/mixedbread/types/object_type.py
index aecff986..fe19bc12 100644
--- a/src/mixedbread/types/object_type.py
+++ b/src/mixedbread/types/object_type.py
@@ -17,4 +17,5 @@
"api_key",
"data_source",
"data_source.connector",
+ "vector_store.histogram",
]
diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py
index 8f11be23..8a9d43e8 100644
--- a/src/mixedbread/types/scored_audio_url_input_chunk.py
+++ b/src/mixedbread/types/scored_audio_url_input_chunk.py
@@ -44,11 +44,11 @@ class ScoredAudioURLInputChunk(BaseModel):
type: Optional[Literal["audio_url"]] = None
"""Input type identifier"""
- audio_url: AudioURL
- """The audio input specification."""
-
transcription: Optional[str] = None
"""speech recognition (sr) text of the audio"""
summary: Optional[str] = None
"""summary of the audio"""
+
+ audio_url: AudioURL
+ """The audio input specification."""
diff --git a/src/mixedbread/types/scored_image_url_input_chunk.py b/src/mixedbread/types/scored_image_url_input_chunk.py
index 8be62ae0..9bd0deb2 100644
--- a/src/mixedbread/types/scored_image_url_input_chunk.py
+++ b/src/mixedbread/types/scored_image_url_input_chunk.py
@@ -47,11 +47,11 @@ class ScoredImageURLInputChunk(BaseModel):
type: Optional[Literal["image_url"]] = None
"""Input type identifier"""
- image_url: ImageURL
- """The image input specification."""
-
ocr_text: Optional[str] = None
"""ocr text of the image"""
summary: Optional[str] = None
"""summary of the image"""
+
+ image_url: ImageURL
+ """The image input specification."""
diff --git a/src/mixedbread/types/scored_text_input_chunk.py b/src/mixedbread/types/scored_text_input_chunk.py
index 398a8a7c..35f20c06 100644
--- a/src/mixedbread/types/scored_text_input_chunk.py
+++ b/src/mixedbread/types/scored_text_input_chunk.py
@@ -39,5 +39,8 @@ class ScoredTextInputChunk(BaseModel):
type: Optional[Literal["text"]] = None
"""Input type identifier"""
+ offset: Optional[int] = None
+ """The offset of the text in the file relative to the start of the file."""
+
text: str
"""Text content to process"""
diff --git a/src/mixedbread/types/scored_video_url_input_chunk.py b/src/mixedbread/types/scored_video_url_input_chunk.py
index bd0bea16..78d77359 100644
--- a/src/mixedbread/types/scored_video_url_input_chunk.py
+++ b/src/mixedbread/types/scored_video_url_input_chunk.py
@@ -44,11 +44,11 @@ class ScoredVideoURLInputChunk(BaseModel):
type: Optional[Literal["video_url"]] = None
"""Input type identifier"""
- video_url: VideoURL
- """The video input specification."""
-
transcription: Optional[str] = None
"""speech recognition (sr) text of the video"""
summary: Optional[str] = None
"""summary of the video"""
+
+ video_url: VideoURL
+ """The video input specification."""
diff --git a/src/mixedbread/types/vector_stores/__init__.py b/src/mixedbread/types/vector_stores/__init__.py
index 6ce1aafe..6919f613 100644
--- a/src/mixedbread/types/vector_stores/__init__.py
+++ b/src/mixedbread/types/vector_stores/__init__.py
@@ -8,6 +8,7 @@
from .file_search_params import FileSearchParams as FileSearchParams
from .rerank_config_param import RerankConfigParam as RerankConfigParam
from .file_delete_response import FileDeleteResponse as FileDeleteResponse
+from .file_retrieve_params import FileRetrieveParams as FileRetrieveParams
from .file_search_response import FileSearchResponse as FileSearchResponse
from .scored_vector_store_file import ScoredVectorStoreFile as ScoredVectorStoreFile
from .vector_store_file_status import VectorStoreFileStatus as VectorStoreFileStatus
diff --git a/src/mixedbread/types/vector_stores/file_retrieve_params.py b/src/mixedbread/types/vector_stores/file_retrieve_params.py
new file mode 100644
index 00000000..0bbdedfc
--- /dev/null
+++ b/src/mixedbread/types/vector_stores/file_retrieve_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileRetrieveParams"]
+
+
+class FileRetrieveParams(TypedDict, total=False):
+ vector_store_identifier: Required[str]
+ """The ID or name of the vector store"""
+
+ return_chunks: bool
+ """Whether to return the chunks for the file"""
diff --git a/src/mixedbread/types/vector_stores/scored_vector_store_file.py b/src/mixedbread/types/vector_stores/scored_vector_store_file.py
index 225806b2..ca72a30f 100644
--- a/src/mixedbread/types/vector_stores/scored_vector_store_file.py
+++ b/src/mixedbread/types/vector_stores/scored_vector_store_file.py
@@ -51,8 +51,8 @@ class ScoredVectorStoreFile(BaseModel):
object: Optional[Literal["vector_store.file"]] = None
"""Type of the object"""
- score: float
- """score of the file"""
-
chunks: Optional[List[Chunk]] = None
"""chunks"""
+
+ score: float
+ """score of the file"""
diff --git a/src/mixedbread/types/vector_stores/vector_store_file.py b/src/mixedbread/types/vector_stores/vector_store_file.py
index 642c066f..96171171 100644
--- a/src/mixedbread/types/vector_stores/vector_store_file.py
+++ b/src/mixedbread/types/vector_stores/vector_store_file.py
@@ -1,13 +1,119 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
+from typing import Dict, List, Union, Optional
from datetime import datetime
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
from .vector_store_file_status import VectorStoreFileStatus
-__all__ = ["VectorStoreFile"]
+__all__ = [
+ "VectorStoreFile",
+ "Chunk",
+ "ChunkTextInputChunk",
+ "ChunkImageURLInputChunkBase",
+ "ChunkAudioURLInputChunkBase",
+ "ChunkVideoURLInputChunkBase",
+]
+
+
+class ChunkTextInputChunk(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[Dict[str, object]] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["text"]] = None
+ """Input type identifier"""
+
+ offset: Optional[int] = None
+ """The offset of the text in the file relative to the start of the file."""
+
+ text: str
+ """Text content to process"""
+
+
+class ChunkImageURLInputChunkBase(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[Dict[str, object]] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["image_url"]] = None
+ """Input type identifier"""
+
+ ocr_text: Optional[str] = None
+ """ocr text of the image"""
+
+ summary: Optional[str] = None
+ """summary of the image"""
+
+
+class ChunkAudioURLInputChunkBase(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[Dict[str, object]] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["audio_url"]] = None
+ """Input type identifier"""
+
+ transcription: Optional[str] = None
+ """speech recognition (sr) text of the audio"""
+
+ summary: Optional[str] = None
+ """summary of the audio"""
+
+
+class ChunkVideoURLInputChunkBase(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[Dict[str, object]] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["video_url"]] = None
+ """Input type identifier"""
+
+ transcription: Optional[str] = None
+ """speech recognition (sr) text of the video"""
+
+ summary: Optional[str] = None
+ """summary of the video"""
+
+
+Chunk: TypeAlias = Annotated[
+ Union[ChunkTextInputChunk, ChunkImageURLInputChunkBase, ChunkAudioURLInputChunkBase, ChunkVideoURLInputChunkBase],
+ PropertyInfo(discriminator="type"),
+]
class VectorStoreFile(BaseModel):
@@ -40,3 +146,6 @@ class VectorStoreFile(BaseModel):
object: Optional[Literal["vector_store.file"]] = None
"""Type of the object"""
+
+ chunks: Optional[List[Chunk]] = None
+ """chunks"""
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
index 1208d103..e0abab0e 100644
--- a/tests/api_resources/vector_stores/test_files.py
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -87,6 +87,15 @@ def test_method_retrieve(self, client: Mixedbread) -> None:
)
assert_matches_type(VectorStoreFile, file, path=["response"])
+ @parametrize
+ def test_method_retrieve_with_all_params(self, client: Mixedbread) -> None:
+ file = client.vector_stores.files.retrieve(
+ file_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ vector_store_identifier="vector_store_identifier",
+ return_chunks=True,
+ )
+ assert_matches_type(VectorStoreFile, file, path=["response"])
+
@parametrize
def test_raw_response_retrieve(self, client: Mixedbread) -> None:
response = client.vector_stores.files.with_raw_response.retrieve(
@@ -390,6 +399,15 @@ async def test_method_retrieve(self, async_client: AsyncMixedbread) -> None:
)
assert_matches_type(VectorStoreFile, file, path=["response"])
+ @parametrize
+ async def test_method_retrieve_with_all_params(self, async_client: AsyncMixedbread) -> None:
+ file = await async_client.vector_stores.files.retrieve(
+ file_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ vector_store_identifier="vector_store_identifier",
+ return_chunks=True,
+ )
+ assert_matches_type(VectorStoreFile, file, path=["response"])
+
@parametrize
async def test_raw_response_retrieve(self, async_client: AsyncMixedbread) -> None:
response = await async_client.vector_stores.files.with_raw_response.retrieve(