diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 81f6dc20..bd8f9862 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,17 +1,18 @@
name: CI
on:
push:
- branches:
- - main
- pull_request:
- branches:
- - main
- - next
+ branches-ignore:
+ - 'generated'
+ - 'codegen/**'
+ - 'integrated/**'
+ - 'stl-preview-head/**'
+ - 'stl-preview-base/**'
jobs:
lint:
+ timeout-minutes: 10
name: lint
- runs-on: ubuntu-latest
+ runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
steps:
- uses: actions/checkout@v4
@@ -30,8 +31,9 @@ jobs:
run: ./scripts/lint
test:
+ timeout-minutes: 10
name: test
- runs-on: ubuntu-latest
+ runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
steps:
- uses: actions/checkout@v4
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index b06ba919..6b7b74c5 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.2.1"
+ ".": "0.3.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 79ca4185..dc5b9a3b 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 32
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-82c2c1c322149cd73b2e8e45f475919b941752a89e74464ccecd1aee9352e9be.yml
-openapi_spec_hash: a47fe4cb39ee0cb74ee5888de2f0a5e1
+openapi_spec_hash: f6661e9fafda26e7e9f3fc06739a33ad
config_hash: 6a7c1faa96b022a6959d720d7957eade
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d4abbb1..bbf66791 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
# Changelog
+## 0.3.0 (2025-05-03)
+
+Full Changelog: [v0.2.1...v0.3.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.2.1...v0.3.0)
+
+### Features
+
+* **api:** update via SDK Studio ([b952b6a](https://github.com/mixedbread-ai/mixedbread-python/commit/b952b6ad1ee7324c6b327d943c5386d678728a54))
+
+
+### Bug Fixes
+
+* **perf:** optimize some hot paths ([bd06b56](https://github.com/mixedbread-ai/mixedbread-python/commit/bd06b5637169c1509e8fe985464648923059be1f))
+* **pydantic v1:** more robust ModelField.annotation check ([c2189be](https://github.com/mixedbread-ai/mixedbread-python/commit/c2189bec21bb2aa3939fa6fbf2101af83ac0d5e2))
+
+
+### Chores
+
+* broadly detect json family of content-type headers ([5c95291](https://github.com/mixedbread-ai/mixedbread-python/commit/5c952910272c0da69b007e99f0df942c7b9ccccd))
+* **ci:** add timeout thresholds for CI jobs ([01d43f8](https://github.com/mixedbread-ai/mixedbread-python/commit/01d43f8e077b0c44aa5c899a8929451f5043ed74))
+* **ci:** only use depot for staging repos ([3dc4f6c](https://github.com/mixedbread-ai/mixedbread-python/commit/3dc4f6c070134034b2a237e496a3faf8aab74119))
+* **client:** minor internal fixes ([b662363](https://github.com/mixedbread-ai/mixedbread-python/commit/b66236373a0d5f6c11b38c00c0b6ee553fb91137))
+* **internal:** base client updates ([0f1103d](https://github.com/mixedbread-ai/mixedbread-python/commit/0f1103d58e3c031470a580cac145aad06f6c9316))
+* **internal:** bump pyright version ([6850203](https://github.com/mixedbread-ai/mixedbread-python/commit/68502037ad57b8b20cfbd482151b5c7f3930e2d6))
+* **internal:** codegen related update ([121e724](https://github.com/mixedbread-ai/mixedbread-python/commit/121e7247c1943d414ea9d3bd72a6e4b0c8ec8e56))
+* **internal:** fix list file params ([9892446](https://github.com/mixedbread-ai/mixedbread-python/commit/9892446c896e49243fc52c060589fc54ad4ece45))
+* **internal:** import reformatting ([ecbe1ef](https://github.com/mixedbread-ai/mixedbread-python/commit/ecbe1ef4b2f62723ba1bcb5683b78ae48163e452))
+* **internal:** minor formatting changes ([c0fad7c](https://github.com/mixedbread-ai/mixedbread-python/commit/c0fad7c5489d0dc6f8daae839f0f85620d079d8e))
+* **internal:** refactor retries to not use recursion ([0b184e2](https://github.com/mixedbread-ai/mixedbread-python/commit/0b184e21de7946ba4ad826831a8adbe25fd126f6))
+* **internal:** update models test ([f4cffd9](https://github.com/mixedbread-ai/mixedbread-python/commit/f4cffd9b164310452f345b4d3508a6764671d750))
+* **internal:** update pyright settings ([0d8905c](https://github.com/mixedbread-ai/mixedbread-python/commit/0d8905c2e4865d9ac385146ad49d691c9cb49a14))
+
## 0.2.1 (2025-04-12)
Full Changelog: [v0.2.0...v0.2.1](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.2.0...v0.2.1)
diff --git a/api.md b/api.md
index b1cf6f5a..2844a12f 100644
--- a/api.md
+++ b/api.md
@@ -49,7 +49,7 @@ Methods:
- client.vector_stores.update(vector_store_id, \*\*params) -> VectorStore
- client.vector_stores.list(\*\*params) -> SyncLimitOffset[VectorStore]
- client.vector_stores.delete(vector_store_id) -> VectorStoreDeleteResponse
-- client.vector_stores.question_answering(\*\*params) -> object
+- client.vector_stores.question_answering(\*\*params) -> VectorStoreQuestionAnsweringResponse
- client.vector_stores.search(\*\*params) -> VectorStoreSearchResponse
## Files
diff --git a/pyproject.toml b/pyproject.toml
index 61c45f44..1d52ff5f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "mixedbread"
-version = "0.2.1"
+version = "0.3.0"
description = "The official Python library for the Mixedbread API"
dynamic = ["readme"]
license = "Apache-2.0"
@@ -42,7 +42,7 @@ Repository = "https://github.com/mixedbread-ai/mixedbread-python"
managed = true
# version pins are in requirements-dev.lock
dev-dependencies = [
- "pyright>=1.1.359",
+ "pyright==1.1.399",
"mypy",
"respx",
"pytest",
@@ -147,6 +147,7 @@ exclude = [
]
reportImplicitOverride = true
+reportOverlappingOverload = false
reportImportCycles = false
reportPrivateUsage = false
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6dbb147d..4d2d4266 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -69,7 +69,7 @@ pydantic-core==2.27.1
# via pydantic
pygments==2.18.0
# via rich
-pyright==1.1.392.post0
+pyright==1.1.399
pytest==8.3.3
# via pytest-asyncio
pytest-asyncio==0.24.0
diff --git a/src/mixedbread/_base_client.py b/src/mixedbread/_base_client.py
index b206ef2e..72f949dc 100644
--- a/src/mixedbread/_base_client.py
+++ b/src/mixedbread/_base_client.py
@@ -98,7 +98,11 @@
_AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
if TYPE_CHECKING:
- from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+ from httpx._config import (
+ DEFAULT_TIMEOUT_CONFIG, # pyright: ignore[reportPrivateImportUsage]
+ )
+
+ HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
else:
try:
from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -115,6 +119,7 @@ class PageInfo:
url: URL | NotGiven
params: Query | NotGiven
+ json: Body | NotGiven
@overload
def __init__(
@@ -130,19 +135,30 @@ def __init__(
params: Query,
) -> None: ...
+ @overload
+ def __init__(
+ self,
+ *,
+ json: Body,
+ ) -> None: ...
+
def __init__(
self,
*,
url: URL | NotGiven = NOT_GIVEN,
+ json: Body | NotGiven = NOT_GIVEN,
params: Query | NotGiven = NOT_GIVEN,
) -> None:
self.url = url
+ self.json = json
self.params = params
@override
def __repr__(self) -> str:
if self.url:
return f"{self.__class__.__name__}(url={self.url})"
+ if self.json:
+ return f"{self.__class__.__name__}(json={self.json})"
return f"{self.__class__.__name__}(params={self.params})"
@@ -191,6 +207,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
options.url = str(url)
return options
+ if not isinstance(info.json, NotGiven):
+ if not is_mapping(info.json):
+ raise TypeError("Pagination is only supported with mappings")
+
+ if not options.json_data:
+ options.json_data = {**info.json}
+ else:
+ if not is_mapping(options.json_data):
+ raise TypeError("Pagination is only supported with mappings")
+
+ options.json_data = {**options.json_data, **info.json}
+ return options
+
raise ValueError("Unexpected PageInfo state")
@@ -408,8 +437,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
headers = httpx.Headers(headers_dict)
idempotency_header = self._idempotency_header
- if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
- headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+ if idempotency_header and options.idempotency_key and idempotency_header not in headers:
+ headers[idempotency_header] = options.idempotency_key
# Don't set these headers if they were already set or removed by the caller. We check
# `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
@@ -873,7 +902,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: Literal[True],
stream_cls: Type[_StreamT],
@@ -884,7 +912,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: Literal[False] = False,
) -> ResponseT: ...
@@ -894,7 +921,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: bool = False,
stream_cls: Type[_StreamT] | None = None,
@@ -904,121 +930,109 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: bool = False,
stream_cls: type[_StreamT] | None = None,
) -> ResponseT | _StreamT:
- if remaining_retries is not None:
- retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
- else:
- retries_taken = 0
-
- return self._request(
- cast_to=cast_to,
- options=options,
- stream=stream,
- stream_cls=stream_cls,
- retries_taken=retries_taken,
- )
+ cast_to = self._maybe_override_cast_to(cast_to, options)
- def _request(
- self,
- *,
- cast_to: Type[ResponseT],
- options: FinalRequestOptions,
- retries_taken: int,
- stream: bool,
- stream_cls: type[_StreamT] | None,
- ) -> ResponseT | _StreamT:
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
+ if input_options.idempotency_key is None and input_options.method.lower() != "get":
+ # ensure the idempotency key is reused between requests
+ input_options.idempotency_key = self._idempotency_key()
- cast_to = self._maybe_override_cast_to(cast_to, options)
- options = self._prepare_options(options)
-
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
- request = self._build_request(options, retries_taken=retries_taken)
- self._prepare_request(request)
-
- kwargs: HttpxSendArgs = {}
- if self.custom_auth is not None:
- kwargs["auth"] = self.custom_auth
+ response: httpx.Response | None = None
+ max_retries = input_options.get_max_retries(self.max_retries)
- log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+ retries_taken = 0
+ for retries_taken in range(max_retries + 1):
+ options = model_copy(input_options)
+ options = self._prepare_options(options)
- try:
- response = self._client.send(
- request,
- stream=stream or self._should_stream_response_body(request=request),
- **kwargs,
- )
- except httpx.TimeoutException as err:
- log.debug("Encountered httpx.TimeoutException", exc_info=True)
+ remaining_retries = max_retries - retries_taken
+ request = self._build_request(options, retries_taken=retries_taken)
+ self._prepare_request(request)
- if remaining_retries > 0:
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
- )
+ kwargs: HttpxSendArgs = {}
+ if self.custom_auth is not None:
+ kwargs["auth"] = self.custom_auth
- log.debug("Raising timeout error")
- raise APITimeoutError(request=request) from err
- except Exception as err:
- log.debug("Encountered Exception", exc_info=True)
+ log.debug("Sending HTTP Request: %s %s", request.method, request.url)
- if remaining_retries > 0:
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
+ response = None
+ try:
+ response = self._client.send(
+ request,
+ stream=stream or self._should_stream_response_body(request=request),
+ **kwargs,
)
+ except httpx.TimeoutException as err:
+ log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+ if remaining_retries > 0:
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising timeout error")
+ raise APITimeoutError(request=request) from err
+ except Exception as err:
+ log.debug("Encountered Exception", exc_info=True)
+
+ if remaining_retries > 0:
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising connection error")
+ raise APIConnectionError(request=request) from err
+
+ log.debug(
+ 'HTTP Response: %s %s "%i %s" %s',
+ request.method,
+ request.url,
+ response.status_code,
+ response.reason_phrase,
+ response.headers,
+ )
- log.debug("Raising connection error")
- raise APIConnectionError(request=request) from err
-
- log.debug(
- 'HTTP Response: %s %s "%i %s" %s',
- request.method,
- request.url,
- response.status_code,
- response.reason_phrase,
- response.headers,
- )
+ try:
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
+ log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+ if remaining_retries > 0 and self._should_retry(err.response):
+ err.response.close()
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=response,
+ )
+ continue
- try:
- response.raise_for_status()
- except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
- log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
- if remaining_retries > 0 and self._should_retry(err.response):
- err.response.close()
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- response_headers=err.response.headers,
- stream=stream,
- stream_cls=stream_cls,
- )
+ # If the response is streamed then we need to explicitly read the response
+ # to completion before attempting to access the response text.
+ if not err.response.is_closed:
+ err.response.read()
- # If the response is streamed then we need to explicitly read the response
- # to completion before attempting to access the response text.
- if not err.response.is_closed:
- err.response.read()
+ log.debug("Re-raising status error")
+ raise self._make_status_error_from_response(err.response) from None
- log.debug("Re-raising status error")
- raise self._make_status_error_from_response(err.response) from None
+ break
+ assert response is not None, "could not resolve response (should never happen)"
return self._process_response(
cast_to=cast_to,
options=options,
@@ -1028,37 +1042,20 @@ def _request(
retries_taken=retries_taken,
)
- def _retry_request(
- self,
- options: FinalRequestOptions,
- cast_to: Type[ResponseT],
- *,
- retries_taken: int,
- response_headers: httpx.Headers | None,
- stream: bool,
- stream_cls: type[_StreamT] | None,
- ) -> ResponseT | _StreamT:
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+ def _sleep_for_retry(
+ self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+ ) -> None:
+ remaining_retries = max_retries - retries_taken
if remaining_retries == 1:
log.debug("1 retry left")
else:
log.debug("%i retries left", remaining_retries)
- timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+ timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
log.info("Retrying request to %s in %f seconds", options.url, timeout)
- # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
- # different thread if necessary.
time.sleep(timeout)
- return self._request(
- options=options,
- cast_to=cast_to,
- retries_taken=retries_taken + 1,
- stream=stream,
- stream_cls=stream_cls,
- )
-
def _process_response(
self,
*,
@@ -1402,7 +1399,6 @@ async def request(
options: FinalRequestOptions,
*,
stream: Literal[False] = False,
- remaining_retries: Optional[int] = None,
) -> ResponseT: ...
@overload
@@ -1413,7 +1409,6 @@ async def request(
*,
stream: Literal[True],
stream_cls: type[_AsyncStreamT],
- remaining_retries: Optional[int] = None,
) -> _AsyncStreamT: ...
@overload
@@ -1424,7 +1419,6 @@ async def request(
*,
stream: bool,
stream_cls: type[_AsyncStreamT] | None = None,
- remaining_retries: Optional[int] = None,
) -> ResponseT | _AsyncStreamT: ...
async def request(
@@ -1434,116 +1428,111 @@ async def request(
*,
stream: bool = False,
stream_cls: type[_AsyncStreamT] | None = None,
- remaining_retries: Optional[int] = None,
- ) -> ResponseT | _AsyncStreamT:
- if remaining_retries is not None:
- retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
- else:
- retries_taken = 0
-
- return await self._request(
- cast_to=cast_to,
- options=options,
- stream=stream,
- stream_cls=stream_cls,
- retries_taken=retries_taken,
- )
-
- async def _request(
- self,
- cast_to: Type[ResponseT],
- options: FinalRequestOptions,
- *,
- stream: bool,
- stream_cls: type[_AsyncStreamT] | None,
- retries_taken: int,
) -> ResponseT | _AsyncStreamT:
if self._platform is None:
# `get_platform` can make blocking IO calls so we
# execute it earlier while we are in an async context
self._platform = await asyncify(get_platform)()
+ cast_to = self._maybe_override_cast_to(cast_to, options)
+
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
+ if input_options.idempotency_key is None and input_options.method.lower() != "get":
+ # ensure the idempotency key is reused between requests
+ input_options.idempotency_key = self._idempotency_key()
- cast_to = self._maybe_override_cast_to(cast_to, options)
- options = await self._prepare_options(options)
+ response: httpx.Response | None = None
+ max_retries = input_options.get_max_retries(self.max_retries)
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
- request = self._build_request(options, retries_taken=retries_taken)
- await self._prepare_request(request)
+ retries_taken = 0
+ for retries_taken in range(max_retries + 1):
+ options = model_copy(input_options)
+ options = await self._prepare_options(options)
- kwargs: HttpxSendArgs = {}
- if self.custom_auth is not None:
- kwargs["auth"] = self.custom_auth
+ remaining_retries = max_retries - retries_taken
+ request = self._build_request(options, retries_taken=retries_taken)
+ await self._prepare_request(request)
- try:
- response = await self._client.send(
- request,
- stream=stream or self._should_stream_response_body(request=request),
- **kwargs,
- )
- except httpx.TimeoutException as err:
- log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
- if remaining_retries > 0:
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
- )
+ kwargs: HttpxSendArgs = {}
+ if self.custom_auth is not None:
+ kwargs["auth"] = self.custom_auth
- log.debug("Raising timeout error")
- raise APITimeoutError(request=request) from err
- except Exception as err:
- log.debug("Encountered Exception", exc_info=True)
+ log.debug("Sending HTTP Request: %s %s", request.method, request.url)
- if remaining_retries > 0:
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
+ response = None
+ try:
+ response = await self._client.send(
+ request,
+ stream=stream or self._should_stream_response_body(request=request),
+ **kwargs,
)
+ except httpx.TimeoutException as err:
+ log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+ if remaining_retries > 0:
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising timeout error")
+ raise APITimeoutError(request=request) from err
+ except Exception as err:
+ log.debug("Encountered Exception", exc_info=True)
+
+ if remaining_retries > 0:
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising connection error")
+ raise APIConnectionError(request=request) from err
+
+ log.debug(
+ 'HTTP Response: %s %s "%i %s" %s',
+ request.method,
+ request.url,
+ response.status_code,
+ response.reason_phrase,
+ response.headers,
+ )
- log.debug("Raising connection error")
- raise APIConnectionError(request=request) from err
+ try:
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
+ log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+ if remaining_retries > 0 and self._should_retry(err.response):
+ await err.response.aclose()
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=response,
+ )
+ continue
- log.debug(
- 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
- )
+ # If the response is streamed then we need to explicitly read the response
+ # to completion before attempting to access the response text.
+ if not err.response.is_closed:
+ await err.response.aread()
- try:
- response.raise_for_status()
- except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
- log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
- if remaining_retries > 0 and self._should_retry(err.response):
- await err.response.aclose()
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- response_headers=err.response.headers,
- stream=stream,
- stream_cls=stream_cls,
- )
+ log.debug("Re-raising status error")
+ raise self._make_status_error_from_response(err.response) from None
- # If the response is streamed then we need to explicitly read the response
- # to completion before attempting to access the response text.
- if not err.response.is_closed:
- await err.response.aread()
-
- log.debug("Re-raising status error")
- raise self._make_status_error_from_response(err.response) from None
+ break
+ assert response is not None, "could not resolve response (should never happen)"
return await self._process_response(
cast_to=cast_to,
options=options,
@@ -1553,35 +1542,20 @@ async def _request(
retries_taken=retries_taken,
)
- async def _retry_request(
- self,
- options: FinalRequestOptions,
- cast_to: Type[ResponseT],
- *,
- retries_taken: int,
- response_headers: httpx.Headers | None,
- stream: bool,
- stream_cls: type[_AsyncStreamT] | None,
- ) -> ResponseT | _AsyncStreamT:
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+ async def _sleep_for_retry(
+ self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+ ) -> None:
+ remaining_retries = max_retries - retries_taken
if remaining_retries == 1:
log.debug("1 retry left")
else:
log.debug("%i retries left", remaining_retries)
- timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+ timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
log.info("Retrying request to %s in %f seconds", options.url, timeout)
await anyio.sleep(timeout)
- return await self._request(
- options=options,
- cast_to=cast_to,
- retries_taken=retries_taken + 1,
- stream=stream,
- stream_cls=stream_cls,
- )
-
async def _process_response(
self,
*,
diff --git a/src/mixedbread/_client.py b/src/mixedbread/_client.py
index ece8c7f8..efe87763 100644
--- a/src/mixedbread/_client.py
+++ b/src/mixedbread/_client.py
@@ -339,6 +339,7 @@ def rerank(
rank_fields: Optional[List[str]] | NotGiven = NOT_GIVEN,
top_k: int | NotGiven = NOT_GIVEN,
return_input: bool | NotGiven = NOT_GIVEN,
+ rewrite_query: bool | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -366,6 +367,8 @@ def rerank(
return_input: Whether to return the documents.
+ rewrite_query: Wether or not to rewrite the query before passing it to the reranking model
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -384,6 +387,7 @@ def rerank(
"rank_fields": rank_fields,
"top_k": top_k,
"return_input": return_input,
+ "rewrite_query": rewrite_query,
},
client_rerank_params.ClientRerankParams,
),
@@ -696,6 +700,7 @@ async def rerank(
rank_fields: Optional[List[str]] | NotGiven = NOT_GIVEN,
top_k: int | NotGiven = NOT_GIVEN,
return_input: bool | NotGiven = NOT_GIVEN,
+ rewrite_query: bool | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -723,6 +728,8 @@ async def rerank(
return_input: Whether to return the documents.
+ rewrite_query: Wether or not to rewrite the query before passing it to the reranking model
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -741,6 +748,7 @@ async def rerank(
"rank_fields": rank_fields,
"top_k": top_k,
"return_input": return_input,
+ "rewrite_query": rewrite_query,
},
client_rerank_params.ClientRerankParams,
),
diff --git a/src/mixedbread/_models.py b/src/mixedbread/_models.py
index 34935716..798956f1 100644
--- a/src/mixedbread/_models.py
+++ b/src/mixedbread/_models.py
@@ -19,7 +19,6 @@
)
import pydantic
-import pydantic.generics
from pydantic.fields import FieldInfo
from ._types import (
@@ -627,8 +626,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
# Note: if one variant defines an alias then they all should
discriminator_alias = field_info.alias
- if field_info.annotation and is_literal_type(field_info.annotation):
- for entry in get_args(field_info.annotation):
+ if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+ for entry in get_args(annotation):
if isinstance(entry, str):
mapping[entry] = variant
diff --git a/src/mixedbread/_response.py b/src/mixedbread/_response.py
index 82f0f3ce..543946b9 100644
--- a/src/mixedbread/_response.py
+++ b/src/mixedbread/_response.py
@@ -235,7 +235,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
# split is required to handle cases where additional information is included
# in the response, e.g. application/json; charset=utf-8
content_type, *_ = response.headers.get("content-type", "*").split(";")
- if content_type != "application/json":
+ if not content_type.endswith("json"):
if is_basemodel(cast_to):
try:
data = response.json()
diff --git a/src/mixedbread/_utils/_transform.py b/src/mixedbread/_utils/_transform.py
index 3b2b8e00..b0cc20a7 100644
--- a/src/mixedbread/_utils/_transform.py
+++ b/src/mixedbread/_utils/_transform.py
@@ -5,7 +5,7 @@
import pathlib
from typing import Any, Mapping, TypeVar, cast
from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
import anyio
import pydantic
@@ -13,6 +13,7 @@
from ._utils import (
is_list,
is_given,
+ lru_cache,
is_mapping,
is_iterable,
)
@@ -109,6 +110,7 @@ class Params(TypedDict, total=False):
return cast(_T, transformed)
+@lru_cache(maxsize=8096)
def _get_annotated_type(type_: type) -> type | None:
"""If the given type is an `Annotated` type then it is returned, if not `None` is returned.
@@ -433,3 +435,13 @@ async def _async_transform_typeddict(
else:
result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+ obj: Any,
+ globalns: dict[str, Any] | None = None,
+ localns: Mapping[str, Any] | None = None,
+ include_extras: bool = False,
+) -> dict[str, Any]:
+ return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/mixedbread/_utils/_typing.py b/src/mixedbread/_utils/_typing.py
index 278749b1..1bac9542 100644
--- a/src/mixedbread/_utils/_typing.py
+++ b/src/mixedbread/_utils/_typing.py
@@ -13,6 +13,7 @@
get_origin,
)
+from ._utils import lru_cache
from .._types import InheritsGeneric
from .._compat import is_union as _is_union
@@ -66,6 +67,7 @@ def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
def strip_annotated_type(typ: type) -> type:
if is_required_type(typ) or is_annotated_type(typ):
return strip_annotated_type(cast(type, get_args(typ)[0]))
@@ -108,7 +110,7 @@ class MyResponse(Foo[_T]):
```
"""
cls = cast(object, get_origin(typ) or typ)
- if cls in generic_bases:
+ if cls in generic_bases: # pyright: ignore[reportUnnecessaryContains]
# we're given the class directly
return extract_type_arg(typ, index)
diff --git a/src/mixedbread/_utils/_utils.py b/src/mixedbread/_utils/_utils.py
index e5811bba..ea3cf3f2 100644
--- a/src/mixedbread/_utils/_utils.py
+++ b/src/mixedbread/_utils/_utils.py
@@ -72,8 +72,16 @@ def _extract_items(
from .._files import assert_is_file_content
# We have exhausted the path, return the entry we found.
- assert_is_file_content(obj, key=flattened_key)
assert flattened_key is not None
+
+ if is_list(obj):
+ files: list[tuple[str, FileTypes]] = []
+ for entry in obj:
+ assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+ files.append((flattened_key + "[]", cast(FileTypes, entry)))
+ return files
+
+ assert_is_file_content(obj, key=flattened_key)
return [(flattened_key, cast(FileTypes, obj))]
index += 1
diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py
index 2c97ccc1..0007f447 100644
--- a/src/mixedbread/_version.py
+++ b/src/mixedbread/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "mixedbread"
-__version__ = "0.2.1" # x-release-please-version
+__version__ = "0.3.0" # x-release-please-version
diff --git a/src/mixedbread/resources/embeddings.py b/src/mixedbread/resources/embeddings.py
index 3800954d..72e10c5d 100644
--- a/src/mixedbread/resources/embeddings.py
+++ b/src/mixedbread/resources/embeddings.py
@@ -9,10 +9,7 @@
from ..types import embedding_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
diff --git a/src/mixedbread/resources/extractions/content.py b/src/mixedbread/resources/extractions/content.py
index 2d1b896f..9dd3c663 100644
--- a/src/mixedbread/resources/extractions/content.py
+++ b/src/mixedbread/resources/extractions/content.py
@@ -2,15 +2,12 @@
from __future__ import annotations
-from typing import Dict
+from typing import Dict, List, Union, Iterable, Optional
import httpx
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -49,8 +46,9 @@ def with_streaming_response(self) -> ContentResourceWithStreamingResponse:
def create(
self,
*,
- content: str,
+ content: Union[str, List[str], Iterable[content_create_params.ContentUnionMember2]],
json_schema: Dict[str, object],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -70,6 +68,8 @@ def create(
json_schema: The JSON schema to use for extraction
+ instructions: Additional instructions for the extraction
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -84,6 +84,7 @@ def create(
{
"content": content,
"json_schema": json_schema,
+ "instructions": instructions,
},
content_create_params.ContentCreateParams,
),
@@ -117,8 +118,9 @@ def with_streaming_response(self) -> AsyncContentResourceWithStreamingResponse:
async def create(
self,
*,
- content: str,
+ content: Union[str, List[str], Iterable[content_create_params.ContentUnionMember2]],
json_schema: Dict[str, object],
+ instructions: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -138,6 +140,8 @@ async def create(
json_schema: The JSON schema to use for extraction
+ instructions: Additional instructions for the extraction
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -152,6 +156,7 @@ async def create(
{
"content": content,
"json_schema": json_schema,
+ "instructions": instructions,
},
content_create_params.ContentCreateParams,
),
diff --git a/src/mixedbread/resources/extractions/jobs.py b/src/mixedbread/resources/extractions/jobs.py
index d127824c..29b77911 100644
--- a/src/mixedbread/resources/extractions/jobs.py
+++ b/src/mixedbread/resources/extractions/jobs.py
@@ -7,10 +7,7 @@
import httpx
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
diff --git a/src/mixedbread/resources/extractions/schema.py b/src/mixedbread/resources/extractions/schema.py
index d92032fa..651723f6 100644
--- a/src/mixedbread/resources/extractions/schema.py
+++ b/src/mixedbread/resources/extractions/schema.py
@@ -7,10 +7,7 @@
import httpx
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
diff --git a/src/mixedbread/resources/files.py b/src/mixedbread/resources/files.py
index 49d4e39b..43cafa7d 100644
--- a/src/mixedbread/resources/files.py
+++ b/src/mixedbread/resources/files.py
@@ -8,12 +8,7 @@
from ..types import file_list_params, file_create_params, file_update_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
- extract_files,
- maybe_transform,
- deepcopy_minimal,
- async_maybe_transform,
-)
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
diff --git a/src/mixedbread/resources/parsing/jobs.py b/src/mixedbread/resources/parsing/jobs.py
index 5e6440ee..61b050ff 100644
--- a/src/mixedbread/resources/parsing/jobs.py
+++ b/src/mixedbread/resources/parsing/jobs.py
@@ -10,10 +10,7 @@
from ...lib import polling
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -76,6 +73,7 @@ def create(
| NotGiven = NOT_GIVEN,
chunking_strategy: Literal["page"] | NotGiven = NOT_GIVEN,
return_format: Literal["html", "markdown", "plain"] | NotGiven = NOT_GIVEN,
+ mode: Literal["fast", "high_quality"] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -99,6 +97,8 @@ def create(
return_format: The format of the returned content
+ mode: The strategy to use for OCR
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -115,6 +115,7 @@ def create(
"element_types": element_types,
"chunking_strategy": chunking_strategy,
"return_format": return_format,
+ "mode": mode,
},
job_create_params.JobCreateParams,
),
@@ -495,6 +496,7 @@ async def create(
| NotGiven = NOT_GIVEN,
chunking_strategy: Literal["page"] | NotGiven = NOT_GIVEN,
return_format: Literal["html", "markdown", "plain"] | NotGiven = NOT_GIVEN,
+ mode: Literal["fast", "high_quality"] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -518,6 +520,8 @@ async def create(
return_format: The format of the returned content
+ mode: The strategy to use for OCR
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -534,6 +538,7 @@ async def create(
"element_types": element_types,
"chunking_strategy": chunking_strategy,
"return_format": return_format,
+ "mode": mode,
},
job_create_params.JobCreateParams,
),
diff --git a/src/mixedbread/resources/vector_stores/files.py b/src/mixedbread/resources/vector_stores/files.py
index 4ca4dfa5..51e7dd3b 100644
--- a/src/mixedbread/resources/vector_stores/files.py
+++ b/src/mixedbread/resources/vector_stores/files.py
@@ -9,10 +9,7 @@
from ...lib import polling
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
diff --git a/src/mixedbread/resources/vector_stores/vector_stores.py b/src/mixedbread/resources/vector_stores/vector_stores.py
index 1efaec37..18b25941 100644
--- a/src/mixedbread/resources/vector_stores/vector_stores.py
+++ b/src/mixedbread/resources/vector_stores/vector_stores.py
@@ -22,10 +22,7 @@
vector_store_question_answering_params,
)
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -40,8 +37,8 @@
from ...types.expires_after_param import ExpiresAfterParam
from ...types.vector_store_delete_response import VectorStoreDeleteResponse
from ...types.vector_store_search_response import VectorStoreSearchResponse
-from ...types.vector_store_file_search_options_param import VectorStoreFileSearchOptionsParam
from ...types.vector_store_chunk_search_options_param import VectorStoreChunkSearchOptionsParam
+from ...types.vector_store_question_answering_response import VectorStoreQuestionAnsweringResponse
__all__ = ["VectorStoresResource", "AsyncVectorStoresResource"]
@@ -328,7 +325,7 @@ def question_answering(
vector_store_ids: List[str],
top_k: int | NotGiven = NOT_GIVEN,
filters: Optional[vector_store_question_answering_params.Filters] | NotGiven = NOT_GIVEN,
- search_options: VectorStoreFileSearchOptionsParam | NotGiven = NOT_GIVEN,
+ search_options: VectorStoreChunkSearchOptionsParam | NotGiven = NOT_GIVEN,
stream: bool | NotGiven = NOT_GIVEN,
qa_options: vector_store_question_answering_params.QaOptions | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -337,7 +334,7 @@ def question_answering(
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> object:
+ ) -> VectorStoreQuestionAnsweringResponse:
"""Question answering
Args:
@@ -383,7 +380,7 @@ def question_answering(
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
- cast_to=object,
+ cast_to=VectorStoreQuestionAnsweringResponse,
)
def search(
@@ -738,7 +735,7 @@ async def question_answering(
vector_store_ids: List[str],
top_k: int | NotGiven = NOT_GIVEN,
filters: Optional[vector_store_question_answering_params.Filters] | NotGiven = NOT_GIVEN,
- search_options: VectorStoreFileSearchOptionsParam | NotGiven = NOT_GIVEN,
+ search_options: VectorStoreChunkSearchOptionsParam | NotGiven = NOT_GIVEN,
stream: bool | NotGiven = NOT_GIVEN,
qa_options: vector_store_question_answering_params.QaOptions | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -747,7 +744,7 @@ async def question_answering(
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> object:
+ ) -> VectorStoreQuestionAnsweringResponse:
"""Question answering
Args:
@@ -793,7 +790,7 @@ async def question_answering(
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
- cast_to=object,
+ cast_to=VectorStoreQuestionAnsweringResponse,
)
async def search(
diff --git a/src/mixedbread/types/__init__.py b/src/mixedbread/types/__init__.py
index 5d068c8e..a8ab8119 100644
--- a/src/mixedbread/types/__init__.py
+++ b/src/mixedbread/types/__init__.py
@@ -36,3 +36,6 @@
from .vector_store_chunk_search_options_param import (
VectorStoreChunkSearchOptionsParam as VectorStoreChunkSearchOptionsParam,
)
+from .vector_store_question_answering_response import (
+ VectorStoreQuestionAnsweringResponse as VectorStoreQuestionAnsweringResponse,
+)
diff --git a/src/mixedbread/types/client_rerank_params.py b/src/mixedbread/types/client_rerank_params.py
index ee17f942..89548084 100644
--- a/src/mixedbread/types/client_rerank_params.py
+++ b/src/mixedbread/types/client_rerank_params.py
@@ -26,3 +26,6 @@ class ClientRerankParams(TypedDict, total=False):
return_input: bool
"""Whether to return the documents."""
+
+ rewrite_query: bool
+ """Wether or not to rewrite the query before passing it to the reranking model"""
diff --git a/src/mixedbread/types/embedding_create_response.py b/src/mixedbread/types/embedding_create_response.py
index c7374f21..24f996e9 100644
--- a/src/mixedbread/types/embedding_create_response.py
+++ b/src/mixedbread/types/embedding_create_response.py
@@ -25,7 +25,7 @@ class EmbeddingCreateResponse(BaseModel):
Literal[
"list",
"parsing_job",
- "job",
+ "extraction_job",
"embedding",
"embedding_dict",
"rank_result",
@@ -33,6 +33,8 @@ class EmbeddingCreateResponse(BaseModel):
"vector_store",
"vector_store.file",
"api_key",
+ "data_source",
+ "data_source.connector",
]
] = None
"""The object type of the response"""
diff --git a/src/mixedbread/types/extractions/content_create_params.py b/src/mixedbread/types/extractions/content_create_params.py
index 372051b0..4e3d3ef9 100644
--- a/src/mixedbread/types/extractions/content_create_params.py
+++ b/src/mixedbread/types/extractions/content_create_params.py
@@ -2,15 +2,48 @@
from __future__ import annotations
-from typing import Dict
-from typing_extensions import Required, TypedDict
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
-__all__ = ["ContentCreateParams"]
+__all__ = [
+ "ContentCreateParams",
+ "ContentUnionMember2",
+ "ContentUnionMember2TextInput",
+ "ContentUnionMember2ImageURLInput",
+ "ContentUnionMember2ImageURLInputImageURL",
+]
class ContentCreateParams(TypedDict, total=False):
- content: Required[str]
+ content: Required[Union[str, List[str], Iterable[ContentUnionMember2]]]
"""The content to extract from"""
json_schema: Required[Dict[str, object]]
"""The JSON schema to use for extraction"""
+
+ instructions: Optional[str]
+ """Additional instructions for the extraction"""
+
+
+class ContentUnionMember2TextInput(TypedDict, total=False):
+ type: Literal["text"]
+ """Input type identifier"""
+
+ text: Required[str]
+ """Text content to process"""
+
+
+class ContentUnionMember2ImageURLInputImageURL(TypedDict, total=False):
+ url: Required[str]
+ """The image URL. Can be either a URL or a Data URI."""
+
+
+class ContentUnionMember2ImageURLInput(TypedDict, total=False):
+ type: Literal["image_url"]
+ """Input type identifier"""
+
+ image_url: Required[ContentUnionMember2ImageURLInputImageURL]
+ """The image input specification."""
+
+
+ContentUnionMember2: TypeAlias = Union[ContentUnionMember2TextInput, ContentUnionMember2ImageURLInput]
diff --git a/src/mixedbread/types/info_response.py b/src/mixedbread/types/info_response.py
index fee93225..a7184bcf 100644
--- a/src/mixedbread/types/info_response.py
+++ b/src/mixedbread/types/info_response.py
@@ -1,6 +1,5 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
from .._models import BaseModel
__all__ = ["InfoResponse"]
diff --git a/src/mixedbread/types/parsing/job_create_params.py b/src/mixedbread/types/parsing/job_create_params.py
index 13ca1743..a8e25307 100644
--- a/src/mixedbread/types/parsing/job_create_params.py
+++ b/src/mixedbread/types/parsing/job_create_params.py
@@ -36,3 +36,6 @@ class JobCreateParams(TypedDict, total=False):
return_format: Literal["html", "markdown", "plain"]
"""The format of the returned content"""
+
+ mode: Literal["fast", "high_quality"]
+ """The strategy to use for OCR"""
diff --git a/src/mixedbread/types/parsing/parsing_job.py b/src/mixedbread/types/parsing/parsing_job.py
index 9e9bb92c..959a7ef8 100644
--- a/src/mixedbread/types/parsing/parsing_job.py
+++ b/src/mixedbread/types/parsing/parsing_job.py
@@ -46,7 +46,7 @@ class ResultChunk(BaseModel):
"""The full content of the chunk"""
content_to_embed: str
- """The content to be used for embedding"""
+ """The content of the chunk to embed"""
elements: List[ResultChunkElement]
"""List of elements contained in this chunk"""
diff --git a/src/mixedbread/types/rerank_response.py b/src/mixedbread/types/rerank_response.py
index 1b4dd0a9..ef0d117d 100644
--- a/src/mixedbread/types/rerank_response.py
+++ b/src/mixedbread/types/rerank_response.py
@@ -37,7 +37,7 @@ class RerankResponse(BaseModel):
Literal[
"list",
"parsing_job",
- "job",
+ "extraction_job",
"embedding",
"embedding_dict",
"rank_result",
@@ -45,6 +45,8 @@ class RerankResponse(BaseModel):
"vector_store",
"vector_store.file",
"api_key",
+ "data_source",
+ "data_source.connector",
]
] = None
"""The object type of the response"""
diff --git a/src/mixedbread/types/scored_vector_store_chunk.py b/src/mixedbread/types/scored_vector_store_chunk.py
index 33a37463..87276ad7 100644
--- a/src/mixedbread/types/scored_vector_store_chunk.py
+++ b/src/mixedbread/types/scored_vector_store_chunk.py
@@ -5,10 +5,10 @@
from .._models import BaseModel
-__all__ = ["ScoredVectorStoreChunk", "Value", "ValueImageURLInput", "ValueImageURLInputImage", "ValueTextInput"]
+__all__ = ["ScoredVectorStoreChunk", "Value", "ValueImageURLInput", "ValueImageURLInputImageURL", "ValueTextInput"]
-class ValueImageURLInputImage(BaseModel):
+class ValueImageURLInputImageURL(BaseModel):
url: str
"""The image URL. Can be either a URL or a Data URI."""
@@ -17,7 +17,7 @@ class ValueImageURLInput(BaseModel):
type: Optional[Literal["image_url"]] = None
"""Input type identifier"""
- image: ValueImageURLInputImage
+ image_url: ValueImageURLInputImageURL
"""The image input specification."""
diff --git a/src/mixedbread/types/vector_store_question_answering_params.py b/src/mixedbread/types/vector_store_question_answering_params.py
index d91524df..7d388f89 100644
--- a/src/mixedbread/types/vector_store_question_answering_params.py
+++ b/src/mixedbread/types/vector_store_question_answering_params.py
@@ -6,7 +6,7 @@
from typing_extensions import Required, TypeAlias, TypedDict
from .shared_params.search_filter_condition import SearchFilterCondition
-from .vector_store_file_search_options_param import VectorStoreFileSearchOptionsParam
+from .vector_store_chunk_search_options_param import VectorStoreChunkSearchOptionsParam
__all__ = ["VectorStoreQuestionAnsweringParams", "Filters", "FiltersUnionMember2", "QaOptions"]
@@ -27,7 +27,7 @@ class VectorStoreQuestionAnsweringParams(TypedDict, total=False):
filters: Optional[Filters]
"""Optional filter conditions"""
- search_options: VectorStoreFileSearchOptionsParam
+ search_options: VectorStoreChunkSearchOptionsParam
"""Search configuration options"""
stream: bool
diff --git a/src/mixedbread/types/vector_store_question_answering_response.py b/src/mixedbread/types/vector_store_question_answering_response.py
new file mode 100644
index 00000000..311c2cf5
--- /dev/null
+++ b/src/mixedbread/types/vector_store_question_answering_response.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from .._models import BaseModel
+from .scored_vector_store_chunk import ScoredVectorStoreChunk
+
+__all__ = ["VectorStoreQuestionAnsweringResponse"]
+
+
+class VectorStoreQuestionAnsweringResponse(BaseModel):
+ answer: str
+ """The answer generated by the LLM"""
+
+ sources: Optional[List[ScoredVectorStoreChunk]] = None
+ """Source documents used to generate the answer"""
diff --git a/tests/api_resources/extractions/test_content.py b/tests/api_resources/extractions/test_content.py
index 923d1cf5..daf0b307 100644
--- a/tests/api_resources/extractions/test_content.py
+++ b/tests/api_resources/extractions/test_content.py
@@ -20,15 +20,24 @@ class TestContent:
@parametrize
def test_method_create(self, client: Mixedbread) -> None:
content = client.extractions.content.create(
- content="content",
+ content="string",
json_schema={"foo": "bar"},
)
assert_matches_type(ExtractionResult, content, path=["response"])
+ @parametrize
+ def test_method_create_with_all_params(self, client: Mixedbread) -> None:
+ content = client.extractions.content.create(
+ content="string",
+ json_schema={"foo": "bar"},
+ instructions="instructions",
+ )
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
@parametrize
def test_raw_response_create(self, client: Mixedbread) -> None:
response = client.extractions.content.with_raw_response.create(
- content="content",
+ content="string",
json_schema={"foo": "bar"},
)
@@ -40,7 +49,7 @@ def test_raw_response_create(self, client: Mixedbread) -> None:
@parametrize
def test_streaming_response_create(self, client: Mixedbread) -> None:
with client.extractions.content.with_streaming_response.create(
- content="content",
+ content="string",
json_schema={"foo": "bar"},
) as response:
assert not response.is_closed
@@ -58,15 +67,24 @@ class TestAsyncContent:
@parametrize
async def test_method_create(self, async_client: AsyncMixedbread) -> None:
content = await async_client.extractions.content.create(
- content="content",
+ content="string",
+ json_schema={"foo": "bar"},
+ )
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncMixedbread) -> None:
+ content = await async_client.extractions.content.create(
+ content="string",
json_schema={"foo": "bar"},
+ instructions="instructions",
)
assert_matches_type(ExtractionResult, content, path=["response"])
@parametrize
async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None:
response = await async_client.extractions.content.with_raw_response.create(
- content="content",
+ content="string",
json_schema={"foo": "bar"},
)
@@ -78,7 +96,7 @@ async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None:
@parametrize
async def test_streaming_response_create(self, async_client: AsyncMixedbread) -> None:
async with async_client.extractions.content.with_streaming_response.create(
- content="content",
+ content="string",
json_schema={"foo": "bar"},
) as response:
assert not response.is_closed
diff --git a/tests/api_resources/parsing/test_jobs.py b/tests/api_resources/parsing/test_jobs.py
index 55cd95bc..a82ef6a0 100644
--- a/tests/api_resources/parsing/test_jobs.py
+++ b/tests/api_resources/parsing/test_jobs.py
@@ -32,6 +32,7 @@ def test_method_create_with_all_params(self, client: Mixedbread) -> None:
element_types=["caption"],
chunking_strategy="page",
return_format="html",
+ mode="fast",
)
assert_matches_type(ParsingJob, job, path=["response"])
@@ -224,6 +225,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncMixedbread
element_types=["caption"],
chunking_strategy="page",
return_format="html",
+ mode="fast",
)
assert_matches_type(ParsingJob, job, path=["response"])
diff --git a/tests/api_resources/test_client.py b/tests/api_resources/test_client.py
index 2122a9f1..e05d9df6 100644
--- a/tests/api_resources/test_client.py
+++ b/tests/api_resources/test_client.py
@@ -109,6 +109,7 @@ def test_method_rerank_with_all_params(self, client: Mixedbread) -> None:
rank_fields=["content", "title"],
top_k=10,
return_input=False,
+ rewrite_query=False,
)
assert_matches_type(RerankResponse, client_, path=["response"])
@@ -230,6 +231,7 @@ async def test_method_rerank_with_all_params(self, async_client: AsyncMixedbread
rank_fields=["content", "title"],
top_k=10,
return_input=False,
+ rewrite_query=False,
)
assert_matches_type(RerankResponse, client, path=["response"])
diff --git a/tests/api_resources/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
index 5a367783..4ecec3a0 100644
--- a/tests/api_resources/test_vector_stores.py
+++ b/tests/api_resources/test_vector_stores.py
@@ -13,6 +13,7 @@
VectorStore,
VectorStoreDeleteResponse,
VectorStoreSearchResponse,
+ VectorStoreQuestionAnsweringResponse,
)
from mixedbread.pagination import SyncLimitOffset, AsyncLimitOffset
@@ -227,7 +228,7 @@ def test_method_question_answering(self, client: Mixedbread) -> None:
vector_store = client.vector_stores.question_answering(
vector_store_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
)
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
def test_method_question_answering_with_all_params(self, client: Mixedbread) -> None:
@@ -277,13 +278,11 @@ def test_method_question_answering_with_all_params(self, client: Mixedbread) ->
"score_threshold": 0,
"rewrite_query": True,
"return_metadata": True,
- "return_chunks": True,
- "chunks_per_file": 0,
},
stream=True,
qa_options={"cite": True},
)
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
def test_raw_response_question_answering(self, client: Mixedbread) -> None:
@@ -294,7 +293,7 @@ def test_raw_response_question_answering(self, client: Mixedbread) -> None:
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
vector_store = response.parse()
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
def test_streaming_response_question_answering(self, client: Mixedbread) -> None:
@@ -305,7 +304,7 @@ def test_streaming_response_question_answering(self, client: Mixedbread) -> None
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
vector_store = response.parse()
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
assert cast(Any, response.is_closed) is True
@@ -604,7 +603,7 @@ async def test_method_question_answering(self, async_client: AsyncMixedbread) ->
vector_store = await async_client.vector_stores.question_answering(
vector_store_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
)
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
async def test_method_question_answering_with_all_params(self, async_client: AsyncMixedbread) -> None:
@@ -654,13 +653,11 @@ async def test_method_question_answering_with_all_params(self, async_client: Asy
"score_threshold": 0,
"rewrite_query": True,
"return_metadata": True,
- "return_chunks": True,
- "chunks_per_file": 0,
},
stream=True,
qa_options={"cite": True},
)
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
async def test_raw_response_question_answering(self, async_client: AsyncMixedbread) -> None:
@@ -671,7 +668,7 @@ async def test_raw_response_question_answering(self, async_client: AsyncMixedbre
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
vector_store = await response.parse()
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
@parametrize
async def test_streaming_response_question_answering(self, async_client: AsyncMixedbread) -> None:
@@ -682,7 +679,7 @@ async def test_streaming_response_question_answering(self, async_client: AsyncMi
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
vector_store = await response.parse()
- assert_matches_type(object, vector_store, path=["response"])
+ assert_matches_type(VectorStoreQuestionAnsweringResponse, vector_store, path=["response"])
assert cast(Any, response.is_closed) is True
diff --git a/tests/conftest.py b/tests/conftest.py
index f546bdd5..95be7245 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
from mixedbread import Mixedbread, AsyncMixedbread
if TYPE_CHECKING:
- from _pytest.fixtures import FixtureRequest
+ from _pytest.fixtures import FixtureRequest # pyright: ignore[reportPrivateImportUsage]
pytest.register_assert_rewrite("tests.utils")
diff --git a/tests/test_models.py b/tests/test_models.py
index fa8f6491..7f19dc8b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -492,12 +492,15 @@ class Model(BaseModel):
resource_id: Optional[str] = None
m = Model.construct()
+ assert m.resource_id is None
assert "resource_id" not in m.model_fields_set
m = Model.construct(resource_id=None)
+ assert m.resource_id is None
assert "resource_id" in m.model_fields_set
m = Model.construct(resource_id="foo")
+ assert m.resource_id == "foo"
assert "resource_id" in m.model_fields_set
@@ -832,7 +835,7 @@ class B(BaseModel):
@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
def test_type_alias_type() -> None:
- Alias = TypeAliasType("Alias", str)
+ Alias = TypeAliasType("Alias", str) # pyright: ignore
class Model(BaseModel):
alias: Alias