diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c6075a38..da1c1d13 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
timeout-minutes: 10
name: lint
runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
- if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+ if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
steps:
- uses: actions/checkout@v6
@@ -38,7 +38,7 @@ jobs:
run: ./scripts/lint
build:
- if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+ if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
timeout-minutes: 10
name: build
permissions:
diff --git a/.gitignore b/.gitignore
index 95ceb189..3824f4c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
.prism.log
+.stdy.log
_dev
__pycache__
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index dd7ced1c..26b1ce24 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.49.0"
+ ".": "0.50.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 4bb5a628..a5e2a677 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 56
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-3daf4d41b24950791a70688527c10dea9e201d304b8d6432b3acfa50e33e0805.yml
-openapi_spec_hash: 1ecaa0f38266f1c5d1da8fb2e9ef651a
-config_hash: c32ffa6858a02d7f23f6f3dda0b461ed
+configured_endpoints: 55
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-ebd391dad1252eb00dd69ac50455b93bcdcd2cf0177d678e160e47f1d017287f.yml
+openapi_spec_hash: 3bfd5f9eb34711238caef851aa81f5c0
+config_hash: 594a43c9cb8089f079bb9c5442646791
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 449aae02..219fa545 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,50 @@
# Changelog
+## 0.50.0 (2026-04-23)
+
+Full Changelog: [v0.49.0...v0.50.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.49.0...v0.50.0)
+
+### Features
+
+* **api:** api update ([9cb94bc](https://github.com/mixedbread-ai/mixedbread-python/commit/9cb94bcfc4cf74adfd82b1ff6e8166d5bf597bf7))
+* **api:** api update ([c7f1a92](https://github.com/mixedbread-ai/mixedbread-python/commit/c7f1a92a897408fe54d22ae773e617c0eb9be584))
+* **api:** api update ([34d8ca8](https://github.com/mixedbread-ai/mixedbread-python/commit/34d8ca84c2ed2fd3a7bf77a84047741b537a178a))
+* **api:** api update ([4ad956c](https://github.com/mixedbread-ai/mixedbread-python/commit/4ad956c1b52e6a08dc44958cb331451f5bec75af))
+* **api:** api update ([a24ebe9](https://github.com/mixedbread-ai/mixedbread-python/commit/a24ebe9e241e075c2958cefecfe9e972a5bcd55f))
+* include results in store search events response ([8bb1069](https://github.com/mixedbread-ai/mixedbread-python/commit/8bb1069f016d97cf24bf4f7031c180ca3752e07d))
+* **internal:** implement indices array format for query and form serialization ([e52e262](https://github.com/mixedbread-ai/mixedbread-python/commit/e52e262bd9acd3caa49cf9ba08a29b260b722bf0))
+
+
+### Bug Fixes
+
+* **client:** preserve hardcoded query params when merging with user params ([83d3f20](https://github.com/mixedbread-ai/mixedbread-python/commit/83d3f201d14edd8afa08a6046c2315097d6509d4))
+* ensure file data are only sent as 1 parameter ([0957f16](https://github.com/mixedbread-ai/mixedbread-python/commit/0957f16bb9a20c72499ae6ee86b9494c1e1ee5c8))
+* sanitize endpoint path params ([7d519be](https://github.com/mixedbread-ai/mixedbread-python/commit/7d519be7f4ea6b2ce11c9915ee1ac903ebb97cea))
+
+
+### Performance Improvements
+
+* **client:** optimize file structure copying in multipart requests ([63a2728](https://github.com/mixedbread-ai/mixedbread-python/commit/63a272878dc28300f5169d38cb523cc641498fed))
+
+
+### Chores
+
+* **ci:** skip lint on metadata-only changes ([a3ec133](https://github.com/mixedbread-ai/mixedbread-python/commit/a3ec133b88d1a70a066c17e29dd2ebd5decffb25))
+* **internal:** more robust bootstrap script ([5b52286](https://github.com/mixedbread-ai/mixedbread-python/commit/5b5228640759a7bf277d9478f10eeca3d358e10f))
+* **internal:** update gitignore ([cf3aa78](https://github.com/mixedbread-ai/mixedbread-python/commit/cf3aa78ec44036bbedce95735ee7ab43cc4d566a))
+* **tests:** bump steady to v0.19.4 ([d6e32d5](https://github.com/mixedbread-ai/mixedbread-python/commit/d6e32d5abe0e137375ea79373033c07057b22c83))
+* **tests:** bump steady to v0.19.5 ([59351b9](https://github.com/mixedbread-ai/mixedbread-python/commit/59351b9dcb0eaf2578c985c80a0501d610ff357d))
+* **tests:** bump steady to v0.19.6 ([ac0e7ac](https://github.com/mixedbread-ai/mixedbread-python/commit/ac0e7acc35384a0ae9f78040b60bce03f8258d81))
+* **tests:** bump steady to v0.19.7 ([839ee9c](https://github.com/mixedbread-ai/mixedbread-python/commit/839ee9c7c60749ae03a125f84e87a7be9365daac))
+* **tests:** bump steady to v0.20.1 ([31e2274](https://github.com/mixedbread-ai/mixedbread-python/commit/31e2274bfe265fbebae02c86beae2f575f9b7b18))
+* **tests:** bump steady to v0.20.2 ([6879b15](https://github.com/mixedbread-ai/mixedbread-python/commit/6879b159d830df5df58696899c7551c709595ed4))
+* **tests:** bump steady to v0.22.1 ([3f6450d](https://github.com/mixedbread-ai/mixedbread-python/commit/3f6450dbda2fe9baeee031495fd10f4d87fa815a))
+
+
+### Refactors
+
+* **tests:** switch from prism to steady ([cc454cf](https://github.com/mixedbread-ai/mixedbread-python/commit/cc454cfbe5752c8e439361c52fb31d2d2b14180d))
+
## 0.49.0 (2026-03-19)
Full Changelog: [v0.48.0...v0.49.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.48.0...v0.49.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ed6fa9a1..21b21b70 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -85,7 +85,7 @@ $ pip install ./path-to-wheel-file.whl
## Running tests
-Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
+Most tests require you to [set up a mock server](https://github.com/dgellow/steady) against the OpenAPI spec to run the tests.
```sh
$ ./scripts/mock
diff --git a/api.md b/api.md
index 53e03831..e3f1e559 100644
--- a/api.md
+++ b/api.md
@@ -30,13 +30,29 @@ Types:
```python
from mixedbread.types import (
+ AgenticSearchConfig,
+ AudioChunkGeneratedMetadata,
+ AudioURL,
+ CodeChunkGeneratedMetadata,
+ ContextualizationConfig,
ExpiresAfter,
+ FileCounts,
+ ImageChunkGeneratedMetadata,
+ ImageURLOutput,
+ MarkdownChunkGeneratedMetadata,
+ MarkdownHeading,
+ PdfChunkGeneratedMetadata,
+ RerankConfig,
ScoredAudioURLInputChunk,
ScoredImageURLInputChunk,
ScoredTextInputChunk,
ScoredVideoURLInputChunk,
Store,
StoreChunkSearchOptions,
+ StoreConfig,
+ TextChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ VideoURL,
StoreDeleteResponse,
StoreMetadataFacetsResponse,
StoreQuestionAnsweringResponse,
@@ -61,12 +77,15 @@ Types:
```python
from mixedbread.types.stores import (
- ScoredStoreFile,
- StoreFileStatus,
+ AudioURLInputChunk,
+ ImageURLInputChunk,
StoreFile,
+ StoreFileConfig,
+ StoreFileStatus,
+ TextInputChunk,
+ VideoURLInputChunk,
FileListResponse,
FileDeleteResponse,
- FileSearchResponse,
)
```
@@ -77,7 +96,6 @@ Methods:
- client.stores.files.update(file_identifier, \*, store_identifier, \*\*params) -> StoreFile
- client.stores.files.list(store_identifier, \*\*params) -> FileListResponse
- client.stores.files.delete(file_identifier, \*, store_identifier) -> FileDeleteResponse
-- client.stores.files.search(\*\*params) -> FileSearchResponse
# Parsing
@@ -87,10 +105,13 @@ Types:
```python
from mixedbread.types.parsing import (
+ Chunk,
+ ChunkElement,
ChunkingStrategy,
+ DocumentParserResult,
ElementType,
- ParsingJobStatus,
ParsingJob,
+ ParsingJobStatus,
ReturnFormat,
JobListResponse,
JobDeleteResponse,
@@ -204,7 +225,9 @@ Types:
```python
from mixedbread.types import (
+ APIKeyCreateOrUpdateParams,
DataSource,
+ DataSourceAPIKeyParams,
DataSourceOauth2Params,
DataSourceType,
LinearDataSource,
@@ -243,7 +266,7 @@ Methods:
Types:
```python
-from mixedbread.types import APIKey, APIKeyCreated, APIKeyDeleteResponse
+from mixedbread.types import APIKey, APIKeyCreated, Scope, APIKeyDeleteResponse
```
Methods:
diff --git a/pyproject.toml b/pyproject.toml
index d39892a9..1dc4f151 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "mixedbread"
-version = "0.49.0"
+version = "0.50.0"
description = "The official Python library for the Mixedbread API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/scripts/bootstrap b/scripts/bootstrap
index b430fee3..fe8451e4 100755
--- a/scripts/bootstrap
+++ b/scripts/bootstrap
@@ -4,7 +4,7 @@ set -e
cd "$(dirname "$0")/.."
-if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then
+if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "${SKIP_BREW:-}" != "1" ] && [ -t 0 ]; then
brew bundle check >/dev/null 2>&1 || {
echo -n "==> Install Homebrew dependencies? (y/N): "
read -r response
diff --git a/scripts/mock b/scripts/mock
index bcf3b392..9c7c4399 100755
--- a/scripts/mock
+++ b/scripts/mock
@@ -19,34 +19,34 @@ fi
echo "==> Starting mock server with URL ${URL}"
-# Run prism mock on the given spec
+# Run steady mock on the given spec
if [ "$1" == "--daemon" ]; then
# Pre-install the package so the download doesn't eat into the startup timeout
- npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism --version
+ npm exec --package=@stdy/cli@0.22.1 -- steady --version
- npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log &
+ npm exec --package=@stdy/cli@0.22.1 -- steady --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets "$URL" &> .stdy.log &
- # Wait for server to come online (max 30s)
+ # Wait for server to come online via health endpoint (max 30s)
echo -n "Waiting for server"
attempts=0
- while ! grep -q "✖ fatal\|Prism is listening" ".prism.log" ; do
+ while ! curl --silent --fail "http://127.0.0.1:4010/_x-steady/health" >/dev/null 2>&1; do
+ if ! kill -0 $! 2>/dev/null; then
+ echo
+ cat .stdy.log
+ exit 1
+ fi
attempts=$((attempts + 1))
if [ "$attempts" -ge 300 ]; then
echo
- echo "Timed out waiting for Prism server to start"
- cat .prism.log
+ echo "Timed out waiting for Steady server to start"
+ cat .stdy.log
exit 1
fi
echo -n "."
sleep 0.1
done
- if grep -q "✖ fatal" ".prism.log"; then
- cat .prism.log
- exit 1
- fi
-
echo
else
- npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL"
+ npm exec --package=@stdy/cli@0.22.1 -- steady --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets "$URL"
fi
diff --git a/scripts/test b/scripts/test
index dbeda2d2..0159035c 100755
--- a/scripts/test
+++ b/scripts/test
@@ -9,8 +9,8 @@ GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
-function prism_is_running() {
- curl --silent "http://localhost:4010" >/dev/null 2>&1
+function steady_is_running() {
+ curl --silent "http://127.0.0.1:4010/_x-steady/health" >/dev/null 2>&1
}
kill_server_on_port() {
@@ -25,7 +25,7 @@ function is_overriding_api_base_url() {
[ -n "$TEST_API_BASE_URL" ]
}
-if ! is_overriding_api_base_url && ! prism_is_running ; then
+if ! is_overriding_api_base_url && ! steady_is_running ; then
# When we exit this script, make sure to kill the background mock server process
trap 'kill_server_on_port 4010' EXIT
@@ -36,19 +36,19 @@ fi
if is_overriding_api_base_url ; then
echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
echo
-elif ! prism_is_running ; then
- echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
+elif ! steady_is_running ; then
+ echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Steady server"
echo -e "running against your OpenAPI spec."
echo
echo -e "To run the server, pass in the path or url of your OpenAPI"
- echo -e "spec to the prism command:"
+ echo -e "spec to the steady command:"
echo
- echo -e " \$ ${YELLOW}npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock path/to/your.openapi.yml${NC}"
+ echo -e " \$ ${YELLOW}npm exec --package=@stdy/cli@0.22.1 -- steady path/to/your.openapi.yml --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets${NC}"
echo
exit 1
else
- echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
+ echo -e "${GREEN}✔ Mock steady server is running with your OpenAPI spec${NC}"
echo
fi
diff --git a/src/mixedbread/_base_client.py b/src/mixedbread/_base_client.py
index dded30e9..1d1daaea 100644
--- a/src/mixedbread/_base_client.py
+++ b/src/mixedbread/_base_client.py
@@ -540,6 +540,10 @@ def _build_request(
files = cast(HttpxRequestFiles, ForceMultipartDict())
prepared_url = self._prepare_url(options.url)
+ # preserve hard-coded query params from the url
+ if params and prepared_url.query:
+ params = {**dict(prepared_url.params.items()), **params}
+ prepared_url = prepared_url.copy_with(raw_path=prepared_url.raw_path.split(b"?", 1)[0])
if "_" in prepared_url.host:
# work around https://github.com/encode/httpx/discussions/2880
kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
diff --git a/src/mixedbread/_files.py b/src/mixedbread/_files.py
index 729ea489..4df9de45 100644
--- a/src/mixedbread/_files.py
+++ b/src/mixedbread/_files.py
@@ -3,8 +3,8 @@
import io
import os
import pathlib
-from typing import overload
-from typing_extensions import TypeGuard
+from typing import Sequence, cast, overload
+from typing_extensions import TypeVar, TypeGuard
import anyio
@@ -17,7 +17,9 @@
HttpxFileContent,
HttpxRequestFiles,
)
-from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
+from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
+
+_T = TypeVar("_T")
def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
return await anyio.Path(file).read_bytes()
return file
+
+
+def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
+ """Copy only the containers along the given paths.
+
+ Used to guard against mutation by extract_files without copying the entire structure.
+ Only dicts and lists that lie on a path are copied; everything else
+ is returned by reference.
+
+ For example, given paths=[["foo", "files", "file"]] and the structure:
+ {
+ "foo": {
+ "bar": {"baz": {}},
+ "files": {"file": }
+ }
+ }
+ The root dict, "foo", and "files" are copied (they lie on the path).
+ "bar" and "baz" are returned by reference (off the path).
+ """
+ return _deepcopy_with_paths(item, paths, 0)
+
+
+def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
+ if not paths:
+ return item
+ if is_mapping(item):
+ key_to_paths: dict[str, list[Sequence[str]]] = {}
+ for path in paths:
+ if index < len(path):
+ key_to_paths.setdefault(path[index], []).append(path)
+
+ # if no path continues through this mapping, it won't be mutated and copying it is redundant
+ if not key_to_paths:
+ return item
+
+ result = dict(item)
+ for key, subpaths in key_to_paths.items():
+ if key in result:
+ result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
+ return cast(_T, result)
+ if is_list(item):
+ array_paths = [path for path in paths if index < len(path) and path[index] == ""]
+
+ # if no path expects a list here, nothing will be mutated inside it - return by reference
+ if not array_paths:
+ return cast(_T, item)
+ return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
+ return item
diff --git a/src/mixedbread/_qs.py b/src/mixedbread/_qs.py
index ada6fd3f..de8c99bc 100644
--- a/src/mixedbread/_qs.py
+++ b/src/mixedbread/_qs.py
@@ -101,7 +101,10 @@ def _stringify_item(
items.extend(self._stringify_item(key, item, opts))
return items
elif array_format == "indices":
- raise NotImplementedError("The array indices format is not supported yet")
+ items = []
+ for i, item in enumerate(value):
+ items.extend(self._stringify_item(f"{key}[{i}]", item, opts))
+ return items
elif array_format == "brackets":
items = []
key = key + "[]"
diff --git a/src/mixedbread/_utils/__init__.py b/src/mixedbread/_utils/__init__.py
index dc64e29a..1c090e51 100644
--- a/src/mixedbread/_utils/__init__.py
+++ b/src/mixedbread/_utils/__init__.py
@@ -1,3 +1,4 @@
+from ._path import path_template as path_template
from ._sync import asyncify as asyncify
from ._proxy import LazyProxy as LazyProxy
from ._utils import (
@@ -23,7 +24,6 @@
coerce_integer as coerce_integer,
file_from_path as file_from_path,
strip_not_given as strip_not_given,
- deepcopy_minimal as deepcopy_minimal,
get_async_library as get_async_library,
maybe_coerce_float as maybe_coerce_float,
get_required_header as get_required_header,
diff --git a/src/mixedbread/_utils/_path.py b/src/mixedbread/_utils/_path.py
new file mode 100644
index 00000000..4d6e1e4c
--- /dev/null
+++ b/src/mixedbread/_utils/_path.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+import re
+from typing import (
+ Any,
+ Mapping,
+ Callable,
+)
+from urllib.parse import quote
+
+# Matches '.' or '..' where each dot is either literal or percent-encoded (%2e / %2E).
+_DOT_SEGMENT_RE = re.compile(r"^(?:\.|%2[eE]){1,2}$")
+
+_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}")
+
+
+def _quote_path_segment_part(value: str) -> str:
+ """Percent-encode `value` for use in a URI path segment.
+
+ Considers characters not in `pchar` set from RFC 3986 §3.3 to be unsafe.
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
+ """
+ # quote() already treats unreserved characters (letters, digits, and -._~)
+ # as safe, so we only need to add sub-delims, ':', and '@'.
+ # Notably, unlike the default `safe` for quote(), / is unsafe and must be quoted.
+ return quote(value, safe="!$&'()*+,;=:@")
+
+
+def _quote_query_part(value: str) -> str:
+ """Percent-encode `value` for use in a URI query string.
+
+ Considers &, = and characters not in `query` set from RFC 3986 §3.4 to be unsafe.
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.4
+ """
+ return quote(value, safe="!$'()*+,;:@/?")
+
+
+def _quote_fragment_part(value: str) -> str:
+ """Percent-encode `value` for use in a URI fragment.
+
+ Considers characters not in `fragment` set from RFC 3986 §3.5 to be unsafe.
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
+ """
+ return quote(value, safe="!$&'()*+,;=:@/?")
+
+
+def _interpolate(
+ template: str,
+ values: Mapping[str, Any],
+ quoter: Callable[[str], str],
+) -> str:
+ """Replace {name} placeholders in `template`, quoting each value with `quoter`.
+
+ Placeholder names are looked up in `values`.
+
+ Raises:
+ KeyError: If a placeholder is not found in `values`.
+ """
+ # re.split with a capturing group returns alternating
+ # [text, name, text, name, ..., text] elements.
+ parts = _PLACEHOLDER_RE.split(template)
+
+ for i in range(1, len(parts), 2):
+ name = parts[i]
+ if name not in values:
+ raise KeyError(f"a value for placeholder {{{name}}} was not provided")
+ val = values[name]
+ if val is None:
+ parts[i] = "null"
+ elif isinstance(val, bool):
+ parts[i] = "true" if val else "false"
+ else:
+ parts[i] = quoter(str(values[name]))
+
+ return "".join(parts)
+
+
+def path_template(template: str, /, **kwargs: Any) -> str:
+ """Interpolate {name} placeholders in `template` from keyword arguments.
+
+ Args:
+ template: The template string containing {name} placeholders.
+ **kwargs: Keyword arguments to interpolate into the template.
+
+ Returns:
+ The template with placeholders interpolated and percent-encoded.
+
+ Safe characters for percent-encoding are dependent on the URI component.
+ Placeholders in path and fragment portions are percent-encoded where the `segment`
+ and `fragment` sets from RFC 3986 respectively are considered safe.
+ Placeholders in the query portion are percent-encoded where the `query` set from
+ RFC 3986 §3.3 is considered safe except for = and & characters.
+
+ Raises:
+ KeyError: If a placeholder is not found in `kwargs`.
+ ValueError: If resulting path contains /./ or /../ segments (including percent-encoded dot-segments).
+ """
+ # Split the template into path, query, and fragment portions.
+ fragment_template: str | None = None
+ query_template: str | None = None
+
+ rest = template
+ if "#" in rest:
+ rest, fragment_template = rest.split("#", 1)
+ if "?" in rest:
+ rest, query_template = rest.split("?", 1)
+ path_template = rest
+
+ # Interpolate each portion with the appropriate quoting rules.
+ path_result = _interpolate(path_template, kwargs, _quote_path_segment_part)
+
+ # Reject dot-segments (. and ..) in the final assembled path. The check
+ # runs after interpolation so that adjacent placeholders or a mix of static
+ # text and placeholders that together form a dot-segment are caught.
+ # Also reject percent-encoded dot-segments to protect against incorrectly
+ # implemented normalization in servers/proxies.
+ for segment in path_result.split("/"):
+ if _DOT_SEGMENT_RE.match(segment):
+ raise ValueError(f"Constructed path {path_result!r} contains dot-segment {segment!r} which is not allowed")
+
+ result = path_result
+ if query_template is not None:
+ result += "?" + _interpolate(query_template, kwargs, _quote_query_part)
+ if fragment_template is not None:
+ result += "#" + _interpolate(fragment_template, kwargs, _quote_fragment_part)
+
+ return result
diff --git a/src/mixedbread/_utils/_utils.py b/src/mixedbread/_utils/_utils.py
index eec7f4a1..771859f5 100644
--- a/src/mixedbread/_utils/_utils.py
+++ b/src/mixedbread/_utils/_utils.py
@@ -86,8 +86,9 @@ def _extract_items(
index += 1
if is_dict(obj):
try:
- # We are at the last entry in the path so we must remove the field
- if (len(path)) == index:
+ # Remove the field if there are no more dict keys in the path,
+ # only "" traversal markers or end.
+ if all(p == "" for p in path[index:]):
item = obj.pop(key)
else:
item = obj[key]
@@ -176,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
return isinstance(obj, Iterable)
-def deepcopy_minimal(item: _T) -> _T:
- """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
-
- - mappings, e.g. `dict`
- - list
-
- This is done for performance reasons.
- """
- if is_mapping(item):
- return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
- if is_list(item):
- return cast(_T, [deepcopy_minimal(entry) for entry in item])
- return item
-
-
# copied from https://github.com/Rapptz/RoboDanny
def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
size = len(seq)
diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py
index 40dca853..84dd5576 100644
--- a/src/mixedbread/_version.py
+++ b/src/mixedbread/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "mixedbread"
-__version__ = "0.49.0" # x-release-please-version
+__version__ = "0.50.0" # x-release-please-version
diff --git a/src/mixedbread/resources/api_keys.py b/src/mixedbread/resources/api_keys.py
index 66408456..df799826 100644
--- a/src/mixedbread/resources/api_keys.py
+++ b/src/mixedbread/resources/api_keys.py
@@ -9,7 +9,7 @@
from ..types import api_key_list_params, api_key_create_params
from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from .._utils import maybe_transform, async_maybe_transform
+from .._utils import path_template, maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -21,6 +21,7 @@
from ..pagination import SyncLimitOffset, AsyncLimitOffset
from .._base_client import AsyncPaginator, make_request_options
from ..types.api_key import APIKey
+from ..types.scope_param import ScopeParam
from ..types.api_key_created import APIKeyCreated
from ..types.api_key_delete_response import APIKeyDeleteResponse
@@ -51,7 +52,7 @@ def create(
self,
*,
name: str | Omit = omit,
- scope: Optional[Iterable[api_key_create_params.Scope]] | Omit = omit,
+ scope: Optional[Iterable[ScopeParam]] | Omit = omit,
expires_at: Union[str, datetime, None] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -131,7 +132,7 @@ def retrieve(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return self._get(
- f"/v1/api-keys/{api_key_id}",
+ path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -222,7 +223,7 @@ def delete(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return self._delete(
- f"/v1/api-keys/{api_key_id}",
+ path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -264,7 +265,7 @@ def reroll(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return self._post(
- f"/v1/api-keys/{api_key_id}/reroll",
+ path_template("/v1/api-keys/{api_key_id}/reroll", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -303,7 +304,7 @@ def revoke(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return self._post(
- f"/v1/api-keys/{api_key_id}/revoke",
+ path_template("/v1/api-keys/{api_key_id}/revoke", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -335,7 +336,7 @@ async def create(
self,
*,
name: str | Omit = omit,
- scope: Optional[Iterable[api_key_create_params.Scope]] | Omit = omit,
+ scope: Optional[Iterable[ScopeParam]] | Omit = omit,
expires_at: Union[str, datetime, None] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -415,7 +416,7 @@ async def retrieve(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return await self._get(
- f"/v1/api-keys/{api_key_id}",
+ path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -506,7 +507,7 @@ async def delete(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return await self._delete(
- f"/v1/api-keys/{api_key_id}",
+ path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -548,7 +549,7 @@ async def reroll(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return await self._post(
- f"/v1/api-keys/{api_key_id}/reroll",
+ path_template("/v1/api-keys/{api_key_id}/reroll", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -587,7 +588,7 @@ async def revoke(
if not api_key_id:
raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}")
return await self._post(
- f"/v1/api-keys/{api_key_id}/revoke",
+ path_template("/v1/api-keys/{api_key_id}/revoke", api_key_id=api_key_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/data_sources/connectors.py b/src/mixedbread/resources/data_sources/connectors.py
index bd913e76..af800c0c 100644
--- a/src/mixedbread/resources/data_sources/connectors.py
+++ b/src/mixedbread/resources/data_sources/connectors.py
@@ -7,7 +7,7 @@
import httpx
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -98,7 +98,7 @@ def create(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._post(
- f"/v1/data_sources/{data_source_id}/connectors",
+ path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id),
body=maybe_transform(
{
"store_id": store_id,
@@ -153,7 +153,11 @@ def retrieve(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return self._get(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -216,7 +220,11 @@ def update(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return self._put(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
body=maybe_transform(
{
"name": name,
@@ -279,7 +287,7 @@ def list(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._get_api_list(
- f"/v1/data_sources/{data_source_id}/connectors",
+ path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id),
page=SyncCursor[DataSourceConnector],
options=make_request_options(
extra_headers=extra_headers,
@@ -337,7 +345,11 @@ def delete(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return self._delete(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -418,7 +430,7 @@ async def create(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return await self._post(
- f"/v1/data_sources/{data_source_id}/connectors",
+ path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id),
body=await async_maybe_transform(
{
"store_id": store_id,
@@ -473,7 +485,11 @@ async def retrieve(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return await self._get(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -536,7 +552,11 @@ async def update(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return await self._put(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
body=await async_maybe_transform(
{
"name": name,
@@ -599,7 +619,7 @@ def list(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._get_api_list(
- f"/v1/data_sources/{data_source_id}/connectors",
+ path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id),
page=AsyncCursor[DataSourceConnector],
options=make_request_options(
extra_headers=extra_headers,
@@ -657,7 +677,11 @@ async def delete(
if not connector_id:
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
return await self._delete(
- f"/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ path_template(
+ "/v1/data_sources/{data_source_id}/connectors/{connector_id}",
+ data_source_id=data_source_id,
+ connector_id=connector_id,
+ ),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/data_sources/data_sources.py b/src/mixedbread/resources/data_sources/data_sources.py
index 185eaedb..bb0a9446 100644
--- a/src/mixedbread/resources/data_sources/data_sources.py
+++ b/src/mixedbread/resources/data_sources/data_sources.py
@@ -9,7 +9,7 @@
from ...types import Oauth2Params, data_source_list_params, data_source_create_params, data_source_update_params
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import required_args, maybe_transform, async_maybe_transform
+from ..._utils import path_template, required_args, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from .connectors import (
ConnectorsResource,
@@ -208,7 +208,7 @@ def retrieve(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._get(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -329,7 +329,7 @@ def update(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._put(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
body=maybe_transform(
{
"type": type,
@@ -434,7 +434,7 @@ def delete(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return self._delete(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -615,7 +615,7 @@ async def retrieve(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return await self._get(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -736,7 +736,7 @@ async def update(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return await self._put(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
body=await async_maybe_transform(
{
"type": type,
@@ -841,7 +841,7 @@ async def delete(
if not data_source_id:
raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}")
return await self._delete(
- f"/v1/data_sources/{data_source_id}",
+ path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/extractions/jobs.py b/src/mixedbread/resources/extractions/jobs.py
index ae48bbf3..3b8a2cf4 100644
--- a/src/mixedbread/resources/extractions/jobs.py
+++ b/src/mixedbread/resources/extractions/jobs.py
@@ -7,7 +7,7 @@
import httpx
from ..._types import Body, Query, Headers, NotGiven, not_given
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -122,7 +122,7 @@ def retrieve(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return self._get(
- f"/v1/extractions/jobs/{job_id}",
+ path_template("/v1/extractions/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -229,7 +229,7 @@ async def retrieve(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return await self._get(
- f"/v1/extractions/jobs/{job_id}",
+ path_template("/v1/extractions/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/files/files.py b/src/mixedbread/resources/files/files.py
index 8536ba20..c89b028d 100644
--- a/src/mixedbread/resources/files/files.py
+++ b/src/mixedbread/resources/files/files.py
@@ -15,8 +15,9 @@
UploadsResourceWithStreamingResponse,
AsyncUploadsResourceWithStreamingResponse,
)
+from ..._files import deepcopy_with_paths
from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._utils import extract_files, path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -132,7 +133,7 @@ def create(
timeout=timeout,
)
- body = deepcopy_minimal({"file": file})
+ body = deepcopy_with_paths({"file": file}, [["file"]])
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
@@ -180,7 +181,7 @@ def retrieve(
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return self._get(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -221,14 +222,14 @@ def update(
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
- body = deepcopy_minimal({"file": file})
+ body = deepcopy_with_paths({"file": file}, [["file"]])
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return self._post(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
body=maybe_transform(body, file_update_params.FileUpdateParams),
files=files,
options=make_request_options(
@@ -334,7 +335,7 @@ def delete(
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return self._delete(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -374,7 +375,7 @@ def content(
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
return self._get(
- f"/v1/files/{file_id}/content",
+ path_template("/v1/files/{file_id}/content", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -467,7 +468,7 @@ async def create(
timeout=timeout,
)
- body = deepcopy_minimal({"file": file})
+ body = deepcopy_with_paths({"file": file}, [["file"]])
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
@@ -515,7 +516,7 @@ async def retrieve(
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return await self._get(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -556,14 +557,14 @@ async def update(
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
- body = deepcopy_minimal({"file": file})
+ body = deepcopy_with_paths({"file": file}, [["file"]])
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self._post(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
body=await async_maybe_transform(body, file_update_params.FileUpdateParams),
files=files,
options=make_request_options(
@@ -669,7 +670,7 @@ async def delete(
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return await self._delete(
- f"/v1/files/{file_id}",
+ path_template("/v1/files/{file_id}", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -709,7 +710,7 @@ async def content(
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
return await self._get(
- f"/v1/files/{file_id}/content",
+ path_template("/v1/files/{file_id}/content", file_id=file_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/files/uploads.py b/src/mixedbread/resources/files/uploads.py
index dc410621..009fbd06 100644
--- a/src/mixedbread/resources/files/uploads.py
+++ b/src/mixedbread/resources/files/uploads.py
@@ -7,7 +7,7 @@
import httpx
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -129,7 +129,7 @@ def retrieve(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return self._get(
- f"/v1/files/uploads/{upload_id}",
+ path_template("/v1/files/uploads/{upload_id}", upload_id=upload_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -183,7 +183,7 @@ def abort(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return self._post(
- f"/v1/files/uploads/{upload_id}/abort",
+ path_template("/v1/files/uploads/{upload_id}/abort", upload_id=upload_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -223,7 +223,7 @@ def complete(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return self._post(
- f"/v1/files/uploads/{upload_id}/complete",
+ path_template("/v1/files/uploads/{upload_id}/complete", upload_id=upload_id),
body=maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -333,7 +333,7 @@ async def retrieve(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return await self._get(
- f"/v1/files/uploads/{upload_id}",
+ path_template("/v1/files/uploads/{upload_id}", upload_id=upload_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -387,7 +387,7 @@ async def abort(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return await self._post(
- f"/v1/files/uploads/{upload_id}/abort",
+ path_template("/v1/files/uploads/{upload_id}/abort", upload_id=upload_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -427,7 +427,7 @@ async def complete(
if not upload_id:
raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
return await self._post(
- f"/v1/files/uploads/{upload_id}/complete",
+ path_template("/v1/files/uploads/{upload_id}/complete", upload_id=upload_id),
body=await async_maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/mixedbread/resources/parsing/jobs.py b/src/mixedbread/resources/parsing/jobs.py
index b797da1e..25d83ca8 100644
--- a/src/mixedbread/resources/parsing/jobs.py
+++ b/src/mixedbread/resources/parsing/jobs.py
@@ -11,7 +11,7 @@
from ...lib import polling
from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
from ...lib.multipart_upload import MultipartUploadOptions
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -145,7 +145,7 @@ def retrieve(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return self._get(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -254,7 +254,7 @@ def delete(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return self._delete(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -293,7 +293,7 @@ def cancel(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return self._patch(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -571,7 +571,7 @@ async def retrieve(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return await self._get(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -680,7 +680,7 @@ async def delete(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return await self._delete(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -719,7 +719,7 @@ async def cancel(
if not job_id:
raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
return await self._patch(
- f"/v1/parsing/jobs/{job_id}",
+ path_template("/v1/parsing/jobs/{job_id}", job_id=job_id),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/resources/stores/files.py b/src/mixedbread/resources/stores/files.py
index 5124082c..32a24aa0 100644
--- a/src/mixedbread/resources/stores/files.py
+++ b/src/mixedbread/resources/stores/files.py
@@ -8,9 +8,9 @@
import httpx
from ...lib import polling
-from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, SequenceNotStr, omit, not_given
+from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
from ...lib.multipart_upload import MultipartUploadOptions
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -23,7 +23,6 @@
from ...types.stores import (
file_list_params,
file_create_params,
- file_search_params,
file_update_params,
file_retrieve_params,
)
@@ -31,7 +30,7 @@
from ...types.stores.store_file_status import StoreFileStatus
from ...types.stores.file_list_response import FileListResponse
from ...types.stores.file_delete_response import FileDeleteResponse
-from ...types.stores.file_search_response import FileSearchResponse
+from ...types.stores.store_file_config_param import StoreFileConfigParam
__all__ = ["FilesResource", "AsyncFilesResource"]
@@ -61,11 +60,11 @@ def create(
store_identifier: str,
*,
metadata: object | Omit = omit,
- config: file_create_params.Config | Omit = omit,
+ config: StoreFileConfigParam | Omit = omit,
external_id: Optional[str] | Omit = omit,
overwrite: bool | Omit = omit,
file_id: str,
- experimental: Optional[file_create_params.Experimental] | Omit = omit,
+ experimental: Optional[StoreFileConfigParam] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -108,7 +107,7 @@ def create(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return self._post(
- f"/v1/stores/{store_identifier}/files",
+ path_template("/v1/stores/{store_identifier}/files", store_identifier=store_identifier),
body=maybe_transform(
{
"metadata": metadata,
@@ -169,7 +168,11 @@ def retrieve(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return self._get(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
+ ),
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
@@ -221,7 +224,11 @@ def update(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return self._patch(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
+ ),
body=maybe_transform({"metadata": metadata}, file_update_params.FileUpdateParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -285,7 +292,7 @@ def list(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return self._post(
- f"/v1/stores/{store_identifier}/files/list",
+ path_template("/v1/stores/{store_identifier}/files/list", store_identifier=store_identifier),
body=maybe_transform(
{
"limit": limit,
@@ -343,76 +350,15 @@ def delete(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return self._delete(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=FileDeleteResponse,
- )
-
- def search(
- self,
- *,
- query: file_search_params.Query,
- store_identifiers: SequenceNotStr[str],
- top_k: int | Omit = omit,
- filters: Optional[file_search_params.Filters] | Omit = omit,
- file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit,
- search_options: file_search_params.SearchOptions | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> FileSearchResponse:
- """
- Search for files within a store based on semantic similarity.
-
- Args: store_identifier: The ID or name of the store to search within
- search_params: Search configuration including query text, pagination, and
- filters
-
- Returns: StoreFileSearchResponse: List of matching files with relevance scores
-
- Args:
- query: Search query text
-
- store_identifiers: IDs or names of stores to search
-
- top_k: Number of results to return
-
- filters: Optional filter conditions
-
- file_ids: Optional list of file IDs to filter chunks by (inclusion filter)
-
- search_options: Search configuration options
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- return self._post(
- "/v1/stores/files/search",
- body=maybe_transform(
- {
- "query": query,
- "store_identifiers": store_identifiers,
- "top_k": top_k,
- "filters": filters,
- "file_ids": file_ids,
- "search_options": search_options,
- },
- file_search_params.FileSearchParams,
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
- cast_to=FileSearchResponse,
+ cast_to=FileDeleteResponse,
)
def poll(
@@ -610,11 +556,11 @@ async def create(
store_identifier: str,
*,
metadata: object | Omit = omit,
- config: file_create_params.Config | Omit = omit,
+ config: StoreFileConfigParam | Omit = omit,
external_id: Optional[str] | Omit = omit,
overwrite: bool | Omit = omit,
file_id: str,
- experimental: Optional[file_create_params.Experimental] | Omit = omit,
+ experimental: Optional[StoreFileConfigParam] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -657,7 +603,7 @@ async def create(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return await self._post(
- f"/v1/stores/{store_identifier}/files",
+ path_template("/v1/stores/{store_identifier}/files", store_identifier=store_identifier),
body=await async_maybe_transform(
{
"metadata": metadata,
@@ -718,7 +664,11 @@ async def retrieve(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return await self._get(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
+ ),
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
@@ -772,7 +722,11 @@ async def update(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return await self._patch(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
+ ),
body=await async_maybe_transform({"metadata": metadata}, file_update_params.FileUpdateParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -836,7 +790,7 @@ async def list(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return await self._post(
- f"/v1/stores/{store_identifier}/files/list",
+ path_template("/v1/stores/{store_identifier}/files/list", store_identifier=store_identifier),
body=await async_maybe_transform(
{
"limit": limit,
@@ -894,76 +848,15 @@ async def delete(
if not file_identifier:
raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}")
return await self._delete(
- f"/v1/stores/{store_identifier}/files/{file_identifier}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=FileDeleteResponse,
- )
-
- async def search(
- self,
- *,
- query: file_search_params.Query,
- store_identifiers: SequenceNotStr[str],
- top_k: int | Omit = omit,
- filters: Optional[file_search_params.Filters] | Omit = omit,
- file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit,
- search_options: file_search_params.SearchOptions | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> FileSearchResponse:
- """
- Search for files within a store based on semantic similarity.
-
- Args: store_identifier: The ID or name of the store to search within
- search_params: Search configuration including query text, pagination, and
- filters
-
- Returns: StoreFileSearchResponse: List of matching files with relevance scores
-
- Args:
- query: Search query text
-
- store_identifiers: IDs or names of stores to search
-
- top_k: Number of results to return
-
- filters: Optional filter conditions
-
- file_ids: Optional list of file IDs to filter chunks by (inclusion filter)
-
- search_options: Search configuration options
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- return await self._post(
- "/v1/stores/files/search",
- body=await async_maybe_transform(
- {
- "query": query,
- "store_identifiers": store_identifiers,
- "top_k": top_k,
- "filters": filters,
- "file_ids": file_ids,
- "search_options": search_options,
- },
- file_search_params.FileSearchParams,
+ path_template(
+ "/v1/stores/{store_identifier}/files/{file_identifier}",
+ store_identifier=store_identifier,
+ file_identifier=file_identifier,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
- cast_to=FileSearchResponse,
+ cast_to=FileDeleteResponse,
)
async def poll(
@@ -1155,9 +1048,6 @@ def __init__(self, files: FilesResource) -> None:
self.delete = to_raw_response_wrapper(
files.delete,
)
- self.search = to_raw_response_wrapper(
- files.search,
- )
class AsyncFilesResourceWithRawResponse:
@@ -1179,9 +1069,6 @@ def __init__(self, files: AsyncFilesResource) -> None:
self.delete = async_to_raw_response_wrapper(
files.delete,
)
- self.search = async_to_raw_response_wrapper(
- files.search,
- )
class FilesResourceWithStreamingResponse:
@@ -1203,9 +1090,6 @@ def __init__(self, files: FilesResource) -> None:
self.delete = to_streamed_response_wrapper(
files.delete,
)
- self.search = to_streamed_response_wrapper(
- files.search,
- )
class AsyncFilesResourceWithStreamingResponse:
@@ -1227,6 +1111,3 @@ def __init__(self, files: AsyncFilesResource) -> None:
self.delete = async_to_streamed_response_wrapper(
files.delete,
)
- self.search = async_to_streamed_response_wrapper(
- files.search,
- )
diff --git a/src/mixedbread/resources/stores/stores.py b/src/mixedbread/resources/stores/stores.py
index 1834f5a8..20271dcc 100644
--- a/src/mixedbread/resources/stores/stores.py
+++ b/src/mixedbread/resources/stores/stores.py
@@ -23,7 +23,7 @@
store_question_answering_params,
)
from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
+from ..._utils import path_template, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -35,6 +35,7 @@
from ...pagination import SyncCursor, AsyncCursor
from ...types.store import Store
from ..._base_client import AsyncPaginator, make_request_options
+from ...types.store_config_param import StoreConfigParam
from ...types.expires_after_param import ExpiresAfterParam
from ...types.store_delete_response import StoreDeleteResponse
from ...types.store_search_response import StoreSearchResponse
@@ -75,9 +76,10 @@ def create(
name: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
is_public: bool | Omit = omit,
+ license: Optional[str] | Omit = omit,
expires_after: Optional[ExpiresAfterParam] | Omit = omit,
metadata: object | Omit = omit,
- config: Optional[store_create_params.Config] | Omit = omit,
+ config: Optional[StoreConfigParam] | Omit = omit,
file_ids: Optional[SequenceNotStr[str]] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -102,6 +104,8 @@ def create(
is_public: Whether the store can be accessed by anyone with valid login credentials
+ license: License for public stores
+
expires_after: Represents an expiration policy for a store.
metadata: Optional metadata key-value pairs
@@ -125,6 +129,7 @@ def create(
"name": name,
"description": description,
"is_public": is_public,
+ "license": license,
"expires_after": expires_after,
"metadata": metadata,
"config": config,
@@ -170,7 +175,7 @@ def retrieve(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return self._get(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -184,6 +189,7 @@ def update(
name: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
is_public: Optional[bool] | Omit = omit,
+ license: Optional[str] | Omit = omit,
expires_after: Optional[ExpiresAfterParam] | Omit = omit,
metadata: object | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -211,6 +217,8 @@ def update(
is_public: Whether the store can be accessed by anyone with valid login credentials
+ license: License for public stores
+
expires_after: Represents an expiration policy for a store.
metadata: Optional metadata key-value pairs
@@ -226,12 +234,13 @@ def update(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return self._put(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
body=maybe_transform(
{
"name": name,
"description": description,
"is_public": is_public,
+ "license": license,
"expires_after": expires_after,
"metadata": metadata,
},
@@ -342,7 +351,7 @@ def delete(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return self._delete(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -596,9 +605,10 @@ async def create(
name: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
is_public: bool | Omit = omit,
+ license: Optional[str] | Omit = omit,
expires_after: Optional[ExpiresAfterParam] | Omit = omit,
metadata: object | Omit = omit,
- config: Optional[store_create_params.Config] | Omit = omit,
+ config: Optional[StoreConfigParam] | Omit = omit,
file_ids: Optional[SequenceNotStr[str]] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -623,6 +633,8 @@ async def create(
is_public: Whether the store can be accessed by anyone with valid login credentials
+ license: License for public stores
+
expires_after: Represents an expiration policy for a store.
metadata: Optional metadata key-value pairs
@@ -646,6 +658,7 @@ async def create(
"name": name,
"description": description,
"is_public": is_public,
+ "license": license,
"expires_after": expires_after,
"metadata": metadata,
"config": config,
@@ -691,7 +704,7 @@ async def retrieve(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return await self._get(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
@@ -705,6 +718,7 @@ async def update(
name: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
is_public: Optional[bool] | Omit = omit,
+ license: Optional[str] | Omit = omit,
expires_after: Optional[ExpiresAfterParam] | Omit = omit,
metadata: object | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -732,6 +746,8 @@ async def update(
is_public: Whether the store can be accessed by anyone with valid login credentials
+ license: License for public stores
+
expires_after: Represents an expiration policy for a store.
metadata: Optional metadata key-value pairs
@@ -747,12 +763,13 @@ async def update(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return await self._put(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
body=await async_maybe_transform(
{
"name": name,
"description": description,
"is_public": is_public,
+ "license": license,
"expires_after": expires_after,
"metadata": metadata,
},
@@ -863,7 +880,7 @@ async def delete(
if not store_identifier:
raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}")
return await self._delete(
- f"/v1/stores/{store_identifier}",
+ path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
diff --git a/src/mixedbread/types/__init__.py b/src/mixedbread/types/__init__.py
index 306c151e..9f1966f9 100644
--- a/src/mixedbread/types/__init__.py
+++ b/src/mixedbread/types/__init__.py
@@ -4,12 +4,18 @@
from . import shared
from .. import _compat
+from .scope import Scope as Scope
from .store import Store as Store
from .shared import Usage as Usage, SearchFilter as SearchFilter, SearchFilterCondition as SearchFilterCondition
from .api_key import APIKey as APIKey
+from .audio_url import AudioURL as AudioURL
from .embedding import Embedding as Embedding
+from .video_url import VideoURL as VideoURL
from .data_source import DataSource as DataSource
+from .file_counts import FileCounts as FileCounts
from .file_object import FileObject as FileObject
+from .scope_param import ScopeParam as ScopeParam
+from .store_config import StoreConfig as StoreConfig
from .expires_after import ExpiresAfter as ExpiresAfter
from .info_response import InfoResponse as InfoResponse
from .oauth2_params import Oauth2Params as Oauth2Params
@@ -18,12 +24,16 @@
from .rerank_response import RerankResponse as RerankResponse
from .data_source_type import DataSourceType as DataSourceType
from .file_list_params import FileListParams as FileListParams
+from .image_url_output import ImageURLOutput as ImageURLOutput
+from .markdown_heading import MarkdownHeading as MarkdownHeading
from .store_list_params import StoreListParams as StoreListParams
from .file_create_params import FileCreateParams as FileCreateParams
from .file_update_params import FileUpdateParams as FileUpdateParams
+from .store_config_param import StoreConfigParam as StoreConfigParam
from .api_key_list_params import APIKeyListParams as APIKeyListParams
from .client_embed_params import ClientEmbedParams as ClientEmbedParams
from .expires_after_param import ExpiresAfterParam as ExpiresAfterParam
+from .rerank_config_param import RerankConfigParam as RerankConfigParam
from .store_create_params import StoreCreateParams as StoreCreateParams
from .store_search_params import StoreSearchParams as StoreSearchParams
from .store_update_params import StoreUpdateParams as StoreUpdateParams
@@ -37,6 +47,7 @@
from .data_source_list_params import DataSourceListParams as DataSourceListParams
from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
from .scored_text_input_chunk import ScoredTextInputChunk as ScoredTextInputChunk
+from .contextualization_config import ContextualizationConfig as ContextualizationConfig
from .linear_data_source_param import LinearDataSourceParam as LinearDataSourceParam
from .multi_encoding_embedding import MultiEncodingEmbedding as MultiEncodingEmbedding
from .notion_data_source_param import NotionDataSourceParam as NotionDataSourceParam
@@ -44,14 +55,25 @@
from .data_source_oauth2_params import DataSourceOauth2Params as DataSourceOauth2Params
from .data_source_update_params import DataSourceUpdateParams as DataSourceUpdateParams
from .embedding_create_response import EmbeddingCreateResponse as EmbeddingCreateResponse
+from .data_source_api_key_params import DataSourceAPIKeyParams as DataSourceAPIKeyParams
+from .agentic_search_config_param import AgenticSearchConfigParam as AgenticSearchConfigParam
from .data_source_delete_response import DataSourceDeleteResponse as DataSourceDeleteResponse
+from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata as PdfChunkGeneratedMetadata
from .scored_audio_url_input_chunk import ScoredAudioURLInputChunk as ScoredAudioURLInputChunk
from .scored_image_url_input_chunk import ScoredImageURLInputChunk as ScoredImageURLInputChunk
from .scored_video_url_input_chunk import ScoredVideoURLInputChunk as ScoredVideoURLInputChunk
from .store_metadata_facets_params import StoreMetadataFacetsParams as StoreMetadataFacetsParams
+from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata as CodeChunkGeneratedMetadata
+from .text_chunk_generated_metadata import TextChunkGeneratedMetadata as TextChunkGeneratedMetadata
+from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata as AudioChunkGeneratedMetadata
+from .contextualization_config_param import ContextualizationConfigParam as ContextualizationConfigParam
+from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata as ImageChunkGeneratedMetadata
from .store_metadata_facets_response import StoreMetadataFacetsResponse as StoreMetadataFacetsResponse
+from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata as VideoChunkGeneratedMetadata
+from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams as APIKeyCreateOrUpdateParams
from .store_question_answering_params import StoreQuestionAnsweringParams as StoreQuestionAnsweringParams
from .store_chunk_search_options_param import StoreChunkSearchOptionsParam as StoreChunkSearchOptionsParam
+from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata as MarkdownChunkGeneratedMetadata
from .store_question_answering_response import StoreQuestionAnsweringResponse as StoreQuestionAnsweringResponse
# Rebuild cyclical models only after all modules are imported.
diff --git a/src/mixedbread/types/agentic_search_config_param.py b/src/mixedbread/types/agentic_search_config_param.py
new file mode 100644
index 00000000..8a01d8e4
--- /dev/null
+++ b/src/mixedbread/types/agentic_search_config_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+__all__ = ["AgenticSearchConfigParam"]
+
+
+class AgenticSearchConfigParam(TypedDict, total=False):
+ """Configuration for agentic multi-query search."""
+
+ max_rounds: int
+ """Maximum number of search rounds"""
+
+ queries_per_round: int
+ """Maximum queries per round"""
+
+ instructions: Optional[str]
+ """
+ Additional custom instructions (followed only when not in conflict with existing
+ rules)
+ """
diff --git a/src/mixedbread/types/api_key.py b/src/mixedbread/types/api_key.py
index ab7b375d..bfd14270 100644
--- a/src/mixedbread/types/api_key.py
+++ b/src/mixedbread/types/api_key.py
@@ -4,17 +4,10 @@
from datetime import datetime
from typing_extensions import Literal
+from .scope import Scope
from .._models import BaseModel
-__all__ = ["APIKey", "Scope"]
-
-
-class Scope(BaseModel):
- method: Literal["read", "write", "delete", "list", "create", "search"]
-
- resource_type: Optional[Literal["store"]] = None
-
- resource_id: Optional[str] = None
+__all__ = ["APIKey"]
class APIKey(BaseModel):
diff --git a/src/mixedbread/types/api_key_create_or_update_params.py b/src/mixedbread/types/api_key_create_or_update_params.py
new file mode 100644
index 00000000..570d6527
--- /dev/null
+++ b/src/mixedbread/types/api_key_create_or_update_params.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["APIKeyCreateOrUpdateParams"]
+
+
+class APIKeyCreateOrUpdateParams(TypedDict, total=False):
+ """Base class for API key create or update parameters."""
+
+ type: Literal["api_key"]
+
+ api_key: Required[str]
+ """The API key"""
diff --git a/src/mixedbread/types/api_key_create_params.py b/src/mixedbread/types/api_key_create_params.py
index 829cd517..24728b23 100644
--- a/src/mixedbread/types/api_key_create_params.py
+++ b/src/mixedbread/types/api_key_create_params.py
@@ -4,27 +4,20 @@
from typing import Union, Iterable, Optional
from datetime import datetime
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Annotated, TypedDict
from .._utils import PropertyInfo
+from .scope_param import ScopeParam
-__all__ = ["APIKeyCreateParams", "Scope"]
+__all__ = ["APIKeyCreateParams"]
class APIKeyCreateParams(TypedDict, total=False):
name: str
"""A name/description for the API key"""
- scope: Optional[Iterable[Scope]]
+ scope: Optional[Iterable[ScopeParam]]
"""The scope of the API key"""
expires_at: Annotated[Union[str, datetime, None], PropertyInfo(format="iso8601")]
"""Optional expiration datetime"""
-
-
-class Scope(TypedDict, total=False):
- method: Required[Literal["read", "write", "delete", "list", "create", "search"]]
-
- resource_type: Optional[Literal["store"]]
-
- resource_id: Optional[str]
diff --git a/src/mixedbread/types/api_key_created.py b/src/mixedbread/types/api_key_created.py
index 3efdb33a..565af1d6 100644
--- a/src/mixedbread/types/api_key_created.py
+++ b/src/mixedbread/types/api_key_created.py
@@ -4,17 +4,10 @@
from datetime import datetime
from typing_extensions import Literal
+from .scope import Scope
from .._models import BaseModel
-__all__ = ["APIKeyCreated", "Scope"]
-
-
-class Scope(BaseModel):
- method: Literal["read", "write", "delete", "list", "create", "search"]
-
- resource_type: Optional[Literal["store"]] = None
-
- resource_id: Optional[str] = None
+__all__ = ["APIKeyCreated"]
class APIKeyCreated(BaseModel):
diff --git a/src/mixedbread/types/audio_chunk_generated_metadata.py b/src/mixedbread/types/audio_chunk_generated_metadata.py
new file mode 100644
index 00000000..1d20d204
--- /dev/null
+++ b/src/mixedbread/types/audio_chunk_generated_metadata.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["AudioChunkGeneratedMetadata"]
+
+
+class AudioChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["audio"]] = None
+
+ file_type: Optional[str] = None
+
+ file_size: Optional[int] = None
+
+ total_duration_seconds: Optional[float] = None
+
+ sample_rate: Optional[int] = None
+
+ channels: Optional[int] = None
+
+ audio_format: Optional[int] = None
+
+ bpm: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/audio_url.py b/src/mixedbread/types/audio_url.py
new file mode 100644
index 00000000..ce881bde
--- /dev/null
+++ b/src/mixedbread/types/audio_url.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["AudioURL"]
+
+
+class AudioURL(BaseModel):
+ """Model for audio URL validation."""
+
+ url: str
+ """The audio URL. Can be either a URL or a Data URI."""
diff --git a/src/mixedbread/types/code_chunk_generated_metadata.py b/src/mixedbread/types/code_chunk_generated_metadata.py
new file mode 100644
index 00000000..ff3aa668
--- /dev/null
+++ b/src/mixedbread/types/code_chunk_generated_metadata.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["CodeChunkGeneratedMetadata"]
+
+
+class CodeChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["code"]] = None
+
+ file_type: str
+
+ language: Optional[str] = None
+
+ word_count: Optional[int] = None
+
+ file_size: Optional[int] = None
+
+ start_line: Optional[int] = None
+
+ num_lines: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/contextualization_config.py b/src/mixedbread/types/contextualization_config.py
new file mode 100644
index 00000000..1b2b74a2
--- /dev/null
+++ b/src/mixedbread/types/contextualization_config.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+
+from .._models import BaseModel
+
+__all__ = ["ContextualizationConfig"]
+
+
+class ContextualizationConfig(BaseModel):
+ with_metadata: Union[bool, List[str], None] = None
+ """Include all metadata or specific fields in the contextualization.
+
+ Supports dot notation for nested fields (e.g., 'author.name'). When True, all
+ metadata is included (flattened). When a list, only specified fields are
+ included.
+ """
+
+ with_file_context: Optional[bool] = None
+ """
+ Use an LLM to generate a short context for each text chunk that situates it
+ within the full document, improving retrieval accuracy. Only applies to text
+ content during non-sliced ingestion.
+ """
diff --git a/src/mixedbread/types/contextualization_config_param.py b/src/mixedbread/types/contextualization_config_param.py
new file mode 100644
index 00000000..9032c508
--- /dev/null
+++ b/src/mixedbread/types/contextualization_config_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["ContextualizationConfigParam"]
+
+
+class ContextualizationConfigParam(TypedDict, total=False):
+ with_metadata: Union[bool, SequenceNotStr[str]]
+ """Include all metadata or specific fields in the contextualization.
+
+ Supports dot notation for nested fields (e.g., 'author.name'). When True, all
+ metadata is included (flattened). When a list, only specified fields are
+ included.
+ """
+
+ with_file_context: bool
+ """
+ Use an LLM to generate a short context for each text chunk that situates it
+ within the full document, improving retrieval accuracy. Only applies to text
+ content during non-sliced ingestion.
+ """
diff --git a/src/mixedbread/types/data_source.py b/src/mixedbread/types/data_source.py
index d219ef28..1acd396c 100644
--- a/src/mixedbread/types/data_source.py
+++ b/src/mixedbread/types/data_source.py
@@ -8,21 +8,12 @@
from .._models import BaseModel
from .data_source_type import DataSourceType
from .data_source_oauth2_params import DataSourceOauth2Params
+from .data_source_api_key_params import DataSourceAPIKeyParams
-__all__ = ["DataSource", "AuthParams", "AuthParamsDataSourceAPIKeyParams"]
-
-
-class AuthParamsDataSourceAPIKeyParams(BaseModel):
- """Authentication parameters for a API key data source."""
-
- type: Optional[Literal["api_key"]] = None
-
- api_key: str
- """The API key"""
-
+__all__ = ["DataSource", "AuthParams"]
AuthParams: TypeAlias = Annotated[
- Union[DataSourceOauth2Params, AuthParamsDataSourceAPIKeyParams, None], PropertyInfo(discriminator="type")
+ Union[DataSourceOauth2Params, DataSourceAPIKeyParams, None], PropertyInfo(discriminator="type")
]
diff --git a/src/mixedbread/types/data_source_api_key_params.py b/src/mixedbread/types/data_source_api_key_params.py
new file mode 100644
index 00000000..85c596bf
--- /dev/null
+++ b/src/mixedbread/types/data_source_api_key_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["DataSourceAPIKeyParams"]
+
+
+class DataSourceAPIKeyParams(BaseModel):
+ """Authentication parameters for a API key data source."""
+
+ type: Optional[Literal["api_key"]] = None
+
+ api_key: str
+ """The API key"""
diff --git a/src/mixedbread/types/data_source_create_params.py b/src/mixedbread/types/data_source_create_params.py
index 9424a1de..19ed3593 100644
--- a/src/mixedbread/types/data_source_create_params.py
+++ b/src/mixedbread/types/data_source_create_params.py
@@ -6,14 +6,9 @@
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from .oauth2_params import Oauth2Params
+from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams
-__all__ = [
- "DataSourceCreateParams",
- "NotionDataSource",
- "NotionDataSourceAuthParams",
- "NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams",
- "LinearDataSource",
-]
+__all__ = ["DataSourceCreateParams", "NotionDataSource", "NotionDataSourceAuthParams", "LinearDataSource"]
class NotionDataSource(TypedDict, total=False):
@@ -33,16 +28,7 @@ class NotionDataSource(TypedDict, total=False):
"""
-class NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False):
- """Base class for API key create or update parameters."""
-
- type: Literal["api_key"]
-
- api_key: Required[str]
- """The API key"""
-
-
-NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams]
+NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams]
class LinearDataSource(TypedDict, total=False):
diff --git a/src/mixedbread/types/data_source_update_params.py b/src/mixedbread/types/data_source_update_params.py
index 96880db4..d32b2504 100644
--- a/src/mixedbread/types/data_source_update_params.py
+++ b/src/mixedbread/types/data_source_update_params.py
@@ -6,14 +6,9 @@
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from .oauth2_params import Oauth2Params
+from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams
-__all__ = [
- "DataSourceUpdateParams",
- "NotionDataSource",
- "NotionDataSourceAuthParams",
- "NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams",
- "LinearDataSource",
-]
+__all__ = ["DataSourceUpdateParams", "NotionDataSource", "NotionDataSourceAuthParams", "LinearDataSource"]
class NotionDataSource(TypedDict, total=False):
@@ -33,16 +28,7 @@ class NotionDataSource(TypedDict, total=False):
"""
-class NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False):
- """Base class for API key create or update parameters."""
-
- type: Literal["api_key"]
-
- api_key: Required[str]
- """The API key"""
-
-
-NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams]
+NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams]
class LinearDataSource(TypedDict, total=False):
diff --git a/src/mixedbread/types/file_counts.py b/src/mixedbread/types/file_counts.py
new file mode 100644
index 00000000..a2daa5f8
--- /dev/null
+++ b/src/mixedbread/types/file_counts.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["FileCounts"]
+
+
+class FileCounts(BaseModel):
+ """Tracks counts of files in different states within a store."""
+
+ pending: Optional[int] = None
+ """Number of files waiting to be processed"""
+
+ in_progress: Optional[int] = None
+ """Number of files currently being processed"""
+
+ cancelled: Optional[int] = None
+ """Number of files whose processing was cancelled"""
+
+ completed: Optional[int] = None
+ """Number of successfully processed files"""
+
+ failed: Optional[int] = None
+ """Number of files that failed processing"""
+
+ total: Optional[int] = None
+ """Total number of files"""
diff --git a/src/mixedbread/types/image_chunk_generated_metadata.py b/src/mixedbread/types/image_chunk_generated_metadata.py
new file mode 100644
index 00000000..a46eff9a
--- /dev/null
+++ b/src/mixedbread/types/image_chunk_generated_metadata.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["ImageChunkGeneratedMetadata"]
+
+
+class ImageChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["image"]] = None
+
+ file_type: Optional[str] = None
+
+ file_size: Optional[int] = None
+
+ width: Optional[int] = None
+
+ height: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/image_url_output.py b/src/mixedbread/types/image_url_output.py
new file mode 100644
index 00000000..eab0a529
--- /dev/null
+++ b/src/mixedbread/types/image_url_output.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["ImageURLOutput"]
+
+
+class ImageURLOutput(BaseModel):
+ """Model for image URL validation."""
+
+ url: str
+ """The image URL. Can be either a URL or a Data URI."""
+
+ format: Optional[str] = None
+ """The image format/mimetype"""
diff --git a/src/mixedbread/types/markdown_chunk_generated_metadata.py b/src/mixedbread/types/markdown_chunk_generated_metadata.py
new file mode 100644
index 00000000..e2e2f016
--- /dev/null
+++ b/src/mixedbread/types/markdown_chunk_generated_metadata.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, List, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .markdown_heading import MarkdownHeading
+
+__all__ = ["MarkdownChunkGeneratedMetadata"]
+
+
+class MarkdownChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["markdown"]] = None
+
+ file_type: Optional[Literal["text/markdown"]] = None
+
+ language: Optional[str] = None
+
+ word_count: Optional[int] = None
+
+ file_size: Optional[int] = None
+
+ chunk_headings: Optional[List[MarkdownHeading]] = None
+
+ heading_context: Optional[List[MarkdownHeading]] = None
+
+ start_line: Optional[int] = None
+
+ num_lines: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ frontmatter: Optional[Dict[str, object]] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/markdown_heading.py b/src/mixedbread/types/markdown_heading.py
new file mode 100644
index 00000000..56a1f24d
--- /dev/null
+++ b/src/mixedbread/types/markdown_heading.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["MarkdownHeading"]
+
+
+class MarkdownHeading(BaseModel):
+ level: int
+
+ text: str
diff --git a/src/mixedbread/types/notion_data_source_param.py b/src/mixedbread/types/notion_data_source_param.py
index 12409ef6..a559a747 100644
--- a/src/mixedbread/types/notion_data_source_param.py
+++ b/src/mixedbread/types/notion_data_source_param.py
@@ -6,20 +6,11 @@
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from .oauth2_params import Oauth2Params
+from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams
-__all__ = ["NotionDataSourceParam", "AuthParams", "AuthParamsAPIKeyCreateOrUpdateParams"]
+__all__ = ["NotionDataSourceParam", "AuthParams"]
-
-class AuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False):
- """Base class for API key create or update parameters."""
-
- type: Literal["api_key"]
-
- api_key: Required[str]
- """The API key"""
-
-
-AuthParams: TypeAlias = Union[Oauth2Params, AuthParamsAPIKeyCreateOrUpdateParams]
+AuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams]
class NotionDataSourceParam(TypedDict, total=False):
diff --git a/src/mixedbread/types/parsing/__init__.py b/src/mixedbread/types/parsing/__init__.py
index d3e38bbe..213c0245 100644
--- a/src/mixedbread/types/parsing/__init__.py
+++ b/src/mixedbread/types/parsing/__init__.py
@@ -2,8 +2,10 @@
from __future__ import annotations
+from .chunk import Chunk as Chunk
from .parsing_job import ParsingJob as ParsingJob
from .element_type import ElementType as ElementType
+from .chunk_element import ChunkElement as ChunkElement
from .return_format import ReturnFormat as ReturnFormat
from .job_list_params import JobListParams as JobListParams
from .chunking_strategy import ChunkingStrategy as ChunkingStrategy
@@ -11,3 +13,4 @@
from .job_list_response import JobListResponse as JobListResponse
from .parsing_job_status import ParsingJobStatus as ParsingJobStatus
from .job_delete_response import JobDeleteResponse as JobDeleteResponse
+from .document_parser_result import DocumentParserResult as DocumentParserResult
diff --git a/src/mixedbread/types/parsing/chunk.py b/src/mixedbread/types/parsing/chunk.py
new file mode 100644
index 00000000..7f866102
--- /dev/null
+++ b/src/mixedbread/types/parsing/chunk.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .chunk_element import ChunkElement
+
+__all__ = ["Chunk"]
+
+
+class Chunk(BaseModel):
+ """A chunk of text extracted from a document page."""
+
+ content: Optional[str] = None
+ """The full content of the chunk"""
+
+ content_to_embed: str
+ """The content of the chunk to embed"""
+
+ elements: List[ChunkElement]
+ """List of elements contained in this chunk"""
diff --git a/src/mixedbread/types/parsing/chunk_element.py b/src/mixedbread/types/parsing/chunk_element.py
new file mode 100644
index 00000000..d5de068a
--- /dev/null
+++ b/src/mixedbread/types/parsing/chunk_element.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .element_type import ElementType
+
+__all__ = ["ChunkElement"]
+
+
+class ChunkElement(BaseModel):
+ """Represents an extracted element from a document with its content and metadata."""
+
+ type: ElementType
+ """The type of the extracted element"""
+
+ confidence: float
+ """The confidence score of the extraction"""
+
+ bbox: List[object]
+ """The bounding box coordinates [x1, y1, x2, y2]"""
+
+ page: int
+ """The page number where the element was found"""
+
+ content: str
+ """The extracted text content of the element"""
+
+ summary: Optional[str] = None
+ """A brief summary of the element's content"""
+
+ image: Optional[str] = None
+ """The base64-encoded image data for figure elements"""
diff --git a/src/mixedbread/types/parsing/document_parser_result.py b/src/mixedbread/types/parsing/document_parser_result.py
new file mode 100644
index 00000000..09a35ec5
--- /dev/null
+++ b/src/mixedbread/types/parsing/document_parser_result.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from .chunk import Chunk
+from ..._models import BaseModel
+from .element_type import ElementType
+from .return_format import ReturnFormat
+from .chunking_strategy import ChunkingStrategy
+
+__all__ = ["DocumentParserResult"]
+
+
+class DocumentParserResult(BaseModel):
+ """Result of document parsing operation."""
+
+ chunking_strategy: ChunkingStrategy
+ """The strategy used for chunking the document"""
+
+ return_format: ReturnFormat
+ """The format of the returned content"""
+
+ element_types: List[ElementType]
+ """The types of elements extracted"""
+
+ chunks: List[Chunk]
+ """List of extracted chunks from the document"""
+
+ page_sizes: Optional[List[List[object]]] = None
+ """List of (width, height) tuples for each page"""
diff --git a/src/mixedbread/types/parsing/parsing_job.py b/src/mixedbread/types/parsing/parsing_job.py
index 2f3d250d..48bbc793 100644
--- a/src/mixedbread/types/parsing/parsing_job.py
+++ b/src/mixedbread/types/parsing/parsing_job.py
@@ -1,73 +1,14 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Dict, List, Optional
+from typing import Dict, Optional
from datetime import datetime
from typing_extensions import Literal
from ..._models import BaseModel
-from .element_type import ElementType
-from .return_format import ReturnFormat
-from .chunking_strategy import ChunkingStrategy
from .parsing_job_status import ParsingJobStatus
+from .document_parser_result import DocumentParserResult
-__all__ = ["ParsingJob", "Result", "ResultChunk", "ResultChunkElement"]
-
-
-class ResultChunkElement(BaseModel):
- """Represents an extracted element from a document with its content and metadata."""
-
- type: ElementType
- """The type of the extracted element"""
-
- confidence: float
- """The confidence score of the extraction"""
-
- bbox: List[object]
- """The bounding box coordinates [x1, y1, x2, y2]"""
-
- page: int
- """The page number where the element was found"""
-
- content: str
- """The extracted text content of the element"""
-
- summary: Optional[str] = None
- """A brief summary of the element's content"""
-
- image: Optional[str] = None
- """The base64-encoded image data for figure elements"""
-
-
-class ResultChunk(BaseModel):
- """A chunk of text extracted from a document page."""
-
- content: Optional[str] = None
- """The full content of the chunk"""
-
- content_to_embed: str
- """The content of the chunk to embed"""
-
- elements: List[ResultChunkElement]
- """List of elements contained in this chunk"""
-
-
-class Result(BaseModel):
- """Result of document parsing operation."""
-
- chunking_strategy: ChunkingStrategy
- """The strategy used for chunking the document"""
-
- return_format: ReturnFormat
- """The format of the returned content"""
-
- element_types: List[ElementType]
- """The types of elements extracted"""
-
- chunks: List[ResultChunk]
- """List of extracted chunks from the document"""
-
- page_sizes: Optional[List[List[object]]] = None
- """List of (width, height) tuples for each page"""
+__all__ = ["ParsingJob"]
class ParsingJob(BaseModel):
@@ -88,7 +29,7 @@ class ParsingJob(BaseModel):
error: Optional[Dict[str, object]] = None
"""The error of the job"""
- result: Optional[Result] = None
+ result: Optional[DocumentParserResult] = None
"""Result of document parsing operation."""
started_at: Optional[datetime] = None
diff --git a/src/mixedbread/types/pdf_chunk_generated_metadata.py b/src/mixedbread/types/pdf_chunk_generated_metadata.py
new file mode 100644
index 00000000..1dd9e9b6
--- /dev/null
+++ b/src/mixedbread/types/pdf_chunk_generated_metadata.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["PdfChunkGeneratedMetadata"]
+
+
+class PdfChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["pdf"]] = None
+
+ file_type: Optional[Literal["application/pdf"]] = None
+
+ total_pages: Optional[int] = None
+
+ total_size: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/rerank_config_param.py b/src/mixedbread/types/rerank_config_param.py
new file mode 100644
index 00000000..1769a120
--- /dev/null
+++ b/src/mixedbread/types/rerank_config_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["RerankConfigParam"]
+
+
+class RerankConfigParam(TypedDict, total=False):
+ """Represents a reranking configuration."""
+
+ model: str
+ """The name of the reranking model"""
+
+ with_metadata: Union[bool, SequenceNotStr[str]]
+ """Whether to include metadata in the reranked results"""
+
+ top_k: Optional[int]
+ """Maximum number of results to return after reranking.
+
+ If None, returns all reranked results.
+ """
diff --git a/src/mixedbread/types/scope.py b/src/mixedbread/types/scope.py
new file mode 100644
index 00000000..269c8183
--- /dev/null
+++ b/src/mixedbread/types/scope.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["Scope"]
+
+
+class Scope(BaseModel):
+ method: Literal["read", "write", "delete", "list", "create", "search"]
+
+ resource_type: Optional[Literal["store"]] = None
+
+ resource_id: Optional[str] = None
diff --git a/src/mixedbread/types/scope_param.py b/src/mixedbread/types/scope_param.py
new file mode 100644
index 00000000..641eb0de
--- /dev/null
+++ b/src/mixedbread/types/scope_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ScopeParam"]
+
+
+class ScopeParam(TypedDict, total=False):
+ method: Required[Literal["read", "write", "delete", "list", "create", "search"]]
+
+ resource_type: Optional[Literal["store"]]
+
+ resource_id: Optional[str]
diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py
index c82677c8..e12b2d96 100644
--- a/src/mixedbread/types/scored_audio_url_input_chunk.py
+++ b/src/mixedbread/types/scored_audio_url_input_chunk.py
@@ -1,277 +1,36 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import TYPE_CHECKING, Dict, List, Union, Optional
+from typing import Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from pydantic import Field as FieldInfo
-
from .._utils import PropertyInfo
from .._models import BaseModel
+from .audio_url import AudioURL
+from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from .text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
-__all__ = [
- "ScoredAudioURLInputChunk",
- "GeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "GeneratedMetadataTextChunkGeneratedMetadata",
- "GeneratedMetadataPdfChunkGeneratedMetadata",
- "GeneratedMetadataCodeChunkGeneratedMetadata",
- "GeneratedMetadataAudioChunkGeneratedMetadata",
- "GeneratedMetadataVideoChunkGeneratedMetadata",
- "GeneratedMetadataImageChunkGeneratedMetadata",
- "AudioURL",
-]
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None
-
- heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
+__all__ = ["ScoredAudioURLInputChunk", "GeneratedMetadata"]
GeneratedMetadata: TypeAlias = Annotated[
Union[
- GeneratedMetadataMarkdownChunkGeneratedMetadata,
- GeneratedMetadataTextChunkGeneratedMetadata,
- GeneratedMetadataPdfChunkGeneratedMetadata,
- GeneratedMetadataCodeChunkGeneratedMetadata,
- GeneratedMetadataAudioChunkGeneratedMetadata,
- GeneratedMetadataVideoChunkGeneratedMetadata,
- GeneratedMetadataImageChunkGeneratedMetadata,
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
None,
],
PropertyInfo(discriminator="type"),
]
-class AudioURL(BaseModel):
- """Model for audio URL validation."""
-
- url: str
- """The audio URL. Can be either a URL or a Data URI."""
-
-
class ScoredAudioURLInputChunk(BaseModel):
chunk_index: int
"""position of the chunk in a file"""
@@ -309,9 +68,6 @@ class ScoredAudioURLInputChunk(BaseModel):
transcription: Optional[str] = None
"""speech recognition (sr) text of the audio"""
- summary: Optional[str] = None
- """summary of the audio"""
-
audio_url: Optional[AudioURL] = None
"""Model for audio URL validation."""
diff --git a/src/mixedbread/types/scored_image_url_input_chunk.py b/src/mixedbread/types/scored_image_url_input_chunk.py
index ae453a4c..9b8c72a6 100644
--- a/src/mixedbread/types/scored_image_url_input_chunk.py
+++ b/src/mixedbread/types/scored_image_url_input_chunk.py
@@ -1,280 +1,36 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import TYPE_CHECKING, Dict, List, Union, Optional
+from typing import Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from pydantic import Field as FieldInfo
-
from .._utils import PropertyInfo
from .._models import BaseModel
+from .image_url_output import ImageURLOutput
+from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from .text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
-__all__ = [
- "ScoredImageURLInputChunk",
- "GeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "GeneratedMetadataTextChunkGeneratedMetadata",
- "GeneratedMetadataPdfChunkGeneratedMetadata",
- "GeneratedMetadataCodeChunkGeneratedMetadata",
- "GeneratedMetadataAudioChunkGeneratedMetadata",
- "GeneratedMetadataVideoChunkGeneratedMetadata",
- "GeneratedMetadataImageChunkGeneratedMetadata",
- "ImageURL",
-]
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None
-
- heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
+__all__ = ["ScoredImageURLInputChunk", "GeneratedMetadata"]
GeneratedMetadata: TypeAlias = Annotated[
Union[
- GeneratedMetadataMarkdownChunkGeneratedMetadata,
- GeneratedMetadataTextChunkGeneratedMetadata,
- GeneratedMetadataPdfChunkGeneratedMetadata,
- GeneratedMetadataCodeChunkGeneratedMetadata,
- GeneratedMetadataAudioChunkGeneratedMetadata,
- GeneratedMetadataVideoChunkGeneratedMetadata,
- GeneratedMetadataImageChunkGeneratedMetadata,
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
None,
],
PropertyInfo(discriminator="type"),
]
-class ImageURL(BaseModel):
- """Model for image URL validation."""
-
- url: str
- """The image URL. Can be either a URL or a Data URI."""
-
- format: Optional[str] = None
- """The image format/mimetype"""
-
-
class ScoredImageURLInputChunk(BaseModel):
chunk_index: int
"""position of the chunk in a file"""
@@ -315,5 +71,5 @@ class ScoredImageURLInputChunk(BaseModel):
summary: Optional[str] = None
"""summary of the image"""
- image_url: Optional[ImageURL] = None
+ image_url: Optional[ImageURLOutput] = None
"""Model for image URL validation."""
diff --git a/src/mixedbread/types/scored_text_input_chunk.py b/src/mixedbread/types/scored_text_input_chunk.py
index e4523a40..d801910d 100644
--- a/src/mixedbread/types/scored_text_input_chunk.py
+++ b/src/mixedbread/types/scored_text_input_chunk.py
@@ -1,263 +1,29 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import TYPE_CHECKING, Dict, List, Union, Optional
+from typing import Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from pydantic import Field as FieldInfo
-
from .._utils import PropertyInfo
from .._models import BaseModel
+from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from .text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
-__all__ = [
- "ScoredTextInputChunk",
- "GeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "GeneratedMetadataTextChunkGeneratedMetadata",
- "GeneratedMetadataPdfChunkGeneratedMetadata",
- "GeneratedMetadataCodeChunkGeneratedMetadata",
- "GeneratedMetadataAudioChunkGeneratedMetadata",
- "GeneratedMetadataVideoChunkGeneratedMetadata",
- "GeneratedMetadataImageChunkGeneratedMetadata",
-]
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None
-
- heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
+__all__ = ["ScoredTextInputChunk", "GeneratedMetadata"]
GeneratedMetadata: TypeAlias = Annotated[
Union[
- GeneratedMetadataMarkdownChunkGeneratedMetadata,
- GeneratedMetadataTextChunkGeneratedMetadata,
- GeneratedMetadataPdfChunkGeneratedMetadata,
- GeneratedMetadataCodeChunkGeneratedMetadata,
- GeneratedMetadataAudioChunkGeneratedMetadata,
- GeneratedMetadataVideoChunkGeneratedMetadata,
- GeneratedMetadataImageChunkGeneratedMetadata,
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
None,
],
PropertyInfo(discriminator="type"),
@@ -303,3 +69,6 @@ class ScoredTextInputChunk(BaseModel):
text: Optional[str] = None
"""Text content"""
+
+ context: Optional[str] = None
+ """LLM-generated context that situates this chunk within its source document"""
diff --git a/src/mixedbread/types/scored_video_url_input_chunk.py b/src/mixedbread/types/scored_video_url_input_chunk.py
index 7be647ff..a9a6a64d 100644
--- a/src/mixedbread/types/scored_video_url_input_chunk.py
+++ b/src/mixedbread/types/scored_video_url_input_chunk.py
@@ -1,277 +1,36 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import TYPE_CHECKING, Dict, List, Union, Optional
+from typing import Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from pydantic import Field as FieldInfo
-
from .._utils import PropertyInfo
from .._models import BaseModel
+from .video_url import VideoURL
+from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from .text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
-__all__ = [
- "ScoredVideoURLInputChunk",
- "GeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadata",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "GeneratedMetadataTextChunkGeneratedMetadata",
- "GeneratedMetadataPdfChunkGeneratedMetadata",
- "GeneratedMetadataCodeChunkGeneratedMetadata",
- "GeneratedMetadataAudioChunkGeneratedMetadata",
- "GeneratedMetadataVideoChunkGeneratedMetadata",
- "GeneratedMetadataImageChunkGeneratedMetadata",
- "VideoURL",
-]
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None
-
- heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
+__all__ = ["ScoredVideoURLInputChunk", "GeneratedMetadata"]
GeneratedMetadata: TypeAlias = Annotated[
Union[
- GeneratedMetadataMarkdownChunkGeneratedMetadata,
- GeneratedMetadataTextChunkGeneratedMetadata,
- GeneratedMetadataPdfChunkGeneratedMetadata,
- GeneratedMetadataCodeChunkGeneratedMetadata,
- GeneratedMetadataAudioChunkGeneratedMetadata,
- GeneratedMetadataVideoChunkGeneratedMetadata,
- GeneratedMetadataImageChunkGeneratedMetadata,
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
None,
],
PropertyInfo(discriminator="type"),
]
-class VideoURL(BaseModel):
- """Model for video URL validation."""
-
- url: str
- """The video URL. Can be either a URL or a Data URI."""
-
-
class ScoredVideoURLInputChunk(BaseModel):
chunk_index: int
"""position of the chunk in a file"""
diff --git a/src/mixedbread/types/store.py b/src/mixedbread/types/store.py
index 83de8954..2f1527fe 100644
--- a/src/mixedbread/types/store.py
+++ b/src/mixedbread/types/store.py
@@ -1,63 +1,15 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Union, Optional
+from typing import Optional
from datetime import datetime
-from typing_extensions import Literal, TypeAlias
+from typing_extensions import Literal
from .._models import BaseModel
+from .file_counts import FileCounts
+from .store_config import StoreConfig
from .expires_after import ExpiresAfter
-__all__ = ["Store", "Config", "ConfigContextualization", "ConfigContextualizationContextualizationConfig", "FileCounts"]
-
-
-class ConfigContextualizationContextualizationConfig(BaseModel):
- with_metadata: Union[bool, List[str], None] = None
- """Include all metadata or specific fields in the contextualization.
-
- Supports dot notation for nested fields (e.g., 'author.name'). When True, all
- metadata is included (flattened). When a list, only specified fields are
- included.
- """
-
-
-ConfigContextualization: TypeAlias = Union[bool, ConfigContextualizationContextualizationConfig]
-
-
-class Config(BaseModel):
- """Configuration for a store."""
-
- contextualization: Optional[ConfigContextualization] = None
- """Contextualize files with metadata"""
-
- save_content: Optional[bool] = None
- """Whether to save original content in the store.
-
- When False, only vectors are indexed without the original content (index-only
- mode). This is useful for data privacy. Note: Reranking is not supported when
- content is not saved.
- """
-
-
-class FileCounts(BaseModel):
- """Counts of files in different states"""
-
- pending: Optional[int] = None
- """Number of files waiting to be processed"""
-
- in_progress: Optional[int] = None
- """Number of files currently being processed"""
-
- cancelled: Optional[int] = None
- """Number of files whose processing was cancelled"""
-
- completed: Optional[int] = None
- """Number of successfully processed files"""
-
- failed: Optional[int] = None
- """Number of files that failed processing"""
-
- total: Optional[int] = None
- """Total number of files"""
+__all__ = ["Store"]
class Store(BaseModel):
@@ -75,10 +27,13 @@ class Store(BaseModel):
is_public: Optional[bool] = None
"""Whether the store can be accessed by anyone with valid login credentials"""
+ license: Optional[str] = None
+ """License for public stores"""
+
metadata: Optional[object] = None
"""Additional metadata associated with the store"""
- config: Optional[Config] = None
+ config: Optional[StoreConfig] = None
"""Configuration for a store."""
file_counts: Optional[FileCounts] = None
diff --git a/src/mixedbread/types/store_chunk_search_options_param.py b/src/mixedbread/types/store_chunk_search_options_param.py
index 95d76cd5..44d79d54 100644
--- a/src/mixedbread/types/store_chunk_search_options_param.py
+++ b/src/mixedbread/types/store_chunk_search_options_param.py
@@ -5,47 +5,14 @@
from typing import Union, Optional
from typing_extensions import TypeAlias, TypedDict
-from .._types import SequenceNotStr
+from .rerank_config_param import RerankConfigParam
+from .agentic_search_config_param import AgenticSearchConfigParam
-__all__ = ["StoreChunkSearchOptionsParam", "Rerank", "RerankRerankConfig", "Agentic", "AgenticAgenticSearchConfig"]
+__all__ = ["StoreChunkSearchOptionsParam", "Rerank", "Agentic"]
+Rerank: TypeAlias = Union[bool, RerankConfigParam]
-class RerankRerankConfig(TypedDict, total=False):
- """Represents a reranking configuration."""
-
- model: str
- """The name of the reranking model"""
-
- with_metadata: Union[bool, SequenceNotStr[str]]
- """Whether to include metadata in the reranked results"""
-
- top_k: Optional[int]
- """Maximum number of results to return after reranking.
-
- If None, returns all reranked results.
- """
-
-
-Rerank: TypeAlias = Union[bool, RerankRerankConfig]
-
-
-class AgenticAgenticSearchConfig(TypedDict, total=False):
- """Configuration for agentic multi-query search."""
-
- max_rounds: int
- """Maximum number of search rounds"""
-
- queries_per_round: int
- """Maximum queries per round"""
-
- instructions: Optional[str]
- """
- Additional custom instructions (followed only when not in conflict with existing
- rules)
- """
-
-
-Agentic: TypeAlias = Union[bool, AgenticAgenticSearchConfig]
+Agentic: TypeAlias = Union[bool, AgenticSearchConfigParam]
class StoreChunkSearchOptionsParam(TypedDict, total=False):
diff --git a/src/mixedbread/types/store_config.py b/src/mixedbread/types/store_config.py
new file mode 100644
index 00000000..b8838ed4
--- /dev/null
+++ b/src/mixedbread/types/store_config.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import TypeAlias
+
+from .._models import BaseModel
+from .contextualization_config import ContextualizationConfig
+
+__all__ = ["StoreConfig", "Contextualization"]
+
+Contextualization: TypeAlias = Union[bool, ContextualizationConfig]
+
+
+class StoreConfig(BaseModel):
+ """Configuration for a store."""
+
+ contextualization: Optional[Contextualization] = None
+ """Contextualize files with metadata"""
+
+ save_content: Optional[bool] = None
+ """Whether to save original content in the store.
+
+ When False, only vectors are indexed without the original content (index-only
+ mode). This is useful for data privacy. Note: Reranking is not supported when
+ content is not saved.
+ """
diff --git a/src/mixedbread/types/store_config_param.py b/src/mixedbread/types/store_config_param.py
new file mode 100644
index 00000000..56f8f0c8
--- /dev/null
+++ b/src/mixedbread/types/store_config_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias, TypedDict
+
+from .contextualization_config_param import ContextualizationConfigParam
+
+__all__ = ["StoreConfigParam", "Contextualization"]
+
+Contextualization: TypeAlias = Union[bool, ContextualizationConfigParam]
+
+
+class StoreConfigParam(TypedDict, total=False):
+ """Configuration for a store."""
+
+ contextualization: Contextualization
+ """Contextualize files with metadata"""
+
+ save_content: bool
+ """Whether to save original content in the store.
+
+ When False, only vectors are indexed without the original content (index-only
+ mode). This is useful for data privacy. Note: Reranking is not supported when
+ content is not saved.
+ """
diff --git a/src/mixedbread/types/store_create_params.py b/src/mixedbread/types/store_create_params.py
index b7814cdd..eb90d776 100644
--- a/src/mixedbread/types/store_create_params.py
+++ b/src/mixedbread/types/store_create_params.py
@@ -2,13 +2,14 @@
from __future__ import annotations
-from typing import Union, Optional
-from typing_extensions import TypeAlias, TypedDict
+from typing import Optional
+from typing_extensions import TypedDict
from .._types import SequenceNotStr
+from .store_config_param import StoreConfigParam
from .expires_after_param import ExpiresAfterParam
-__all__ = ["StoreCreateParams", "Config", "ConfigContextualization", "ConfigContextualizationContextualizationConfig"]
+__all__ = ["StoreCreateParams"]
class StoreCreateParams(TypedDict, total=False):
@@ -24,42 +25,17 @@ class StoreCreateParams(TypedDict, total=False):
is_public: bool
"""Whether the store can be accessed by anyone with valid login credentials"""
+ license: Optional[str]
+ """License for public stores"""
+
expires_after: Optional[ExpiresAfterParam]
"""Represents an expiration policy for a store."""
metadata: object
"""Optional metadata key-value pairs"""
- config: Optional[Config]
+ config: Optional[StoreConfigParam]
"""Configuration for a store."""
file_ids: Optional[SequenceNotStr[str]]
"""Optional list of file IDs"""
-
-
-class ConfigContextualizationContextualizationConfig(TypedDict, total=False):
- with_metadata: Union[bool, SequenceNotStr[str]]
- """Include all metadata or specific fields in the contextualization.
-
- Supports dot notation for nested fields (e.g., 'author.name'). When True, all
- metadata is included (flattened). When a list, only specified fields are
- included.
- """
-
-
-ConfigContextualization: TypeAlias = Union[bool, ConfigContextualizationContextualizationConfig]
-
-
-class Config(TypedDict, total=False):
- """Configuration for a store."""
-
- contextualization: ConfigContextualization
- """Contextualize files with metadata"""
-
- save_content: bool
- """Whether to save original content in the store.
-
- When False, only vectors are indexed without the original content (index-only
- mode). This is useful for data privacy. Note: Reranking is not supported when
- content is not saved.
- """
diff --git a/src/mixedbread/types/store_update_params.py b/src/mixedbread/types/store_update_params.py
index e06bbf9d..d6285f0c 100644
--- a/src/mixedbread/types/store_update_params.py
+++ b/src/mixedbread/types/store_update_params.py
@@ -23,6 +23,9 @@ class StoreUpdateParams(TypedDict, total=False):
is_public: Optional[bool]
"""Whether the store can be accessed by anyone with valid login credentials"""
+ license: Optional[str]
+ """License for public stores"""
+
expires_after: Optional[ExpiresAfterParam]
"""Represents an expiration policy for a store."""
diff --git a/src/mixedbread/types/stores/__init__.py b/src/mixedbread/types/stores/__init__.py
index 50862586..f52b7b9f 100644
--- a/src/mixedbread/types/stores/__init__.py
+++ b/src/mixedbread/types/stores/__init__.py
@@ -4,12 +4,15 @@
from .store_file import StoreFile as StoreFile
from .file_list_params import FileListParams as FileListParams
-from .scored_store_file import ScoredStoreFile as ScoredStoreFile
+from .text_input_chunk import TextInputChunk as TextInputChunk
+from .store_file_config import StoreFileConfig as StoreFileConfig
from .store_file_status import StoreFileStatus as StoreFileStatus
from .file_create_params import FileCreateParams as FileCreateParams
from .file_list_response import FileListResponse as FileListResponse
-from .file_search_params import FileSearchParams as FileSearchParams
from .file_update_params import FileUpdateParams as FileUpdateParams
from .file_delete_response import FileDeleteResponse as FileDeleteResponse
from .file_retrieve_params import FileRetrieveParams as FileRetrieveParams
-from .file_search_response import FileSearchResponse as FileSearchResponse
+from .audio_url_input_chunk import AudioURLInputChunk as AudioURLInputChunk
+from .image_url_input_chunk import ImageURLInputChunk as ImageURLInputChunk
+from .video_url_input_chunk import VideoURLInputChunk as VideoURLInputChunk
+from .store_file_config_param import StoreFileConfigParam as StoreFileConfigParam
diff --git a/src/mixedbread/types/stores/audio_url_input_chunk.py b/src/mixedbread/types/stores/audio_url_input_chunk.py
new file mode 100644
index 00000000..79cdd3c3
--- /dev/null
+++ b/src/mixedbread/types/stores/audio_url_input_chunk.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..audio_url import AudioURL
+from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
+
+__all__ = ["AudioURLInputChunk", "GeneratedMetadata"]
+
+GeneratedMetadata: TypeAlias = Annotated[
+ Union[
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
+ None,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class AudioURLInputChunk(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[GeneratedMetadata] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["audio_url"]] = None
+ """Input type identifier"""
+
+ transcription: Optional[str] = None
+ """speech recognition (sr) text of the audio"""
+
+ audio_url: Optional[AudioURL] = None
+ """Model for audio URL validation."""
+
+ sampling_rate: int
+ """The sampling rate of the audio."""
diff --git a/src/mixedbread/types/stores/file_create_params.py b/src/mixedbread/types/stores/file_create_params.py
index c776ea33..ac1d5b86 100644
--- a/src/mixedbread/types/stores/file_create_params.py
+++ b/src/mixedbread/types/stores/file_create_params.py
@@ -3,16 +3,18 @@
from __future__ import annotations
from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Required, TypedDict
-__all__ = ["FileCreateParams", "Config", "Experimental"]
+from .store_file_config_param import StoreFileConfigParam
+
+__all__ = ["FileCreateParams"]
class FileCreateParams(TypedDict, total=False):
metadata: object
"""Optional metadata for the file"""
- config: Config
+ config: StoreFileConfigParam
"""Configuration for adding the file"""
external_id: Optional[str]
@@ -24,19 +26,5 @@ class FileCreateParams(TypedDict, total=False):
file_id: Required[str]
"""ID of the file to add"""
- experimental: Optional[Experimental]
+ experimental: Optional[StoreFileConfigParam]
"""Configuration for a file."""
-
-
-class Config(TypedDict, total=False):
- """Configuration for adding the file"""
-
- parsing_strategy: Literal["fast", "high_quality"]
- """Strategy for adding the file, this overrides the store-level default"""
-
-
-class Experimental(TypedDict, total=False):
- """Configuration for a file."""
-
- parsing_strategy: Literal["fast", "high_quality"]
- """Strategy for adding the file, this overrides the store-level default"""
diff --git a/src/mixedbread/types/stores/file_search_params.py b/src/mixedbread/types/stores/file_search_params.py
deleted file mode 100644
index c1b5dee9..00000000
--- a/src/mixedbread/types/stores/file_search_params.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable, Optional
-from typing_extensions import Required, TypeAlias, TypedDict
-
-from ..._types import SequenceNotStr
-from ..extractions.text_input_param import TextInputParam
-from ..extractions.image_url_input_param import ImageURLInputParam
-from ..shared_params.search_filter_condition import SearchFilterCondition
-
-__all__ = [
- "FileSearchParams",
- "Query",
- "Filters",
- "FiltersUnionMember2",
- "SearchOptions",
- "SearchOptionsRerank",
- "SearchOptionsRerankRerankConfig",
- "SearchOptionsAgentic",
- "SearchOptionsAgenticAgenticSearchConfig",
-]
-
-
-class FileSearchParams(TypedDict, total=False):
- query: Required[Query]
- """Search query text"""
-
- store_identifiers: Required[SequenceNotStr[str]]
- """IDs or names of stores to search"""
-
- top_k: int
- """Number of results to return"""
-
- filters: Optional[Filters]
- """Optional filter conditions"""
-
- file_ids: Union[Iterable[object], SequenceNotStr[str], None]
- """Optional list of file IDs to filter chunks by (inclusion filter)"""
-
- search_options: SearchOptions
- """Search configuration options"""
-
-
-Query: TypeAlias = Union[str, ImageURLInputParam, TextInputParam]
-
-FiltersUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition]
-
-Filters: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[FiltersUnionMember2]]
-
-
-class SearchOptionsRerankRerankConfig(TypedDict, total=False):
- """Represents a reranking configuration."""
-
- model: str
- """The name of the reranking model"""
-
- with_metadata: Union[bool, SequenceNotStr[str]]
- """Whether to include metadata in the reranked results"""
-
- top_k: Optional[int]
- """Maximum number of results to return after reranking.
-
- If None, returns all reranked results.
- """
-
-
-SearchOptionsRerank: TypeAlias = Union[bool, SearchOptionsRerankRerankConfig]
-
-
-class SearchOptionsAgenticAgenticSearchConfig(TypedDict, total=False):
- """Configuration for agentic multi-query search."""
-
- max_rounds: int
- """Maximum number of search rounds"""
-
- queries_per_round: int
- """Maximum queries per round"""
-
- instructions: Optional[str]
- """
- Additional custom instructions (followed only when not in conflict with existing
- rules)
- """
-
-
-SearchOptionsAgentic: TypeAlias = Union[bool, SearchOptionsAgenticAgenticSearchConfig]
-
-
-class SearchOptions(TypedDict, total=False):
- """Search configuration options"""
-
- score_threshold: float
- """Minimum similarity score threshold"""
-
- rewrite_query: bool
- """Whether to rewrite the query.
-
- Ignored when agentic is enabled (the agent handles query decomposition).
- """
-
- rerank: Optional[SearchOptionsRerank]
- """Whether to rerank results and optional reranking configuration.
-
- Ignored when agentic is enabled (the agent handles ranking).
- """
-
- agentic: Optional[SearchOptionsAgentic]
- """
- Whether to use agentic multi-query search with automatic query decomposition and
- ranking. When enabled, rewrite_query and rerank options are ignored.
- """
-
- return_metadata: bool
- """Whether to return file metadata"""
-
- return_chunks: bool
- """Whether to return matching text chunks"""
-
- chunks_per_file: int
- """Number of chunks to return for each file"""
-
- apply_search_rules: bool
- """Whether to apply search rules"""
-
-
-from ..shared_params.search_filter import SearchFilter
diff --git a/src/mixedbread/types/stores/file_search_response.py b/src/mixedbread/types/stores/file_search_response.py
deleted file mode 100644
index 304512e4..00000000
--- a/src/mixedbread/types/stores/file_search_response.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .scored_store_file import ScoredStoreFile
-
-__all__ = ["FileSearchResponse"]
-
-
-class FileSearchResponse(BaseModel):
- object: Optional[Literal["list"]] = None
- """The object type of the response"""
-
- data: List[ScoredStoreFile]
- """The list of scored store files"""
diff --git a/src/mixedbread/types/stores/image_url_input_chunk.py b/src/mixedbread/types/stores/image_url_input_chunk.py
new file mode 100644
index 00000000..927f5d8d
--- /dev/null
+++ b/src/mixedbread/types/stores/image_url_input_chunk.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..image_url_output import ImageURLOutput
+from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
+
+__all__ = ["ImageURLInputChunk", "GeneratedMetadata"]
+
+GeneratedMetadata: TypeAlias = Annotated[
+ Union[
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
+ None,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class ImageURLInputChunk(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[GeneratedMetadata] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["image_url"]] = None
+ """Input type identifier"""
+
+ ocr_text: Optional[str] = None
+ """ocr text of the image"""
+
+ summary: Optional[str] = None
+ """summary of the image"""
+
+ image_url: Optional[ImageURLOutput] = None
+ """Model for image URL validation."""
diff --git a/src/mixedbread/types/stores/scored_store_file.py b/src/mixedbread/types/stores/scored_store_file.py
deleted file mode 100644
index 87483138..00000000
--- a/src/mixedbread/types/stores/scored_store_file.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from .store_file_status import StoreFileStatus
-from ..scored_text_input_chunk import ScoredTextInputChunk
-from ..scored_audio_url_input_chunk import ScoredAudioURLInputChunk
-from ..scored_image_url_input_chunk import ScoredImageURLInputChunk
-from ..scored_video_url_input_chunk import ScoredVideoURLInputChunk
-
-__all__ = ["ScoredStoreFile", "Config", "Chunk"]
-
-
-class Config(BaseModel):
- """Configuration for a file."""
-
- parsing_strategy: Optional[Literal["fast", "high_quality"]] = None
- """Strategy for adding the file, this overrides the store-level default"""
-
-
-Chunk: TypeAlias = Annotated[
- Union[ScoredTextInputChunk, ScoredImageURLInputChunk, ScoredAudioURLInputChunk, ScoredVideoURLInputChunk],
- PropertyInfo(discriminator="type"),
-]
-
-
-class ScoredStoreFile(BaseModel):
- """Represents a scored store file."""
-
- id: str
- """Unique identifier for the file"""
-
- filename: Optional[str] = None
- """Name of the file"""
-
- metadata: Optional[object] = None
- """Optional file metadata"""
-
- external_id: Optional[str] = None
- """External identifier for this file in the store"""
-
- status: Optional[StoreFileStatus] = None
- """Processing status of the file"""
-
- last_error: Optional[object] = None
- """Last error message if processing failed"""
-
- store_id: str
- """ID of the containing store"""
-
- created_at: datetime
- """Timestamp of store file creation"""
-
- version: Optional[int] = None
- """Version number of the file"""
-
- usage_bytes: Optional[int] = None
- """Storage usage in bytes"""
-
- usage_tokens: Optional[int] = None
- """Storage usage in tokens"""
-
- config: Optional[Config] = None
- """Configuration for a file."""
-
- object: Optional[Literal["store.file"]] = None
- """Type of the object"""
-
- chunks: Optional[List[Chunk]] = None
- """Array of scored file chunks"""
-
- score: float
- """score of the file"""
diff --git a/src/mixedbread/types/stores/store_file.py b/src/mixedbread/types/stores/store_file.py
index eb1baefc..e6154982 100644
--- a/src/mixedbread/types/stores/store_file.py
+++ b/src/mixedbread/types/stores/store_file.py
@@ -1,1186 +1,22 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import TYPE_CHECKING, Dict, List, Union, Optional
+from typing import List, Union, Optional
from datetime import datetime
from typing_extensions import Literal, Annotated, TypeAlias
-from pydantic import Field as FieldInfo
-
from ..._utils import PropertyInfo
from ..._models import BaseModel
+from .text_input_chunk import TextInputChunk
+from .store_file_config import StoreFileConfig
from .store_file_status import StoreFileStatus
+from .audio_url_input_chunk import AudioURLInputChunk
+from .image_url_input_chunk import ImageURLInputChunk
+from .video_url_input_chunk import VideoURLInputChunk
-__all__ = [
- "StoreFile",
- "Config",
- "Chunk",
- "ChunkTextInputChunk",
- "ChunkTextInputChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata",
- "ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata",
- "ChunkImageURLInputChunk",
- "ChunkImageURLInputChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata",
- "ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata",
- "ChunkImageURLInputChunkImageURL",
- "ChunkAudioURLInputChunk",
- "ChunkAudioURLInputChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata",
- "ChunkAudioURLInputChunkAudioURL",
- "ChunkVideoURLInputChunk",
- "ChunkVideoURLInputChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading",
- "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext",
- "ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata",
- "ChunkVideoURLInputChunkVideoURL",
-]
-
-
-class Config(BaseModel):
- """Configuration for a file."""
-
- parsing_strategy: Optional[Literal["fast", "high_quality"]] = None
- """Strategy for adding the file, this overrides the store-level default"""
-
-
-class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[List[ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = (
- None
- )
-
- heading_context: Optional[
- List[ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]
- ] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-ChunkTextInputChunkGeneratedMetadata: TypeAlias = Annotated[
- Union[
- ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata,
- ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata,
- None,
- ],
- PropertyInfo(discriminator="type"),
-]
-
-
-class ChunkTextInputChunk(BaseModel):
- chunk_index: int
- """position of the chunk in a file"""
-
- mime_type: Optional[str] = None
- """mime type of the chunk"""
-
- generated_metadata: Optional[ChunkTextInputChunkGeneratedMetadata] = None
- """metadata of the chunk"""
-
- model: Optional[str] = None
- """model used for this chunk"""
-
- type: Optional[Literal["text"]] = None
- """Input type identifier"""
-
- offset: Optional[int] = None
- """The offset of the text in the file relative to the start of the file."""
-
- text: Optional[str] = None
- """Text content"""
-
-
-class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[
- List[ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]
- ] = None
-
- heading_context: Optional[
- List[ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]
- ] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-ChunkImageURLInputChunkGeneratedMetadata: TypeAlias = Annotated[
- Union[
- ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata,
- ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata,
- None,
- ],
- PropertyInfo(discriminator="type"),
-]
-
-
-class ChunkImageURLInputChunkImageURL(BaseModel):
- """Model for image URL validation."""
-
- url: str
- """The image URL. Can be either a URL or a Data URI."""
-
- format: Optional[str] = None
- """The image format/mimetype"""
-
-
-class ChunkImageURLInputChunk(BaseModel):
- chunk_index: int
- """position of the chunk in a file"""
-
- mime_type: Optional[str] = None
- """mime type of the chunk"""
-
- generated_metadata: Optional[ChunkImageURLInputChunkGeneratedMetadata] = None
- """metadata of the chunk"""
-
- model: Optional[str] = None
- """model used for this chunk"""
-
- type: Optional[Literal["image_url"]] = None
- """Input type identifier"""
-
- ocr_text: Optional[str] = None
- """ocr text of the image"""
-
- summary: Optional[str] = None
- """summary of the image"""
-
- image_url: Optional[ChunkImageURLInputChunkImageURL] = None
- """Model for image URL validation."""
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[
- List[ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]
- ] = None
-
- heading_context: Optional[
- List[ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]
- ] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-ChunkAudioURLInputChunkGeneratedMetadata: TypeAlias = Annotated[
- Union[
- ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata,
- ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata,
- None,
- ],
- PropertyInfo(discriminator="type"),
-]
-
-
-class ChunkAudioURLInputChunkAudioURL(BaseModel):
- """Model for audio URL validation."""
-
- url: str
- """The audio URL. Can be either a URL or a Data URI."""
-
-
-class ChunkAudioURLInputChunk(BaseModel):
- chunk_index: int
- """position of the chunk in a file"""
-
- mime_type: Optional[str] = None
- """mime type of the chunk"""
-
- generated_metadata: Optional[ChunkAudioURLInputChunkGeneratedMetadata] = None
- """metadata of the chunk"""
-
- model: Optional[str] = None
- """model used for this chunk"""
-
- type: Optional[Literal["audio_url"]] = None
- """Input type identifier"""
-
- transcription: Optional[str] = None
- """speech recognition (sr) text of the audio"""
-
- summary: Optional[str] = None
- """summary of the audio"""
-
- audio_url: Optional[ChunkAudioURLInputChunkAudioURL] = None
- """Model for audio URL validation."""
-
- sampling_rate: int
- """The sampling rate of the audio."""
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel):
- level: int
-
- text: str
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["markdown"]] = None
-
- file_type: Optional[Literal["text/markdown"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- chunk_headings: Optional[
- List[ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]
- ] = None
-
- heading_context: Optional[
- List[ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]
- ] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- frontmatter: Optional[Dict[str, object]] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["text"]] = None
-
- file_type: Optional[Literal["text/plain"]] = None
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["pdf"]] = None
-
- file_type: Optional[Literal["application/pdf"]] = None
-
- total_pages: Optional[int] = None
-
- total_size: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["code"]] = None
-
- file_type: str
-
- language: Optional[str] = None
-
- word_count: Optional[int] = None
-
- file_size: Optional[int] = None
-
- start_line: Optional[int] = None
-
- num_lines: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["audio"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- sample_rate: Optional[int] = None
-
- channels: Optional[int] = None
-
- audio_format: Optional[int] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["video"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- total_duration_seconds: Optional[float] = None
-
- fps: Optional[float] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- frame_count: Optional[int] = None
-
- has_audio_stream: Optional[bool] = None
-
- bpm: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-class ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel):
- type: Optional[Literal["image"]] = None
-
- file_type: Optional[str] = None
-
- file_size: Optional[int] = None
-
- width: Optional[int] = None
-
- height: Optional[int] = None
-
- file_extension: Optional[str] = None
-
- if TYPE_CHECKING:
- # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
- # value to this field, so for compatibility we avoid doing it at runtime.
- __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
-
- # Stub to indicate that arbitrary properties are accepted.
- # To access properties that are not valid identifiers you can use `getattr`, e.g.
- # `getattr(obj, '$type')`
- def __getattr__(self, attr: str) -> object: ...
- else:
- __pydantic_extra__: Dict[str, object]
-
-
-ChunkVideoURLInputChunkGeneratedMetadata: TypeAlias = Annotated[
- Union[
- ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata,
- ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata,
- None,
- ],
- PropertyInfo(discriminator="type"),
-]
-
-
-class ChunkVideoURLInputChunkVideoURL(BaseModel):
- """Model for video URL validation."""
-
- url: str
- """The video URL. Can be either a URL or a Data URI."""
-
-
-class ChunkVideoURLInputChunk(BaseModel):
- chunk_index: int
- """position of the chunk in a file"""
-
- mime_type: Optional[str] = None
- """mime type of the chunk"""
-
- generated_metadata: Optional[ChunkVideoURLInputChunkGeneratedMetadata] = None
- """metadata of the chunk"""
-
- model: Optional[str] = None
- """model used for this chunk"""
-
- type: Optional[Literal["video_url"]] = None
- """Input type identifier"""
-
- transcription: Optional[str] = None
- """speech recognition (sr) text of the video"""
-
- summary: Optional[str] = None
- """summary of the video"""
-
- video_url: Optional[ChunkVideoURLInputChunkVideoURL] = None
- """Model for video URL validation."""
-
+__all__ = ["StoreFile", "Chunk"]
Chunk: TypeAlias = Annotated[
- Union[ChunkTextInputChunk, ChunkImageURLInputChunk, ChunkAudioURLInputChunk, ChunkVideoURLInputChunk],
+ Union[TextInputChunk, ImageURLInputChunk, AudioURLInputChunk, VideoURLInputChunk],
PropertyInfo(discriminator="type"),
]
@@ -1221,7 +57,7 @@ class StoreFile(BaseModel):
usage_tokens: Optional[int] = None
"""Storage usage in tokens"""
- config: Optional[Config] = None
+ config: Optional[StoreFileConfig] = None
"""Configuration for a file."""
object: Optional[Literal["store.file"]] = None
@@ -1229,3 +65,6 @@ class StoreFile(BaseModel):
chunks: Optional[List[Chunk]] = None
"""chunks"""
+
+ content_url: str
+ """Presigned URL for file content"""
diff --git a/src/mixedbread/types/stores/store_file_config.py b/src/mixedbread/types/stores/store_file_config.py
new file mode 100644
index 00000000..c30ee8da
--- /dev/null
+++ b/src/mixedbread/types/stores/store_file_config.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["StoreFileConfig"]
+
+
+class StoreFileConfig(BaseModel):
+ """Configuration for a file."""
+
+ parsing_strategy: Optional[Literal["fast", "high_quality"]] = None
+ """Strategy for adding the file, this overrides the store-level default"""
diff --git a/src/mixedbread/types/stores/store_file_config_param.py b/src/mixedbread/types/stores/store_file_config_param.py
new file mode 100644
index 00000000..82380c0e
--- /dev/null
+++ b/src/mixedbread/types/stores/store_file_config_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["StoreFileConfigParam"]
+
+
+class StoreFileConfigParam(TypedDict, total=False):
+ """Configuration for a file."""
+
+ parsing_strategy: Literal["fast", "high_quality"]
+ """Strategy for adding the file, this overrides the store-level default"""
diff --git a/src/mixedbread/types/stores/text_input_chunk.py b/src/mixedbread/types/stores/text_input_chunk.py
new file mode 100644
index 00000000..0080d81f
--- /dev/null
+++ b/src/mixedbread/types/stores/text_input_chunk.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
+
+__all__ = ["TextInputChunk", "GeneratedMetadata"]
+
+GeneratedMetadata: TypeAlias = Annotated[
+ Union[
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
+ None,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class TextInputChunk(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[GeneratedMetadata] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["text"]] = None
+ """Input type identifier"""
+
+ offset: Optional[int] = None
+ """The offset of the text in the file relative to the start of the file."""
+
+ text: Optional[str] = None
+ """Text content"""
+
+ context: Optional[str] = None
+ """LLM-generated context that situates this chunk within its source document"""
diff --git a/src/mixedbread/types/stores/video_url_input_chunk.py b/src/mixedbread/types/stores/video_url_input_chunk.py
new file mode 100644
index 00000000..c1a16e79
--- /dev/null
+++ b/src/mixedbread/types/stores/video_url_input_chunk.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..video_url import VideoURL
+from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata
+from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata
+from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata
+from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata
+from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata
+from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata
+from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata
+
+__all__ = ["VideoURLInputChunk", "GeneratedMetadata"]
+
+GeneratedMetadata: TypeAlias = Annotated[
+ Union[
+ MarkdownChunkGeneratedMetadata,
+ TextChunkGeneratedMetadata,
+ PdfChunkGeneratedMetadata,
+ CodeChunkGeneratedMetadata,
+ AudioChunkGeneratedMetadata,
+ VideoChunkGeneratedMetadata,
+ ImageChunkGeneratedMetadata,
+ None,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class VideoURLInputChunk(BaseModel):
+ chunk_index: int
+ """position of the chunk in a file"""
+
+ mime_type: Optional[str] = None
+ """mime type of the chunk"""
+
+ generated_metadata: Optional[GeneratedMetadata] = None
+ """metadata of the chunk"""
+
+ model: Optional[str] = None
+ """model used for this chunk"""
+
+ type: Optional[Literal["video_url"]] = None
+ """Input type identifier"""
+
+ transcription: Optional[str] = None
+ """speech recognition (sr) text of the video"""
+
+ summary: Optional[str] = None
+ """summary of the video"""
+
+ video_url: Optional[VideoURL] = None
+ """Model for video URL validation."""
diff --git a/src/mixedbread/types/text_chunk_generated_metadata.py b/src/mixedbread/types/text_chunk_generated_metadata.py
new file mode 100644
index 00000000..0d4d55c3
--- /dev/null
+++ b/src/mixedbread/types/text_chunk_generated_metadata.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["TextChunkGeneratedMetadata"]
+
+
+class TextChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["text"]] = None
+
+ file_type: Optional[Literal["text/plain"]] = None
+
+ language: Optional[str] = None
+
+ word_count: Optional[int] = None
+
+ file_size: Optional[int] = None
+
+ start_line: Optional[int] = None
+
+ num_lines: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/video_chunk_generated_metadata.py b/src/mixedbread/types/video_chunk_generated_metadata.py
new file mode 100644
index 00000000..d3ab7c61
--- /dev/null
+++ b/src/mixedbread/types/video_chunk_generated_metadata.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["VideoChunkGeneratedMetadata"]
+
+
+class VideoChunkGeneratedMetadata(BaseModel):
+ type: Optional[Literal["video"]] = None
+
+ file_type: Optional[str] = None
+
+ file_size: Optional[int] = None
+
+ total_duration_seconds: Optional[float] = None
+
+ fps: Optional[float] = None
+
+ width: Optional[int] = None
+
+ height: Optional[int] = None
+
+ frame_count: Optional[int] = None
+
+ has_audio_stream: Optional[bool] = None
+
+ bpm: Optional[int] = None
+
+ file_extension: Optional[str] = None
+
+ if TYPE_CHECKING:
+ # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+ # value to this field, so for compatibility we avoid doing it at runtime.
+ __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
+
+ # Stub to indicate that arbitrary properties are accepted.
+ # To access properties that are not valid identifiers you can use `getattr`, e.g.
+ # `getattr(obj, '$type')`
+ def __getattr__(self, attr: str) -> object: ...
+ else:
+ __pydantic_extra__: Dict[str, object]
diff --git a/src/mixedbread/types/video_url.py b/src/mixedbread/types/video_url.py
new file mode 100644
index 00000000..885a6bce
--- /dev/null
+++ b/src/mixedbread/types/video_url.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["VideoURL"]
+
+
+class VideoURL(BaseModel):
+ """Model for video URL validation."""
+
+ url: str
+ """The video URL. Can be either a URL or a Data URI."""
diff --git a/tests/api_resources/stores/test_files.py b/tests/api_resources/stores/test_files.py
index f59802bf..7baab6c8 100644
--- a/tests/api_resources/stores/test_files.py
+++ b/tests/api_resources/stores/test_files.py
@@ -13,7 +13,6 @@
StoreFile,
FileListResponse,
FileDeleteResponse,
- FileSearchResponse,
)
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -295,65 +294,6 @@ def test_path_params_delete(self, client: Mixedbread) -> None:
store_identifier="store_identifier",
)
- @parametrize
- def test_method_search(self, client: Mixedbread) -> None:
- file = client.stores.files.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- )
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- def test_method_search_with_all_params(self, client: Mixedbread) -> None:
- file = client.stores.files.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- top_k=1,
- filters={
- "all": [{}, {}],
- "any": [{}, {}],
- "none": [{}, {}],
- },
- file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"],
- search_options={
- "score_threshold": 0,
- "rewrite_query": True,
- "rerank": True,
- "agentic": True,
- "return_metadata": True,
- "return_chunks": True,
- "chunks_per_file": 0,
- "apply_search_rules": True,
- },
- )
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- def test_raw_response_search(self, client: Mixedbread) -> None:
- response = client.stores.files.with_raw_response.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- )
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- file = response.parse()
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- def test_streaming_response_search(self, client: Mixedbread) -> None:
- with client.stores.files.with_streaming_response.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- file = response.parse()
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- assert cast(Any, response.is_closed) is True
-
class TestAsyncFiles:
parametrize = pytest.mark.parametrize(
@@ -632,62 +572,3 @@ async def test_path_params_delete(self, async_client: AsyncMixedbread) -> None:
file_identifier="",
store_identifier="store_identifier",
)
-
- @parametrize
- async def test_method_search(self, async_client: AsyncMixedbread) -> None:
- file = await async_client.stores.files.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- )
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- async def test_method_search_with_all_params(self, async_client: AsyncMixedbread) -> None:
- file = await async_client.stores.files.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- top_k=1,
- filters={
- "all": [{}, {}],
- "any": [{}, {}],
- "none": [{}, {}],
- },
- file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"],
- search_options={
- "score_threshold": 0,
- "rewrite_query": True,
- "rerank": True,
- "agentic": True,
- "return_metadata": True,
- "return_chunks": True,
- "chunks_per_file": 0,
- "apply_search_rules": True,
- },
- )
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- async def test_raw_response_search(self, async_client: AsyncMixedbread) -> None:
- response = await async_client.stores.files.with_raw_response.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- )
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- file = await response.parse()
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- @parametrize
- async def test_streaming_response_search(self, async_client: AsyncMixedbread) -> None:
- async with async_client.stores.files.with_streaming_response.search(
- query="how to configure SSL",
- store_identifiers=["string"],
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- file = await response.parse()
- assert_matches_type(FileSearchResponse, file, path=["response"])
-
- assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_stores.py b/tests/api_resources/test_stores.py
index 5e8ae604..39988178 100644
--- a/tests/api_resources/test_stores.py
+++ b/tests/api_resources/test_stores.py
@@ -35,6 +35,7 @@ def test_method_create_with_all_params(self, client: Mixedbread) -> None:
name="technical-documentation",
description="Contains technical specifications and guides",
is_public=False,
+ license="license",
expires_after={
"anchor": "last_active_at",
"days": 0,
@@ -120,6 +121,7 @@ def test_method_update_with_all_params(self, client: Mixedbread) -> None:
name="x",
description="description",
is_public=True,
+ license="license",
expires_after={
"anchor": "last_active_at",
"days": 0,
@@ -422,6 +424,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncMixedbread
name="technical-documentation",
description="Contains technical specifications and guides",
is_public=False,
+ license="license",
expires_after={
"anchor": "last_active_at",
"days": 0,
@@ -507,6 +510,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncMixedbread
name="x",
description="description",
is_public=True,
+ license="license",
expires_after={
"anchor": "last_active_at",
"days": 0,
diff --git a/tests/test_client.py b/tests/test_client.py
index 8cc00170..e219d2e6 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -429,6 +429,30 @@ def test_default_query_option(self) -> None:
client.close()
+ def test_hardcoded_query_params_in_url(self, client: Mixedbread) -> None:
+ request = client._build_request(FinalRequestOptions(method="get", url="/foo?beta=true"))
+ url = httpx.URL(request.url)
+ assert dict(url.params) == {"beta": "true"}
+
+ request = client._build_request(
+ FinalRequestOptions(
+ method="get",
+ url="/foo?beta=true",
+ params={"limit": "10", "page": "abc"},
+ )
+ )
+ url = httpx.URL(request.url)
+ assert dict(url.params) == {"beta": "true", "limit": "10", "page": "abc"}
+
+ request = client._build_request(
+ FinalRequestOptions(
+ method="get",
+ url="/files/a%2Fb?beta=true",
+ params={"limit": "10"},
+ )
+ )
+ assert request.url.raw_path == b"/files/a%2Fb?beta=true&limit=10"
+
def test_request_extra_json(self, client: Mixedbread) -> None:
request = client._build_request(
FinalRequestOptions(
@@ -1340,6 +1364,30 @@ async def test_default_query_option(self) -> None:
await client.close()
+ async def test_hardcoded_query_params_in_url(self, async_client: AsyncMixedbread) -> None:
+ request = async_client._build_request(FinalRequestOptions(method="get", url="/foo?beta=true"))
+ url = httpx.URL(request.url)
+ assert dict(url.params) == {"beta": "true"}
+
+ request = async_client._build_request(
+ FinalRequestOptions(
+ method="get",
+ url="/foo?beta=true",
+ params={"limit": "10", "page": "abc"},
+ )
+ )
+ url = httpx.URL(request.url)
+ assert dict(url.params) == {"beta": "true", "limit": "10", "page": "abc"}
+
+ request = async_client._build_request(
+ FinalRequestOptions(
+ method="get",
+ url="/files/a%2Fb?beta=true",
+ params={"limit": "10"},
+ )
+ )
+ assert request.url.raw_path == b"/files/a%2Fb?beta=true&limit=10"
+
def test_request_extra_json(self, client: Mixedbread) -> None:
request = client._build_request(
FinalRequestOptions(
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
deleted file mode 100644
index e124b497..00000000
--- a/tests/test_deepcopy.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from mixedbread._utils import deepcopy_minimal
-
-
-def assert_different_identities(obj1: object, obj2: object) -> None:
- assert obj1 == obj2
- assert id(obj1) != id(obj2)
-
-
-def test_simple_dict() -> None:
- obj1 = {"foo": "bar"}
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
-
-
-def test_nested_dict() -> None:
- obj1 = {"foo": {"bar": True}}
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
- assert_different_identities(obj1["foo"], obj2["foo"])
-
-
-def test_complex_nested_dict() -> None:
- obj1 = {"foo": {"bar": [{"hello": "world"}]}}
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
- assert_different_identities(obj1["foo"], obj2["foo"])
- assert_different_identities(obj1["foo"]["bar"], obj2["foo"]["bar"])
- assert_different_identities(obj1["foo"]["bar"][0], obj2["foo"]["bar"][0])
-
-
-def test_simple_list() -> None:
- obj1 = ["a", "b", "c"]
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
-
-
-def test_nested_list() -> None:
- obj1 = ["a", [1, 2, 3]]
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
- assert_different_identities(obj1[1], obj2[1])
-
-
-class MyObject: ...
-
-
-def test_ignores_other_types() -> None:
- # custom classes
- my_obj = MyObject()
- obj1 = {"foo": my_obj}
- obj2 = deepcopy_minimal(obj1)
- assert_different_identities(obj1, obj2)
- assert obj1["foo"] is my_obj
-
- # tuples
- obj3 = ("a", "b")
- obj4 = deepcopy_minimal(obj3)
- assert obj3 is obj4
diff --git a/tests/test_extract_files.py b/tests/test_extract_files.py
index 20ec67fe..4a252515 100644
--- a/tests/test_extract_files.py
+++ b/tests/test_extract_files.py
@@ -35,6 +35,15 @@ def test_multiple_files() -> None:
assert query == {"documents": [{}, {}]}
+def test_top_level_file_array() -> None:
+ query = {"files": [b"file one", b"file two"], "title": "hello"}
+ assert extract_files(query, paths=[["files", ""]]) == [
+ ("files[]", b"file one"),
+ ("files[]", b"file two"),
+ ]
+ assert query == {"title": "hello"}
+
+
@pytest.mark.parametrize(
"query,paths,expected",
[
diff --git a/tests/test_files.py b/tests/test_files.py
index 79d9ce62..8e51c40f 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -4,7 +4,8 @@
import pytest
from dirty_equals import IsDict, IsList, IsBytes, IsTuple
-from mixedbread._files import to_httpx_files, async_to_httpx_files
+from mixedbread._files import to_httpx_files, deepcopy_with_paths, async_to_httpx_files
+from mixedbread._utils import extract_files
readme_path = Path(__file__).parent.parent.joinpath("README.md")
@@ -49,3 +50,99 @@ def test_string_not_allowed() -> None:
"file": "foo", # type: ignore
}
)
+
+
+def assert_different_identities(obj1: object, obj2: object) -> None:
+ assert obj1 == obj2
+ assert obj1 is not obj2
+
+
+class TestDeepcopyWithPaths:
+ def test_copies_top_level_dict(self) -> None:
+ original = {"file": b"data", "other": "value"}
+ result = deepcopy_with_paths(original, [["file"]])
+ assert_different_identities(result, original)
+
+ def test_file_value_is_same_reference(self) -> None:
+ file_bytes = b"contents"
+ original = {"file": file_bytes}
+ result = deepcopy_with_paths(original, [["file"]])
+ assert_different_identities(result, original)
+ assert result["file"] is file_bytes
+
+ def test_list_popped_wholesale(self) -> None:
+ files = [b"f1", b"f2"]
+ original = {"files": files, "title": "t"}
+ result = deepcopy_with_paths(original, [["files", ""]])
+ assert_different_identities(result, original)
+ result_files = result["files"]
+ assert isinstance(result_files, list)
+ assert_different_identities(result_files, files)
+
+ def test_nested_array_path_copies_list_and_elements(self) -> None:
+ elem1 = {"file": b"f1", "extra": 1}
+ elem2 = {"file": b"f2", "extra": 2}
+ original = {"items": [elem1, elem2]}
+ result = deepcopy_with_paths(original, [["items", "", "file"]])
+ assert_different_identities(result, original)
+ result_items = result["items"]
+ assert isinstance(result_items, list)
+ assert_different_identities(result_items, original["items"])
+ assert_different_identities(result_items[0], elem1)
+ assert_different_identities(result_items[1], elem2)
+
+ def test_empty_paths_returns_same_object(self) -> None:
+ original = {"foo": "bar"}
+ result = deepcopy_with_paths(original, [])
+ assert result is original
+
+ def test_multiple_paths(self) -> None:
+ f1 = b"file1"
+ f2 = b"file2"
+ original = {"a": f1, "b": f2, "c": "unchanged"}
+ result = deepcopy_with_paths(original, [["a"], ["b"]])
+ assert_different_identities(result, original)
+ assert result["a"] is f1
+ assert result["b"] is f2
+ assert result["c"] is original["c"]
+
+ def test_extract_files_does_not_mutate_original_top_level(self) -> None:
+ file_bytes = b"contents"
+ original = {"file": file_bytes, "other": "value"}
+
+ copied = deepcopy_with_paths(original, [["file"]])
+ extracted = extract_files(copied, paths=[["file"]])
+
+ assert extracted == [("file", file_bytes)]
+ assert original == {"file": file_bytes, "other": "value"}
+ assert copied == {"other": "value"}
+
+ def test_extract_files_does_not_mutate_original_nested_array_path(self) -> None:
+ file1 = b"f1"
+ file2 = b"f2"
+ original = {
+ "items": [
+ {"file": file1, "extra": 1},
+ {"file": file2, "extra": 2},
+ ],
+ "title": "example",
+ }
+
+ copied = deepcopy_with_paths(original, [["items", "", "file"]])
+ extracted = extract_files(copied, paths=[["items", "", "file"]])
+
+ assert extracted == [("items[][file]", file1), ("items[][file]", file2)]
+ assert original == {
+ "items": [
+ {"file": file1, "extra": 1},
+ {"file": file2, "extra": 2},
+ ],
+ "title": "example",
+ }
+ assert copied == {
+ "items": [
+ {"extra": 1},
+ {"extra": 2},
+ ],
+ "title": "example",
+ }
diff --git a/tests/test_utils/test_path.py b/tests/test_utils/test_path.py
new file mode 100644
index 00000000..7d148421
--- /dev/null
+++ b/tests/test_utils/test_path.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from mixedbread._utils._path import path_template
+
+
+@pytest.mark.parametrize(
+ "template, kwargs, expected",
+ [
+ ("/v1/{id}", dict(id="abc"), "/v1/abc"),
+ ("/v1/{a}/{b}", dict(a="x", b="y"), "/v1/x/y"),
+ ("/v1/{a}{b}/path/{c}?val={d}#{e}", dict(a="x", b="y", c="z", d="u", e="v"), "/v1/xy/path/z?val=u#v"),
+ ("/{w}/{w}", dict(w="echo"), "/echo/echo"),
+ ("/v1/static", {}, "/v1/static"),
+ ("", {}, ""),
+ ("/v1/?q={n}&count=10", dict(n=42), "/v1/?q=42&count=10"),
+ ("/v1/{v}", dict(v=None), "/v1/null"),
+ ("/v1/{v}", dict(v=True), "/v1/true"),
+ ("/v1/{v}", dict(v=False), "/v1/false"),
+ ("/v1/{v}", dict(v=".hidden"), "/v1/.hidden"), # dot prefix ok
+ ("/v1/{v}", dict(v="file.txt"), "/v1/file.txt"), # dot in middle ok
+ ("/v1/{v}", dict(v="..."), "/v1/..."), # triple dot ok
+ ("/v1/{a}{b}", dict(a=".", b="txt"), "/v1/.txt"), # dot var combining with adjacent to be ok
+ ("/items?q={v}#{f}", dict(v=".", f=".."), "/items?q=.#.."), # dots in query/fragment are fine
+ (
+ "/v1/{a}?query={b}",
+ dict(a="../../other/endpoint", b="a&bad=true"),
+ "/v1/..%2F..%2Fother%2Fendpoint?query=a%26bad%3Dtrue",
+ ),
+ ("/v1/{val}", dict(val="a/b/c"), "/v1/a%2Fb%2Fc"),
+ ("/v1/{val}", dict(val="a/b/c?query=value"), "/v1/a%2Fb%2Fc%3Fquery=value"),
+ ("/v1/{val}", dict(val="a/b/c?query=value&bad=true"), "/v1/a%2Fb%2Fc%3Fquery=value&bad=true"),
+ ("/v1/{val}", dict(val="%20"), "/v1/%2520"), # escapes escape sequences in input
+ # Query: slash and ? are safe, # is not
+ ("/items?q={v}", dict(v="a/b"), "/items?q=a/b"),
+ ("/items?q={v}", dict(v="a?b"), "/items?q=a?b"),
+ ("/items?q={v}", dict(v="a#b"), "/items?q=a%23b"),
+ ("/items?q={v}", dict(v="a b"), "/items?q=a%20b"),
+ # Fragment: slash and ? are safe
+ ("/docs#{v}", dict(v="a/b"), "/docs#a/b"),
+ ("/docs#{v}", dict(v="a?b"), "/docs#a?b"),
+ # Path: slash, ? and # are all encoded
+ ("/v1/{v}", dict(v="a/b"), "/v1/a%2Fb"),
+ ("/v1/{v}", dict(v="a?b"), "/v1/a%3Fb"),
+ ("/v1/{v}", dict(v="a#b"), "/v1/a%23b"),
+ # same var encoded differently by component
+ (
+ "/v1/{v}?q={v}#{v}",
+ dict(v="a/b?c#d"),
+ "/v1/a%2Fb%3Fc%23d?q=a/b?c%23d#a/b?c%23d",
+ ),
+ ("/v1/{val}", dict(val="x?admin=true"), "/v1/x%3Fadmin=true"), # query injection
+ ("/v1/{val}", dict(val="x#admin"), "/v1/x%23admin"), # fragment injection
+ ],
+)
+def test_interpolation(template: str, kwargs: dict[str, Any], expected: str) -> None:
+ assert path_template(template, **kwargs) == expected
+
+
+def test_missing_kwarg_raises_key_error() -> None:
+ with pytest.raises(KeyError, match="org_id"):
+ path_template("/v1/{org_id}")
+
+
+@pytest.mark.parametrize(
+ "template, kwargs",
+ [
+ ("{a}/path", dict(a=".")),
+ ("{a}/path", dict(a="..")),
+ ("/v1/{a}", dict(a=".")),
+ ("/v1/{a}", dict(a="..")),
+ ("/v1/{a}/path", dict(a=".")),
+ ("/v1/{a}/path", dict(a="..")),
+ ("/v1/{a}{b}", dict(a=".", b=".")), # adjacent vars → ".."
+ ("/v1/{a}.", dict(a=".")), # var + static → ".."
+ ("/v1/{a}{b}", dict(a="", b=".")), # empty + dot → "."
+ ("/v1/%2e/{x}", dict(x="ok")), # encoded dot in static text
+ ("/v1/%2e./{x}", dict(x="ok")), # mixed encoded ".." in static
+ ("/v1/.%2E/{x}", dict(x="ok")), # mixed encoded ".." in static
+ ("/v1/{v}?q=1", dict(v="..")),
+ ("/v1/{v}#frag", dict(v="..")),
+ ],
+)
+def test_dot_segment_rejected(template: str, kwargs: dict[str, Any]) -> None:
+ with pytest.raises(ValueError, match="dot-segment"):
+ path_template(template, **kwargs)