diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c6075a38..da1c1d13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: timeout-minutes: 10 name: lint runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} - if: github.event_name == 'push' || github.event.pull_request.head.repo.fork + if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata') steps: - uses: actions/checkout@v6 @@ -38,7 +38,7 @@ jobs: run: ./scripts/lint build: - if: github.event_name == 'push' || github.event.pull_request.head.repo.fork + if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata') timeout-minutes: 10 name: build permissions: diff --git a/.gitignore b/.gitignore index 95ceb189..3824f4c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .prism.log +.stdy.log _dev __pycache__ diff --git a/.release-please-manifest.json b/.release-please-manifest.json index dd7ced1c..26b1ce24 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.49.0" + ".": "0.50.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 4bb5a628..a5e2a677 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 56 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-3daf4d41b24950791a70688527c10dea9e201d304b8d6432b3acfa50e33e0805.yml -openapi_spec_hash: 1ecaa0f38266f1c5d1da8fb2e9ef651a -config_hash: c32ffa6858a02d7f23f6f3dda0b461ed +configured_endpoints: 55 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-ebd391dad1252eb00dd69ac50455b93bcdcd2cf0177d678e160e47f1d017287f.yml +openapi_spec_hash: 3bfd5f9eb34711238caef851aa81f5c0 +config_hash: 594a43c9cb8089f079bb9c5442646791 diff --git a/CHANGELOG.md b/CHANGELOG.md index 449aae02..219fa545 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,50 @@ # Changelog +## 0.50.0 (2026-04-23) + +Full Changelog: [v0.49.0...v0.50.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.49.0...v0.50.0) + +### Features + +* **api:** api update ([9cb94bc](https://github.com/mixedbread-ai/mixedbread-python/commit/9cb94bcfc4cf74adfd82b1ff6e8166d5bf597bf7)) +* **api:** api update ([c7f1a92](https://github.com/mixedbread-ai/mixedbread-python/commit/c7f1a92a897408fe54d22ae773e617c0eb9be584)) +* **api:** api update ([34d8ca8](https://github.com/mixedbread-ai/mixedbread-python/commit/34d8ca84c2ed2fd3a7bf77a84047741b537a178a)) +* **api:** api update ([4ad956c](https://github.com/mixedbread-ai/mixedbread-python/commit/4ad956c1b52e6a08dc44958cb331451f5bec75af)) +* **api:** api update ([a24ebe9](https://github.com/mixedbread-ai/mixedbread-python/commit/a24ebe9e241e075c2958cefecfe9e972a5bcd55f)) +* include results in store search events response ([8bb1069](https://github.com/mixedbread-ai/mixedbread-python/commit/8bb1069f016d97cf24bf4f7031c180ca3752e07d)) +* **internal:** implement indices array format for query and form serialization ([e52e262](https://github.com/mixedbread-ai/mixedbread-python/commit/e52e262bd9acd3caa49cf9ba08a29b260b722bf0)) + + +### Bug Fixes + +* **client:** preserve hardcoded query params when merging with user params ([83d3f20](https://github.com/mixedbread-ai/mixedbread-python/commit/83d3f201d14edd8afa08a6046c2315097d6509d4)) +* ensure file data are only sent as 1 parameter ([0957f16](https://github.com/mixedbread-ai/mixedbread-python/commit/0957f16bb9a20c72499ae6ee86b9494c1e1ee5c8)) +* sanitize endpoint path params ([7d519be](https://github.com/mixedbread-ai/mixedbread-python/commit/7d519be7f4ea6b2ce11c9915ee1ac903ebb97cea)) + + +### Performance Improvements + +* **client:** optimize file structure copying in multipart requests ([63a2728](https://github.com/mixedbread-ai/mixedbread-python/commit/63a272878dc28300f5169d38cb523cc641498fed)) + + +### Chores + +* **ci:** skip lint on metadata-only changes ([a3ec133](https://github.com/mixedbread-ai/mixedbread-python/commit/a3ec133b88d1a70a066c17e29dd2ebd5decffb25)) +* **internal:** more robust bootstrap script ([5b52286](https://github.com/mixedbread-ai/mixedbread-python/commit/5b5228640759a7bf277d9478f10eeca3d358e10f)) +* **internal:** update gitignore ([cf3aa78](https://github.com/mixedbread-ai/mixedbread-python/commit/cf3aa78ec44036bbedce95735ee7ab43cc4d566a)) +* **tests:** bump steady to v0.19.4 ([d6e32d5](https://github.com/mixedbread-ai/mixedbread-python/commit/d6e32d5abe0e137375ea79373033c07057b22c83)) +* **tests:** bump steady to v0.19.5 ([59351b9](https://github.com/mixedbread-ai/mixedbread-python/commit/59351b9dcb0eaf2578c985c80a0501d610ff357d)) +* **tests:** bump steady to v0.19.6 ([ac0e7ac](https://github.com/mixedbread-ai/mixedbread-python/commit/ac0e7acc35384a0ae9f78040b60bce03f8258d81)) +* **tests:** bump steady to v0.19.7 ([839ee9c](https://github.com/mixedbread-ai/mixedbread-python/commit/839ee9c7c60749ae03a125f84e87a7be9365daac)) +* **tests:** bump steady to v0.20.1 ([31e2274](https://github.com/mixedbread-ai/mixedbread-python/commit/31e2274bfe265fbebae02c86beae2f575f9b7b18)) +* **tests:** bump steady to v0.20.2 ([6879b15](https://github.com/mixedbread-ai/mixedbread-python/commit/6879b159d830df5df58696899c7551c709595ed4)) +* **tests:** bump steady to v0.22.1 ([3f6450d](https://github.com/mixedbread-ai/mixedbread-python/commit/3f6450dbda2fe9baeee031495fd10f4d87fa815a)) + + +### Refactors + +* **tests:** switch from prism to steady ([cc454cf](https://github.com/mixedbread-ai/mixedbread-python/commit/cc454cfbe5752c8e439361c52fb31d2d2b14180d)) + ## 0.49.0 (2026-03-19) Full Changelog: [v0.48.0...v0.49.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.48.0...v0.49.0) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed6fa9a1..21b21b70 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ $ pip install ./path-to-wheel-file.whl ## Running tests -Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests. +Most tests require you to [set up a mock server](https://github.com/dgellow/steady) against the OpenAPI spec to run the tests. ```sh $ ./scripts/mock diff --git a/api.md b/api.md index 53e03831..e3f1e559 100644 --- a/api.md +++ b/api.md @@ -30,13 +30,29 @@ Types: ```python from mixedbread.types import ( + AgenticSearchConfig, + AudioChunkGeneratedMetadata, + AudioURL, + CodeChunkGeneratedMetadata, + ContextualizationConfig, ExpiresAfter, + FileCounts, + ImageChunkGeneratedMetadata, + ImageURLOutput, + MarkdownChunkGeneratedMetadata, + MarkdownHeading, + PdfChunkGeneratedMetadata, + RerankConfig, ScoredAudioURLInputChunk, ScoredImageURLInputChunk, ScoredTextInputChunk, ScoredVideoURLInputChunk, Store, StoreChunkSearchOptions, + StoreConfig, + TextChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + VideoURL, StoreDeleteResponse, StoreMetadataFacetsResponse, StoreQuestionAnsweringResponse, @@ -61,12 +77,15 @@ Types: ```python from mixedbread.types.stores import ( - ScoredStoreFile, - StoreFileStatus, + AudioURLInputChunk, + ImageURLInputChunk, StoreFile, + StoreFileConfig, + StoreFileStatus, + TextInputChunk, + VideoURLInputChunk, FileListResponse, FileDeleteResponse, - FileSearchResponse, ) ``` @@ -77,7 +96,6 @@ Methods: - client.stores.files.update(file_identifier, \*, store_identifier, \*\*params) -> StoreFile - client.stores.files.list(store_identifier, \*\*params) -> FileListResponse - client.stores.files.delete(file_identifier, \*, store_identifier) -> FileDeleteResponse -- client.stores.files.search(\*\*params) -> FileSearchResponse # Parsing @@ -87,10 +105,13 @@ Types: ```python from mixedbread.types.parsing import ( + Chunk, + ChunkElement, ChunkingStrategy, + DocumentParserResult, ElementType, - ParsingJobStatus, ParsingJob, + ParsingJobStatus, ReturnFormat, JobListResponse, JobDeleteResponse, @@ -204,7 +225,9 @@ Types: ```python from mixedbread.types import ( + APIKeyCreateOrUpdateParams, DataSource, + DataSourceAPIKeyParams, DataSourceOauth2Params, DataSourceType, LinearDataSource, @@ -243,7 +266,7 @@ Methods: Types: ```python -from mixedbread.types import APIKey, APIKeyCreated, APIKeyDeleteResponse +from mixedbread.types import APIKey, APIKeyCreated, Scope, APIKeyDeleteResponse ``` Methods: diff --git a/pyproject.toml b/pyproject.toml index d39892a9..1dc4f151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mixedbread" -version = "0.49.0" +version = "0.50.0" description = "The official Python library for the Mixedbread API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/scripts/bootstrap b/scripts/bootstrap index b430fee3..fe8451e4 100755 --- a/scripts/bootstrap +++ b/scripts/bootstrap @@ -4,7 +4,7 @@ set -e cd "$(dirname "$0")/.." -if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then +if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "${SKIP_BREW:-}" != "1" ] && [ -t 0 ]; then brew bundle check >/dev/null 2>&1 || { echo -n "==> Install Homebrew dependencies? (y/N): " read -r response diff --git a/scripts/mock b/scripts/mock index bcf3b392..9c7c4399 100755 --- a/scripts/mock +++ b/scripts/mock @@ -19,34 +19,34 @@ fi echo "==> Starting mock server with URL ${URL}" -# Run prism mock on the given spec +# Run steady mock on the given spec if [ "$1" == "--daemon" ]; then # Pre-install the package so the download doesn't eat into the startup timeout - npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism --version + npm exec --package=@stdy/cli@0.22.1 -- steady --version - npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log & + npm exec --package=@stdy/cli@0.22.1 -- steady --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets "$URL" &> .stdy.log & - # Wait for server to come online (max 30s) + # Wait for server to come online via health endpoint (max 30s) echo -n "Waiting for server" attempts=0 - while ! grep -q "✖ fatal\|Prism is listening" ".prism.log" ; do + while ! curl --silent --fail "http://127.0.0.1:4010/_x-steady/health" >/dev/null 2>&1; do + if ! kill -0 $! 2>/dev/null; then + echo + cat .stdy.log + exit 1 + fi attempts=$((attempts + 1)) if [ "$attempts" -ge 300 ]; then echo - echo "Timed out waiting for Prism server to start" - cat .prism.log + echo "Timed out waiting for Steady server to start" + cat .stdy.log exit 1 fi echo -n "." sleep 0.1 done - if grep -q "✖ fatal" ".prism.log"; then - cat .prism.log - exit 1 - fi - echo else - npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" + npm exec --package=@stdy/cli@0.22.1 -- steady --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets "$URL" fi diff --git a/scripts/test b/scripts/test index dbeda2d2..0159035c 100755 --- a/scripts/test +++ b/scripts/test @@ -9,8 +9,8 @@ GREEN='\033[0;32m' YELLOW='\033[0;33m' NC='\033[0m' # No Color -function prism_is_running() { - curl --silent "http://localhost:4010" >/dev/null 2>&1 +function steady_is_running() { + curl --silent "http://127.0.0.1:4010/_x-steady/health" >/dev/null 2>&1 } kill_server_on_port() { @@ -25,7 +25,7 @@ function is_overriding_api_base_url() { [ -n "$TEST_API_BASE_URL" ] } -if ! is_overriding_api_base_url && ! prism_is_running ; then +if ! is_overriding_api_base_url && ! steady_is_running ; then # When we exit this script, make sure to kill the background mock server process trap 'kill_server_on_port 4010' EXIT @@ -36,19 +36,19 @@ fi if is_overriding_api_base_url ; then echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}" echo -elif ! prism_is_running ; then - echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server" +elif ! steady_is_running ; then + echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Steady server" echo -e "running against your OpenAPI spec." echo echo -e "To run the server, pass in the path or url of your OpenAPI" - echo -e "spec to the prism command:" + echo -e "spec to the steady command:" echo - echo -e " \$ ${YELLOW}npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock path/to/your.openapi.yml${NC}" + echo -e " \$ ${YELLOW}npm exec --package=@stdy/cli@0.22.1 -- steady path/to/your.openapi.yml --host 127.0.0.1 -p 4010 --validator-query-array-format=repeat --validator-form-array-format=repeat --validator-query-object-format=brackets --validator-form-object-format=brackets${NC}" echo exit 1 else - echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}" + echo -e "${GREEN}✔ Mock steady server is running with your OpenAPI spec${NC}" echo fi diff --git a/src/mixedbread/_base_client.py b/src/mixedbread/_base_client.py index dded30e9..1d1daaea 100644 --- a/src/mixedbread/_base_client.py +++ b/src/mixedbread/_base_client.py @@ -540,6 +540,10 @@ def _build_request( files = cast(HttpxRequestFiles, ForceMultipartDict()) prepared_url = self._prepare_url(options.url) + # preserve hard-coded query params from the url + if params and prepared_url.query: + params = {**dict(prepared_url.params.items()), **params} + prepared_url = prepared_url.copy_with(raw_path=prepared_url.raw_path.split(b"?", 1)[0]) if "_" in prepared_url.host: # work around https://github.com/encode/httpx/discussions/2880 kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")} diff --git a/src/mixedbread/_files.py b/src/mixedbread/_files.py index 729ea489..4df9de45 100644 --- a/src/mixedbread/_files.py +++ b/src/mixedbread/_files.py @@ -3,8 +3,8 @@ import io import os import pathlib -from typing import overload -from typing_extensions import TypeGuard +from typing import Sequence, cast, overload +from typing_extensions import TypeVar, TypeGuard import anyio @@ -17,7 +17,9 @@ HttpxFileContent, HttpxRequestFiles, ) -from ._utils import is_tuple_t, is_mapping_t, is_sequence_t +from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t + +_T = TypeVar("_T") def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]: @@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent: return await anyio.Path(file).read_bytes() return file + + +def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T: + """Copy only the containers along the given paths. + + Used to guard against mutation by extract_files without copying the entire structure. + Only dicts and lists that lie on a path are copied; everything else + is returned by reference. + + For example, given paths=[["foo", "files", "file"]] and the structure: + { + "foo": { + "bar": {"baz": {}}, + "files": {"file": } + } + } + The root dict, "foo", and "files" are copied (they lie on the path). + "bar" and "baz" are returned by reference (off the path). + """ + return _deepcopy_with_paths(item, paths, 0) + + +def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T: + if not paths: + return item + if is_mapping(item): + key_to_paths: dict[str, list[Sequence[str]]] = {} + for path in paths: + if index < len(path): + key_to_paths.setdefault(path[index], []).append(path) + + # if no path continues through this mapping, it won't be mutated and copying it is redundant + if not key_to_paths: + return item + + result = dict(item) + for key, subpaths in key_to_paths.items(): + if key in result: + result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1) + return cast(_T, result) + if is_list(item): + array_paths = [path for path in paths if index < len(path) and path[index] == ""] + + # if no path expects a list here, nothing will be mutated inside it - return by reference + if not array_paths: + return cast(_T, item) + return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item]) + return item diff --git a/src/mixedbread/_qs.py b/src/mixedbread/_qs.py index ada6fd3f..de8c99bc 100644 --- a/src/mixedbread/_qs.py +++ b/src/mixedbread/_qs.py @@ -101,7 +101,10 @@ def _stringify_item( items.extend(self._stringify_item(key, item, opts)) return items elif array_format == "indices": - raise NotImplementedError("The array indices format is not supported yet") + items = [] + for i, item in enumerate(value): + items.extend(self._stringify_item(f"{key}[{i}]", item, opts)) + return items elif array_format == "brackets": items = [] key = key + "[]" diff --git a/src/mixedbread/_utils/__init__.py b/src/mixedbread/_utils/__init__.py index dc64e29a..1c090e51 100644 --- a/src/mixedbread/_utils/__init__.py +++ b/src/mixedbread/_utils/__init__.py @@ -1,3 +1,4 @@ +from ._path import path_template as path_template from ._sync import asyncify as asyncify from ._proxy import LazyProxy as LazyProxy from ._utils import ( @@ -23,7 +24,6 @@ coerce_integer as coerce_integer, file_from_path as file_from_path, strip_not_given as strip_not_given, - deepcopy_minimal as deepcopy_minimal, get_async_library as get_async_library, maybe_coerce_float as maybe_coerce_float, get_required_header as get_required_header, diff --git a/src/mixedbread/_utils/_path.py b/src/mixedbread/_utils/_path.py new file mode 100644 index 00000000..4d6e1e4c --- /dev/null +++ b/src/mixedbread/_utils/_path.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import re +from typing import ( + Any, + Mapping, + Callable, +) +from urllib.parse import quote + +# Matches '.' or '..' where each dot is either literal or percent-encoded (%2e / %2E). +_DOT_SEGMENT_RE = re.compile(r"^(?:\.|%2[eE]){1,2}$") + +_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") + + +def _quote_path_segment_part(value: str) -> str: + """Percent-encode `value` for use in a URI path segment. + + Considers characters not in `pchar` set from RFC 3986 §3.3 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + """ + # quote() already treats unreserved characters (letters, digits, and -._~) + # as safe, so we only need to add sub-delims, ':', and '@'. + # Notably, unlike the default `safe` for quote(), / is unsafe and must be quoted. + return quote(value, safe="!$&'()*+,;=:@") + + +def _quote_query_part(value: str) -> str: + """Percent-encode `value` for use in a URI query string. + + Considers &, = and characters not in `query` set from RFC 3986 §3.4 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 + """ + return quote(value, safe="!$'()*+,;:@/?") + + +def _quote_fragment_part(value: str) -> str: + """Percent-encode `value` for use in a URI fragment. + + Considers characters not in `fragment` set from RFC 3986 §3.5 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + """ + return quote(value, safe="!$&'()*+,;=:@/?") + + +def _interpolate( + template: str, + values: Mapping[str, Any], + quoter: Callable[[str], str], +) -> str: + """Replace {name} placeholders in `template`, quoting each value with `quoter`. + + Placeholder names are looked up in `values`. + + Raises: + KeyError: If a placeholder is not found in `values`. + """ + # re.split with a capturing group returns alternating + # [text, name, text, name, ..., text] elements. + parts = _PLACEHOLDER_RE.split(template) + + for i in range(1, len(parts), 2): + name = parts[i] + if name not in values: + raise KeyError(f"a value for placeholder {{{name}}} was not provided") + val = values[name] + if val is None: + parts[i] = "null" + elif isinstance(val, bool): + parts[i] = "true" if val else "false" + else: + parts[i] = quoter(str(values[name])) + + return "".join(parts) + + +def path_template(template: str, /, **kwargs: Any) -> str: + """Interpolate {name} placeholders in `template` from keyword arguments. + + Args: + template: The template string containing {name} placeholders. + **kwargs: Keyword arguments to interpolate into the template. + + Returns: + The template with placeholders interpolated and percent-encoded. + + Safe characters for percent-encoding are dependent on the URI component. + Placeholders in path and fragment portions are percent-encoded where the `segment` + and `fragment` sets from RFC 3986 respectively are considered safe. + Placeholders in the query portion are percent-encoded where the `query` set from + RFC 3986 §3.3 is considered safe except for = and & characters. + + Raises: + KeyError: If a placeholder is not found in `kwargs`. + ValueError: If resulting path contains /./ or /../ segments (including percent-encoded dot-segments). + """ + # Split the template into path, query, and fragment portions. + fragment_template: str | None = None + query_template: str | None = None + + rest = template + if "#" in rest: + rest, fragment_template = rest.split("#", 1) + if "?" in rest: + rest, query_template = rest.split("?", 1) + path_template = rest + + # Interpolate each portion with the appropriate quoting rules. + path_result = _interpolate(path_template, kwargs, _quote_path_segment_part) + + # Reject dot-segments (. and ..) in the final assembled path. The check + # runs after interpolation so that adjacent placeholders or a mix of static + # text and placeholders that together form a dot-segment are caught. + # Also reject percent-encoded dot-segments to protect against incorrectly + # implemented normalization in servers/proxies. + for segment in path_result.split("/"): + if _DOT_SEGMENT_RE.match(segment): + raise ValueError(f"Constructed path {path_result!r} contains dot-segment {segment!r} which is not allowed") + + result = path_result + if query_template is not None: + result += "?" + _interpolate(query_template, kwargs, _quote_query_part) + if fragment_template is not None: + result += "#" + _interpolate(fragment_template, kwargs, _quote_fragment_part) + + return result diff --git a/src/mixedbread/_utils/_utils.py b/src/mixedbread/_utils/_utils.py index eec7f4a1..771859f5 100644 --- a/src/mixedbread/_utils/_utils.py +++ b/src/mixedbread/_utils/_utils.py @@ -86,8 +86,9 @@ def _extract_items( index += 1 if is_dict(obj): try: - # We are at the last entry in the path so we must remove the field - if (len(path)) == index: + # Remove the field if there are no more dict keys in the path, + # only "" traversal markers or end. + if all(p == "" for p in path[index:]): item = obj.pop(key) else: item = obj[key] @@ -176,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]: return isinstance(obj, Iterable) -def deepcopy_minimal(item: _T) -> _T: - """Minimal reimplementation of copy.deepcopy() that will only copy certain object types: - - - mappings, e.g. `dict` - - list - - This is done for performance reasons. - """ - if is_mapping(item): - return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()}) - if is_list(item): - return cast(_T, [deepcopy_minimal(entry) for entry in item]) - return item - - # copied from https://github.com/Rapptz/RoboDanny def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str: size = len(seq) diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py index 40dca853..84dd5576 100644 --- a/src/mixedbread/_version.py +++ b/src/mixedbread/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "mixedbread" -__version__ = "0.49.0" # x-release-please-version +__version__ = "0.50.0" # x-release-please-version diff --git a/src/mixedbread/resources/api_keys.py b/src/mixedbread/resources/api_keys.py index 66408456..df799826 100644 --- a/src/mixedbread/resources/api_keys.py +++ b/src/mixedbread/resources/api_keys.py @@ -9,7 +9,7 @@ from ..types import api_key_list_params, api_key_create_params from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from .._utils import maybe_transform, async_maybe_transform +from .._utils import path_template, maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -21,6 +21,7 @@ from ..pagination import SyncLimitOffset, AsyncLimitOffset from .._base_client import AsyncPaginator, make_request_options from ..types.api_key import APIKey +from ..types.scope_param import ScopeParam from ..types.api_key_created import APIKeyCreated from ..types.api_key_delete_response import APIKeyDeleteResponse @@ -51,7 +52,7 @@ def create( self, *, name: str | Omit = omit, - scope: Optional[Iterable[api_key_create_params.Scope]] | Omit = omit, + scope: Optional[Iterable[ScopeParam]] | Omit = omit, expires_at: Union[str, datetime, None] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -131,7 +132,7 @@ def retrieve( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return self._get( - f"/v1/api-keys/{api_key_id}", + path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -222,7 +223,7 @@ def delete( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return self._delete( - f"/v1/api-keys/{api_key_id}", + path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -264,7 +265,7 @@ def reroll( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return self._post( - f"/v1/api-keys/{api_key_id}/reroll", + path_template("/v1/api-keys/{api_key_id}/reroll", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -303,7 +304,7 @@ def revoke( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return self._post( - f"/v1/api-keys/{api_key_id}/revoke", + path_template("/v1/api-keys/{api_key_id}/revoke", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -335,7 +336,7 @@ async def create( self, *, name: str | Omit = omit, - scope: Optional[Iterable[api_key_create_params.Scope]] | Omit = omit, + scope: Optional[Iterable[ScopeParam]] | Omit = omit, expires_at: Union[str, datetime, None] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -415,7 +416,7 @@ async def retrieve( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return await self._get( - f"/v1/api-keys/{api_key_id}", + path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -506,7 +507,7 @@ async def delete( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return await self._delete( - f"/v1/api-keys/{api_key_id}", + path_template("/v1/api-keys/{api_key_id}", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -548,7 +549,7 @@ async def reroll( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return await self._post( - f"/v1/api-keys/{api_key_id}/reroll", + path_template("/v1/api-keys/{api_key_id}/reroll", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -587,7 +588,7 @@ async def revoke( if not api_key_id: raise ValueError(f"Expected a non-empty value for `api_key_id` but received {api_key_id!r}") return await self._post( - f"/v1/api-keys/{api_key_id}/revoke", + path_template("/v1/api-keys/{api_key_id}/revoke", api_key_id=api_key_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/data_sources/connectors.py b/src/mixedbread/resources/data_sources/connectors.py index bd913e76..af800c0c 100644 --- a/src/mixedbread/resources/data_sources/connectors.py +++ b/src/mixedbread/resources/data_sources/connectors.py @@ -7,7 +7,7 @@ import httpx from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -98,7 +98,7 @@ def create( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._post( - f"/v1/data_sources/{data_source_id}/connectors", + path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id), body=maybe_transform( { "store_id": store_id, @@ -153,7 +153,11 @@ def retrieve( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return self._get( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -216,7 +220,11 @@ def update( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return self._put( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), body=maybe_transform( { "name": name, @@ -279,7 +287,7 @@ def list( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._get_api_list( - f"/v1/data_sources/{data_source_id}/connectors", + path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id), page=SyncCursor[DataSourceConnector], options=make_request_options( extra_headers=extra_headers, @@ -337,7 +345,11 @@ def delete( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return self._delete( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -418,7 +430,7 @@ async def create( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return await self._post( - f"/v1/data_sources/{data_source_id}/connectors", + path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id), body=await async_maybe_transform( { "store_id": store_id, @@ -473,7 +485,11 @@ async def retrieve( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return await self._get( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -536,7 +552,11 @@ async def update( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return await self._put( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), body=await async_maybe_transform( { "name": name, @@ -599,7 +619,7 @@ def list( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._get_api_list( - f"/v1/data_sources/{data_source_id}/connectors", + path_template("/v1/data_sources/{data_source_id}/connectors", data_source_id=data_source_id), page=AsyncCursor[DataSourceConnector], options=make_request_options( extra_headers=extra_headers, @@ -657,7 +677,11 @@ async def delete( if not connector_id: raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}") return await self._delete( - f"/v1/data_sources/{data_source_id}/connectors/{connector_id}", + path_template( + "/v1/data_sources/{data_source_id}/connectors/{connector_id}", + data_source_id=data_source_id, + connector_id=connector_id, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/data_sources/data_sources.py b/src/mixedbread/resources/data_sources/data_sources.py index 185eaedb..bb0a9446 100644 --- a/src/mixedbread/resources/data_sources/data_sources.py +++ b/src/mixedbread/resources/data_sources/data_sources.py @@ -9,7 +9,7 @@ from ...types import Oauth2Params, data_source_list_params, data_source_create_params, data_source_update_params from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from ..._utils import required_args, maybe_transform, async_maybe_transform +from ..._utils import path_template, required_args, maybe_transform, async_maybe_transform from ..._compat import cached_property from .connectors import ( ConnectorsResource, @@ -208,7 +208,7 @@ def retrieve( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._get( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -329,7 +329,7 @@ def update( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._put( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), body=maybe_transform( { "type": type, @@ -434,7 +434,7 @@ def delete( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return self._delete( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -615,7 +615,7 @@ async def retrieve( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return await self._get( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -736,7 +736,7 @@ async def update( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return await self._put( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), body=await async_maybe_transform( { "type": type, @@ -841,7 +841,7 @@ async def delete( if not data_source_id: raise ValueError(f"Expected a non-empty value for `data_source_id` but received {data_source_id!r}") return await self._delete( - f"/v1/data_sources/{data_source_id}", + path_template("/v1/data_sources/{data_source_id}", data_source_id=data_source_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/extractions/jobs.py b/src/mixedbread/resources/extractions/jobs.py index ae48bbf3..3b8a2cf4 100644 --- a/src/mixedbread/resources/extractions/jobs.py +++ b/src/mixedbread/resources/extractions/jobs.py @@ -7,7 +7,7 @@ import httpx from ..._types import Body, Query, Headers, NotGiven, not_given -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -122,7 +122,7 @@ def retrieve( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._get( - f"/v1/extractions/jobs/{job_id}", + path_template("/v1/extractions/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -229,7 +229,7 @@ async def retrieve( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._get( - f"/v1/extractions/jobs/{job_id}", + path_template("/v1/extractions/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/files/files.py b/src/mixedbread/resources/files/files.py index 8536ba20..c89b028d 100644 --- a/src/mixedbread/resources/files/files.py +++ b/src/mixedbread/resources/files/files.py @@ -15,8 +15,9 @@ UploadsResourceWithStreamingResponse, AsyncUploadsResourceWithStreamingResponse, ) +from ..._files import deepcopy_with_paths from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given -from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform +from ..._utils import extract_files, path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -132,7 +133,7 @@ def create( timeout=timeout, ) - body = deepcopy_minimal({"file": file}) + body = deepcopy_with_paths({"file": file}, [["file"]]) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. @@ -180,7 +181,7 @@ def retrieve( if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") return self._get( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -221,14 +222,14 @@ def update( """ if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") - body = deepcopy_minimal({"file": file}) + body = deepcopy_with_paths({"file": file}, [["file"]]) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return self._post( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), body=maybe_transform(body, file_update_params.FileUpdateParams), files=files, options=make_request_options( @@ -334,7 +335,7 @@ def delete( if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") return self._delete( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -374,7 +375,7 @@ def content( raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})} return self._get( - f"/v1/files/{file_id}/content", + path_template("/v1/files/{file_id}/content", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -467,7 +468,7 @@ async def create( timeout=timeout, ) - body = deepcopy_minimal({"file": file}) + body = deepcopy_with_paths({"file": file}, [["file"]]) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. @@ -515,7 +516,7 @@ async def retrieve( if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") return await self._get( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -556,14 +557,14 @@ async def update( """ if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") - body = deepcopy_minimal({"file": file}) + body = deepcopy_with_paths({"file": file}, [["file"]]) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return await self._post( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), body=await async_maybe_transform(body, file_update_params.FileUpdateParams), files=files, options=make_request_options( @@ -669,7 +670,7 @@ async def delete( if not file_id: raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") return await self._delete( - f"/v1/files/{file_id}", + path_template("/v1/files/{file_id}", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -709,7 +710,7 @@ async def content( raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})} return await self._get( - f"/v1/files/{file_id}/content", + path_template("/v1/files/{file_id}/content", file_id=file_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/files/uploads.py b/src/mixedbread/resources/files/uploads.py index dc410621..009fbd06 100644 --- a/src/mixedbread/resources/files/uploads.py +++ b/src/mixedbread/resources/files/uploads.py @@ -7,7 +7,7 @@ import httpx from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -129,7 +129,7 @@ def retrieve( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return self._get( - f"/v1/files/uploads/{upload_id}", + path_template("/v1/files/uploads/{upload_id}", upload_id=upload_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -183,7 +183,7 @@ def abort( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return self._post( - f"/v1/files/uploads/{upload_id}/abort", + path_template("/v1/files/uploads/{upload_id}/abort", upload_id=upload_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -223,7 +223,7 @@ def complete( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return self._post( - f"/v1/files/uploads/{upload_id}/complete", + path_template("/v1/files/uploads/{upload_id}/complete", upload_id=upload_id), body=maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -333,7 +333,7 @@ async def retrieve( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return await self._get( - f"/v1/files/uploads/{upload_id}", + path_template("/v1/files/uploads/{upload_id}", upload_id=upload_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -387,7 +387,7 @@ async def abort( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return await self._post( - f"/v1/files/uploads/{upload_id}/abort", + path_template("/v1/files/uploads/{upload_id}/abort", upload_id=upload_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -427,7 +427,7 @@ async def complete( if not upload_id: raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") return await self._post( - f"/v1/files/uploads/{upload_id}/complete", + path_template("/v1/files/uploads/{upload_id}/complete", upload_id=upload_id), body=await async_maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/mixedbread/resources/parsing/jobs.py b/src/mixedbread/resources/parsing/jobs.py index b797da1e..25d83ca8 100644 --- a/src/mixedbread/resources/parsing/jobs.py +++ b/src/mixedbread/resources/parsing/jobs.py @@ -11,7 +11,7 @@ from ...lib import polling from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given from ...lib.multipart_upload import MultipartUploadOptions -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -145,7 +145,7 @@ def retrieve( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._get( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -254,7 +254,7 @@ def delete( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._delete( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -293,7 +293,7 @@ def cancel( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._patch( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -571,7 +571,7 @@ async def retrieve( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._get( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -680,7 +680,7 @@ async def delete( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._delete( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -719,7 +719,7 @@ async def cancel( if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._patch( - f"/v1/parsing/jobs/{job_id}", + path_template("/v1/parsing/jobs/{job_id}", job_id=job_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/resources/stores/files.py b/src/mixedbread/resources/stores/files.py index 5124082c..32a24aa0 100644 --- a/src/mixedbread/resources/stores/files.py +++ b/src/mixedbread/resources/stores/files.py @@ -8,9 +8,9 @@ import httpx from ...lib import polling -from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, SequenceNotStr, omit, not_given +from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given from ...lib.multipart_upload import MultipartUploadOptions -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -23,7 +23,6 @@ from ...types.stores import ( file_list_params, file_create_params, - file_search_params, file_update_params, file_retrieve_params, ) @@ -31,7 +30,7 @@ from ...types.stores.store_file_status import StoreFileStatus from ...types.stores.file_list_response import FileListResponse from ...types.stores.file_delete_response import FileDeleteResponse -from ...types.stores.file_search_response import FileSearchResponse +from ...types.stores.store_file_config_param import StoreFileConfigParam __all__ = ["FilesResource", "AsyncFilesResource"] @@ -61,11 +60,11 @@ def create( store_identifier: str, *, metadata: object | Omit = omit, - config: file_create_params.Config | Omit = omit, + config: StoreFileConfigParam | Omit = omit, external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, file_id: str, - experimental: Optional[file_create_params.Experimental] | Omit = omit, + experimental: Optional[StoreFileConfigParam] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -108,7 +107,7 @@ def create( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return self._post( - f"/v1/stores/{store_identifier}/files", + path_template("/v1/stores/{store_identifier}/files", store_identifier=store_identifier), body=maybe_transform( { "metadata": metadata, @@ -169,7 +168,11 @@ def retrieve( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return self._get( - f"/v1/stores/{store_identifier}/files/{file_identifier}", + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, @@ -221,7 +224,11 @@ def update( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return self._patch( - f"/v1/stores/{store_identifier}/files/{file_identifier}", + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, + ), body=maybe_transform({"metadata": metadata}, file_update_params.FileUpdateParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -285,7 +292,7 @@ def list( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return self._post( - f"/v1/stores/{store_identifier}/files/list", + path_template("/v1/stores/{store_identifier}/files/list", store_identifier=store_identifier), body=maybe_transform( { "limit": limit, @@ -343,76 +350,15 @@ def delete( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return self._delete( - f"/v1/stores/{store_identifier}/files/{file_identifier}", - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=FileDeleteResponse, - ) - - def search( - self, - *, - query: file_search_params.Query, - store_identifiers: SequenceNotStr[str], - top_k: int | Omit = omit, - filters: Optional[file_search_params.Filters] | Omit = omit, - file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, - search_options: file_search_params.SearchOptions | Omit = omit, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = not_given, - ) -> FileSearchResponse: - """ - Search for files within a store based on semantic similarity. - - Args: store_identifier: The ID or name of the store to search within - search_params: Search configuration including query text, pagination, and - filters - - Returns: StoreFileSearchResponse: List of matching files with relevance scores - - Args: - query: Search query text - - store_identifiers: IDs or names of stores to search - - top_k: Number of results to return - - filters: Optional filter conditions - - file_ids: Optional list of file IDs to filter chunks by (inclusion filter) - - search_options: Search configuration options - - extra_headers: Send extra headers - - extra_query: Add additional query parameters to the request - - extra_body: Add additional JSON properties to the request - - timeout: Override the client-level default timeout for this request, in seconds - """ - return self._post( - "/v1/stores/files/search", - body=maybe_transform( - { - "query": query, - "store_identifiers": store_identifiers, - "top_k": top_k, - "filters": filters, - "file_ids": file_ids, - "search_options": search_options, - }, - file_search_params.FileSearchParams, + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), - cast_to=FileSearchResponse, + cast_to=FileDeleteResponse, ) def poll( @@ -610,11 +556,11 @@ async def create( store_identifier: str, *, metadata: object | Omit = omit, - config: file_create_params.Config | Omit = omit, + config: StoreFileConfigParam | Omit = omit, external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, file_id: str, - experimental: Optional[file_create_params.Experimental] | Omit = omit, + experimental: Optional[StoreFileConfigParam] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -657,7 +603,7 @@ async def create( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return await self._post( - f"/v1/stores/{store_identifier}/files", + path_template("/v1/stores/{store_identifier}/files", store_identifier=store_identifier), body=await async_maybe_transform( { "metadata": metadata, @@ -718,7 +664,11 @@ async def retrieve( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return await self._get( - f"/v1/stores/{store_identifier}/files/{file_identifier}", + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, @@ -772,7 +722,11 @@ async def update( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return await self._patch( - f"/v1/stores/{store_identifier}/files/{file_identifier}", + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, + ), body=await async_maybe_transform({"metadata": metadata}, file_update_params.FileUpdateParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -836,7 +790,7 @@ async def list( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return await self._post( - f"/v1/stores/{store_identifier}/files/list", + path_template("/v1/stores/{store_identifier}/files/list", store_identifier=store_identifier), body=await async_maybe_transform( { "limit": limit, @@ -894,76 +848,15 @@ async def delete( if not file_identifier: raise ValueError(f"Expected a non-empty value for `file_identifier` but received {file_identifier!r}") return await self._delete( - f"/v1/stores/{store_identifier}/files/{file_identifier}", - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=FileDeleteResponse, - ) - - async def search( - self, - *, - query: file_search_params.Query, - store_identifiers: SequenceNotStr[str], - top_k: int | Omit = omit, - filters: Optional[file_search_params.Filters] | Omit = omit, - file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, - search_options: file_search_params.SearchOptions | Omit = omit, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = not_given, - ) -> FileSearchResponse: - """ - Search for files within a store based on semantic similarity. - - Args: store_identifier: The ID or name of the store to search within - search_params: Search configuration including query text, pagination, and - filters - - Returns: StoreFileSearchResponse: List of matching files with relevance scores - - Args: - query: Search query text - - store_identifiers: IDs or names of stores to search - - top_k: Number of results to return - - filters: Optional filter conditions - - file_ids: Optional list of file IDs to filter chunks by (inclusion filter) - - search_options: Search configuration options - - extra_headers: Send extra headers - - extra_query: Add additional query parameters to the request - - extra_body: Add additional JSON properties to the request - - timeout: Override the client-level default timeout for this request, in seconds - """ - return await self._post( - "/v1/stores/files/search", - body=await async_maybe_transform( - { - "query": query, - "store_identifiers": store_identifiers, - "top_k": top_k, - "filters": filters, - "file_ids": file_ids, - "search_options": search_options, - }, - file_search_params.FileSearchParams, + path_template( + "/v1/stores/{store_identifier}/files/{file_identifier}", + store_identifier=store_identifier, + file_identifier=file_identifier, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), - cast_to=FileSearchResponse, + cast_to=FileDeleteResponse, ) async def poll( @@ -1155,9 +1048,6 @@ def __init__(self, files: FilesResource) -> None: self.delete = to_raw_response_wrapper( files.delete, ) - self.search = to_raw_response_wrapper( - files.search, - ) class AsyncFilesResourceWithRawResponse: @@ -1179,9 +1069,6 @@ def __init__(self, files: AsyncFilesResource) -> None: self.delete = async_to_raw_response_wrapper( files.delete, ) - self.search = async_to_raw_response_wrapper( - files.search, - ) class FilesResourceWithStreamingResponse: @@ -1203,9 +1090,6 @@ def __init__(self, files: FilesResource) -> None: self.delete = to_streamed_response_wrapper( files.delete, ) - self.search = to_streamed_response_wrapper( - files.search, - ) class AsyncFilesResourceWithStreamingResponse: @@ -1227,6 +1111,3 @@ def __init__(self, files: AsyncFilesResource) -> None: self.delete = async_to_streamed_response_wrapper( files.delete, ) - self.search = async_to_streamed_response_wrapper( - files.search, - ) diff --git a/src/mixedbread/resources/stores/stores.py b/src/mixedbread/resources/stores/stores.py index 1834f5a8..20271dcc 100644 --- a/src/mixedbread/resources/stores/stores.py +++ b/src/mixedbread/resources/stores/stores.py @@ -23,7 +23,7 @@ store_question_answering_params, ) from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -35,6 +35,7 @@ from ...pagination import SyncCursor, AsyncCursor from ...types.store import Store from ..._base_client import AsyncPaginator, make_request_options +from ...types.store_config_param import StoreConfigParam from ...types.expires_after_param import ExpiresAfterParam from ...types.store_delete_response import StoreDeleteResponse from ...types.store_search_response import StoreSearchResponse @@ -75,9 +76,10 @@ def create( name: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, is_public: bool | Omit = omit, + license: Optional[str] | Omit = omit, expires_after: Optional[ExpiresAfterParam] | Omit = omit, metadata: object | Omit = omit, - config: Optional[store_create_params.Config] | Omit = omit, + config: Optional[StoreConfigParam] | Omit = omit, file_ids: Optional[SequenceNotStr[str]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -102,6 +104,8 @@ def create( is_public: Whether the store can be accessed by anyone with valid login credentials + license: License for public stores + expires_after: Represents an expiration policy for a store. metadata: Optional metadata key-value pairs @@ -125,6 +129,7 @@ def create( "name": name, "description": description, "is_public": is_public, + "license": license, "expires_after": expires_after, "metadata": metadata, "config": config, @@ -170,7 +175,7 @@ def retrieve( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return self._get( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -184,6 +189,7 @@ def update( name: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, is_public: Optional[bool] | Omit = omit, + license: Optional[str] | Omit = omit, expires_after: Optional[ExpiresAfterParam] | Omit = omit, metadata: object | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -211,6 +217,8 @@ def update( is_public: Whether the store can be accessed by anyone with valid login credentials + license: License for public stores + expires_after: Represents an expiration policy for a store. metadata: Optional metadata key-value pairs @@ -226,12 +234,13 @@ def update( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return self._put( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), body=maybe_transform( { "name": name, "description": description, "is_public": is_public, + "license": license, "expires_after": expires_after, "metadata": metadata, }, @@ -342,7 +351,7 @@ def delete( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return self._delete( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -596,9 +605,10 @@ async def create( name: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, is_public: bool | Omit = omit, + license: Optional[str] | Omit = omit, expires_after: Optional[ExpiresAfterParam] | Omit = omit, metadata: object | Omit = omit, - config: Optional[store_create_params.Config] | Omit = omit, + config: Optional[StoreConfigParam] | Omit = omit, file_ids: Optional[SequenceNotStr[str]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -623,6 +633,8 @@ async def create( is_public: Whether the store can be accessed by anyone with valid login credentials + license: License for public stores + expires_after: Represents an expiration policy for a store. metadata: Optional metadata key-value pairs @@ -646,6 +658,7 @@ async def create( "name": name, "description": description, "is_public": is_public, + "license": license, "expires_after": expires_after, "metadata": metadata, "config": config, @@ -691,7 +704,7 @@ async def retrieve( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return await self._get( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -705,6 +718,7 @@ async def update( name: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, is_public: Optional[bool] | Omit = omit, + license: Optional[str] | Omit = omit, expires_after: Optional[ExpiresAfterParam] | Omit = omit, metadata: object | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -732,6 +746,8 @@ async def update( is_public: Whether the store can be accessed by anyone with valid login credentials + license: License for public stores + expires_after: Represents an expiration policy for a store. metadata: Optional metadata key-value pairs @@ -747,12 +763,13 @@ async def update( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return await self._put( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), body=await async_maybe_transform( { "name": name, "description": description, "is_public": is_public, + "license": license, "expires_after": expires_after, "metadata": metadata, }, @@ -863,7 +880,7 @@ async def delete( if not store_identifier: raise ValueError(f"Expected a non-empty value for `store_identifier` but received {store_identifier!r}") return await self._delete( - f"/v1/stores/{store_identifier}", + path_template("/v1/stores/{store_identifier}", store_identifier=store_identifier), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/mixedbread/types/__init__.py b/src/mixedbread/types/__init__.py index 306c151e..9f1966f9 100644 --- a/src/mixedbread/types/__init__.py +++ b/src/mixedbread/types/__init__.py @@ -4,12 +4,18 @@ from . import shared from .. import _compat +from .scope import Scope as Scope from .store import Store as Store from .shared import Usage as Usage, SearchFilter as SearchFilter, SearchFilterCondition as SearchFilterCondition from .api_key import APIKey as APIKey +from .audio_url import AudioURL as AudioURL from .embedding import Embedding as Embedding +from .video_url import VideoURL as VideoURL from .data_source import DataSource as DataSource +from .file_counts import FileCounts as FileCounts from .file_object import FileObject as FileObject +from .scope_param import ScopeParam as ScopeParam +from .store_config import StoreConfig as StoreConfig from .expires_after import ExpiresAfter as ExpiresAfter from .info_response import InfoResponse as InfoResponse from .oauth2_params import Oauth2Params as Oauth2Params @@ -18,12 +24,16 @@ from .rerank_response import RerankResponse as RerankResponse from .data_source_type import DataSourceType as DataSourceType from .file_list_params import FileListParams as FileListParams +from .image_url_output import ImageURLOutput as ImageURLOutput +from .markdown_heading import MarkdownHeading as MarkdownHeading from .store_list_params import StoreListParams as StoreListParams from .file_create_params import FileCreateParams as FileCreateParams from .file_update_params import FileUpdateParams as FileUpdateParams +from .store_config_param import StoreConfigParam as StoreConfigParam from .api_key_list_params import APIKeyListParams as APIKeyListParams from .client_embed_params import ClientEmbedParams as ClientEmbedParams from .expires_after_param import ExpiresAfterParam as ExpiresAfterParam +from .rerank_config_param import RerankConfigParam as RerankConfigParam from .store_create_params import StoreCreateParams as StoreCreateParams from .store_search_params import StoreSearchParams as StoreSearchParams from .store_update_params import StoreUpdateParams as StoreUpdateParams @@ -37,6 +47,7 @@ from .data_source_list_params import DataSourceListParams as DataSourceListParams from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams from .scored_text_input_chunk import ScoredTextInputChunk as ScoredTextInputChunk +from .contextualization_config import ContextualizationConfig as ContextualizationConfig from .linear_data_source_param import LinearDataSourceParam as LinearDataSourceParam from .multi_encoding_embedding import MultiEncodingEmbedding as MultiEncodingEmbedding from .notion_data_source_param import NotionDataSourceParam as NotionDataSourceParam @@ -44,14 +55,25 @@ from .data_source_oauth2_params import DataSourceOauth2Params as DataSourceOauth2Params from .data_source_update_params import DataSourceUpdateParams as DataSourceUpdateParams from .embedding_create_response import EmbeddingCreateResponse as EmbeddingCreateResponse +from .data_source_api_key_params import DataSourceAPIKeyParams as DataSourceAPIKeyParams +from .agentic_search_config_param import AgenticSearchConfigParam as AgenticSearchConfigParam from .data_source_delete_response import DataSourceDeleteResponse as DataSourceDeleteResponse +from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata as PdfChunkGeneratedMetadata from .scored_audio_url_input_chunk import ScoredAudioURLInputChunk as ScoredAudioURLInputChunk from .scored_image_url_input_chunk import ScoredImageURLInputChunk as ScoredImageURLInputChunk from .scored_video_url_input_chunk import ScoredVideoURLInputChunk as ScoredVideoURLInputChunk from .store_metadata_facets_params import StoreMetadataFacetsParams as StoreMetadataFacetsParams +from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata as CodeChunkGeneratedMetadata +from .text_chunk_generated_metadata import TextChunkGeneratedMetadata as TextChunkGeneratedMetadata +from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata as AudioChunkGeneratedMetadata +from .contextualization_config_param import ContextualizationConfigParam as ContextualizationConfigParam +from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata as ImageChunkGeneratedMetadata from .store_metadata_facets_response import StoreMetadataFacetsResponse as StoreMetadataFacetsResponse +from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata as VideoChunkGeneratedMetadata +from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams as APIKeyCreateOrUpdateParams from .store_question_answering_params import StoreQuestionAnsweringParams as StoreQuestionAnsweringParams from .store_chunk_search_options_param import StoreChunkSearchOptionsParam as StoreChunkSearchOptionsParam +from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata as MarkdownChunkGeneratedMetadata from .store_question_answering_response import StoreQuestionAnsweringResponse as StoreQuestionAnsweringResponse # Rebuild cyclical models only after all modules are imported. diff --git a/src/mixedbread/types/agentic_search_config_param.py b/src/mixedbread/types/agentic_search_config_param.py new file mode 100644 index 00000000..8a01d8e4 --- /dev/null +++ b/src/mixedbread/types/agentic_search_config_param.py @@ -0,0 +1,24 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import TypedDict + +__all__ = ["AgenticSearchConfigParam"] + + +class AgenticSearchConfigParam(TypedDict, total=False): + """Configuration for agentic multi-query search.""" + + max_rounds: int + """Maximum number of search rounds""" + + queries_per_round: int + """Maximum queries per round""" + + instructions: Optional[str] + """ + Additional custom instructions (followed only when not in conflict with existing + rules) + """ diff --git a/src/mixedbread/types/api_key.py b/src/mixedbread/types/api_key.py index ab7b375d..bfd14270 100644 --- a/src/mixedbread/types/api_key.py +++ b/src/mixedbread/types/api_key.py @@ -4,17 +4,10 @@ from datetime import datetime from typing_extensions import Literal +from .scope import Scope from .._models import BaseModel -__all__ = ["APIKey", "Scope"] - - -class Scope(BaseModel): - method: Literal["read", "write", "delete", "list", "create", "search"] - - resource_type: Optional[Literal["store"]] = None - - resource_id: Optional[str] = None +__all__ = ["APIKey"] class APIKey(BaseModel): diff --git a/src/mixedbread/types/api_key_create_or_update_params.py b/src/mixedbread/types/api_key_create_or_update_params.py new file mode 100644 index 00000000..570d6527 --- /dev/null +++ b/src/mixedbread/types/api_key_create_or_update_params.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["APIKeyCreateOrUpdateParams"] + + +class APIKeyCreateOrUpdateParams(TypedDict, total=False): + """Base class for API key create or update parameters.""" + + type: Literal["api_key"] + + api_key: Required[str] + """The API key""" diff --git a/src/mixedbread/types/api_key_create_params.py b/src/mixedbread/types/api_key_create_params.py index 829cd517..24728b23 100644 --- a/src/mixedbread/types/api_key_create_params.py +++ b/src/mixedbread/types/api_key_create_params.py @@ -4,27 +4,20 @@ from typing import Union, Iterable, Optional from datetime import datetime -from typing_extensions import Literal, Required, Annotated, TypedDict +from typing_extensions import Annotated, TypedDict from .._utils import PropertyInfo +from .scope_param import ScopeParam -__all__ = ["APIKeyCreateParams", "Scope"] +__all__ = ["APIKeyCreateParams"] class APIKeyCreateParams(TypedDict, total=False): name: str """A name/description for the API key""" - scope: Optional[Iterable[Scope]] + scope: Optional[Iterable[ScopeParam]] """The scope of the API key""" expires_at: Annotated[Union[str, datetime, None], PropertyInfo(format="iso8601")] """Optional expiration datetime""" - - -class Scope(TypedDict, total=False): - method: Required[Literal["read", "write", "delete", "list", "create", "search"]] - - resource_type: Optional[Literal["store"]] - - resource_id: Optional[str] diff --git a/src/mixedbread/types/api_key_created.py b/src/mixedbread/types/api_key_created.py index 3efdb33a..565af1d6 100644 --- a/src/mixedbread/types/api_key_created.py +++ b/src/mixedbread/types/api_key_created.py @@ -4,17 +4,10 @@ from datetime import datetime from typing_extensions import Literal +from .scope import Scope from .._models import BaseModel -__all__ = ["APIKeyCreated", "Scope"] - - -class Scope(BaseModel): - method: Literal["read", "write", "delete", "list", "create", "search"] - - resource_type: Optional[Literal["store"]] = None - - resource_id: Optional[str] = None +__all__ = ["APIKeyCreated"] class APIKeyCreated(BaseModel): diff --git a/src/mixedbread/types/audio_chunk_generated_metadata.py b/src/mixedbread/types/audio_chunk_generated_metadata.py new file mode 100644 index 00000000..1d20d204 --- /dev/null +++ b/src/mixedbread/types/audio_chunk_generated_metadata.py @@ -0,0 +1,42 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["AudioChunkGeneratedMetadata"] + + +class AudioChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["audio"]] = None + + file_type: Optional[str] = None + + file_size: Optional[int] = None + + total_duration_seconds: Optional[float] = None + + sample_rate: Optional[int] = None + + channels: Optional[int] = None + + audio_format: Optional[int] = None + + bpm: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/audio_url.py b/src/mixedbread/types/audio_url.py new file mode 100644 index 00000000..ce881bde --- /dev/null +++ b/src/mixedbread/types/audio_url.py @@ -0,0 +1,12 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .._models import BaseModel + +__all__ = ["AudioURL"] + + +class AudioURL(BaseModel): + """Model for audio URL validation.""" + + url: str + """The audio URL. Can be either a URL or a Data URI.""" diff --git a/src/mixedbread/types/code_chunk_generated_metadata.py b/src/mixedbread/types/code_chunk_generated_metadata.py new file mode 100644 index 00000000..ff3aa668 --- /dev/null +++ b/src/mixedbread/types/code_chunk_generated_metadata.py @@ -0,0 +1,40 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["CodeChunkGeneratedMetadata"] + + +class CodeChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["code"]] = None + + file_type: str + + language: Optional[str] = None + + word_count: Optional[int] = None + + file_size: Optional[int] = None + + start_line: Optional[int] = None + + num_lines: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/contextualization_config.py b/src/mixedbread/types/contextualization_config.py new file mode 100644 index 00000000..1b2b74a2 --- /dev/null +++ b/src/mixedbread/types/contextualization_config.py @@ -0,0 +1,24 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional + +from .._models import BaseModel + +__all__ = ["ContextualizationConfig"] + + +class ContextualizationConfig(BaseModel): + with_metadata: Union[bool, List[str], None] = None + """Include all metadata or specific fields in the contextualization. + + Supports dot notation for nested fields (e.g., 'author.name'). When True, all + metadata is included (flattened). When a list, only specified fields are + included. + """ + + with_file_context: Optional[bool] = None + """ + Use an LLM to generate a short context for each text chunk that situates it + within the full document, improving retrieval accuracy. Only applies to text + content during non-sliced ingestion. + """ diff --git a/src/mixedbread/types/contextualization_config_param.py b/src/mixedbread/types/contextualization_config_param.py new file mode 100644 index 00000000..9032c508 --- /dev/null +++ b/src/mixedbread/types/contextualization_config_param.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union +from typing_extensions import TypedDict + +from .._types import SequenceNotStr + +__all__ = ["ContextualizationConfigParam"] + + +class ContextualizationConfigParam(TypedDict, total=False): + with_metadata: Union[bool, SequenceNotStr[str]] + """Include all metadata or specific fields in the contextualization. + + Supports dot notation for nested fields (e.g., 'author.name'). When True, all + metadata is included (flattened). When a list, only specified fields are + included. + """ + + with_file_context: bool + """ + Use an LLM to generate a short context for each text chunk that situates it + within the full document, improving retrieval accuracy. Only applies to text + content during non-sliced ingestion. + """ diff --git a/src/mixedbread/types/data_source.py b/src/mixedbread/types/data_source.py index d219ef28..1acd396c 100644 --- a/src/mixedbread/types/data_source.py +++ b/src/mixedbread/types/data_source.py @@ -8,21 +8,12 @@ from .._models import BaseModel from .data_source_type import DataSourceType from .data_source_oauth2_params import DataSourceOauth2Params +from .data_source_api_key_params import DataSourceAPIKeyParams -__all__ = ["DataSource", "AuthParams", "AuthParamsDataSourceAPIKeyParams"] - - -class AuthParamsDataSourceAPIKeyParams(BaseModel): - """Authentication parameters for a API key data source.""" - - type: Optional[Literal["api_key"]] = None - - api_key: str - """The API key""" - +__all__ = ["DataSource", "AuthParams"] AuthParams: TypeAlias = Annotated[ - Union[DataSourceOauth2Params, AuthParamsDataSourceAPIKeyParams, None], PropertyInfo(discriminator="type") + Union[DataSourceOauth2Params, DataSourceAPIKeyParams, None], PropertyInfo(discriminator="type") ] diff --git a/src/mixedbread/types/data_source_api_key_params.py b/src/mixedbread/types/data_source_api_key_params.py new file mode 100644 index 00000000..85c596bf --- /dev/null +++ b/src/mixedbread/types/data_source_api_key_params.py @@ -0,0 +1,17 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["DataSourceAPIKeyParams"] + + +class DataSourceAPIKeyParams(BaseModel): + """Authentication parameters for a API key data source.""" + + type: Optional[Literal["api_key"]] = None + + api_key: str + """The API key""" diff --git a/src/mixedbread/types/data_source_create_params.py b/src/mixedbread/types/data_source_create_params.py index 9424a1de..19ed3593 100644 --- a/src/mixedbread/types/data_source_create_params.py +++ b/src/mixedbread/types/data_source_create_params.py @@ -6,14 +6,9 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from .oauth2_params import Oauth2Params +from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams -__all__ = [ - "DataSourceCreateParams", - "NotionDataSource", - "NotionDataSourceAuthParams", - "NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams", - "LinearDataSource", -] +__all__ = ["DataSourceCreateParams", "NotionDataSource", "NotionDataSourceAuthParams", "LinearDataSource"] class NotionDataSource(TypedDict, total=False): @@ -33,16 +28,7 @@ class NotionDataSource(TypedDict, total=False): """ -class NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False): - """Base class for API key create or update parameters.""" - - type: Literal["api_key"] - - api_key: Required[str] - """The API key""" - - -NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams] +NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams] class LinearDataSource(TypedDict, total=False): diff --git a/src/mixedbread/types/data_source_update_params.py b/src/mixedbread/types/data_source_update_params.py index 96880db4..d32b2504 100644 --- a/src/mixedbread/types/data_source_update_params.py +++ b/src/mixedbread/types/data_source_update_params.py @@ -6,14 +6,9 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from .oauth2_params import Oauth2Params +from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams -__all__ = [ - "DataSourceUpdateParams", - "NotionDataSource", - "NotionDataSourceAuthParams", - "NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams", - "LinearDataSource", -] +__all__ = ["DataSourceUpdateParams", "NotionDataSource", "NotionDataSourceAuthParams", "LinearDataSource"] class NotionDataSource(TypedDict, total=False): @@ -33,16 +28,7 @@ class NotionDataSource(TypedDict, total=False): """ -class NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False): - """Base class for API key create or update parameters.""" - - type: Literal["api_key"] - - api_key: Required[str] - """The API key""" - - -NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, NotionDataSourceAuthParamsAPIKeyCreateOrUpdateParams] +NotionDataSourceAuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams] class LinearDataSource(TypedDict, total=False): diff --git a/src/mixedbread/types/file_counts.py b/src/mixedbread/types/file_counts.py new file mode 100644 index 00000000..a2daa5f8 --- /dev/null +++ b/src/mixedbread/types/file_counts.py @@ -0,0 +1,29 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from .._models import BaseModel + +__all__ = ["FileCounts"] + + +class FileCounts(BaseModel): + """Tracks counts of files in different states within a store.""" + + pending: Optional[int] = None + """Number of files waiting to be processed""" + + in_progress: Optional[int] = None + """Number of files currently being processed""" + + cancelled: Optional[int] = None + """Number of files whose processing was cancelled""" + + completed: Optional[int] = None + """Number of successfully processed files""" + + failed: Optional[int] = None + """Number of files that failed processing""" + + total: Optional[int] = None + """Total number of files""" diff --git a/src/mixedbread/types/image_chunk_generated_metadata.py b/src/mixedbread/types/image_chunk_generated_metadata.py new file mode 100644 index 00000000..a46eff9a --- /dev/null +++ b/src/mixedbread/types/image_chunk_generated_metadata.py @@ -0,0 +1,36 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["ImageChunkGeneratedMetadata"] + + +class ImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: Optional[str] = None + + file_size: Optional[int] = None + + width: Optional[int] = None + + height: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/image_url_output.py b/src/mixedbread/types/image_url_output.py new file mode 100644 index 00000000..eab0a529 --- /dev/null +++ b/src/mixedbread/types/image_url_output.py @@ -0,0 +1,17 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from .._models import BaseModel + +__all__ = ["ImageURLOutput"] + + +class ImageURLOutput(BaseModel): + """Model for image URL validation.""" + + url: str + """The image URL. Can be either a URL or a Data URI.""" + + format: Optional[str] = None + """The image format/mimetype""" diff --git a/src/mixedbread/types/markdown_chunk_generated_metadata.py b/src/mixedbread/types/markdown_chunk_generated_metadata.py new file mode 100644 index 00000000..e2e2f016 --- /dev/null +++ b/src/mixedbread/types/markdown_chunk_generated_metadata.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, List, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel +from .markdown_heading import MarkdownHeading + +__all__ = ["MarkdownChunkGeneratedMetadata"] + + +class MarkdownChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["markdown"]] = None + + file_type: Optional[Literal["text/markdown"]] = None + + language: Optional[str] = None + + word_count: Optional[int] = None + + file_size: Optional[int] = None + + chunk_headings: Optional[List[MarkdownHeading]] = None + + heading_context: Optional[List[MarkdownHeading]] = None + + start_line: Optional[int] = None + + num_lines: Optional[int] = None + + file_extension: Optional[str] = None + + frontmatter: Optional[Dict[str, object]] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/markdown_heading.py b/src/mixedbread/types/markdown_heading.py new file mode 100644 index 00000000..56a1f24d --- /dev/null +++ b/src/mixedbread/types/markdown_heading.py @@ -0,0 +1,11 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .._models import BaseModel + +__all__ = ["MarkdownHeading"] + + +class MarkdownHeading(BaseModel): + level: int + + text: str diff --git a/src/mixedbread/types/notion_data_source_param.py b/src/mixedbread/types/notion_data_source_param.py index 12409ef6..a559a747 100644 --- a/src/mixedbread/types/notion_data_source_param.py +++ b/src/mixedbread/types/notion_data_source_param.py @@ -6,20 +6,11 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from .oauth2_params import Oauth2Params +from .api_key_create_or_update_params import APIKeyCreateOrUpdateParams -__all__ = ["NotionDataSourceParam", "AuthParams", "AuthParamsAPIKeyCreateOrUpdateParams"] +__all__ = ["NotionDataSourceParam", "AuthParams"] - -class AuthParamsAPIKeyCreateOrUpdateParams(TypedDict, total=False): - """Base class for API key create or update parameters.""" - - type: Literal["api_key"] - - api_key: Required[str] - """The API key""" - - -AuthParams: TypeAlias = Union[Oauth2Params, AuthParamsAPIKeyCreateOrUpdateParams] +AuthParams: TypeAlias = Union[Oauth2Params, APIKeyCreateOrUpdateParams] class NotionDataSourceParam(TypedDict, total=False): diff --git a/src/mixedbread/types/parsing/__init__.py b/src/mixedbread/types/parsing/__init__.py index d3e38bbe..213c0245 100644 --- a/src/mixedbread/types/parsing/__init__.py +++ b/src/mixedbread/types/parsing/__init__.py @@ -2,8 +2,10 @@ from __future__ import annotations +from .chunk import Chunk as Chunk from .parsing_job import ParsingJob as ParsingJob from .element_type import ElementType as ElementType +from .chunk_element import ChunkElement as ChunkElement from .return_format import ReturnFormat as ReturnFormat from .job_list_params import JobListParams as JobListParams from .chunking_strategy import ChunkingStrategy as ChunkingStrategy @@ -11,3 +13,4 @@ from .job_list_response import JobListResponse as JobListResponse from .parsing_job_status import ParsingJobStatus as ParsingJobStatus from .job_delete_response import JobDeleteResponse as JobDeleteResponse +from .document_parser_result import DocumentParserResult as DocumentParserResult diff --git a/src/mixedbread/types/parsing/chunk.py b/src/mixedbread/types/parsing/chunk.py new file mode 100644 index 00000000..7f866102 --- /dev/null +++ b/src/mixedbread/types/parsing/chunk.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional + +from ..._models import BaseModel +from .chunk_element import ChunkElement + +__all__ = ["Chunk"] + + +class Chunk(BaseModel): + """A chunk of text extracted from a document page.""" + + content: Optional[str] = None + """The full content of the chunk""" + + content_to_embed: str + """The content of the chunk to embed""" + + elements: List[ChunkElement] + """List of elements contained in this chunk""" diff --git a/src/mixedbread/types/parsing/chunk_element.py b/src/mixedbread/types/parsing/chunk_element.py new file mode 100644 index 00000000..d5de068a --- /dev/null +++ b/src/mixedbread/types/parsing/chunk_element.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional + +from ..._models import BaseModel +from .element_type import ElementType + +__all__ = ["ChunkElement"] + + +class ChunkElement(BaseModel): + """Represents an extracted element from a document with its content and metadata.""" + + type: ElementType + """The type of the extracted element""" + + confidence: float + """The confidence score of the extraction""" + + bbox: List[object] + """The bounding box coordinates [x1, y1, x2, y2]""" + + page: int + """The page number where the element was found""" + + content: str + """The extracted text content of the element""" + + summary: Optional[str] = None + """A brief summary of the element's content""" + + image: Optional[str] = None + """The base64-encoded image data for figure elements""" diff --git a/src/mixedbread/types/parsing/document_parser_result.py b/src/mixedbread/types/parsing/document_parser_result.py new file mode 100644 index 00000000..09a35ec5 --- /dev/null +++ b/src/mixedbread/types/parsing/document_parser_result.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional + +from .chunk import Chunk +from ..._models import BaseModel +from .element_type import ElementType +from .return_format import ReturnFormat +from .chunking_strategy import ChunkingStrategy + +__all__ = ["DocumentParserResult"] + + +class DocumentParserResult(BaseModel): + """Result of document parsing operation.""" + + chunking_strategy: ChunkingStrategy + """The strategy used for chunking the document""" + + return_format: ReturnFormat + """The format of the returned content""" + + element_types: List[ElementType] + """The types of elements extracted""" + + chunks: List[Chunk] + """List of extracted chunks from the document""" + + page_sizes: Optional[List[List[object]]] = None + """List of (width, height) tuples for each page""" diff --git a/src/mixedbread/types/parsing/parsing_job.py b/src/mixedbread/types/parsing/parsing_job.py index 2f3d250d..48bbc793 100644 --- a/src/mixedbread/types/parsing/parsing_job.py +++ b/src/mixedbread/types/parsing/parsing_job.py @@ -1,73 +1,14 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Optional +from typing import Dict, Optional from datetime import datetime from typing_extensions import Literal from ..._models import BaseModel -from .element_type import ElementType -from .return_format import ReturnFormat -from .chunking_strategy import ChunkingStrategy from .parsing_job_status import ParsingJobStatus +from .document_parser_result import DocumentParserResult -__all__ = ["ParsingJob", "Result", "ResultChunk", "ResultChunkElement"] - - -class ResultChunkElement(BaseModel): - """Represents an extracted element from a document with its content and metadata.""" - - type: ElementType - """The type of the extracted element""" - - confidence: float - """The confidence score of the extraction""" - - bbox: List[object] - """The bounding box coordinates [x1, y1, x2, y2]""" - - page: int - """The page number where the element was found""" - - content: str - """The extracted text content of the element""" - - summary: Optional[str] = None - """A brief summary of the element's content""" - - image: Optional[str] = None - """The base64-encoded image data for figure elements""" - - -class ResultChunk(BaseModel): - """A chunk of text extracted from a document page.""" - - content: Optional[str] = None - """The full content of the chunk""" - - content_to_embed: str - """The content of the chunk to embed""" - - elements: List[ResultChunkElement] - """List of elements contained in this chunk""" - - -class Result(BaseModel): - """Result of document parsing operation.""" - - chunking_strategy: ChunkingStrategy - """The strategy used for chunking the document""" - - return_format: ReturnFormat - """The format of the returned content""" - - element_types: List[ElementType] - """The types of elements extracted""" - - chunks: List[ResultChunk] - """List of extracted chunks from the document""" - - page_sizes: Optional[List[List[object]]] = None - """List of (width, height) tuples for each page""" +__all__ = ["ParsingJob"] class ParsingJob(BaseModel): @@ -88,7 +29,7 @@ class ParsingJob(BaseModel): error: Optional[Dict[str, object]] = None """The error of the job""" - result: Optional[Result] = None + result: Optional[DocumentParserResult] = None """Result of document parsing operation.""" started_at: Optional[datetime] = None diff --git a/src/mixedbread/types/pdf_chunk_generated_metadata.py b/src/mixedbread/types/pdf_chunk_generated_metadata.py new file mode 100644 index 00000000..1dd9e9b6 --- /dev/null +++ b/src/mixedbread/types/pdf_chunk_generated_metadata.py @@ -0,0 +1,34 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["PdfChunkGeneratedMetadata"] + + +class PdfChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["pdf"]] = None + + file_type: Optional[Literal["application/pdf"]] = None + + total_pages: Optional[int] = None + + total_size: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/rerank_config_param.py b/src/mixedbread/types/rerank_config_param.py new file mode 100644 index 00000000..1769a120 --- /dev/null +++ b/src/mixedbread/types/rerank_config_param.py @@ -0,0 +1,26 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Optional +from typing_extensions import TypedDict + +from .._types import SequenceNotStr + +__all__ = ["RerankConfigParam"] + + +class RerankConfigParam(TypedDict, total=False): + """Represents a reranking configuration.""" + + model: str + """The name of the reranking model""" + + with_metadata: Union[bool, SequenceNotStr[str]] + """Whether to include metadata in the reranked results""" + + top_k: Optional[int] + """Maximum number of results to return after reranking. + + If None, returns all reranked results. + """ diff --git a/src/mixedbread/types/scope.py b/src/mixedbread/types/scope.py new file mode 100644 index 00000000..269c8183 --- /dev/null +++ b/src/mixedbread/types/scope.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["Scope"] + + +class Scope(BaseModel): + method: Literal["read", "write", "delete", "list", "create", "search"] + + resource_type: Optional[Literal["store"]] = None + + resource_id: Optional[str] = None diff --git a/src/mixedbread/types/scope_param.py b/src/mixedbread/types/scope_param.py new file mode 100644 index 00000000..641eb0de --- /dev/null +++ b/src/mixedbread/types/scope_param.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ScopeParam"] + + +class ScopeParam(TypedDict, total=False): + method: Required[Literal["read", "write", "delete", "list", "create", "search"]] + + resource_type: Optional[Literal["store"]] + + resource_id: Optional[str] diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py index c82677c8..e12b2d96 100644 --- a/src/mixedbread/types/scored_audio_url_input_chunk.py +++ b/src/mixedbread/types/scored_audio_url_input_chunk.py @@ -1,277 +1,36 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import TYPE_CHECKING, Dict, List, Union, Optional +from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from pydantic import Field as FieldInfo - from .._utils import PropertyInfo from .._models import BaseModel +from .audio_url import AudioURL +from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from .text_chunk_generated_metadata import TextChunkGeneratedMetadata +from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata -__all__ = [ - "ScoredAudioURLInputChunk", - "GeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "GeneratedMetadataTextChunkGeneratedMetadata", - "GeneratedMetadataPdfChunkGeneratedMetadata", - "GeneratedMetadataCodeChunkGeneratedMetadata", - "GeneratedMetadataAudioChunkGeneratedMetadata", - "GeneratedMetadataVideoChunkGeneratedMetadata", - "GeneratedMetadataImageChunkGeneratedMetadata", - "AudioURL", -] - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None - - heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - +__all__ = ["ScoredAudioURLInputChunk", "GeneratedMetadata"] GeneratedMetadata: TypeAlias = Annotated[ Union[ - GeneratedMetadataMarkdownChunkGeneratedMetadata, - GeneratedMetadataTextChunkGeneratedMetadata, - GeneratedMetadataPdfChunkGeneratedMetadata, - GeneratedMetadataCodeChunkGeneratedMetadata, - GeneratedMetadataAudioChunkGeneratedMetadata, - GeneratedMetadataVideoChunkGeneratedMetadata, - GeneratedMetadataImageChunkGeneratedMetadata, + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), ] -class AudioURL(BaseModel): - """Model for audio URL validation.""" - - url: str - """The audio URL. Can be either a URL or a Data URI.""" - - class ScoredAudioURLInputChunk(BaseModel): chunk_index: int """position of the chunk in a file""" @@ -309,9 +68,6 @@ class ScoredAudioURLInputChunk(BaseModel): transcription: Optional[str] = None """speech recognition (sr) text of the audio""" - summary: Optional[str] = None - """summary of the audio""" - audio_url: Optional[AudioURL] = None """Model for audio URL validation.""" diff --git a/src/mixedbread/types/scored_image_url_input_chunk.py b/src/mixedbread/types/scored_image_url_input_chunk.py index ae453a4c..9b8c72a6 100644 --- a/src/mixedbread/types/scored_image_url_input_chunk.py +++ b/src/mixedbread/types/scored_image_url_input_chunk.py @@ -1,280 +1,36 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import TYPE_CHECKING, Dict, List, Union, Optional +from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from pydantic import Field as FieldInfo - from .._utils import PropertyInfo from .._models import BaseModel +from .image_url_output import ImageURLOutput +from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from .text_chunk_generated_metadata import TextChunkGeneratedMetadata +from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata -__all__ = [ - "ScoredImageURLInputChunk", - "GeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "GeneratedMetadataTextChunkGeneratedMetadata", - "GeneratedMetadataPdfChunkGeneratedMetadata", - "GeneratedMetadataCodeChunkGeneratedMetadata", - "GeneratedMetadataAudioChunkGeneratedMetadata", - "GeneratedMetadataVideoChunkGeneratedMetadata", - "GeneratedMetadataImageChunkGeneratedMetadata", - "ImageURL", -] - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None - - heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - +__all__ = ["ScoredImageURLInputChunk", "GeneratedMetadata"] GeneratedMetadata: TypeAlias = Annotated[ Union[ - GeneratedMetadataMarkdownChunkGeneratedMetadata, - GeneratedMetadataTextChunkGeneratedMetadata, - GeneratedMetadataPdfChunkGeneratedMetadata, - GeneratedMetadataCodeChunkGeneratedMetadata, - GeneratedMetadataAudioChunkGeneratedMetadata, - GeneratedMetadataVideoChunkGeneratedMetadata, - GeneratedMetadataImageChunkGeneratedMetadata, + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), ] -class ImageURL(BaseModel): - """Model for image URL validation.""" - - url: str - """The image URL. Can be either a URL or a Data URI.""" - - format: Optional[str] = None - """The image format/mimetype""" - - class ScoredImageURLInputChunk(BaseModel): chunk_index: int """position of the chunk in a file""" @@ -315,5 +71,5 @@ class ScoredImageURLInputChunk(BaseModel): summary: Optional[str] = None """summary of the image""" - image_url: Optional[ImageURL] = None + image_url: Optional[ImageURLOutput] = None """Model for image URL validation.""" diff --git a/src/mixedbread/types/scored_text_input_chunk.py b/src/mixedbread/types/scored_text_input_chunk.py index e4523a40..d801910d 100644 --- a/src/mixedbread/types/scored_text_input_chunk.py +++ b/src/mixedbread/types/scored_text_input_chunk.py @@ -1,263 +1,29 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import TYPE_CHECKING, Dict, List, Union, Optional +from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from pydantic import Field as FieldInfo - from .._utils import PropertyInfo from .._models import BaseModel +from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from .text_chunk_generated_metadata import TextChunkGeneratedMetadata +from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata -__all__ = [ - "ScoredTextInputChunk", - "GeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "GeneratedMetadataTextChunkGeneratedMetadata", - "GeneratedMetadataPdfChunkGeneratedMetadata", - "GeneratedMetadataCodeChunkGeneratedMetadata", - "GeneratedMetadataAudioChunkGeneratedMetadata", - "GeneratedMetadataVideoChunkGeneratedMetadata", - "GeneratedMetadataImageChunkGeneratedMetadata", -] - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None - - heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - +__all__ = ["ScoredTextInputChunk", "GeneratedMetadata"] GeneratedMetadata: TypeAlias = Annotated[ Union[ - GeneratedMetadataMarkdownChunkGeneratedMetadata, - GeneratedMetadataTextChunkGeneratedMetadata, - GeneratedMetadataPdfChunkGeneratedMetadata, - GeneratedMetadataCodeChunkGeneratedMetadata, - GeneratedMetadataAudioChunkGeneratedMetadata, - GeneratedMetadataVideoChunkGeneratedMetadata, - GeneratedMetadataImageChunkGeneratedMetadata, + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -303,3 +69,6 @@ class ScoredTextInputChunk(BaseModel): text: Optional[str] = None """Text content""" + + context: Optional[str] = None + """LLM-generated context that situates this chunk within its source document""" diff --git a/src/mixedbread/types/scored_video_url_input_chunk.py b/src/mixedbread/types/scored_video_url_input_chunk.py index 7be647ff..a9a6a64d 100644 --- a/src/mixedbread/types/scored_video_url_input_chunk.py +++ b/src/mixedbread/types/scored_video_url_input_chunk.py @@ -1,277 +1,36 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import TYPE_CHECKING, Dict, List, Union, Optional +from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from pydantic import Field as FieldInfo - from .._utils import PropertyInfo from .._models import BaseModel +from .video_url import VideoURL +from .pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from .code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from .text_chunk_generated_metadata import TextChunkGeneratedMetadata +from .audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from .image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from .video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata -__all__ = [ - "ScoredVideoURLInputChunk", - "GeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadata", - "GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "GeneratedMetadataTextChunkGeneratedMetadata", - "GeneratedMetadataPdfChunkGeneratedMetadata", - "GeneratedMetadataCodeChunkGeneratedMetadata", - "GeneratedMetadataAudioChunkGeneratedMetadata", - "GeneratedMetadataVideoChunkGeneratedMetadata", - "GeneratedMetadataImageChunkGeneratedMetadata", - "VideoURL", -] - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = None - - heading_context: Optional[List[GeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext]] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - +__all__ = ["ScoredVideoURLInputChunk", "GeneratedMetadata"] GeneratedMetadata: TypeAlias = Annotated[ Union[ - GeneratedMetadataMarkdownChunkGeneratedMetadata, - GeneratedMetadataTextChunkGeneratedMetadata, - GeneratedMetadataPdfChunkGeneratedMetadata, - GeneratedMetadataCodeChunkGeneratedMetadata, - GeneratedMetadataAudioChunkGeneratedMetadata, - GeneratedMetadataVideoChunkGeneratedMetadata, - GeneratedMetadataImageChunkGeneratedMetadata, + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), ] -class VideoURL(BaseModel): - """Model for video URL validation.""" - - url: str - """The video URL. Can be either a URL or a Data URI.""" - - class ScoredVideoURLInputChunk(BaseModel): chunk_index: int """position of the chunk in a file""" diff --git a/src/mixedbread/types/store.py b/src/mixedbread/types/store.py index 83de8954..2f1527fe 100644 --- a/src/mixedbread/types/store.py +++ b/src/mixedbread/types/store.py @@ -1,63 +1,15 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Union, Optional +from typing import Optional from datetime import datetime -from typing_extensions import Literal, TypeAlias +from typing_extensions import Literal from .._models import BaseModel +from .file_counts import FileCounts +from .store_config import StoreConfig from .expires_after import ExpiresAfter -__all__ = ["Store", "Config", "ConfigContextualization", "ConfigContextualizationContextualizationConfig", "FileCounts"] - - -class ConfigContextualizationContextualizationConfig(BaseModel): - with_metadata: Union[bool, List[str], None] = None - """Include all metadata or specific fields in the contextualization. - - Supports dot notation for nested fields (e.g., 'author.name'). When True, all - metadata is included (flattened). When a list, only specified fields are - included. - """ - - -ConfigContextualization: TypeAlias = Union[bool, ConfigContextualizationContextualizationConfig] - - -class Config(BaseModel): - """Configuration for a store.""" - - contextualization: Optional[ConfigContextualization] = None - """Contextualize files with metadata""" - - save_content: Optional[bool] = None - """Whether to save original content in the store. - - When False, only vectors are indexed without the original content (index-only - mode). This is useful for data privacy. Note: Reranking is not supported when - content is not saved. - """ - - -class FileCounts(BaseModel): - """Counts of files in different states""" - - pending: Optional[int] = None - """Number of files waiting to be processed""" - - in_progress: Optional[int] = None - """Number of files currently being processed""" - - cancelled: Optional[int] = None - """Number of files whose processing was cancelled""" - - completed: Optional[int] = None - """Number of successfully processed files""" - - failed: Optional[int] = None - """Number of files that failed processing""" - - total: Optional[int] = None - """Total number of files""" +__all__ = ["Store"] class Store(BaseModel): @@ -75,10 +27,13 @@ class Store(BaseModel): is_public: Optional[bool] = None """Whether the store can be accessed by anyone with valid login credentials""" + license: Optional[str] = None + """License for public stores""" + metadata: Optional[object] = None """Additional metadata associated with the store""" - config: Optional[Config] = None + config: Optional[StoreConfig] = None """Configuration for a store.""" file_counts: Optional[FileCounts] = None diff --git a/src/mixedbread/types/store_chunk_search_options_param.py b/src/mixedbread/types/store_chunk_search_options_param.py index 95d76cd5..44d79d54 100644 --- a/src/mixedbread/types/store_chunk_search_options_param.py +++ b/src/mixedbread/types/store_chunk_search_options_param.py @@ -5,47 +5,14 @@ from typing import Union, Optional from typing_extensions import TypeAlias, TypedDict -from .._types import SequenceNotStr +from .rerank_config_param import RerankConfigParam +from .agentic_search_config_param import AgenticSearchConfigParam -__all__ = ["StoreChunkSearchOptionsParam", "Rerank", "RerankRerankConfig", "Agentic", "AgenticAgenticSearchConfig"] +__all__ = ["StoreChunkSearchOptionsParam", "Rerank", "Agentic"] +Rerank: TypeAlias = Union[bool, RerankConfigParam] -class RerankRerankConfig(TypedDict, total=False): - """Represents a reranking configuration.""" - - model: str - """The name of the reranking model""" - - with_metadata: Union[bool, SequenceNotStr[str]] - """Whether to include metadata in the reranked results""" - - top_k: Optional[int] - """Maximum number of results to return after reranking. - - If None, returns all reranked results. - """ - - -Rerank: TypeAlias = Union[bool, RerankRerankConfig] - - -class AgenticAgenticSearchConfig(TypedDict, total=False): - """Configuration for agentic multi-query search.""" - - max_rounds: int - """Maximum number of search rounds""" - - queries_per_round: int - """Maximum queries per round""" - - instructions: Optional[str] - """ - Additional custom instructions (followed only when not in conflict with existing - rules) - """ - - -Agentic: TypeAlias = Union[bool, AgenticAgenticSearchConfig] +Agentic: TypeAlias = Union[bool, AgenticSearchConfigParam] class StoreChunkSearchOptionsParam(TypedDict, total=False): diff --git a/src/mixedbread/types/store_config.py b/src/mixedbread/types/store_config.py new file mode 100644 index 00000000..b8838ed4 --- /dev/null +++ b/src/mixedbread/types/store_config.py @@ -0,0 +1,26 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union, Optional +from typing_extensions import TypeAlias + +from .._models import BaseModel +from .contextualization_config import ContextualizationConfig + +__all__ = ["StoreConfig", "Contextualization"] + +Contextualization: TypeAlias = Union[bool, ContextualizationConfig] + + +class StoreConfig(BaseModel): + """Configuration for a store.""" + + contextualization: Optional[Contextualization] = None + """Contextualize files with metadata""" + + save_content: Optional[bool] = None + """Whether to save original content in the store. + + When False, only vectors are indexed without the original content (index-only + mode). This is useful for data privacy. Note: Reranking is not supported when + content is not saved. + """ diff --git a/src/mixedbread/types/store_config_param.py b/src/mixedbread/types/store_config_param.py new file mode 100644 index 00000000..56f8f0c8 --- /dev/null +++ b/src/mixedbread/types/store_config_param.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union +from typing_extensions import TypeAlias, TypedDict + +from .contextualization_config_param import ContextualizationConfigParam + +__all__ = ["StoreConfigParam", "Contextualization"] + +Contextualization: TypeAlias = Union[bool, ContextualizationConfigParam] + + +class StoreConfigParam(TypedDict, total=False): + """Configuration for a store.""" + + contextualization: Contextualization + """Contextualize files with metadata""" + + save_content: bool + """Whether to save original content in the store. + + When False, only vectors are indexed without the original content (index-only + mode). This is useful for data privacy. Note: Reranking is not supported when + content is not saved. + """ diff --git a/src/mixedbread/types/store_create_params.py b/src/mixedbread/types/store_create_params.py index b7814cdd..eb90d776 100644 --- a/src/mixedbread/types/store_create_params.py +++ b/src/mixedbread/types/store_create_params.py @@ -2,13 +2,14 @@ from __future__ import annotations -from typing import Union, Optional -from typing_extensions import TypeAlias, TypedDict +from typing import Optional +from typing_extensions import TypedDict from .._types import SequenceNotStr +from .store_config_param import StoreConfigParam from .expires_after_param import ExpiresAfterParam -__all__ = ["StoreCreateParams", "Config", "ConfigContextualization", "ConfigContextualizationContextualizationConfig"] +__all__ = ["StoreCreateParams"] class StoreCreateParams(TypedDict, total=False): @@ -24,42 +25,17 @@ class StoreCreateParams(TypedDict, total=False): is_public: bool """Whether the store can be accessed by anyone with valid login credentials""" + license: Optional[str] + """License for public stores""" + expires_after: Optional[ExpiresAfterParam] """Represents an expiration policy for a store.""" metadata: object """Optional metadata key-value pairs""" - config: Optional[Config] + config: Optional[StoreConfigParam] """Configuration for a store.""" file_ids: Optional[SequenceNotStr[str]] """Optional list of file IDs""" - - -class ConfigContextualizationContextualizationConfig(TypedDict, total=False): - with_metadata: Union[bool, SequenceNotStr[str]] - """Include all metadata or specific fields in the contextualization. - - Supports dot notation for nested fields (e.g., 'author.name'). When True, all - metadata is included (flattened). When a list, only specified fields are - included. - """ - - -ConfigContextualization: TypeAlias = Union[bool, ConfigContextualizationContextualizationConfig] - - -class Config(TypedDict, total=False): - """Configuration for a store.""" - - contextualization: ConfigContextualization - """Contextualize files with metadata""" - - save_content: bool - """Whether to save original content in the store. - - When False, only vectors are indexed without the original content (index-only - mode). This is useful for data privacy. Note: Reranking is not supported when - content is not saved. - """ diff --git a/src/mixedbread/types/store_update_params.py b/src/mixedbread/types/store_update_params.py index e06bbf9d..d6285f0c 100644 --- a/src/mixedbread/types/store_update_params.py +++ b/src/mixedbread/types/store_update_params.py @@ -23,6 +23,9 @@ class StoreUpdateParams(TypedDict, total=False): is_public: Optional[bool] """Whether the store can be accessed by anyone with valid login credentials""" + license: Optional[str] + """License for public stores""" + expires_after: Optional[ExpiresAfterParam] """Represents an expiration policy for a store.""" diff --git a/src/mixedbread/types/stores/__init__.py b/src/mixedbread/types/stores/__init__.py index 50862586..f52b7b9f 100644 --- a/src/mixedbread/types/stores/__init__.py +++ b/src/mixedbread/types/stores/__init__.py @@ -4,12 +4,15 @@ from .store_file import StoreFile as StoreFile from .file_list_params import FileListParams as FileListParams -from .scored_store_file import ScoredStoreFile as ScoredStoreFile +from .text_input_chunk import TextInputChunk as TextInputChunk +from .store_file_config import StoreFileConfig as StoreFileConfig from .store_file_status import StoreFileStatus as StoreFileStatus from .file_create_params import FileCreateParams as FileCreateParams from .file_list_response import FileListResponse as FileListResponse -from .file_search_params import FileSearchParams as FileSearchParams from .file_update_params import FileUpdateParams as FileUpdateParams from .file_delete_response import FileDeleteResponse as FileDeleteResponse from .file_retrieve_params import FileRetrieveParams as FileRetrieveParams -from .file_search_response import FileSearchResponse as FileSearchResponse +from .audio_url_input_chunk import AudioURLInputChunk as AudioURLInputChunk +from .image_url_input_chunk import ImageURLInputChunk as ImageURLInputChunk +from .video_url_input_chunk import VideoURLInputChunk as VideoURLInputChunk +from .store_file_config_param import StoreFileConfigParam as StoreFileConfigParam diff --git a/src/mixedbread/types/stores/audio_url_input_chunk.py b/src/mixedbread/types/stores/audio_url_input_chunk.py new file mode 100644 index 00000000..79cdd3c3 --- /dev/null +++ b/src/mixedbread/types/stores/audio_url_input_chunk.py @@ -0,0 +1,57 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel +from ..audio_url import AudioURL +from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata +from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata + +__all__ = ["AudioURLInputChunk", "GeneratedMetadata"] + +GeneratedMetadata: TypeAlias = Annotated[ + Union[ + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, + None, + ], + PropertyInfo(discriminator="type"), +] + + +class AudioURLInputChunk(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[GeneratedMetadata] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["audio_url"]] = None + """Input type identifier""" + + transcription: Optional[str] = None + """speech recognition (sr) text of the audio""" + + audio_url: Optional[AudioURL] = None + """Model for audio URL validation.""" + + sampling_rate: int + """The sampling rate of the audio.""" diff --git a/src/mixedbread/types/stores/file_create_params.py b/src/mixedbread/types/stores/file_create_params.py index c776ea33..ac1d5b86 100644 --- a/src/mixedbread/types/stores/file_create_params.py +++ b/src/mixedbread/types/stores/file_create_params.py @@ -3,16 +3,18 @@ from __future__ import annotations from typing import Optional -from typing_extensions import Literal, Required, TypedDict +from typing_extensions import Required, TypedDict -__all__ = ["FileCreateParams", "Config", "Experimental"] +from .store_file_config_param import StoreFileConfigParam + +__all__ = ["FileCreateParams"] class FileCreateParams(TypedDict, total=False): metadata: object """Optional metadata for the file""" - config: Config + config: StoreFileConfigParam """Configuration for adding the file""" external_id: Optional[str] @@ -24,19 +26,5 @@ class FileCreateParams(TypedDict, total=False): file_id: Required[str] """ID of the file to add""" - experimental: Optional[Experimental] + experimental: Optional[StoreFileConfigParam] """Configuration for a file.""" - - -class Config(TypedDict, total=False): - """Configuration for adding the file""" - - parsing_strategy: Literal["fast", "high_quality"] - """Strategy for adding the file, this overrides the store-level default""" - - -class Experimental(TypedDict, total=False): - """Configuration for a file.""" - - parsing_strategy: Literal["fast", "high_quality"] - """Strategy for adding the file, this overrides the store-level default""" diff --git a/src/mixedbread/types/stores/file_search_params.py b/src/mixedbread/types/stores/file_search_params.py deleted file mode 100644 index c1b5dee9..00000000 --- a/src/mixedbread/types/stores/file_search_params.py +++ /dev/null @@ -1,128 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing import Union, Iterable, Optional -from typing_extensions import Required, TypeAlias, TypedDict - -from ..._types import SequenceNotStr -from ..extractions.text_input_param import TextInputParam -from ..extractions.image_url_input_param import ImageURLInputParam -from ..shared_params.search_filter_condition import SearchFilterCondition - -__all__ = [ - "FileSearchParams", - "Query", - "Filters", - "FiltersUnionMember2", - "SearchOptions", - "SearchOptionsRerank", - "SearchOptionsRerankRerankConfig", - "SearchOptionsAgentic", - "SearchOptionsAgenticAgenticSearchConfig", -] - - -class FileSearchParams(TypedDict, total=False): - query: Required[Query] - """Search query text""" - - store_identifiers: Required[SequenceNotStr[str]] - """IDs or names of stores to search""" - - top_k: int - """Number of results to return""" - - filters: Optional[Filters] - """Optional filter conditions""" - - file_ids: Union[Iterable[object], SequenceNotStr[str], None] - """Optional list of file IDs to filter chunks by (inclusion filter)""" - - search_options: SearchOptions - """Search configuration options""" - - -Query: TypeAlias = Union[str, ImageURLInputParam, TextInputParam] - -FiltersUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] - -Filters: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[FiltersUnionMember2]] - - -class SearchOptionsRerankRerankConfig(TypedDict, total=False): - """Represents a reranking configuration.""" - - model: str - """The name of the reranking model""" - - with_metadata: Union[bool, SequenceNotStr[str]] - """Whether to include metadata in the reranked results""" - - top_k: Optional[int] - """Maximum number of results to return after reranking. - - If None, returns all reranked results. - """ - - -SearchOptionsRerank: TypeAlias = Union[bool, SearchOptionsRerankRerankConfig] - - -class SearchOptionsAgenticAgenticSearchConfig(TypedDict, total=False): - """Configuration for agentic multi-query search.""" - - max_rounds: int - """Maximum number of search rounds""" - - queries_per_round: int - """Maximum queries per round""" - - instructions: Optional[str] - """ - Additional custom instructions (followed only when not in conflict with existing - rules) - """ - - -SearchOptionsAgentic: TypeAlias = Union[bool, SearchOptionsAgenticAgenticSearchConfig] - - -class SearchOptions(TypedDict, total=False): - """Search configuration options""" - - score_threshold: float - """Minimum similarity score threshold""" - - rewrite_query: bool - """Whether to rewrite the query. - - Ignored when agentic is enabled (the agent handles query decomposition). - """ - - rerank: Optional[SearchOptionsRerank] - """Whether to rerank results and optional reranking configuration. - - Ignored when agentic is enabled (the agent handles ranking). - """ - - agentic: Optional[SearchOptionsAgentic] - """ - Whether to use agentic multi-query search with automatic query decomposition and - ranking. When enabled, rewrite_query and rerank options are ignored. - """ - - return_metadata: bool - """Whether to return file metadata""" - - return_chunks: bool - """Whether to return matching text chunks""" - - chunks_per_file: int - """Number of chunks to return for each file""" - - apply_search_rules: bool - """Whether to apply search rules""" - - -from ..shared_params.search_filter import SearchFilter diff --git a/src/mixedbread/types/stores/file_search_response.py b/src/mixedbread/types/stores/file_search_response.py deleted file mode 100644 index 304512e4..00000000 --- a/src/mixedbread/types/stores/file_search_response.py +++ /dev/null @@ -1,17 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing import List, Optional -from typing_extensions import Literal - -from ..._models import BaseModel -from .scored_store_file import ScoredStoreFile - -__all__ = ["FileSearchResponse"] - - -class FileSearchResponse(BaseModel): - object: Optional[Literal["list"]] = None - """The object type of the response""" - - data: List[ScoredStoreFile] - """The list of scored store files""" diff --git a/src/mixedbread/types/stores/image_url_input_chunk.py b/src/mixedbread/types/stores/image_url_input_chunk.py new file mode 100644 index 00000000..927f5d8d --- /dev/null +++ b/src/mixedbread/types/stores/image_url_input_chunk.py @@ -0,0 +1,57 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel +from ..image_url_output import ImageURLOutput +from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata +from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata + +__all__ = ["ImageURLInputChunk", "GeneratedMetadata"] + +GeneratedMetadata: TypeAlias = Annotated[ + Union[ + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, + None, + ], + PropertyInfo(discriminator="type"), +] + + +class ImageURLInputChunk(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[GeneratedMetadata] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["image_url"]] = None + """Input type identifier""" + + ocr_text: Optional[str] = None + """ocr text of the image""" + + summary: Optional[str] = None + """summary of the image""" + + image_url: Optional[ImageURLOutput] = None + """Model for image URL validation.""" diff --git a/src/mixedbread/types/stores/scored_store_file.py b/src/mixedbread/types/stores/scored_store_file.py deleted file mode 100644 index 87483138..00000000 --- a/src/mixedbread/types/stores/scored_store_file.py +++ /dev/null @@ -1,77 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing import List, Union, Optional -from datetime import datetime -from typing_extensions import Literal, Annotated, TypeAlias - -from ..._utils import PropertyInfo -from ..._models import BaseModel -from .store_file_status import StoreFileStatus -from ..scored_text_input_chunk import ScoredTextInputChunk -from ..scored_audio_url_input_chunk import ScoredAudioURLInputChunk -from ..scored_image_url_input_chunk import ScoredImageURLInputChunk -from ..scored_video_url_input_chunk import ScoredVideoURLInputChunk - -__all__ = ["ScoredStoreFile", "Config", "Chunk"] - - -class Config(BaseModel): - """Configuration for a file.""" - - parsing_strategy: Optional[Literal["fast", "high_quality"]] = None - """Strategy for adding the file, this overrides the store-level default""" - - -Chunk: TypeAlias = Annotated[ - Union[ScoredTextInputChunk, ScoredImageURLInputChunk, ScoredAudioURLInputChunk, ScoredVideoURLInputChunk], - PropertyInfo(discriminator="type"), -] - - -class ScoredStoreFile(BaseModel): - """Represents a scored store file.""" - - id: str - """Unique identifier for the file""" - - filename: Optional[str] = None - """Name of the file""" - - metadata: Optional[object] = None - """Optional file metadata""" - - external_id: Optional[str] = None - """External identifier for this file in the store""" - - status: Optional[StoreFileStatus] = None - """Processing status of the file""" - - last_error: Optional[object] = None - """Last error message if processing failed""" - - store_id: str - """ID of the containing store""" - - created_at: datetime - """Timestamp of store file creation""" - - version: Optional[int] = None - """Version number of the file""" - - usage_bytes: Optional[int] = None - """Storage usage in bytes""" - - usage_tokens: Optional[int] = None - """Storage usage in tokens""" - - config: Optional[Config] = None - """Configuration for a file.""" - - object: Optional[Literal["store.file"]] = None - """Type of the object""" - - chunks: Optional[List[Chunk]] = None - """Array of scored file chunks""" - - score: float - """score of the file""" diff --git a/src/mixedbread/types/stores/store_file.py b/src/mixedbread/types/stores/store_file.py index eb1baefc..e6154982 100644 --- a/src/mixedbread/types/stores/store_file.py +++ b/src/mixedbread/types/stores/store_file.py @@ -1,1186 +1,22 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import TYPE_CHECKING, Dict, List, Union, Optional +from typing import List, Union, Optional from datetime import datetime from typing_extensions import Literal, Annotated, TypeAlias -from pydantic import Field as FieldInfo - from ..._utils import PropertyInfo from ..._models import BaseModel +from .text_input_chunk import TextInputChunk +from .store_file_config import StoreFileConfig from .store_file_status import StoreFileStatus +from .audio_url_input_chunk import AudioURLInputChunk +from .image_url_input_chunk import ImageURLInputChunk +from .video_url_input_chunk import VideoURLInputChunk -__all__ = [ - "StoreFile", - "Config", - "Chunk", - "ChunkTextInputChunk", - "ChunkTextInputChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", - "ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata", - "ChunkImageURLInputChunk", - "ChunkImageURLInputChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", - "ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", - "ChunkImageURLInputChunkImageURL", - "ChunkAudioURLInputChunk", - "ChunkAudioURLInputChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", - "ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", - "ChunkAudioURLInputChunkAudioURL", - "ChunkVideoURLInputChunk", - "ChunkVideoURLInputChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading", - "ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext", - "ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", - "ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", - "ChunkVideoURLInputChunkVideoURL", -] - - -class Config(BaseModel): - """Configuration for a file.""" - - parsing_strategy: Optional[Literal["fast", "high_quality"]] = None - """Strategy for adding the file, this overrides the store-level default""" - - -class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[List[ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading]] = ( - None - ) - - heading_context: Optional[ - List[ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext] - ] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -ChunkTextInputChunkGeneratedMetadata: TypeAlias = Annotated[ - Union[ - ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, - ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata, - None, - ], - PropertyInfo(discriminator="type"), -] - - -class ChunkTextInputChunk(BaseModel): - chunk_index: int - """position of the chunk in a file""" - - mime_type: Optional[str] = None - """mime type of the chunk""" - - generated_metadata: Optional[ChunkTextInputChunkGeneratedMetadata] = None - """metadata of the chunk""" - - model: Optional[str] = None - """model used for this chunk""" - - type: Optional[Literal["text"]] = None - """Input type identifier""" - - offset: Optional[int] = None - """The offset of the text in the file relative to the start of the file.""" - - text: Optional[str] = None - """Text content""" - - -class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[ - List[ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading] - ] = None - - heading_context: Optional[ - List[ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext] - ] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -ChunkImageURLInputChunkGeneratedMetadata: TypeAlias = Annotated[ - Union[ - ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, - ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, - None, - ], - PropertyInfo(discriminator="type"), -] - - -class ChunkImageURLInputChunkImageURL(BaseModel): - """Model for image URL validation.""" - - url: str - """The image URL. Can be either a URL or a Data URI.""" - - format: Optional[str] = None - """The image format/mimetype""" - - -class ChunkImageURLInputChunk(BaseModel): - chunk_index: int - """position of the chunk in a file""" - - mime_type: Optional[str] = None - """mime type of the chunk""" - - generated_metadata: Optional[ChunkImageURLInputChunkGeneratedMetadata] = None - """metadata of the chunk""" - - model: Optional[str] = None - """model used for this chunk""" - - type: Optional[Literal["image_url"]] = None - """Input type identifier""" - - ocr_text: Optional[str] = None - """ocr text of the image""" - - summary: Optional[str] = None - """summary of the image""" - - image_url: Optional[ChunkImageURLInputChunkImageURL] = None - """Model for image URL validation.""" - - -class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[ - List[ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading] - ] = None - - heading_context: Optional[ - List[ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext] - ] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -ChunkAudioURLInputChunkGeneratedMetadata: TypeAlias = Annotated[ - Union[ - ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, - ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, - None, - ], - PropertyInfo(discriminator="type"), -] - - -class ChunkAudioURLInputChunkAudioURL(BaseModel): - """Model for audio URL validation.""" - - url: str - """The audio URL. Can be either a URL or a Data URI.""" - - -class ChunkAudioURLInputChunk(BaseModel): - chunk_index: int - """position of the chunk in a file""" - - mime_type: Optional[str] = None - """mime type of the chunk""" - - generated_metadata: Optional[ChunkAudioURLInputChunkGeneratedMetadata] = None - """metadata of the chunk""" - - model: Optional[str] = None - """model used for this chunk""" - - type: Optional[Literal["audio_url"]] = None - """Input type identifier""" - - transcription: Optional[str] = None - """speech recognition (sr) text of the audio""" - - summary: Optional[str] = None - """summary of the audio""" - - audio_url: Optional[ChunkAudioURLInputChunkAudioURL] = None - """Model for audio URL validation.""" - - sampling_rate: int - """The sampling rate of the audio.""" - - -class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading(BaseModel): - level: int - - text: str - - -class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext(BaseModel): - level: int - - text: str - - -class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["markdown"]] = None - - file_type: Optional[Literal["text/markdown"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - chunk_headings: Optional[ - List[ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataChunkHeading] - ] = None - - heading_context: Optional[ - List[ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadataHeadingContext] - ] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - frontmatter: Optional[Dict[str, object]] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["text"]] = None - - file_type: Optional[Literal["text/plain"]] = None - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["pdf"]] = None - - file_type: Optional[Literal["application/pdf"]] = None - - total_pages: Optional[int] = None - - total_size: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["code"]] = None - - file_type: str - - language: Optional[str] = None - - word_count: Optional[int] = None - - file_size: Optional[int] = None - - start_line: Optional[int] = None - - num_lines: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["audio"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - sample_rate: Optional[int] = None - - channels: Optional[int] = None - - audio_format: Optional[int] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["video"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - total_duration_seconds: Optional[float] = None - - fps: Optional[float] = None - - width: Optional[int] = None - - height: Optional[int] = None - - frame_count: Optional[int] = None - - has_audio_stream: Optional[bool] = None - - bpm: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -class ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): - type: Optional[Literal["image"]] = None - - file_type: Optional[str] = None - - file_size: Optional[int] = None - - width: Optional[int] = None - - height: Optional[int] = None - - file_extension: Optional[str] = None - - if TYPE_CHECKING: - # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a - # value to this field, so for compatibility we avoid doing it at runtime. - __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] - - # Stub to indicate that arbitrary properties are accepted. - # To access properties that are not valid identifiers you can use `getattr`, e.g. - # `getattr(obj, '$type')` - def __getattr__(self, attr: str) -> object: ... - else: - __pydantic_extra__: Dict[str, object] - - -ChunkVideoURLInputChunkGeneratedMetadata: TypeAlias = Annotated[ - Union[ - ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, - ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, - None, - ], - PropertyInfo(discriminator="type"), -] - - -class ChunkVideoURLInputChunkVideoURL(BaseModel): - """Model for video URL validation.""" - - url: str - """The video URL. Can be either a URL or a Data URI.""" - - -class ChunkVideoURLInputChunk(BaseModel): - chunk_index: int - """position of the chunk in a file""" - - mime_type: Optional[str] = None - """mime type of the chunk""" - - generated_metadata: Optional[ChunkVideoURLInputChunkGeneratedMetadata] = None - """metadata of the chunk""" - - model: Optional[str] = None - """model used for this chunk""" - - type: Optional[Literal["video_url"]] = None - """Input type identifier""" - - transcription: Optional[str] = None - """speech recognition (sr) text of the video""" - - summary: Optional[str] = None - """summary of the video""" - - video_url: Optional[ChunkVideoURLInputChunkVideoURL] = None - """Model for video URL validation.""" - +__all__ = ["StoreFile", "Chunk"] Chunk: TypeAlias = Annotated[ - Union[ChunkTextInputChunk, ChunkImageURLInputChunk, ChunkAudioURLInputChunk, ChunkVideoURLInputChunk], + Union[TextInputChunk, ImageURLInputChunk, AudioURLInputChunk, VideoURLInputChunk], PropertyInfo(discriminator="type"), ] @@ -1221,7 +57,7 @@ class StoreFile(BaseModel): usage_tokens: Optional[int] = None """Storage usage in tokens""" - config: Optional[Config] = None + config: Optional[StoreFileConfig] = None """Configuration for a file.""" object: Optional[Literal["store.file"]] = None @@ -1229,3 +65,6 @@ class StoreFile(BaseModel): chunks: Optional[List[Chunk]] = None """chunks""" + + content_url: str + """Presigned URL for file content""" diff --git a/src/mixedbread/types/stores/store_file_config.py b/src/mixedbread/types/stores/store_file_config.py new file mode 100644 index 00000000..c30ee8da --- /dev/null +++ b/src/mixedbread/types/stores/store_file_config.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ..._models import BaseModel + +__all__ = ["StoreFileConfig"] + + +class StoreFileConfig(BaseModel): + """Configuration for a file.""" + + parsing_strategy: Optional[Literal["fast", "high_quality"]] = None + """Strategy for adding the file, this overrides the store-level default""" diff --git a/src/mixedbread/types/stores/store_file_config_param.py b/src/mixedbread/types/stores/store_file_config_param.py new file mode 100644 index 00000000..82380c0e --- /dev/null +++ b/src/mixedbread/types/stores/store_file_config_param.py @@ -0,0 +1,14 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, TypedDict + +__all__ = ["StoreFileConfigParam"] + + +class StoreFileConfigParam(TypedDict, total=False): + """Configuration for a file.""" + + parsing_strategy: Literal["fast", "high_quality"] + """Strategy for adding the file, this overrides the store-level default""" diff --git a/src/mixedbread/types/stores/text_input_chunk.py b/src/mixedbread/types/stores/text_input_chunk.py new file mode 100644 index 00000000..0080d81f --- /dev/null +++ b/src/mixedbread/types/stores/text_input_chunk.py @@ -0,0 +1,56 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel +from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata +from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata + +__all__ = ["TextInputChunk", "GeneratedMetadata"] + +GeneratedMetadata: TypeAlias = Annotated[ + Union[ + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, + None, + ], + PropertyInfo(discriminator="type"), +] + + +class TextInputChunk(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[GeneratedMetadata] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["text"]] = None + """Input type identifier""" + + offset: Optional[int] = None + """The offset of the text in the file relative to the start of the file.""" + + text: Optional[str] = None + """Text content""" + + context: Optional[str] = None + """LLM-generated context that situates this chunk within its source document""" diff --git a/src/mixedbread/types/stores/video_url_input_chunk.py b/src/mixedbread/types/stores/video_url_input_chunk.py new file mode 100644 index 00000000..c1a16e79 --- /dev/null +++ b/src/mixedbread/types/stores/video_url_input_chunk.py @@ -0,0 +1,57 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel +from ..video_url import VideoURL +from ..pdf_chunk_generated_metadata import PdfChunkGeneratedMetadata +from ..code_chunk_generated_metadata import CodeChunkGeneratedMetadata +from ..text_chunk_generated_metadata import TextChunkGeneratedMetadata +from ..audio_chunk_generated_metadata import AudioChunkGeneratedMetadata +from ..image_chunk_generated_metadata import ImageChunkGeneratedMetadata +from ..video_chunk_generated_metadata import VideoChunkGeneratedMetadata +from ..markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata + +__all__ = ["VideoURLInputChunk", "GeneratedMetadata"] + +GeneratedMetadata: TypeAlias = Annotated[ + Union[ + MarkdownChunkGeneratedMetadata, + TextChunkGeneratedMetadata, + PdfChunkGeneratedMetadata, + CodeChunkGeneratedMetadata, + AudioChunkGeneratedMetadata, + VideoChunkGeneratedMetadata, + ImageChunkGeneratedMetadata, + None, + ], + PropertyInfo(discriminator="type"), +] + + +class VideoURLInputChunk(BaseModel): + chunk_index: int + """position of the chunk in a file""" + + mime_type: Optional[str] = None + """mime type of the chunk""" + + generated_metadata: Optional[GeneratedMetadata] = None + """metadata of the chunk""" + + model: Optional[str] = None + """model used for this chunk""" + + type: Optional[Literal["video_url"]] = None + """Input type identifier""" + + transcription: Optional[str] = None + """speech recognition (sr) text of the video""" + + summary: Optional[str] = None + """summary of the video""" + + video_url: Optional[VideoURL] = None + """Model for video URL validation.""" diff --git a/src/mixedbread/types/text_chunk_generated_metadata.py b/src/mixedbread/types/text_chunk_generated_metadata.py new file mode 100644 index 00000000..0d4d55c3 --- /dev/null +++ b/src/mixedbread/types/text_chunk_generated_metadata.py @@ -0,0 +1,40 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["TextChunkGeneratedMetadata"] + + +class TextChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["text"]] = None + + file_type: Optional[Literal["text/plain"]] = None + + language: Optional[str] = None + + word_count: Optional[int] = None + + file_size: Optional[int] = None + + start_line: Optional[int] = None + + num_lines: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/video_chunk_generated_metadata.py b/src/mixedbread/types/video_chunk_generated_metadata.py new file mode 100644 index 00000000..d3ab7c61 --- /dev/null +++ b/src/mixedbread/types/video_chunk_generated_metadata.py @@ -0,0 +1,46 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import TYPE_CHECKING, Dict, Optional +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["VideoChunkGeneratedMetadata"] + + +class VideoChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["video"]] = None + + file_type: Optional[str] = None + + file_size: Optional[int] = None + + total_duration_seconds: Optional[float] = None + + fps: Optional[float] = None + + width: Optional[int] = None + + height: Optional[int] = None + + frame_count: Optional[int] = None + + has_audio_stream: Optional[bool] = None + + bpm: Optional[int] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] diff --git a/src/mixedbread/types/video_url.py b/src/mixedbread/types/video_url.py new file mode 100644 index 00000000..885a6bce --- /dev/null +++ b/src/mixedbread/types/video_url.py @@ -0,0 +1,12 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .._models import BaseModel + +__all__ = ["VideoURL"] + + +class VideoURL(BaseModel): + """Model for video URL validation.""" + + url: str + """The video URL. Can be either a URL or a Data URI.""" diff --git a/tests/api_resources/stores/test_files.py b/tests/api_resources/stores/test_files.py index f59802bf..7baab6c8 100644 --- a/tests/api_resources/stores/test_files.py +++ b/tests/api_resources/stores/test_files.py @@ -13,7 +13,6 @@ StoreFile, FileListResponse, FileDeleteResponse, - FileSearchResponse, ) base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -295,65 +294,6 @@ def test_path_params_delete(self, client: Mixedbread) -> None: store_identifier="store_identifier", ) - @parametrize - def test_method_search(self, client: Mixedbread) -> None: - file = client.stores.files.search( - query="how to configure SSL", - store_identifiers=["string"], - ) - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - def test_method_search_with_all_params(self, client: Mixedbread) -> None: - file = client.stores.files.search( - query="how to configure SSL", - store_identifiers=["string"], - top_k=1, - filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], - }, - file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], - search_options={ - "score_threshold": 0, - "rewrite_query": True, - "rerank": True, - "agentic": True, - "return_metadata": True, - "return_chunks": True, - "chunks_per_file": 0, - "apply_search_rules": True, - }, - ) - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - def test_raw_response_search(self, client: Mixedbread) -> None: - response = client.stores.files.with_raw_response.search( - query="how to configure SSL", - store_identifiers=["string"], - ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - file = response.parse() - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - def test_streaming_response_search(self, client: Mixedbread) -> None: - with client.stores.files.with_streaming_response.search( - query="how to configure SSL", - store_identifiers=["string"], - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - file = response.parse() - assert_matches_type(FileSearchResponse, file, path=["response"]) - - assert cast(Any, response.is_closed) is True - class TestAsyncFiles: parametrize = pytest.mark.parametrize( @@ -632,62 +572,3 @@ async def test_path_params_delete(self, async_client: AsyncMixedbread) -> None: file_identifier="", store_identifier="store_identifier", ) - - @parametrize - async def test_method_search(self, async_client: AsyncMixedbread) -> None: - file = await async_client.stores.files.search( - query="how to configure SSL", - store_identifiers=["string"], - ) - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - async def test_method_search_with_all_params(self, async_client: AsyncMixedbread) -> None: - file = await async_client.stores.files.search( - query="how to configure SSL", - store_identifiers=["string"], - top_k=1, - filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], - }, - file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], - search_options={ - "score_threshold": 0, - "rewrite_query": True, - "rerank": True, - "agentic": True, - "return_metadata": True, - "return_chunks": True, - "chunks_per_file": 0, - "apply_search_rules": True, - }, - ) - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - async def test_raw_response_search(self, async_client: AsyncMixedbread) -> None: - response = await async_client.stores.files.with_raw_response.search( - query="how to configure SSL", - store_identifiers=["string"], - ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - file = await response.parse() - assert_matches_type(FileSearchResponse, file, path=["response"]) - - @parametrize - async def test_streaming_response_search(self, async_client: AsyncMixedbread) -> None: - async with async_client.stores.files.with_streaming_response.search( - query="how to configure SSL", - store_identifiers=["string"], - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - file = await response.parse() - assert_matches_type(FileSearchResponse, file, path=["response"]) - - assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_stores.py b/tests/api_resources/test_stores.py index 5e8ae604..39988178 100644 --- a/tests/api_resources/test_stores.py +++ b/tests/api_resources/test_stores.py @@ -35,6 +35,7 @@ def test_method_create_with_all_params(self, client: Mixedbread) -> None: name="technical-documentation", description="Contains technical specifications and guides", is_public=False, + license="license", expires_after={ "anchor": "last_active_at", "days": 0, @@ -120,6 +121,7 @@ def test_method_update_with_all_params(self, client: Mixedbread) -> None: name="x", description="description", is_public=True, + license="license", expires_after={ "anchor": "last_active_at", "days": 0, @@ -422,6 +424,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncMixedbread name="technical-documentation", description="Contains technical specifications and guides", is_public=False, + license="license", expires_after={ "anchor": "last_active_at", "days": 0, @@ -507,6 +510,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncMixedbread name="x", description="description", is_public=True, + license="license", expires_after={ "anchor": "last_active_at", "days": 0, diff --git a/tests/test_client.py b/tests/test_client.py index 8cc00170..e219d2e6 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -429,6 +429,30 @@ def test_default_query_option(self) -> None: client.close() + def test_hardcoded_query_params_in_url(self, client: Mixedbread) -> None: + request = client._build_request(FinalRequestOptions(method="get", url="/foo?beta=true")) + url = httpx.URL(request.url) + assert dict(url.params) == {"beta": "true"} + + request = client._build_request( + FinalRequestOptions( + method="get", + url="/foo?beta=true", + params={"limit": "10", "page": "abc"}, + ) + ) + url = httpx.URL(request.url) + assert dict(url.params) == {"beta": "true", "limit": "10", "page": "abc"} + + request = client._build_request( + FinalRequestOptions( + method="get", + url="/files/a%2Fb?beta=true", + params={"limit": "10"}, + ) + ) + assert request.url.raw_path == b"/files/a%2Fb?beta=true&limit=10" + def test_request_extra_json(self, client: Mixedbread) -> None: request = client._build_request( FinalRequestOptions( @@ -1340,6 +1364,30 @@ async def test_default_query_option(self) -> None: await client.close() + async def test_hardcoded_query_params_in_url(self, async_client: AsyncMixedbread) -> None: + request = async_client._build_request(FinalRequestOptions(method="get", url="/foo?beta=true")) + url = httpx.URL(request.url) + assert dict(url.params) == {"beta": "true"} + + request = async_client._build_request( + FinalRequestOptions( + method="get", + url="/foo?beta=true", + params={"limit": "10", "page": "abc"}, + ) + ) + url = httpx.URL(request.url) + assert dict(url.params) == {"beta": "true", "limit": "10", "page": "abc"} + + request = async_client._build_request( + FinalRequestOptions( + method="get", + url="/files/a%2Fb?beta=true", + params={"limit": "10"}, + ) + ) + assert request.url.raw_path == b"/files/a%2Fb?beta=true&limit=10" + def test_request_extra_json(self, client: Mixedbread) -> None: request = client._build_request( FinalRequestOptions( diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py deleted file mode 100644 index e124b497..00000000 --- a/tests/test_deepcopy.py +++ /dev/null @@ -1,58 +0,0 @@ -from mixedbread._utils import deepcopy_minimal - - -def assert_different_identities(obj1: object, obj2: object) -> None: - assert obj1 == obj2 - assert id(obj1) != id(obj2) - - -def test_simple_dict() -> None: - obj1 = {"foo": "bar"} - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - - -def test_nested_dict() -> None: - obj1 = {"foo": {"bar": True}} - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - assert_different_identities(obj1["foo"], obj2["foo"]) - - -def test_complex_nested_dict() -> None: - obj1 = {"foo": {"bar": [{"hello": "world"}]}} - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - assert_different_identities(obj1["foo"], obj2["foo"]) - assert_different_identities(obj1["foo"]["bar"], obj2["foo"]["bar"]) - assert_different_identities(obj1["foo"]["bar"][0], obj2["foo"]["bar"][0]) - - -def test_simple_list() -> None: - obj1 = ["a", "b", "c"] - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - - -def test_nested_list() -> None: - obj1 = ["a", [1, 2, 3]] - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - assert_different_identities(obj1[1], obj2[1]) - - -class MyObject: ... - - -def test_ignores_other_types() -> None: - # custom classes - my_obj = MyObject() - obj1 = {"foo": my_obj} - obj2 = deepcopy_minimal(obj1) - assert_different_identities(obj1, obj2) - assert obj1["foo"] is my_obj - - # tuples - obj3 = ("a", "b") - obj4 = deepcopy_minimal(obj3) - assert obj3 is obj4 diff --git a/tests/test_extract_files.py b/tests/test_extract_files.py index 20ec67fe..4a252515 100644 --- a/tests/test_extract_files.py +++ b/tests/test_extract_files.py @@ -35,6 +35,15 @@ def test_multiple_files() -> None: assert query == {"documents": [{}, {}]} +def test_top_level_file_array() -> None: + query = {"files": [b"file one", b"file two"], "title": "hello"} + assert extract_files(query, paths=[["files", ""]]) == [ + ("files[]", b"file one"), + ("files[]", b"file two"), + ] + assert query == {"title": "hello"} + + @pytest.mark.parametrize( "query,paths,expected", [ diff --git a/tests/test_files.py b/tests/test_files.py index 79d9ce62..8e51c40f 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -4,7 +4,8 @@ import pytest from dirty_equals import IsDict, IsList, IsBytes, IsTuple -from mixedbread._files import to_httpx_files, async_to_httpx_files +from mixedbread._files import to_httpx_files, deepcopy_with_paths, async_to_httpx_files +from mixedbread._utils import extract_files readme_path = Path(__file__).parent.parent.joinpath("README.md") @@ -49,3 +50,99 @@ def test_string_not_allowed() -> None: "file": "foo", # type: ignore } ) + + +def assert_different_identities(obj1: object, obj2: object) -> None: + assert obj1 == obj2 + assert obj1 is not obj2 + + +class TestDeepcopyWithPaths: + def test_copies_top_level_dict(self) -> None: + original = {"file": b"data", "other": "value"} + result = deepcopy_with_paths(original, [["file"]]) + assert_different_identities(result, original) + + def test_file_value_is_same_reference(self) -> None: + file_bytes = b"contents" + original = {"file": file_bytes} + result = deepcopy_with_paths(original, [["file"]]) + assert_different_identities(result, original) + assert result["file"] is file_bytes + + def test_list_popped_wholesale(self) -> None: + files = [b"f1", b"f2"] + original = {"files": files, "title": "t"} + result = deepcopy_with_paths(original, [["files", ""]]) + assert_different_identities(result, original) + result_files = result["files"] + assert isinstance(result_files, list) + assert_different_identities(result_files, files) + + def test_nested_array_path_copies_list_and_elements(self) -> None: + elem1 = {"file": b"f1", "extra": 1} + elem2 = {"file": b"f2", "extra": 2} + original = {"items": [elem1, elem2]} + result = deepcopy_with_paths(original, [["items", "", "file"]]) + assert_different_identities(result, original) + result_items = result["items"] + assert isinstance(result_items, list) + assert_different_identities(result_items, original["items"]) + assert_different_identities(result_items[0], elem1) + assert_different_identities(result_items[1], elem2) + + def test_empty_paths_returns_same_object(self) -> None: + original = {"foo": "bar"} + result = deepcopy_with_paths(original, []) + assert result is original + + def test_multiple_paths(self) -> None: + f1 = b"file1" + f2 = b"file2" + original = {"a": f1, "b": f2, "c": "unchanged"} + result = deepcopy_with_paths(original, [["a"], ["b"]]) + assert_different_identities(result, original) + assert result["a"] is f1 + assert result["b"] is f2 + assert result["c"] is original["c"] + + def test_extract_files_does_not_mutate_original_top_level(self) -> None: + file_bytes = b"contents" + original = {"file": file_bytes, "other": "value"} + + copied = deepcopy_with_paths(original, [["file"]]) + extracted = extract_files(copied, paths=[["file"]]) + + assert extracted == [("file", file_bytes)] + assert original == {"file": file_bytes, "other": "value"} + assert copied == {"other": "value"} + + def test_extract_files_does_not_mutate_original_nested_array_path(self) -> None: + file1 = b"f1" + file2 = b"f2" + original = { + "items": [ + {"file": file1, "extra": 1}, + {"file": file2, "extra": 2}, + ], + "title": "example", + } + + copied = deepcopy_with_paths(original, [["items", "", "file"]]) + extracted = extract_files(copied, paths=[["items", "", "file"]]) + + assert extracted == [("items[][file]", file1), ("items[][file]", file2)] + assert original == { + "items": [ + {"file": file1, "extra": 1}, + {"file": file2, "extra": 2}, + ], + "title": "example", + } + assert copied == { + "items": [ + {"extra": 1}, + {"extra": 2}, + ], + "title": "example", + } diff --git a/tests/test_utils/test_path.py b/tests/test_utils/test_path.py new file mode 100644 index 00000000..7d148421 --- /dev/null +++ b/tests/test_utils/test_path.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +from mixedbread._utils._path import path_template + + +@pytest.mark.parametrize( + "template, kwargs, expected", + [ + ("/v1/{id}", dict(id="abc"), "/v1/abc"), + ("/v1/{a}/{b}", dict(a="x", b="y"), "/v1/x/y"), + ("/v1/{a}{b}/path/{c}?val={d}#{e}", dict(a="x", b="y", c="z", d="u", e="v"), "/v1/xy/path/z?val=u#v"), + ("/{w}/{w}", dict(w="echo"), "/echo/echo"), + ("/v1/static", {}, "/v1/static"), + ("", {}, ""), + ("/v1/?q={n}&count=10", dict(n=42), "/v1/?q=42&count=10"), + ("/v1/{v}", dict(v=None), "/v1/null"), + ("/v1/{v}", dict(v=True), "/v1/true"), + ("/v1/{v}", dict(v=False), "/v1/false"), + ("/v1/{v}", dict(v=".hidden"), "/v1/.hidden"), # dot prefix ok + ("/v1/{v}", dict(v="file.txt"), "/v1/file.txt"), # dot in middle ok + ("/v1/{v}", dict(v="..."), "/v1/..."), # triple dot ok + ("/v1/{a}{b}", dict(a=".", b="txt"), "/v1/.txt"), # dot var combining with adjacent to be ok + ("/items?q={v}#{f}", dict(v=".", f=".."), "/items?q=.#.."), # dots in query/fragment are fine + ( + "/v1/{a}?query={b}", + dict(a="../../other/endpoint", b="a&bad=true"), + "/v1/..%2F..%2Fother%2Fendpoint?query=a%26bad%3Dtrue", + ), + ("/v1/{val}", dict(val="a/b/c"), "/v1/a%2Fb%2Fc"), + ("/v1/{val}", dict(val="a/b/c?query=value"), "/v1/a%2Fb%2Fc%3Fquery=value"), + ("/v1/{val}", dict(val="a/b/c?query=value&bad=true"), "/v1/a%2Fb%2Fc%3Fquery=value&bad=true"), + ("/v1/{val}", dict(val="%20"), "/v1/%2520"), # escapes escape sequences in input + # Query: slash and ? are safe, # is not + ("/items?q={v}", dict(v="a/b"), "/items?q=a/b"), + ("/items?q={v}", dict(v="a?b"), "/items?q=a?b"), + ("/items?q={v}", dict(v="a#b"), "/items?q=a%23b"), + ("/items?q={v}", dict(v="a b"), "/items?q=a%20b"), + # Fragment: slash and ? are safe + ("/docs#{v}", dict(v="a/b"), "/docs#a/b"), + ("/docs#{v}", dict(v="a?b"), "/docs#a?b"), + # Path: slash, ? and # are all encoded + ("/v1/{v}", dict(v="a/b"), "/v1/a%2Fb"), + ("/v1/{v}", dict(v="a?b"), "/v1/a%3Fb"), + ("/v1/{v}", dict(v="a#b"), "/v1/a%23b"), + # same var encoded differently by component + ( + "/v1/{v}?q={v}#{v}", + dict(v="a/b?c#d"), + "/v1/a%2Fb%3Fc%23d?q=a/b?c%23d#a/b?c%23d", + ), + ("/v1/{val}", dict(val="x?admin=true"), "/v1/x%3Fadmin=true"), # query injection + ("/v1/{val}", dict(val="x#admin"), "/v1/x%23admin"), # fragment injection + ], +) +def test_interpolation(template: str, kwargs: dict[str, Any], expected: str) -> None: + assert path_template(template, **kwargs) == expected + + +def test_missing_kwarg_raises_key_error() -> None: + with pytest.raises(KeyError, match="org_id"): + path_template("/v1/{org_id}") + + +@pytest.mark.parametrize( + "template, kwargs", + [ + ("{a}/path", dict(a=".")), + ("{a}/path", dict(a="..")), + ("/v1/{a}", dict(a=".")), + ("/v1/{a}", dict(a="..")), + ("/v1/{a}/path", dict(a=".")), + ("/v1/{a}/path", dict(a="..")), + ("/v1/{a}{b}", dict(a=".", b=".")), # adjacent vars → ".." + ("/v1/{a}.", dict(a=".")), # var + static → ".." + ("/v1/{a}{b}", dict(a="", b=".")), # empty + dot → "." + ("/v1/%2e/{x}", dict(x="ok")), # encoded dot in static text + ("/v1/%2e./{x}", dict(x="ok")), # mixed encoded ".." in static + ("/v1/.%2E/{x}", dict(x="ok")), # mixed encoded ".." in static + ("/v1/{v}?q=1", dict(v="..")), + ("/v1/{v}#frag", dict(v="..")), + ], +) +def test_dot_segment_rejected(template: str, kwargs: dict[str, Any]) -> None: + with pytest.raises(ValueError, match="dot-segment"): + path_template(template, **kwargs)