From 9b4373d5e74ed2641d5df7d38ea166ceb054b346 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Fri, 27 Mar 2026 20:55:21 -0500 Subject: [PATCH] Add bulk document upload functionality to DocumentClient ## Summary This commit introduces a new method, `upload_bulk`, to the `DocumentClient` class, enabling the upload of multiple markdown documents in a single request. It also updates the `DocumentListItem` model to include a new `document_id` attribute for better document identification. ## Key Accomplishments - **New Method**: Added `upload_bulk` method to handle bulk document uploads, supporting up to 50 documents per request. - **Model Update**: Enhanced `DocumentListItem` to include `document_id`, improving the data structure for document management. ## Changes Breakdown - Implemented the `upload_bulk` method in `document_client.py` to facilitate bulk uploads. - Updated `document_list_item.py` to add the `document_id` attribute and adjusted serialization methods accordingly. ## Testing Notes - Verify that the `upload_bulk` method correctly uploads multiple documents and handles responses as expected. - Ensure that the new `document_id` field is properly serialized and deserialized in document list items. ## Infrastructure Considerations - No breaking changes introduced; existing functionality remains intact. --- .githooks/pre-commit | 7 ++++ .../extensions/document_client.py | 39 +++++++++++++++++++ .../models/document_list_item.py | 8 ++++ 3 files changed, 54 insertions(+) create mode 100755 .githooks/pre-commit diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..7abce06 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail + +uv run ruff check . +uv run ruff format --check . +uv run basedpyright +uv run pytest diff --git a/robosystems_client/extensions/document_client.py b/robosystems_client/extensions/document_client.py index 48aeb8b..e8a2913 100644 --- a/robosystems_client/extensions/document_client.py +++ b/robosystems_client/extensions/document_client.py @@ -12,11 +12,14 @@ from ..api.documents.delete_document import sync_detailed as delete_document from ..api.documents.list_documents import sync_detailed as list_documents from ..api.documents.upload_document import sync_detailed as upload_document +from ..api.documents.upload_documents_bulk import sync_detailed as upload_documents_bulk from ..api.search.get_document_section import sync_detailed as get_document_section from ..api.search.search_documents import sync_detailed as search_documents from ..client import AuthenticatedClient from ..models.document_list_response import DocumentListResponse from ..models.document_section import DocumentSection +from ..models.bulk_document_upload_request import BulkDocumentUploadRequest +from ..models.bulk_document_upload_response import BulkDocumentUploadResponse from ..models.document_upload_request import DocumentUploadRequest from ..models.document_upload_response import DocumentUploadResponse from ..models.search_request import SearchRequest @@ -154,6 +157,42 @@ def upload_directory( return results + def upload_bulk( + self, + graph_id: str, + documents: List[Dict[str, Any]], + ) -> BulkDocumentUploadResponse: + """Upload multiple markdown documents (max 50 per request). + + Args: + graph_id: Target graph ID. + documents: List of dicts with keys: title, content, and + optionally tags, folder, external_id. + + Returns: + BulkDocumentUploadResponse with per-document results. + """ + items = [] + for doc in documents: + items.append( + DocumentUploadRequest( + title=doc["title"], + content=doc["content"], + tags=doc.get("tags", UNSET), + folder=doc.get("folder", UNSET), + external_id=doc.get("external_id", UNSET), + ) + ) + + body = BulkDocumentUploadRequest(documents=items) + client = self._get_client() + response = upload_documents_bulk(graph_id=graph_id, client=client, body=body) + if response.status_code != HTTPStatus.OK: + raise Exception( + f"Bulk upload failed ({response.status_code}): {response.content.decode()}" + ) + return response.parsed + def search( self, graph_id: str, diff --git a/robosystems_client/models/document_list_item.py b/robosystems_client/models/document_list_item.py index 60409d5..d2570d2 100644 --- a/robosystems_client/models/document_list_item.py +++ b/robosystems_client/models/document_list_item.py @@ -16,6 +16,7 @@ class DocumentListItem: """A document in the document list. Attributes: + document_id (str): document_title (str): section_count (int): source_type (str): @@ -24,6 +25,7 @@ class DocumentListItem: last_indexed (None | str | Unset): """ + document_id: str document_title: str section_count: int source_type: str @@ -33,6 +35,8 @@ class DocumentListItem: additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> dict[str, Any]: + document_id = self.document_id + document_title = self.document_title section_count = self.section_count @@ -64,6 +68,7 @@ def to_dict(self) -> dict[str, Any]: field_dict.update(self.additional_properties) field_dict.update( { + "document_id": document_id, "document_title": document_title, "section_count": section_count, "source_type": source_type, @@ -81,6 +86,8 @@ def to_dict(self) -> dict[str, Any]: @classmethod def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T: d = dict(src_dict) + document_id = d.pop("document_id") + document_title = d.pop("document_title") section_count = d.pop("section_count") @@ -123,6 +130,7 @@ def _parse_last_indexed(data: object) -> None | str | Unset: last_indexed = _parse_last_indexed(d.pop("last_indexed", UNSET)) document_list_item = cls( + document_id=document_id, document_title=document_title, section_count=section_count, source_type=source_type,