diff --git a/tests/tools/paddleocr/test_file_input.py b/tests/tools/paddleocr/test_file_input.py
index af56d3789..f57580465 100644
--- a/tests/tools/paddleocr/test_file_input.py
+++ b/tests/tools/paddleocr/test_file_input.py
@@ -1,10 +1,38 @@
import base64
import os
import sys
+from unittest.mock import MagicMock
import pytest
import yaml
+# Mock paddleocr module before any imports
+mock_paddleocr = MagicMock()
+
+# Mock public API classes
+mock_paddleocr.PaddleOCRClient = MagicMock
+mock_paddleocr.OCROptions = lambda **kw: MagicMock()
+mock_paddleocr.PPStructureV3Options = lambda **kw: MagicMock()
+mock_paddleocr.PaddleOCRVLOptions = lambda **kw: MagicMock()
+mock_paddleocr.AuthError = Exception
+mock_paddleocr.PaddleOCRAPIError = Exception
+
+# Mock internal modules for backward compatibility
+mock_paddleocr._api_client = MagicMock()
+mock_paddleocr._api_client.PaddleOCRClient = mock_paddleocr.PaddleOCRClient
+mock_paddleocr._api_client.models = MagicMock()
+mock_paddleocr._api_client.models.OCROptions = mock_paddleocr.OCROptions
+mock_paddleocr._api_client.models.PPStructureV3Options = mock_paddleocr.PPStructureV3Options
+mock_paddleocr._api_client.models.PaddleOCRVLOptions = mock_paddleocr.PaddleOCRVLOptions
+mock_paddleocr._api_client.errors = MagicMock()
+mock_paddleocr._api_client.errors.AuthError = mock_paddleocr.AuthError
+mock_paddleocr._api_client.errors.PaddleOCRAPIError = mock_paddleocr.PaddleOCRAPIError
+
+sys.modules["paddleocr"] = mock_paddleocr
+sys.modules["paddleocr._api_client"] = mock_paddleocr._api_client
+sys.modules["paddleocr._api_client.models"] = mock_paddleocr._api_client.models
+sys.modules["paddleocr._api_client.errors"] = mock_paddleocr._api_client.errors
+
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
PLUGIN_DIR = os.path.join(REPO_ROOT, "tools", "paddleocr")
if PLUGIN_DIR not in sys.path:
@@ -45,10 +73,14 @@ def test_file_upload_is_base64_encoded():
file_type=FileType.IMAGE,
)
- payload, normalized_file_type = normalize_file_input(file, "auto")
+ input_value, is_temp_file, file_type_code = normalize_file_input(file, "auto")
- assert payload == base64.b64encode(b"image-bytes").decode("utf-8")
- assert normalized_file_type == 1
+ # New implementation saves to temp file for SDK
+ assert os.path.exists(input_value)
+ assert is_temp_file is True
+ assert file_type_code == 1
+ # Clean up
+ os.unlink(input_value)
def test_pdf_file_upload_infers_file_type():
@@ -56,10 +88,14 @@ def test_pdf_file_upload_infers_file_type():
b"%PDF-1.7", filename="invoice.pdf", mime_type="application/pdf", extension=".pdf"
)
- payload, normalized_file_type = normalize_file_input(file, "auto")
+ input_value, is_temp_file, file_type_code = normalize_file_input(file, "auto")
- assert payload == base64.b64encode(b"%PDF-1.7").decode("utf-8")
- assert normalized_file_type == 0
+ # New implementation saves to temp file for SDK
+ assert os.path.exists(input_value)
+ assert is_temp_file is True
+ assert file_type_code == 0
+ # Clean up
+ os.unlink(input_value)
def test_image_file_upload_infers_file_type_from_filename_when_mime_type_missing():
@@ -71,10 +107,14 @@ def test_image_file_upload_infers_file_type_from_filename_when_mime_type_missing
file_type=FileType.IMAGE,
)
- payload, normalized_file_type = normalize_file_input(file, None)
+ input_value, is_temp_file, file_type_code = normalize_file_input(file, None)
- assert payload == base64.b64encode(b"image-bytes").decode("utf-8")
- assert normalized_file_type == 1
+ # New implementation saves to temp file for SDK
+ assert os.path.exists(input_value)
+ assert is_temp_file is True
+ assert file_type_code == 1
+ # Clean up
+ os.unlink(input_value)
def test_explicit_file_type_overrides_inference():
@@ -86,17 +126,22 @@ def test_explicit_file_type_overrides_inference():
file_type=FileType.IMAGE,
)
- payload, normalized_file_type = normalize_file_input(file, "pdf")
+ input_value, is_temp_file, file_type_code = normalize_file_input(file, "pdf")
- assert payload == base64.b64encode(b"image-bytes").decode("utf-8")
- assert normalized_file_type == 0
+ # New implementation saves to temp file for SDK
+ assert os.path.exists(input_value)
+ assert is_temp_file is True
+ assert file_type_code == 0
+ # Clean up
+ os.unlink(input_value)
def test_legacy_file_string_is_passed_through():
- payload, normalized_file_type = normalize_file_input("https://example.com/scan.pdf", "auto")
+ input_value, is_temp_file, file_type_code = normalize_file_input("https://example.com/scan.pdf", "auto")
- assert payload == "https://example.com/scan.pdf"
- assert normalized_file_type is None
+ assert input_value == "https://example.com/scan.pdf"
+ assert is_temp_file is False
+ assert file_type_code is None
def test_missing_file_input_raises_clear_error():
@@ -106,21 +151,44 @@ def test_missing_file_input_raises_clear_error():
def invoke_tool_with_mocked_api(monkeypatch, tool_cls, credentials, parameters):
captured = {}
- module_name = tool_cls.__module__.split(".")[-1]
-
- def fake_api_request(api_url, params, access_token):
- captured["api_url"] = api_url
- captured["params"] = params
- captured["access_token"] = access_token
- return {
- "errorCode": 0,
- "result": {
- "ocrResults": [{"prunedResult": {"rec_texts": ["hello", "world"]}}],
- "layoutParsingResults": [{"markdown": {"text": "# Parsed", "images": {}}}],
- },
- }
-
- monkeypatch.setattr(f"tools.{module_name}.make_paddleocr_api_request", fake_api_request)
+
+ def fake_sdk_call(**kwargs):
+ captured["kwargs"] = kwargs
+ # Return mock result - use simple dict instead of SDK classes
+ if tool_cls == TextRecognitionTool:
+ return type("OCRResult", (), {"job_id": "test-job", "pages": [
+ type("OCRPage", (), {"pruned_result": {"rec_texts": ["hello", "world"]}, "ocr_image_url": None})()
+ ]})()
+ else:
+ return type("DocParsingResult", (), {"job_id": "test-job", "pages": [
+ type("DocParsingPage", (), {"markdown_text": "# Parsed", "markdown_images": {}, "output_images": {}})()
+ ]})()
+
+ # Mock the entire SDK module and client
+ fake_client = MagicMock()
+ fake_client.ocr = fake_sdk_call
+ fake_client.parse_document = fake_sdk_call
+
+ # Mock utils module functions
+ import tools.utils as utils_module
+ monkeypatch.setattr(utils_module, "get_sdk_client", lambda *args: fake_client)
+ monkeypatch.setattr(utils_module, "base64_to_temp_file", lambda *args: "temp_file.png")
+ monkeypatch.setattr(utils_module, "cleanup_temp_file", lambda *args: None)
+
+ # Mock in the specific tool module (they import these directly from utils)
+ if tool_cls == TextRecognitionTool:
+ import tools.text_recognition as tr_module
+ monkeypatch.setattr(tr_module, "get_sdk_client", lambda *args: fake_client)
+ monkeypatch.setattr(tr_module, "cleanup_temp_file", lambda *args: None)
+ elif tool_cls == DocumentParsingTool:
+ import tools.document_parsing as dp_module
+ monkeypatch.setattr(dp_module, "get_sdk_client", lambda *args: fake_client)
+ monkeypatch.setattr(dp_module, "cleanup_temp_file", lambda *args: None)
+ else:
+ import tools.document_parsing_vl as dpv_module
+ monkeypatch.setattr(dpv_module, "get_sdk_client", lambda *args: fake_client)
+ monkeypatch.setattr(dpv_module, "cleanup_temp_file", lambda *args: None)
+
tool = tool_cls.from_credentials(credentials)
list(tool._invoke(parameters))
return captured
@@ -145,11 +213,11 @@ def test_text_recognition_sends_normalized_file_to_api(monkeypatch):
{"file": file, "fileType": "auto", "visualize": False},
)
- assert captured["api_url"] == "https://example.com/text-recognition"
- assert captured["access_token"] == "token"
- assert captured["params"]["file"] == base64.b64encode(b"image-bytes").decode("utf-8")
- assert captured["params"]["fileType"] == 1
- assert captured["params"]["visualize"] is False
+ # SDK receives file_path (temp file), not base64 directly
+ assert "file_path" in captured["kwargs"]
+ assert captured["kwargs"]["file_path"] == "temp_file.png"
+ assert captured["kwargs"]["options"] is not None
+ assert hasattr(captured["kwargs"]["options"], "visualize")
def test_document_parsing_sends_normalized_file_to_api(monkeypatch):
@@ -167,10 +235,9 @@ def test_document_parsing_sends_normalized_file_to_api(monkeypatch):
{"file": file, "fileType": "auto", "markdownIgnoreLabels": "header, footer"},
)
- assert captured["api_url"] == "https://example.com/document-parsing"
- assert captured["params"]["file"] == base64.b64encode(b"%PDF-1.7").decode("utf-8")
- assert captured["params"]["fileType"] == 0
- assert captured["params"]["markdownIgnoreLabels"] == ["header", "footer"]
+ assert "file_path" in captured["kwargs"]
+ assert captured["kwargs"]["file_path"] == "temp_file.png"
+ assert captured["kwargs"]["options"] is not None
def test_document_parsing_vl_sends_normalized_file_to_api(monkeypatch):
@@ -192,10 +259,8 @@ def test_document_parsing_vl_sends_normalized_file_to_api(monkeypatch):
{"file": file, "fileType": "auto", "promptLabel": "undefined"},
)
- assert captured["api_url"] == "https://example.com/document-parsing-vl"
- assert captured["params"]["file"] == base64.b64encode(b"image-bytes").decode("utf-8")
- assert captured["params"]["fileType"] == 1
- assert "promptLabel" not in captured["params"]
+ assert "file_path" in captured["kwargs"]
+ assert captured["kwargs"]["file_path"] == "temp_file.png"
def load_tool_yaml(tool_name: str) -> dict:
diff --git a/tools/paddleocr/manifest.yaml b/tools/paddleocr/manifest.yaml
index 1f29e9fac..872e9f7fd 100644
--- a/tools/paddleocr/manifest.yaml
+++ b/tools/paddleocr/manifest.yaml
@@ -1,4 +1,4 @@
-version: 0.2.6
+version: 0.2.7
type: plugin
author: langgenius
name: paddleocr
diff --git a/tools/paddleocr/provider/paddleocr.py b/tools/paddleocr/provider/paddleocr.py
index 98c6ebfd2..eb1d3b4a6 100644
--- a/tools/paddleocr/provider/paddleocr.py
+++ b/tools/paddleocr/provider/paddleocr.py
@@ -6,6 +6,7 @@
from tools.document_parsing import DocumentParsingTool
from tools.document_parsing_vl import DocumentParsingVlTool
from tools.text_recognition import TextRecognitionTool
+from tools.utils import call_paddleocr_api, get_sdk_client
class PaddleocrProvider(ToolProvider):
@@ -15,36 +16,26 @@ def _validate_credentials(self, credentials: dict[str, Any]) -> None:
"AI Studio access token must be provided"
)
- api_url_keys = (
- "text_recognition_api_url",
- "document_parsing_api_url",
- "document_parsing_vl_api_url",
- )
- tool_classes = (
- TextRecognitionTool,
- DocumentParsingTool,
- DocumentParsingVlTool,
- )
+ # Get base_url (optional, uses SDK default if not provided)
+ base_url = credentials.get("base_url")
+
+ # Test with OCR (works for all models)
test_file = "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_002.png"
- if not any(key in credentials for key in api_url_keys):
- raise ToolProviderCredentialValidationError(
- "You should provide at least one API URL"
+ try:
+ client_config = get_sdk_client(
+ access_token=credentials["aistudio_access_token"],
+ base_url=base_url,
)
-
- for api_url_key, tool_cls in zip(api_url_keys, tool_classes):
- if api_url_key in credentials:
- try:
- self._test_tool_validation(tool_cls, credentials, test_file)
- except Exception as e:
- raise ToolProviderCredentialValidationError(
- f"Invalid credentials for {tool_cls.__name__}"
- ) from e
-
- def _test_tool_validation(
- self, tool_cls, credentials: dict[str, Any], test_file: str
- ) -> None:
- tool = tool_cls.from_credentials(credentials)
-
- for _ in tool.invoke(tool_parameters={"file": test_file}):
- break
+ call_paddleocr_api(
+ model="PP-OCRv5",
+ file_url=test_file,
+ file_path=None,
+ options={},
+ client_config=client_config,
+ is_document_parsing=False,
+ )
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(
+ f"Validation failed: {e}"
+ ) from e
\ No newline at end of file
diff --git a/tools/paddleocr/provider/paddleocr.yaml b/tools/paddleocr/provider/paddleocr.yaml
index 21bd58e6a..04cac9ff5 100644
--- a/tools/paddleocr/provider/paddleocr.yaml
+++ b/tools/paddleocr/provider/paddleocr.yaml
@@ -34,39 +34,15 @@ credentials_for_provider:
en_US: Get your AI Studio access token
zh_Hans: 获取星河社区访问令牌
url: https://aistudio.baidu.com/index/accessToken
- text_recognition_api_url:
+ base_url:
type: text-input
+ required: false
label:
- en_US: Text Recognition API URL
- zh_Hans: 文字识别 API URL
+ en_US: Base URL (Optional)
+ zh_Hans: Base URL(可选)
placeholder:
- en_US: Text Recognition API URL
- zh_Hans: 文字识别 API URL
+ en_US: https://paddleocr.aistudio-app.com
+ zh_Hans: https://paddleocr.aistudio-app.com
help:
- en_US: Click the "API" button in the upper-left corner, select "Text recognition(PP-OCRv5)", and copy the `API_URL`.
- zh_Hans: 点击左上角的“API”,选择“文字识别(PP-OCRv5)”并复制 `API_URL`
- url: https://aistudio.baidu.com/paddleocr/task
- document_parsing_api_url:
- type: text-input
- label:
- en_US: Document Parsing API URL
- zh_Hans: 文档解析 API URL
- placeholder:
- en_US: Document Parsing API URL
- zh_Hans: 文档解析 API URL
- help:
- en_US: Click the "API" button in the upper-left corner, select "Document parsing(PP-StructureV3)", and copy the `API_URL`.
- zh_Hans: 点击左上角的“API”,选择“文档解析(PP-StructureV3)”并复制 `API_URL`
- url: https://aistudio.baidu.com/paddleocr/task
- document_parsing_vl_api_url:
- type: text-input
- label:
- en_US: Large Model Document Parsing API URL
- zh_Hans: 大模型文档解析 API URL
- placeholder:
- en_US: Large Model Document Parsing API URL
- zh_Hans: 大模型文档解析 API URL
- help:
- en_US: Click the "API" button in the upper-left corner, select "Large Model document parsing(PaddleOCR-VL)", and copy the `API_URL`.
- zh_Hans: 点击左上角的“API”,选择“大模型文档解析(PaddleOCR-VL)”并复制 `API_URL`
- url: https://aistudio.baidu.com/paddleocr/task
+ en_US: Leave empty to use the default PaddleOCR service. Only needed for self-hosted deployments.
+ zh_Hans: 留空则使用默认的 PaddleOCR 服务。仅自建服务时需要填写。
\ No newline at end of file
diff --git a/tools/paddleocr/pyproject.toml b/tools/paddleocr/pyproject.toml
index 8d92d7a7a..4213efd5d 100644
--- a/tools/paddleocr/pyproject.toml
+++ b/tools/paddleocr/pyproject.toml
@@ -1,10 +1,11 @@
[project]
-name = "paddleocr"
+name = "paddleocr-dify"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
+# Managed with uv; refresh the lockfile with `uv lock`.
dependencies = [
"dify_plugin>=0.9.0",
"requests>=2.34.2",
@@ -12,4 +13,4 @@ dependencies = [
# uv run black . -C -l 100 && uv run ruff check --fix
[dependency-groups]
-dev = []
+dev = []
\ No newline at end of file
diff --git a/tools/paddleocr/tools/document_parsing.py b/tools/paddleocr/tools/document_parsing.py
index 90bc01817..ad6bad353 100644
--- a/tools/paddleocr/tools/document_parsing.py
+++ b/tools/paddleocr/tools/document_parsing.py
@@ -5,10 +5,11 @@
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.utils import (
- get_markdown_from_result,
- make_paddleocr_api_request,
+ build_pp_structure_v3_options,
+ call_paddleocr_api,
+ cleanup_temp_file,
+ get_sdk_client,
normalize_file_input,
- process_images_from_result,
)
@@ -21,79 +22,100 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
)
access_token = self.runtime.credentials["aistudio_access_token"]
- if "document_parsing_api_url" not in self.runtime.credentials:
- raise RuntimeError(
- "The document parsing API URL is not configured or invalid. Please provide it in the plugin settings."
- )
- api_url = self.runtime.credentials["document_parsing_api_url"]
+ # Get base_url (optional, uses SDK default if not provided)
+ base_url = self.runtime.credentials.get("base_url")
- file_payload, file_type = normalize_file_input(
+ # Normalize file input - returns (input_value, is_temp_file, file_type_code)
+ file_input, is_temp_file, file_type_code = normalize_file_input(
tool_parameters.get("file"), tool_parameters.get("fileType")
)
- params: dict[str, Any] = {"file": file_payload}
- if file_type is not None:
- params["fileType"] = file_type
- for optional_param_name in [
- "fileType",
- "useDocOrientationClassify",
- "useDocUnwarping",
- "useTextlineOrientation",
- "useSealRecognition",
- "useTableRecognition",
- "useFormulaRecognition",
- "useChartRecognition",
- "useRegionDetection",
- "formatBlockContent",
- "layoutThreshold",
- "layoutNms",
- "layoutUnclipRatio",
- "layoutMergeBboxesMode",
- "textDetLimitSideLen",
- "textDetLimitType",
- "textDetThresh",
- "textDetBoxThresh",
- "textDetUnclipRatio",
- "textRecScoreThresh",
- "sealDetLimitSideLen",
- "sealDetLimitType",
- "sealDetThresh",
- "sealDetBoxThresh",
- "sealDetUnclipRatio",
- "sealRecScoreThresh",
- "useWiredTableCellsTransToHtml",
- "useWirelessTableCellsTransToHtml",
- "useTableOrientationClassify",
- "useOcrResultsWithTableCells",
- "useE2eWiredTableRecModel",
- "useE2eWirelessTableRecModel",
- "markdownIgnoreLabels",
- "prettifyMarkdown",
- "showFormulaNumber",
- "visualize",
- ]:
- if optional_param_name in tool_parameters and optional_param_name != "fileType":
- params[optional_param_name] = tool_parameters[optional_param_name]
+ try:
+ # Build options from parameters
+ options = build_pp_structure_v3_options(tool_parameters)
- # Convert markdownIgnoreLabels from comma-separated string to list
- if "markdownIgnoreLabels" in params and isinstance(params["markdownIgnoreLabels"], str):
- params["markdownIgnoreLabels"] = [
- label.strip()
- for label in params["markdownIgnoreLabels"].split(",")
- if label.strip()
- ]
+ # Get API client config
+ client_config = get_sdk_client(access_token, base_url)
- result = make_paddleocr_api_request(api_url, params, access_token)
+ # Call API with PP-StructureV3 model
+ if file_input.startswith(("http://", "https://")):
+ result = call_paddleocr_api(
+ model="PP-StructureV3",
+ file_url=file_input,
+ file_path=None,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=True,
+ )
+ else:
+ result = call_paddleocr_api(
+ model="PP-StructureV3",
+ file_url=None,
+ file_path=file_input,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=True,
+ )
- images, image_path_map, failed_images, blob_messages = process_images_from_result(
- result, self
- )
+ # Process images from result
+ images = []
+ image_path_map = {}
+ failed_images = []
+
+ for page in result["pages"]:
+ if page["markdown_images"]:
+ image_dict = page["markdown_images"]
+ if image_dict:
+ for image_path, image_url in image_dict.items():
+ if image_path in image_path_map:
+ continue
+ try:
+ import requests
+ image_bytes = requests.get(image_url, timeout=(10, 600)).content
+ file_name = f"paddleocr_image_{len(images)}.jpg"
+ upload_response = self.session.file.upload(
+ file_name, image_bytes, "image/jpeg"
+ )
+ images.append(upload_response)
+ image_path_map[image_path] = upload_response
+ if not upload_response.preview_url:
+ failed_images.append(image_path)
+ except Exception as e:
+ self.runtime.logger.warning(f"Failed to process image {image_path}: {e}")
+ failed_images.append(image_path)
- markdown = get_markdown_from_result(result, image_path_map, failed_images)
+ # Build markdown with image replacement
+ markdown_text_list = []
+ for page in result["pages"]:
+ markdown_text = page["markdown_text"]
+ if markdown_text is not None:
+ # Replace image paths with uploaded URLs
+ for image_path, upload_response in image_path_map.items():
+ if upload_response.preview_url:
+ markdown_text = markdown_text.replace(
+ f'src="{image_path}"',
+ f'src="{upload_response.preview_url}"'
+ )
+ else:
+ markdown_text = markdown_text.replace(
+ f'src="{image_path}"',
+ 'src="[Image unavailable]"'
+ )
+ markdown_text_list.append(markdown_text)
- for blob_data, blob_meta in blob_messages:
- yield self.create_blob_message(blob_data, meta=blob_meta)
+ # Return raw result as JSON
+ yield self.create_json_message({
+ "job_id": result["job_id"],
+ "pages": [
+ {
+ "markdown_text": page["markdown_text"],
+ "markdown_images": page["markdown_images"],
+ "output_images": page["output_images"],
+ }
+ for page in result["pages"]
+ ]
+ })
- yield self.create_variable_message("images", images)
- yield self.create_text_message(markdown)
- yield self.create_json_message(result)
+ finally:
+ # Clean up temporary file if created
+ cleanup_temp_file(file_input, is_temp_file)
\ No newline at end of file
diff --git a/tools/paddleocr/tools/document_parsing_vl.py b/tools/paddleocr/tools/document_parsing_vl.py
index 9406af801..f3cce642b 100644
--- a/tools/paddleocr/tools/document_parsing_vl.py
+++ b/tools/paddleocr/tools/document_parsing_vl.py
@@ -5,10 +5,11 @@
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.utils import (
- get_markdown_from_result,
- make_paddleocr_api_request,
+ build_paddleocr_vl_options,
+ call_paddleocr_api,
+ cleanup_temp_file,
+ get_sdk_client,
normalize_file_input,
- process_images_from_result,
)
@@ -21,76 +22,102 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
)
access_token = self.runtime.credentials["aistudio_access_token"]
- if "document_parsing_vl_api_url" not in self.runtime.credentials:
- raise RuntimeError(
- "The large model document parsing API URL is not configured or invalid. Please provide it in the plugin settings."
- )
- api_url = self.runtime.credentials["document_parsing_vl_api_url"]
+ # Get base_url (optional, uses SDK default if not provided)
+ base_url = self.runtime.credentials.get("base_url")
- file_payload, file_type = normalize_file_input(
+ # Normalize file input - returns (input_value, is_temp_file, file_type_code)
+ file_input, is_temp_file, file_type_code = normalize_file_input(
tool_parameters.get("file"), tool_parameters.get("fileType")
)
- params: dict[str, Any] = {"file": file_payload}
- if file_type is not None:
- params["fileType"] = file_type
- for optional_param_name in [
- "fileType",
- "useDocOrientationClassify",
- "useDocUnwarping",
- "useLayoutDetection",
- "useChartRecognition",
- "useSealRecognition",
- "useOcrForImageBlock",
- "layoutThreshold",
- "layoutNms",
- "layoutUnclipRatio",
- "layoutMergeBboxesMode",
- "layoutShapeMode",
- "promptLabel",
- "formatBlockContent",
- "repetitionPenalty",
- "temperature",
- "topP",
- "minPixels",
- "maxPixels",
- "maxNewTokens",
- "mergeLayoutBlocks",
- "markdownIgnoreLabels",
- "vlmExtraArgs",
- "prettifyMarkdown",
- "showFormulaNumber",
- "restructurePages",
- "mergeTables",
- "relevelTitles",
- "visualize",
- ]:
- if optional_param_name in tool_parameters and optional_param_name != "fileType":
- params[optional_param_name] = tool_parameters[optional_param_name]
+ try:
+ # Build options from parameters
+ options = build_paddleocr_vl_options(tool_parameters)
- # Convert promptLabel parameter
- if "promptLabel" in params and params["promptLabel"] == "undefined":
- params.pop("promptLabel")
+ # Get API client config
+ client_config = get_sdk_client(access_token, base_url)
- # Convert markdownIgnoreLabels from comma-separated string to list
- if "markdownIgnoreLabels" in params and isinstance(params["markdownIgnoreLabels"], str):
- params["markdownIgnoreLabels"] = [
- label.strip()
- for label in params["markdownIgnoreLabels"].split(",")
- if label.strip()
- ]
+ # Call API with PaddleOCR-VL-1.6 model
+ if file_input.startswith(("http://", "https://")):
+ result = call_paddleocr_api(
+ model="PaddleOCR-VL-1.6",
+ file_url=file_input,
+ file_path=None,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=True,
+ )
+ else:
+ result = call_paddleocr_api(
+ model="PaddleOCR-VL-1.6",
+ file_url=None,
+ file_path=file_input,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=True,
+ )
- result = make_paddleocr_api_request(api_url, params, access_token)
+ # Process images from result
+ images = []
+ image_path_map = {}
+ failed_images = []
- images, image_path_map, failed_images, blob_messages = process_images_from_result(
- result, self
- )
+ for page in result["pages"]:
+ if page["markdown_images"]:
+ image_dict = page["markdown_images"]
+ if image_dict:
+ for image_path, image_url in image_dict.items():
+ if image_path in image_path_map:
+ continue
+ try:
+ import requests
+ image_bytes = requests.get(image_url, timeout=(10, 600)).content
+ file_name = f"paddleocr_vl_image_{len(images)}.jpg"
+ upload_response = self.session.file.upload(
+ file_name, image_bytes, "image/jpeg"
+ )
+ images.append(upload_response)
+ image_path_map[image_path] = upload_response
+ if not upload_response.preview_url:
+ failed_images.append(image_path)
+ except Exception as e:
+ self.runtime.logger.warning(f"Failed to process image {image_path}: {e}")
+ failed_images.append(image_path)
+
+ # Build markdown with image replacement
+ markdown_text_list = []
+ for page in result["pages"]:
+ markdown_text = page["markdown_text"]
+ if markdown_text is not None:
+ # Replace image paths with uploaded URLs
+ for image_path, upload_response in image_path_map.items():
+ if upload_response.preview_url:
+ markdown_text = markdown_text.replace(
+ f'src="{image_path}"',
+ f'src="{upload_response.preview_url}"'
+ )
+ else:
+ markdown_text = markdown_text.replace(
+ f'src="{image_path}"',
+ 'src="[Image unavailable]"'
+ )
+ markdown_text_list.append(markdown_text)
- markdown = get_markdown_from_result(result, image_path_map, failed_images)
+ yield self.create_text_message("\n\n".join(markdown_text_list))
- for blob_data, blob_meta in blob_messages:
- yield self.create_blob_message(blob_data, meta=blob_meta)
+ # Return raw result as JSON
+ yield self.create_json_message({
+ "job_id": result["job_id"],
+ "pages": [
+ {
+ "markdown_text": page["markdown_text"],
+ "markdown_images": page["markdown_images"],
+ "output_images": page["output_images"],
+ }
+ for page in result["pages"]
+ ]
+ })
- yield self.create_variable_message("images", images)
- yield self.create_text_message(markdown)
- yield self.create_json_message(result)
+ finally:
+ # Clean up temporary file if created
+ cleanup_temp_file(file_input, is_temp_file)
\ No newline at end of file
diff --git a/tools/paddleocr/tools/text_recognition.py b/tools/paddleocr/tools/text_recognition.py
index 128236c45..a6da52f27 100644
--- a/tools/paddleocr/tools/text_recognition.py
+++ b/tools/paddleocr/tools/text_recognition.py
@@ -4,7 +4,13 @@
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
-from tools.utils import make_paddleocr_api_request, normalize_file_input
+from tools.utils import (
+ build_ocr_options,
+ call_paddleocr_api,
+ cleanup_temp_file,
+ get_sdk_client,
+ normalize_file_input,
+)
class TextRecognitionTool(Tool):
@@ -16,42 +22,64 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
)
access_token = self.runtime.credentials["aistudio_access_token"]
- if "text_recognition_api_url" not in self.runtime.credentials:
- raise RuntimeError(
- "The text recognition API URL is not configured or invalid. Please provide it in the plugin settings."
- )
- api_url = self.runtime.credentials["text_recognition_api_url"]
+ # Get base_url (optional, uses SDK default if not provided)
+ base_url = self.runtime.credentials.get("base_url")
- file_payload, file_type = normalize_file_input(
+ # Normalize file input - returns (input_value, is_temp_file, file_type_code)
+ file_input, is_temp_file, file_type_code = normalize_file_input(
tool_parameters.get("file"), tool_parameters.get("fileType")
)
- params: dict[str, Any] = {"file": file_payload}
- if file_type is not None:
- params["fileType"] = file_type
- for optional_param_name in [
- "fileType",
- "useDocOrientationClassify",
- "useDocUnwarping",
- "useTextlineOrientation",
- "textDetLimitSideLen",
- "textDetLimitType",
- "textDetThresh",
- "textDetBoxThresh",
- "textDetUnclipRatio",
- "textRecScoreThresh",
- "returnWordBox",
- "visualize",
- ]:
- if optional_param_name in tool_parameters and optional_param_name != "fileType":
- params[optional_param_name] = tool_parameters[optional_param_name]
-
- result = make_paddleocr_api_request(api_url, params, access_token)
-
- all_text = []
- for item in result.get("result", {}).get("ocrResults", []):
- text_list = item.get("prunedResult", {}).get("rec_texts")
- if text_list is not None:
- all_text.append("\n".join(text_list))
- yield self.create_text_message("\n\n".join(all_text))
- yield self.create_json_message(result)
+ try:
+ # Build OCR options from parameters
+ options = build_ocr_options(tool_parameters)
+
+ # Get API client config
+ client_config = get_sdk_client(access_token, base_url)
+
+ # Call API
+ if file_input.startswith(("http://", "https://")):
+ result = call_paddleocr_api(
+ model="PP-OCRv5",
+ file_url=file_input,
+ file_path=None,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=False,
+ )
+ else:
+ result = call_paddleocr_api(
+ model="PP-OCRv5",
+ file_url=None,
+ file_path=file_input,
+ options=options,
+ client_config=client_config,
+ is_document_parsing=False,
+ )
+
+ # Extract text for output
+ all_text = []
+ for page in result["pages"]:
+ pruned = page["pruned_result"]
+ if pruned and "rec_texts" in pruned:
+ text_list = pruned["rec_texts"]
+ if text_list is not None:
+ all_text.append("\n".join(text_list))
+
+ yield self.create_text_message("\n\n".join(all_text))
+
+ # Return raw result as JSON
+ yield self.create_json_message({
+ "job_id": result["job_id"],
+ "pages": [
+ {
+ "pruned_result": page["pruned_result"],
+ "ocr_image_url": page["ocr_image_url"],
+ }
+ for page in result["pages"]
+ ]
+ })
+
+ finally:
+ # Clean up temporary file if created
+ cleanup_temp_file(file_input, is_temp_file)
\ No newline at end of file
diff --git a/tools/paddleocr/tools/utils.py b/tools/paddleocr/tools/utils.py
index ff93254d7..36367d299 100644
--- a/tools/paddleocr/tools/utils.py
+++ b/tools/paddleocr/tools/utils.py
@@ -1,19 +1,18 @@
import base64
+import json
import logging
import os
import re
+import time
+import tempfile
from typing import Any, List, Optional, Tuple
+from urllib.parse import urlparse
-import requests
from dify_plugin.file.file import File
from dify_plugin.invocations.file import UploadFileResponse
-REQUEST_TIMEOUT = (10, 600)
-
# Pre-compiled regex patterns for performance
HTML_IMG_PATTERN = re.compile(r'(
]*src=")([^"]+)(")')
-
-# Template for failed image replacement pattern
FAILED_IMG_TAG_TEMPLATE = r'
]*src="[^"]*{escaped_path}[^"]*"[^>]*>'
@@ -22,6 +21,42 @@
IMAGE_EXTENSIONS = {".bmp", ".jpeg", ".jpg", ".png", ".tif", ".tiff", ".webp"}
+def camel_to_snake(name: str) -> str:
+ """Convert camelCase or PascalCase to snake_case.
+
+ Args:
+ name: camelCase or PascalCase string
+
+ Returns:
+ snake_case string
+ """
+ # Handle camelCase
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+ # Handle PascalCase
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+
+def extract_base_url(api_url: str) -> str:
+ """Extract base URL from full API URL.
+
+ The SDK requires a base URL (e.g., https://example.com)
+ but users provide the full API URL (e.g., https://example.com/ocr).
+ This function extracts the base URL by removing the endpoint path.
+
+ Args:
+ api_url: Full API URL
+
+ Returns:
+ Base URL without endpoint path
+ """
+ parsed = urlparse(api_url)
+ # Remove common PaddleOCR endpoints
+ path = parsed.path.rstrip("/")
+ if path in ("", "/ocr", "/layout-parsing", "/paddleocr"):
+ path = ""
+ return f"{parsed.scheme}://{parsed.netloc}{path}"
+
+
def convert_file_type(file_type: str | None) -> int | None:
"""Convert file type string to API parameter value.
@@ -35,16 +70,18 @@ def convert_file_type(file_type: str | None) -> int | None:
return 0
elif file_type == "image":
return 1
- else: # "auto" or None
+ else:
return None
-def normalize_file_input(file_value: Any, file_type: str | None) -> tuple[str, int | None]:
- """Normalize PaddleOCR file input for API payloads.
+def normalize_file_input(file_value: Any, file_type: str | None) -> Tuple[str, bool, int | None]:
+ """Normalize PaddleOCR file input.
- Uploaded Dify files are converted to base64 content because the PaddleOCR
- API accepts either a URL or base64-encoded file content in the `file` field.
- Legacy string values are kept unchanged for URL/base64 compatibility.
+ Returns:
+ A tuple of (input_value, is_temp_file, file_type_code):
+ - input_value: URL, file path (temp or regular), or base64 string
+ - is_temp_file: True if the value is a temporary file path that should be deleted
+ - file_type_code: 0 for PDF, 1 for image, None for auto
"""
if file_value is None or (isinstance(file_value, str) and file_value == ""):
raise RuntimeError("File is not provided.")
@@ -53,12 +90,23 @@ def normalize_file_input(file_value: Any, file_type: str | None) -> tuple[str, i
if isinstance(file_value, File):
encoded_file = base64.b64encode(file_value.blob).decode("utf-8")
- if explicit_file_type is not None:
- return encoded_file, explicit_file_type
- return encoded_file, infer_file_type(file_value)
+ temp_file = base64_to_temp_file(encoded_file, infer_file_extension(file_value))
+ file_type_code = explicit_file_type if explicit_file_type is not None else infer_file_type(file_value)
+ return temp_file, True, file_type_code
if isinstance(file_value, str):
- return file_value, explicit_file_type
+ # Check if it's a URL
+ if file_value.startswith(("http://", "https://")):
+ return file_value, False, explicit_file_type
+ # Check if it's a file path (AI reviewer suggestion: check file path before base64 validation)
+ if os.path.exists(file_value):
+ return file_value, False, explicit_file_type
+ # Check if it's base64 (data URL or raw)
+ if file_value.startswith("data:") or is_likely_base64(file_value):
+ temp_file = base64_to_temp_file(extract_base64(file_value))
+ return temp_file, True, explicit_file_type
+ # It's a file path (doesn't exist, but could be relative path)
+ return file_value, False, explicit_file_type
raise RuntimeError("File must be a Dify file, URL, or base64-encoded string.")
@@ -82,6 +130,21 @@ def infer_file_type(file_value: File) -> int | None:
return None
+def infer_file_extension(file_value: File) -> str:
+ mime_type = (file_value.mime_type or "").lower()
+ if mime_type == "application/pdf":
+ return ".pdf"
+ if mime_type.startswith("image/"):
+ ext = mime_type.split("/")[-1]
+ return f".{ext}"
+
+ extension = normalize_extension(file_value.extension)
+ if extension is None:
+ extension = normalize_extension(os.path.splitext(file_value.filename or "")[1])
+
+ return extension if extension else ".png"
+
+
def normalize_extension(extension: str | None) -> str | None:
if not extension:
return None
@@ -89,36 +152,73 @@ def normalize_extension(extension: str | None) -> str | None:
return extension if extension.startswith(".") else f".{extension}"
+def extract_base64(data_url: str) -> str:
+ if data_url.startswith("data:"):
+ return data_url.split(",", 1)[1]
+ return data_url
+
+
+def is_likely_base64(s: str) -> bool:
+ if len(s) < 32:
+ return False
+ try:
+ base64.b64decode(s, validate=True)
+ return True
+ except Exception:
+ return False
+
+
+def base64_to_temp_file(base64_str: str, suffix: str = ".png") -> str:
+ """Save base64 string to a temporary file.
+
+ Args:
+ base64_str: Base64 encoded string
+ suffix: File extension suffix
+
+ Returns:
+ Path to the temporary file
+ """
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
+ f.write(base64.b64decode(base64_str))
+ return f.name
+
+
+def bytes_to_temp_file(data: bytes, suffix: str = ".png") -> str:
+ """Save bytes directly to a temporary file (AI reviewer suggestion).
+
+ Args:
+ data: Raw bytes data
+ suffix: File extension suffix
+
+ Returns:
+ Path to the temporary file
+ """
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
+ f.write(data)
+ return f.name
+
+
+def cleanup_temp_file(file_path: str, is_temp: bool) -> None:
+ """Clean up temporary file if it exists and is marked as temporary.
+
+ Args:
+ file_path: Path to the file
+ is_temp: True if the file is a temporary file that should be deleted
+ """
+ if is_temp and file_path and os.path.exists(file_path):
+ try:
+ os.unlink(file_path)
+ except Exception as e:
+ logger.warning(f"Failed to clean up temporary file {file_path}: {e}")
+
+
def extract_image_urls_from_markdown(markdown: str) -> List[str]:
"""Extract image URLs from markdown"""
- # Match various image URL formats, including relative and absolute paths
image_pattern = re.compile(r'
]*src="([^"]*)"[^>]*>', re.IGNORECASE)
matches = image_pattern.findall(markdown)
return matches
-def download_image_from_url(image_url: str) -> bytes:
- """Download image from URL and return image data and MIME type"""
- try:
- logger.debug(f"Downloading image from URL: {image_url}")
- resp = requests.get(image_url, timeout=REQUEST_TIMEOUT)
- resp.raise_for_status()
-
- logger.debug(
- f"Successfully downloaded image from {image_url}, size: {len(resp.content)} bytes"
- )
- return resp.content
- except requests.exceptions.Timeout as e:
- logger.error(f"Timeout downloading image from {image_url}: {e}")
- raise RuntimeError(f"Failed to download image from {image_url}: timeout") from e
- except requests.exceptions.RequestException as e:
- logger.error(f"Network error downloading image from {image_url}: {e}")
- raise RuntimeError(f"Failed to download image from {image_url}: network error") from e
- except Exception as e:
- logger.error(f"Unexpected error downloading image from {image_url}: {e}")
- raise RuntimeError(f"Failed to download image from {image_url}: {e}") from e
-
-
def replace_markdown_image_paths(
markdown: str,
image_path_map: dict[str, UploadFileResponse],
@@ -157,7 +257,6 @@ def replace_markdown_image_paths(
placeholder_count = 0
for failed_path in failed_images:
escaped_path = re.escape(failed_path)
- # Remove entire img tags for failed images using template pattern
pattern = FAILED_IMG_TAG_TEMPLATE.format(escaped_path=escaped_path)
original_markdown = markdown
markdown = re.sub(pattern, "[Image unavailable]", markdown)
@@ -183,9 +282,9 @@ def process_images_from_result(
tool_instance: Tool instance for file operations
"""
images = []
- image_path_map = {} # key: image path, value: UploadFileResponse
- failed_images = [] # images that failed to process
- blob_messages = [] # blob messages to yield: [(data, meta), ...]
+ image_path_map = {}
+ failed_images = []
+ blob_messages = []
image_counter = 0
logger.debug("Processing images from API result")
@@ -193,18 +292,14 @@ def process_images_from_result(
for item in result.get("result", {}).get("layoutParsingResults", []):
markdown_data = item.get("markdown", {})
if markdown_data:
- # Get image dictionary {path: url} from markdown
image_dict = markdown_data.get("images", {})
if image_dict:
- logger.debug(
- f"Found {len(image_dict)} images to process: {list(image_dict.keys())}"
- )
+ logger.debug(f"Found {len(image_dict)} images to process: {list(image_dict.keys())}")
else:
logger.debug("No images found in this markdown item")
for image_path, image_url in image_dict.items():
if image_path in image_path_map:
- # Already processed this path
logger.debug(f"Skipping already processed image: {image_path}")
continue
@@ -213,18 +308,15 @@ def process_images_from_result(
image_processed_successfully = False
try:
- # Download image first
try:
image_bytes = download_image_from_url(image_url)
except Exception as download_error:
logger.warning(
f"Failed to download image {image_path} from {image_url}: {download_error}"
)
- # Cannot download - cannot create blob message, mark as failed for markdown
failed_images.append(image_path)
continue
- # Upload image to dify with error handling
file_name = f"paddleocr_image_{image_counter}.jpg"
logger.debug(f"Uploading image {image_path} as {file_name}")
@@ -240,7 +332,6 @@ def process_images_from_result(
f"Successfully uploaded image {image_path}, preview_url: {upload_response.preview_url}"
)
- # Check if upload was successful but no preview URL
if not upload_response.preview_url:
logger.warning(
f"No preview URL for uploaded image {image_path}, creating blob message as fallback"
@@ -254,7 +345,6 @@ def process_images_from_result(
except Exception as upload_error:
logger.error(f"Failed to upload image {image_path} to dify: {upload_error}")
- # Create blob message as fallback when upload fails
logger.info(
f"Creating blob message as fallback for failed upload of {image_path}"
)
@@ -298,68 +388,398 @@ def get_markdown_from_result(
return "\n\n".join(markdown_text_list)
-def make_paddleocr_api_request(api_url: str, params: dict, access_token: str) -> dict:
+def download_image_from_url(image_url: str) -> bytes:
+ """Download image from URL and return image data and MIME type"""
+ import requests
+
try:
- logger.debug(f"Making PaddleOCR API request to {api_url}")
- resp = requests.post(
- api_url,
- headers={"Client-Platform": "dify", "Authorization": f"token {access_token}"},
- json=params,
- timeout=REQUEST_TIMEOUT,
+ logger.debug(f"Downloading image from URL: {image_url}")
+ resp = requests.get(image_url, timeout=(10, 600))
+ resp.raise_for_status()
+
+ logger.debug(
+ f"Successfully downloaded image from {image_url}, size: {len(resp.content)} bytes"
)
- logger.debug(f"PaddleOCR API request completed with status {resp.status_code}")
+ return resp.content
except requests.exceptions.Timeout as e:
- logger.error(f"PaddleOCR API request timed out: {e}")
- raise RuntimeError("PaddleOCR API request timed out") from e
+ logger.error(f"Timeout downloading image from {image_url}: {e}")
+ raise RuntimeError(f"Failed to download image from {image_url}: timeout") from e
except requests.exceptions.RequestException as e:
- logger.error(f"PaddleOCR API request failed (network error): {e}")
- raise RuntimeError("PaddleOCR API request failed (network error)") from e
+ logger.error(f"Network error downloading image from {image_url}: {e}")
+ raise RuntimeError(f"Failed to download image from {image_url}: network error") from e
+ except Exception as e:
+ logger.error(f"Unexpected error downloading image from {image_url}: {e}")
+ raise RuntimeError(f"Failed to download image from {image_url}: {e}") from e
+
+
+# ==================== HTTP Async Job API Implementation ====================
+
+DEFAULT_BASE_URL = "https://paddleocr.aistudio-app.com"
+API_PATH = "/api/v2/ocr/jobs"
+DEFAULT_REQUEST_TIMEOUT = 300.0
+DEFAULT_POLL_TIMEOUT = 600.0
+DEFAULT_INITIAL_INTERVAL = 3.0
+DEFAULT_MULTIPLIER = 1.5
+DEFAULT_MAX_INTERVAL = 15.0
+
+
+def get_sdk_client(access_token: str, base_url: str | None = None) -> dict[str, Any]:
+ """Get PaddleOCR API client configuration.
+
+ Args:
+ access_token: AI Studio access token
+ base_url: Base URL (optional, uses SDK default if not provided)
+
+ Returns:
+ Configuration dict with token, base_url, headers
+ """
+ # If base_url is provided, extract it (in case user passed full API URL)
+ if base_url:
+ base_url = extract_base_url(base_url)
+ else:
+ base_url = DEFAULT_BASE_URL
+
+ return {
+ "token": access_token,
+ "base_url": base_url.rstrip("/"),
+ "headers": {
+ "Authorization": f"Bearer {access_token}",
+ "Client-Platform": "dify",
+ },
+ }
+
+
+def build_ocr_options(params: dict[str, Any]) -> dict[str, Any]:
+ """Build OCR options dict from parameters using dynamic conversion.
+
+ Args:
+ params: Tool parameters
+
+ Returns:
+ Options dict with snake_case keys
+ """
+ options_dict = {}
+ for api_name, value in params.items():
+ if value is None:
+ continue
+ # Convert camelCase to snake_case
+ option_name = camel_to_snake(api_name)
+ options_dict[option_name] = value
+ return options_dict
+
+
+def build_pp_structure_v3_options(params: dict[str, Any]) -> dict[str, Any]:
+ """Build PPStructureV3 options dict from parameters using dynamic conversion.
+
+ Args:
+ params: Tool parameters
+
+ Returns:
+ Options dict with snake_case keys
+ """
+ options_dict = {}
+ for api_name, value in params.items():
+ if value is None:
+ continue
+ # Convert camelCase to snake_case
+ option_name = camel_to_snake(api_name)
+ # Handle markdownIgnoreLabels conversion
+ if api_name == "markdownIgnoreLabels" and isinstance(value, str):
+ value = [label.strip() for label in value.split(",") if label.strip()]
+ options_dict[option_name] = value
+ return options_dict
+
+
+def build_paddleocr_vl_options(params: dict[str, Any]) -> dict[str, Any]:
+ """Build PaddleOCRVLOptions dict from parameters using dynamic conversion.
+
+ Args:
+ params: Tool parameters
+
+ Returns:
+ Options dict with snake_case keys
+ """
+ options_dict = {}
+ for api_name, value in params.items():
+ if value is None:
+ continue
+ # Handle promptLabel conversion - skip if "undefined"
+ if api_name == "promptLabel" and value == "undefined":
+ continue
+ # Convert camelCase to snake_case
+ option_name = camel_to_snake(api_name)
+ # Handle markdownIgnoreLabels conversion
+ if api_name == "markdownIgnoreLabels" and isinstance(value, str):
+ value = [label.strip() for label in value.split(",") if label.strip()]
+ options_dict[option_name] = value
+ return options_dict
+
+
+def _submit_job(
+ model: str,
+ file_url: str | None,
+ file_path: str | None,
+ options: dict[str, Any],
+ base_url: str,
+ headers: dict[str, str],
+) -> str:
+ """Submit job and return job_id.
+
+ Args:
+ model: Model name (e.g., "PP-OCRv5", "PP-StructureV3", "PaddleOCR-VL-1.6")
+ file_url: URL of the file (if using URL input)
+ file_path: Path to the file (if using file input)
+ options: Optional payload parameters
+ base_url: Base API URL
+ headers: Request headers
+
+ Returns:
+ job_id string
+
+ Raises:
+ RuntimeError: If submission fails
+ """
+ import requests
+
+ jobs_url = f"{base_url}{API_PATH}"
try:
- resp.raise_for_status()
- except requests.exceptions.HTTPError as e:
- status = resp.status_code
+ if file_url:
+ # Submit with URL
+ body = {
+ "fileUrl": file_url,
+ "model": model,
+ "optionalPayload": options,
+ }
+ resp = requests.post(jobs_url, json=body, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT)
+ else:
+ # Submit with file
+ data = {
+ "model": model,
+ "optionalPayload": json.dumps(options),
+ }
+ with open(file_path, "rb") as f:
+ resp = requests.post(
+ jobs_url, data=data, files={"file": f}, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT
+ )
+ except requests.Timeout as e:
+ raise RuntimeError(f"Request timed out: {e}") from e
+ except requests.ConnectionError as e:
+ raise RuntimeError(f"Connection failed: {e}") from e
+
+ if not 200 <= resp.status_code < 300:
+ try:
+ payload = resp.json()
+ msg = payload.get("msg") or payload.get("message") or payload.get("error") or resp.text
+ except ValueError:
+ msg = resp.text
+ raise RuntimeError(f"Job submission failed (HTTP {resp.status_code}): {msg}")
+
+ try:
+ payload = resp.json()
+ job_id = payload.get("data", {}).get("jobId") or payload.get("jobId")
+ if not job_id:
+ raise RuntimeError(f"Job ID not found in response: {payload}")
+ return job_id
+ except (ValueError, KeyError) as e:
+ raise RuntimeError(f"Failed to parse job submission response: {e}") from e
+
+
+def _poll_job(
+ job_id: str,
+ base_url: str,
+ headers: dict[str, str],
+ max_wait_time: float = DEFAULT_POLL_TIMEOUT,
+) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+ """Poll job until done, return (jsonl_data, status_data).
+
+ Args:
+ job_id: Job ID
+ base_url: Base API URL
+ headers: Request headers
+ max_wait_time: Maximum wait time in seconds
+
+ Returns:
+ Tuple of (jsonl_data list, status_data dict)
+
+ Raises:
+ RuntimeError: If polling fails or job fails
+ """
+ import requests
+
+ jobs_url = f"{base_url}{API_PATH}"
+ status_url = f"{jobs_url}/{job_id}"
- if status in (400, 422):
+ interval = DEFAULT_INITIAL_INTERVAL
+ start = time.monotonic()
+ deadline = start + max_wait_time
+
+ while True:
+ now = time.monotonic()
+ if now >= deadline:
+ raise RuntimeError(f"Job {job_id} timed out after {max_wait_time:.1f} seconds")
+
+ try:
+ resp = requests.get(status_url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT)
+ except requests.Timeout as e:
+ raise RuntimeError(f"Request timed out: {e}") from e
+ except requests.ConnectionError as e:
+ raise RuntimeError(f"Connection failed: {e}") from e
+
+ if not 200 <= resp.status_code < 300:
try:
- result = resp.json()
- err_code = result.get("errorCode")
- err_msg = result.get("errorMsg")
+ payload = resp.json()
+ msg = payload.get("msg") or payload.get("message") or payload.get("error") or resp.text
except ValueError:
- err_code = None
- err_msg = resp.text or "Bad Request"
+ msg = resp.text
+ raise RuntimeError(f"Poll failed (HTTP {resp.status_code}): {msg}")
+
+ try:
+ data = resp.json()
+ state = data.get("data", {}).get("state") or data.get("state")
+ except (ValueError, KeyError) as e:
+ raise RuntimeError(f"Failed to parse poll response: {e}") from e
+
+ if state == "done":
+ # Get result URL
+ result_json_url = data.get("data", {}).get("resultJsonUrl") or data.get("resultJsonUrl")
+ if not result_json_url:
+ raise RuntimeError(f"Result URL not found in response: {data}")
+
+ # Fetch JSONL result
+ try:
+ resp = requests.get(result_json_url, timeout=DEFAULT_REQUEST_TIMEOUT)
+ resp.raise_for_status()
+ except requests.Timeout as e:
+ raise RuntimeError(f"Result download timed out: {e}") from e
+ except requests.ConnectionError as e:
+ raise RuntimeError(f"Result download failed: {e}") from e
+
+ # Parse JSONL
+ lines = resp.text.strip().split("\n")
+ jsonl_data = []
+ for line in lines:
+ line = line.strip()
+ if line:
+ try:
+ jsonl_data.append(json.loads(line))
+ except json.JSONDecodeError as e:
+ raise RuntimeError(f"Malformed JSONL result: {e}") from e
+
+ return jsonl_data, data
+
+ if state == "failed":
+ error_msg = data.get("data", {}).get("errorMsg") or data.get("errorMsg") or "Unknown error"
+ raise RuntimeError(f"Job {job_id} failed: {error_msg}")
+
+ # Continue polling
+ remaining = deadline - time.monotonic()
+ if remaining <= 0:
+ raise RuntimeError(f"Job {job_id} timed out after {max_wait_time:.1f} seconds")
+
+ sleep_time = min(interval, remaining)
+ time.sleep(sleep_time)
+ interval = min(interval * DEFAULT_MULTIPLIER, DEFAULT_MAX_INTERVAL)
+
- logger.error(f"PaddleOCR API returned {status}: code={err_code}, msg={err_msg}")
- raise RuntimeError(
- f"PaddleOCR API returned {status}: code={err_code}, msg={err_msg}"
- ) from e
+def _parse_ocr_result(job_id: str, jsonl_data: list[dict[str, Any]]) -> dict[str, Any]:
+ """Parse OCR result into compatible format.
- if status in (401, 403):
- logger.error(f"PaddleOCR API authorization failed ({status})")
- raise RuntimeError(f"PaddleOCR API authorization failed ({status})") from e
+ Args:
+ job_id: Job ID
+ jsonl_data: JSONL data list
+
+ Returns:
+ Dict with job_id and pages list
- if status == 429:
- logger.warning("PaddleOCR API rate limit exceeded (429)")
- raise RuntimeError("PaddleOCR API rate limit exceeded (429)") from e
+ Raises:
+ RuntimeError: If parsing fails
+ """
+ try:
+ pages = []
+ for line_obj in jsonl_data:
+ result = line_obj["result"]
+ for item in result["ocrResults"]:
+ pages.append(
+ {
+ "pruned_result": item["prunedResult"],
+ "ocr_image_url": item.get("ocrImage"),
+ }
+ )
+ return {
+ "job_id": job_id,
+ "pages": pages,
+ }
+ except (KeyError, TypeError) as e:
+ raise RuntimeError(f"Malformed OCR result payload: {e}") from e
- if status in (500, 502, 503, 504):
- logger.error(f"PaddleOCR API service unavailable ({status})")
- raise RuntimeError(f"PaddleOCR API service unavailable ({status})") from e
- logger.error(f"PaddleOCR API returned HTTP {status}: {resp.text}")
- raise RuntimeError(f"PaddleOCR API returned HTTP {status}: {resp.text}") from e
+def _parse_doc_parsing_result(job_id: str, jsonl_data: list[dict[str, Any]]) -> dict[str, Any]:
+ """Parse doc parsing result into compatible format.
+ Args:
+ job_id: Job ID
+ jsonl_data: JSONL data list
+
+ Returns:
+ Dict with job_id and pages list
+
+ Raises:
+ RuntimeError: If parsing fails
+ """
try:
- result = resp.json()
- logger.debug("Successfully parsed PaddleOCR API response")
- except ValueError as e:
- logger.error(f"Failed to decode JSON response from PaddleOCR API: {resp.text}")
- raise RuntimeError(f"Failed to decode JSON response from PaddleOCR API: {resp.text}") from e
-
- err_code = result.get("errorCode")
- err_msg = result.get("errorMsg")
- if err_code != 0:
- logger.error(f"PaddleOCR API returned error: code={err_code}, msg={err_msg}")
- raise RuntimeError(f"PaddleOCR API returned error: code={err_code}, msg={err_msg}")
-
- return result
+ pages = []
+ for line_obj in jsonl_data:
+ result = line_obj["result"]
+ for item in result["layoutParsingResults"]:
+ markdown = item["markdown"]
+ pages.append(
+ {
+ "markdown_text": markdown["text"],
+ "markdown_images": markdown.get("images", {}),
+ "output_images": item.get("outputImages", {}),
+ }
+ )
+ return {
+ "job_id": job_id,
+ "pages": pages,
+ }
+ except (KeyError, TypeError) as e:
+ raise RuntimeError(f"Malformed document parsing result payload: {e}") from e
+
+
+def call_paddleocr_api(
+ model: str,
+ file_url: str | None,
+ file_path: str | None,
+ options: dict[str, Any],
+ client_config: dict[str, Any],
+ is_document_parsing: bool = False,
+) -> dict[str, Any]:
+ """Call PaddleOCR API using async job pattern.
+
+ Args:
+ model: Model name (e.g., "PP-OCRv5", "PP-StructureV3", "PaddleOCR-VL-1.6")
+ file_url: URL of the file (if using URL input)
+ file_path: Path to the file (if using file input)
+ options: Optional payload parameters
+ client_config: Client config from get_sdk_client()
+ is_document_parsing: True for doc parsing, False for OCR
+
+ Returns:
+ Parsed result dict with job_id and pages
+
+ Raises:
+ RuntimeError: If API call fails
+ """
+ job_id = _submit_job(
+ model, file_url, file_path, options, client_config["base_url"], client_config["headers"]
+ )
+ jsonl_data, status_data = _poll_job(
+ job_id, client_config["base_url"], client_config["headers"]
+ )
+
+ if is_document_parsing:
+ return _parse_doc_parsing_result(job_id, jsonl_data)
+ else:
+ return _parse_ocr_result(job_id, jsonl_data)
\ No newline at end of file
diff --git a/tools/paddleocr/uv.lock b/tools/paddleocr/uv.lock
index dda9a8ba2..8ba50b5bb 100644
--- a/tools/paddleocr/uv.lock
+++ b/tools/paddleocr/uv.lock
@@ -365,11 +365,11 @@ wheels = [
[[package]]
name = "idna"
-version = "3.16"
+version = "3.18"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1a/88/bcf9709822fe69d02c2a6a77956c98ce6ea8ca8767a9aadcedc7eb6a2390/idna-3.16.tar.gz", hash = "sha256:d7a6da03db833450fca25d2358ac9ff06cd624577a4aea3a596d5c0f77b8e03d", size = 203770, upload-time = "2026-05-22T00:16:18.781Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/63/9496c57188a2ee585e0f1db071d75089a11e98aa86eb99d9d7618fc1edce/idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848", size = 196711, upload-time = "2026-06-02T14:34:07.794Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/94/16/70255075a9859a0e3adb789b68ceb0e210dec03934245fd98d248226572f/idna-3.16-py3-none-any.whl", hash = "sha256:cc246e3a3f89580c3a951b5ad298ca4638078b2cdd4f115654332b5c26daded5", size = 74165, upload-time = "2026-05-22T00:16:16.698Z" },
+ { url = "https://files.pythonhosted.org/packages/1e/5e/d4e9f1a599fb8e573b7b87160658329fbf28d19eac2718f51fc3def3aa5a/idna-3.18-py3-none-any.whl", hash = "sha256:7f952cbe720b688055e3f87de14f5c3e5fdaa8bc3928985c4077ca689de849a2", size = 65455, upload-time = "2026-06-02T14:34:06.319Z" },
]
[[package]]
@@ -565,7 +565,7 @@ wheels = [
]
[[package]]
-name = "paddleocr"
+name = "paddleocr-dify"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
@@ -1138,29 +1138,35 @@ wheels = [
[[package]]
name = "zope-interface"
-version = "8.4"
+version = "8.5"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9f/65/34a6e6e4dfa260c4c55ee02bb2fc53625e126ff0181485286cf0c9d453d6/zope_interface-8.4.tar.gz", hash = "sha256:9dbee7925a23aa6349738892c911019d4095a96cff487b743482073ecbc174a8", size = 257736, upload-time = "2026-04-25T07:22:10.439Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/dc/50550cfcbb2ea3cbca5f1d7ed05c8aa840f831a0f2d63aec0a953f7c590e/zope_interface-8.5.tar.gz", hash = "sha256:7a3ba1c5877f0f3e3906b02ddf793abed2becc2948116414ce0e1dd820b68d6d", size = 257957, upload-time = "2026-05-26T06:50:14.574Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/b8/96/0017b980424125cf98a9851d8fd3e24939818b7a82ecdd19ae672bb2413f/zope_interface-8.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:84064876ed96ddd0744e3ad5d37134c758d77885e54113567792671405a02bac", size = 211604, upload-time = "2026-04-25T07:28:08.13Z" },
- { url = "https://files.pythonhosted.org/packages/59/4c/2cf5c45477fdd58a2c786d0c0d1817cbaaff8743d98ae72c643c4fe3be7b/zope_interface-8.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:81ed23698bfb588c48b1756129814b890febac971ff6c8a414f82601773145bb", size = 211783, upload-time = "2026-04-25T07:28:10.028Z" },
- { url = "https://files.pythonhosted.org/packages/fa/8c/efabdafc25ed44ef9c1084aad9870bb6c2c9b78e542684efe6865c0f0067/zope_interface-8.4-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:e0b9d7e958657fad414f8272afcdf0b8a873fbbb2bb6a6287232d2f11a232bf8", size = 264752, upload-time = "2026-04-25T07:28:11.773Z" },
- { url = "https://files.pythonhosted.org/packages/53/5a/c4d52c58d5fee4ff67cc02f0dec24d0e84428520f67a52f1e4086f0e7779/zope_interface-8.4-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:eef0a49e041f4dc4d2a6ab894b4fd0c5354e0e8037e731fb953531e59b0d3d33", size = 269829, upload-time = "2026-04-25T07:28:13.988Z" },
- { url = "https://files.pythonhosted.org/packages/16/d2/df8f339c93bb5adee695546ba90d0daa2917338a4792281f6b8e652a9328/zope_interface-8.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b302f955c36e924e1f4fe70dd9105ff06235857861c6ae72c3b10b016aeee99", size = 269452, upload-time = "2026-04-25T07:28:16.403Z" },
- { url = "https://files.pythonhosted.org/packages/17/4b/bd97b1a21bb2c16d66a42f6c7a43c0a5afcfaf14c68d3b7d2ee6afb28e52/zope_interface-8.4-cp312-cp312-win_amd64.whl", hash = "sha256:4ae6a1e111642dbf724f635424dcaf5a5c8abbde49eac3f452f5323ffaa10232", size = 214420, upload-time = "2026-04-25T07:28:18.405Z" },
- { url = "https://files.pythonhosted.org/packages/7d/85/1477f23cf3b0476608ca987b4338f91439abb5b96564ac26b26d2cde38fd/zope_interface-8.4-cp312-cp312-win_arm64.whl", hash = "sha256:2e9e4aa33b76877af903d5532545e64d24ade0f6f80d9d1a31e6efcea76a60bc", size = 212992, upload-time = "2026-04-25T07:28:20.48Z" },
- { url = "https://files.pythonhosted.org/packages/8e/6a/a08c62bc1fa0e34fe7b8b401646cba4817427c716bfbef6cc88937cd327f/zope_interface-8.4-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:cd55965d715413038774aead54851bc3dbdd74a69f3ce30252182a94407b9905", size = 211924, upload-time = "2026-04-25T07:28:22.219Z" },
- { url = "https://files.pythonhosted.org/packages/50/30/2011f17e00ff078658bc317e1f7eccd7843fc1ce60695b665b0a52c45c1b/zope_interface-8.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0d88c1f106a4f06e074a3ada2d20f4a602e3f2871c4f55726ed5d91e94ec19b1", size = 211995, upload-time = "2026-04-25T07:28:24.107Z" },
- { url = "https://files.pythonhosted.org/packages/25/f3/a16fe884571cfa89271412dbb40def6d6865824428d1e14785a82795100c/zope_interface-8.4-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:36c575356732d59ffd3279ad67e302a6fe517e67db5b061b36b377ee0fa016c4", size = 264443, upload-time = "2026-04-25T07:28:26.401Z" },
- { url = "https://files.pythonhosted.org/packages/83/88/e08923fcd8a8c8704af05a90418b07cd897ac90865925b37d7ad8139adfa/zope_interface-8.4-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:29f09ec8bda65f7b30294328070070a2590b90f252f834ee0817cdb0e2c35f6a", size = 269626, upload-time = "2026-04-25T07:28:28.423Z" },
- { url = "https://files.pythonhosted.org/packages/27/67/96c94cd307f9946d0b0f03402a335f7aae7b4f0b129b5734cc56cc78cb65/zope_interface-8.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2bc388cebcb753d21eaf2a0481fd6f0ce6840a47300a40dcec0b56bac27d0f97", size = 269583, upload-time = "2026-04-25T07:28:30.434Z" },
- { url = "https://files.pythonhosted.org/packages/e2/d4/7e9fcc8bb0dba5d023b9fca92035d68c018457cc550e9d51746670b76a6b/zope_interface-8.4-cp313-cp313-win_amd64.whl", hash = "sha256:3e5866917ccb57d929e515a1136d729bd3fa4f367965fb16e38a4bc72cb05521", size = 214422, upload-time = "2026-04-25T07:28:32.201Z" },
- { url = "https://files.pythonhosted.org/packages/16/26/b0bcde302f6a4c155d047a8ab5cba1003363031919d6e8f3bcdc139c28a6/zope_interface-8.4-cp313-cp313-win_arm64.whl", hash = "sha256:f1f854bef8bc137519e4413bcc1322d55faad28b20b3ca39f7bec49d2f1b26df", size = 213029, upload-time = "2026-04-25T07:28:34.677Z" },
- { url = "https://files.pythonhosted.org/packages/f6/d5/ca60c8b404b303d9490e1417430a5198a77557dbeb17c1cb31616e432318/zope_interface-8.4-cp314-cp314-macosx_10_9_x86_64.whl", hash = "sha256:7cbb887fdbfaacb4c362dbb487033551646e28013ad5ffe72e96eb260003a1a1", size = 212012, upload-time = "2026-04-25T07:28:36.88Z" },
- { url = "https://files.pythonhosted.org/packages/83/64/6bb9f54250c817e24b39e986f173b6cd21ff658bec6c6cc0baad05d761e4/zope_interface-8.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a5638c6be715116d3453e6d099c299c6844d54810de7445ce116424e905ede06", size = 212071, upload-time = "2026-04-25T07:28:38.742Z" },
- { url = "https://files.pythonhosted.org/packages/c6/cf/42851262e102723058019dc7d0b48210b85a935f79ae32ce60ddccc2e8fb/zope_interface-8.4-cp314-cp314-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:b8147b40bfcd53803870a9519e0879ff066aeecc2fcff8295663c1b17fc38dc2", size = 266075, upload-time = "2026-04-25T07:28:41.084Z" },
- { url = "https://files.pythonhosted.org/packages/d2/a7/e48c79b836f6f0a2c219288e2ec343517f90e95c93de5435a8a23918bf20/zope_interface-8.4-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:049ba3c7b38cc400ae08e011617635706e0f442e1d075db1b015246fcbf6091e", size = 269127, upload-time = "2026-04-25T07:28:42.868Z" },
- { url = "https://files.pythonhosted.org/packages/6a/40/0e26f24d3a2f34f0de2cfeaab6458a865284d9d1fa317ab78913aa1f7322/zope_interface-8.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9c4ac009c2c8e43283842f80387c4d4b41bcbc293391c3b9ab71532ae1ccc301", size = 269446, upload-time = "2026-04-25T07:28:44.97Z" },
- { url = "https://files.pythonhosted.org/packages/91/d5/20310601450367fc35fa28b0544c98d0347b8cc25eaf106a2c4cc36841e1/zope_interface-8.4-cp314-cp314-win_amd64.whl", hash = "sha256:4713bf651ec36e7eea49d2ace4f0e89bec2b33a339674874b1121f2537edc62a", size = 215199, upload-time = "2026-04-25T07:28:47.146Z" },
- { url = "https://files.pythonhosted.org/packages/5b/00/0d22ce75126e31f81baa5889e2a40aad37c8e34d1220cf8b18d744f2b5d9/zope_interface-8.4-cp314-cp314-win_arm64.whl", hash = "sha256:d934497c4b72d5f528d2b5ebe9b8b5a7004b5877948ebd4ea00c2432fb27178f", size = 213178, upload-time = "2026-04-25T07:28:48.868Z" },
+ { url = "https://files.pythonhosted.org/packages/97/cc/b84123a948f3162a34623e188922827cd845244fdd043ed20f8d02228caa/zope_interface-8.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8e6ee90c2e6de7c37058d5fa41f123c8b13a312db8d1e0fb5840d7f4bcdff9c9", size = 212165, upload-time = "2026-05-26T06:49:26.566Z" },
+ { url = "https://files.pythonhosted.org/packages/4e/78/cbceec44f1b27208a76c1a688c131302685852406a23df5aab68324109cc/zope_interface-8.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c1adc90d3576b3b4c4de4953e6002c37bef28b78d7fa54c1bbfd0c50f022fe7c", size = 212341, upload-time = "2026-05-26T06:49:28.182Z" },
+ { url = "https://files.pythonhosted.org/packages/e1/c3/005032195ff3b210c139b7c560ed5c534e844b0907d8e44d2b3d8919305e/zope_interface-8.5-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:e6347b8d8d12c5eca6502450a92be30079b7acfade2c4f693efa0deb8871b06e", size = 265296, upload-time = "2026-05-26T06:49:29.741Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/66/1036543d6a66bc04c19df3cf650f3ad938a002ab0a443c24e23e8de5e8b9/zope_interface-8.5-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5e970dabea777a24b0b0bbf9dae3ab75ce8b2d8e948edf4875627034b21f3560", size = 270689, upload-time = "2026-05-26T06:49:31.767Z" },
+ { url = "https://files.pythonhosted.org/packages/30/4c/8b56259558cace4414e753ca6740396a1f59d4a95ddb55b4658600408670/zope_interface-8.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f0b48ccadaa9839e09ff81e969703cecb3f402c813bfe8b958652e699bea69f5", size = 270280, upload-time = "2026-05-26T06:49:33.489Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/ea/649908c83aa8fdb7faf2ddca4d3cf6fb8f2157121267dc56e8f72681e26c/zope_interface-8.5-cp312-cp312-win_amd64.whl", hash = "sha256:e0e311f1277468c08fd59a2b41f71b43d25dff639789d364747acd1705c0df6e", size = 215019, upload-time = "2026-05-26T06:49:35.607Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/97/da13037b4c563e4df32eedbc819f8c00b754af494f68211e3dffd48d52da/zope_interface-8.5-cp312-cp312-win_arm64.whl", hash = "sha256:652b73107a04159ec6c020db6c1543d4f1e8f4d069bd2aac88a947820923517b", size = 213569, upload-time = "2026-05-26T06:49:37.317Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/8c/4c15755d701f2ec0e80d64a18e1ebaf5be2c584c0ec153fd516f5d13eada/zope_interface-8.5-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:28e80457c134d1fa57a7d758004dece348654e1b1467ac22dcdc20fc1d127c52", size = 212512, upload-time = "2026-05-26T06:49:38.996Z" },
+ { url = "https://files.pythonhosted.org/packages/9a/2e/4360c54c465db042cc8fbeeec92abac28b4cedbf6ba63c1f092fd08a190f/zope_interface-8.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:09495ce9d559c06b70f2d4855b3e4f48a822a9ddc8be1d30c5b4e5be14ae1ace", size = 212541, upload-time = "2026-05-26T06:49:41.186Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/a5/692a2b8d70f78e848793231d5fae5fecbf8d0cccd73430fdc34802a6d3c1/zope_interface-8.5-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:7849ad8fa90763cc1087f4dda78ca3a233e950b3e08fac7079297c9cafbbd7bb", size = 265191, upload-time = "2026-05-26T06:49:43.449Z" },
+ { url = "https://files.pythonhosted.org/packages/70/8d/454a9cfc7a050c394ab4f11b3371f7897828b7415e096afff724637e65e0/zope_interface-8.5-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5578c9421ca409a1f39f153d6f7803e4cde01da592ec75a9ac5e1b777d18d33b", size = 270626, upload-time = "2026-05-26T06:49:45.425Z" },
+ { url = "https://files.pythonhosted.org/packages/51/8c/db8409cfa3575b8e9b4800babd7d49f8228433cd1f0c56814bd0ada49c33/zope_interface-8.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e1bd7d96b4ca5fa311f54c9eac16dce4886b428c1531dbe06067763ccdf123b4", size = 270444, upload-time = "2026-05-26T06:49:47.025Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/df/a386940e41469ef615e100a216d8b386521e9e598817147f87932ca203c4/zope_interface-8.5-cp313-cp313-win_amd64.whl", hash = "sha256:0c8123d2a4dfde2a613c7cb772605477724782c20bc2e0ad1d9435376a6a44a3", size = 215021, upload-time = "2026-05-26T06:49:48.478Z" },
+ { url = "https://files.pythonhosted.org/packages/89/75/477eb5669b6b2a7a843decd1a075e9b1971a8720017654143a7183abd3d9/zope_interface-8.5-cp313-cp313-win_arm64.whl", hash = "sha256:6d02be14f3173c6c7288bc2fdf530090c01c3cf8764ad46c68024686f364278e", size = 213610, upload-time = "2026-05-26T06:49:50.01Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/19/5032e954827fdf02db2d2f49737ac4378bb9cfc2cd95a8f2e2a5ae2ec01a/zope_interface-8.5-cp314-cp314-macosx_10_9_x86_64.whl", hash = "sha256:ffaecf013251a89d0de6feb49a46eba48ad8cbbf8a40aeb6045e459e7bec6784", size = 212597, upload-time = "2026-05-26T06:49:51.63Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/53/3ef644012cf8a6a234a2d6134aab5a5c65ac5467c86296865501d4fbc406/zope_interface-8.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:126fa9d1c52295ae076d4cf968634f0a1826afa408a20808b57ff72877b8f69f", size = 212626, upload-time = "2026-05-26T06:49:53.236Z" },
+ { url = "https://files.pythonhosted.org/packages/32/67/bc8b4f465d388039255003e230c284a175cedf1203c692f23cb7bff64efe/zope_interface-8.5-cp314-cp314-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:3090e3a663d20194756a59a272e0c8508b889341e31d5894223331fe6b4f9b21", size = 266827, upload-time = "2026-05-26T06:49:54.873Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/eb/37d05b935ede53d79690fecc8d201440084418e590bcfc05f384451c7593/zope_interface-8.5-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9342fb74e2afefdb081bf1df727d209ea56995c6e13f5a0540e6d7aff4beafb8", size = 270139, upload-time = "2026-05-26T06:49:57.116Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/0b/fd0c54579e2ce8dc6cf1a757903f3374bc6fbda929a46af9e0f53cb0e5f0/zope_interface-8.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6c54725d818f1b57a7efb8b16528326e1f3c257b602b32393fd255c45af8799d", size = 270338, upload-time = "2026-05-26T06:49:58.698Z" },
+ { url = "https://files.pythonhosted.org/packages/c1/1d/c420dcd777bb761067ea92879ac766694a5ca78608185f1aecea64cbfc11/zope_interface-8.5-cp314-cp314-win_amd64.whl", hash = "sha256:29d74febbae1afeb6834c4ccbf42e242a673c860060f09e53142825270456140", size = 215789, upload-time = "2026-05-26T06:50:00.405Z" },
+ { url = "https://files.pythonhosted.org/packages/62/94/50b5eb8f94e527edceac14f9955e58917424ea79bb572ddc18548561cbc2/zope_interface-8.5-cp314-cp314-win_arm64.whl", hash = "sha256:633c8c49396f38df030340797c533e9fe460d1b5d1e42d88e55e938e525f548c", size = 213757, upload-time = "2026-05-26T06:50:01.973Z" },
+ { url = "https://files.pythonhosted.org/packages/17/6f/5d5f32c4dfcdb16ce2ec5363da686840f13c13e1a1214cb70b49e1cd6d9f/zope_interface-8.5-cp314-cp314t-macosx_10_9_x86_64.whl", hash = "sha256:133999820fdbae513c36c03d6f29ef87317aaa3edef39112222b155083664714", size = 213591, upload-time = "2026-05-26T06:50:03.529Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/55/de0c3459ff717fce3342f9a29464c281fdeb0d36c3171ee88d119d5f0650/zope_interface-8.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8bd75c96966e573232f0599deaff717564828031c7f05563ccc1ac35c5ee0304", size = 213733, upload-time = "2026-05-26T06:50:05.101Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/95/d97430abd5ae9677e8b9295b58720c0064a5b557dbb6b8bf5928484cf0d8/zope_interface-8.5-cp314-cp314t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:14b0e9799351d4c34fe99afd67f0cdd76e55ba15c66a98699d5fc22ea8241e08", size = 294905, upload-time = "2026-05-26T06:50:07.384Z" },
+ { url = "https://files.pythonhosted.org/packages/41/ec/a0f8f3dad6e74992f4654bdd94802be0929eabca7b871cac3b6fbb5e961b/zope_interface-8.5-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0cd6a732ac84b94eb1ef9222a117347a27efd294ee16810ffdf7ecd307677ed5", size = 300885, upload-time = "2026-05-26T06:50:08.997Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/da/6881b48803a0ee8d23eb5efa30fce3ed218a2bd9de5758ce489d224fee81/zope_interface-8.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:798b7c87d0e59a7d5d086d642208d0d8700ff0d55c4029134b3c479c3bfb110f", size = 304672, upload-time = "2026-05-26T06:50:10.563Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/0e/b4c01320859ff1d585438bc231fd60bd258d096359bccf6654fecdf0cffb/zope_interface-8.5-cp314-cp314t-win_amd64.whl", hash = "sha256:0fc3a9d45f114d27eaa1e53beeb144533689edca8a9f66505b1e8e8b3f075e42", size = 217241, upload-time = "2026-05-26T06:50:12.171Z" },
]