Skip to content

Commit 693362b

Browse files
authored
bugfix: fix mapping input content when associated with json mixin (#12)
1 parent fcd8fef commit 693362b

File tree

5 files changed

+78
-31
lines changed

5 files changed

+78
-31
lines changed

test/test_schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import inspect
22
from dataclasses import dataclass
33
from enum import Enum
4+
from pathlib import Path
45
from typing import Any, Optional, Union
56

67
import pytest
@@ -408,6 +409,8 @@ def fn(
408409
e: Optional[dict[str, Any]] = None,
409410
f: list[float] = None,
410411
g: Optional[g_enum] = None,
412+
h: FileData | None = None,
413+
i: Path | None = None,
411414
) -> None:
412415
pass
413416

@@ -419,6 +422,8 @@ class ExpectedInputModel(BaseModel):
419422
e: Optional[dict[str, Any]] = None
420423
f: list[float] = None
421424
g: Optional[g_enum] = None
425+
h: FileData | None = None
426+
i: Path | None = None
422427

423428
input_schema = get_input_schema(fn)
424429
input_model = js.schema_to_base_model(schema=input_schema)

test/test_utils.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from dataclasses import dataclass, is_dataclass
33
from enum import Enum
44

5+
import pytest
56
from pydantic import BaseModel
7+
from unstructured.ingest.v2.interfaces import FileData
68
from uvicorn.importer import import_from_string
79

810
from unstructured_platform_plugins.etl_uvicorn import utils
@@ -101,14 +103,20 @@ class MyEnum(Enum):
101103

102104

103105
def test_map_inputs():
104-
def fn(a: A, b: B, c: MyEnum, d: list) -> None:
106+
def fn(a: A, b: B, c: MyEnum, d: list, e: FileData) -> None:
105107
pass
106108

109+
file_data = FileData(
110+
identifier="custom_file_data",
111+
connector_type="mock_connector",
112+
additional_metadata={"additional": "metadata"},
113+
)
107114
inputs = {
108115
"a": {"b": 4, "c": 5.6},
109116
"b": {"d": True, "e": {"key": "value"}},
110117
"c": MyEnum.VALUE.value,
111118
"d": [1, 2, 3],
119+
"e": file_data.to_dict(),
112120
}
113121

114122
mapped_inputs = utils.map_inputs(func=fn, raw_inputs=inputs)
@@ -117,5 +125,16 @@ def fn(a: A, b: B, c: MyEnum, d: list) -> None:
117125
"b": B(d=True, e={"key": "value"}),
118126
"c": MyEnum.VALUE.value,
119127
"d": [1, 2, 3],
128+
"e": file_data,
120129
}
121130
assert mapped_inputs == expected
131+
132+
133+
def test_map_inputs_error():
134+
def fn(a: FileData) -> None:
135+
pass
136+
137+
inputs = {"a": {"not": "the", "right": "values"}}
138+
139+
with pytest.raises(KeyError):
140+
utils.map_inputs(func=fn, raw_inputs=inputs)

unstructured_platform_plugins/etl_uvicorn/api_generator.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
import inspect
44
import json
55
import logging
6-
from pathlib import Path
76
from typing import Any, Callable, Optional
87

9-
from fastapi import FastAPI
8+
from fastapi import FastAPI, HTTPException, status
109
from pydantic import BaseModel
1110
from starlette.responses import RedirectResponse
1211
from uvicorn.importer import import_from_string
@@ -64,27 +63,35 @@ def generate_fast_api(
6463

6564
@fastapi_app.post("/invoke", response_model=response_type)
6665
async def run_job(request: input_schema_model) -> response_type:
67-
logger.debug(f"invoking function: {func}")
68-
input_schema = get_input_schema(func)
69-
request_dict = request.model_dump()
70-
for k, v in request_dict.items():
71-
if schema := input_schema.get(k): # noqa: SIM102
72-
if (
73-
schema.get("type") == "string"
74-
and schema.get("is_path", False)
75-
and isinstance(v, str)
76-
):
77-
request_dict[k] = Path(v)
66+
logger.debug(f"invoking function {func} with input: {request.model_dump()}")
67+
# Create dictionary from pydantic model while preserving underlying types
68+
request_dict = {f: getattr(request, f) for f in request.model_fields}
7869
map_inputs(func=func, raw_inputs=request_dict)
7970
logger.debug(f"passing inputs to function: {request_dict}")
80-
return await invoke_func(func=func, kwargs=request_dict)
71+
try:
72+
return await invoke_func(func=func, kwargs=request_dict)
73+
except Exception as e:
74+
logger.error(
75+
f"failed to invoke plugin with inputs {request_dict}: {e}", exc_info=True
76+
)
77+
raise HTTPException(
78+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
79+
detail=f"failed to invoke plugin: {e}",
80+
)
8181

8282
else:
8383

8484
@fastapi_app.post("/invoke", response_model=response_type)
8585
async def run_job() -> response_type:
8686
logger.debug(f"invoking function without inputs: {func}")
87-
return await invoke_func(func=func)
87+
try:
88+
return await invoke_func(func=func)
89+
except Exception as e:
90+
logger.error(f"failed to invoke plugin: {e}", exc_info=True)
91+
raise HTTPException(
92+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
93+
detail=f"failed to invoke plugin: {e}",
94+
)
8895

8996
class SchemaOutputResponse(BaseModel):
9097
inputs: dict[str, Any]

unstructured_platform_plugins/etl_uvicorn/utils.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from types import GenericAlias, NoneType
55
from typing import Any, Callable, Optional
66

7+
from dataclasses_json import DataClassJsonMixin
78
from pydantic import BaseModel
89

910
from unstructured_platform_plugins.schema.json_schema import (
@@ -83,18 +84,29 @@ def map_inputs(func: Callable, raw_inputs: dict[str, Any]) -> dict[str, Any]:
8384
type_info = get_type_hints(func)
8485
type_info.pop("return")
8586
for field_name, type_data in type_info.items():
86-
if (
87-
is_dataclass(type_data)
88-
and field_name in raw_inputs
89-
and isinstance(raw_inputs[field_name], dict)
90-
):
91-
raw_inputs[field_name] = type_data(**raw_inputs[field_name])
92-
elif isinstance(type_data, EnumMeta):
93-
raw_inputs[field_name] = raw_inputs[field_name]
94-
elif (
95-
inspect.isclass(type_data)
96-
and not isinstance(type_data, GenericAlias)
97-
and issubclass(type_data, BaseModel)
98-
):
99-
raw_inputs[field_name] = type_data.model_validate(raw_inputs[field_name])
87+
if field_name not in raw_inputs:
88+
continue
89+
field_value = raw_inputs[field_name]
90+
try:
91+
if (
92+
inspect.isclass(type_data)
93+
and issubclass(type_data, DataClassJsonMixin)
94+
and isinstance(field_value, dict)
95+
):
96+
raw_inputs[field_name] = type_data.from_dict(raw_inputs[field_name])
97+
elif is_dataclass(type_data) and isinstance(field_value, dict):
98+
raw_inputs[field_name] = type_data(**raw_inputs[field_name])
99+
elif isinstance(type_data, EnumMeta):
100+
raw_inputs[field_name] = raw_inputs[field_name]
101+
elif (
102+
inspect.isclass(type_data)
103+
and not isinstance(type_data, GenericAlias)
104+
and issubclass(type_data, BaseModel)
105+
):
106+
raw_inputs[field_name] = type_data.model_validate(raw_inputs[field_name])
107+
except Exception as e:
108+
exception_type = type(e)
109+
raise exception_type(
110+
f"failed to map input for field {field_name}: {field_value}"
111+
) from e
100112
return raw_inputs

unstructured_platform_plugins/schema/json_schema.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,12 @@ def response_to_json_schema(return_annotation: Any) -> dict:
262262
return to_json_schema(val=return_annotation)
263263

264264

265-
def schema_to_base_model_type(json_type_name, name: str, type_info: dict) -> Type[BaseModel]:
265+
def schema_to_base_model_type(json_type_name, name: str, type_info: dict) -> Type:
266266
t = typed_map_reverse[json_type_name]
267+
if t is dict and type_info.get("is_file_data", False):
268+
return FileData
269+
if t is str and type_info.get("is_path", False):
270+
return Path
267271
if t is dict and "properties" in type_info:
268272
t = schema_to_base_model(
269273
schema=type_info["properties"],

0 commit comments

Comments
 (0)