|
2 | 2 |
|
3 | 3 | import pytest |
4 | 4 |
|
| 5 | +import hyperbrowser.tools as tools_module |
5 | 6 | from hyperbrowser.exceptions import HyperbrowserError |
6 | 7 | from hyperbrowser.models.extract import StartExtractJobParams |
7 | 8 | from hyperbrowser.tools import WebsiteExtractTool |
8 | 9 |
|
| 10 | +_UNSET = object() |
| 11 | + |
9 | 12 |
|
10 | 13 | class _Response: |
11 | 14 | def __init__(self, data): |
12 | 15 | self.data = data |
13 | 16 |
|
14 | 17 |
|
15 | 18 | class _SyncExtractManager: |
16 | | - def __init__(self): |
| 19 | + def __init__(self, response_data=_UNSET): |
17 | 20 | self.last_params = None |
| 21 | + self._response_data = {"ok": True} if response_data is _UNSET else response_data |
18 | 22 |
|
19 | 23 | def start_and_wait(self, params: StartExtractJobParams): |
20 | 24 | self.last_params = params |
21 | | - return _Response({"ok": True}) |
| 25 | + return _Response(self._response_data) |
22 | 26 |
|
23 | 27 |
|
24 | 28 | class _AsyncExtractManager: |
25 | | - def __init__(self): |
| 29 | + def __init__(self, response_data=_UNSET): |
26 | 30 | self.last_params = None |
| 31 | + self._response_data = {"ok": True} if response_data is _UNSET else response_data |
27 | 32 |
|
28 | 33 | async def start_and_wait(self, params: StartExtractJobParams): |
29 | 34 | self.last_params = params |
30 | | - return _Response({"ok": True}) |
| 35 | + return _Response(self._response_data) |
31 | 36 |
|
32 | 37 |
|
33 | 38 | class _SyncClient: |
34 | | - def __init__(self): |
35 | | - self.extract = _SyncExtractManager() |
| 39 | + def __init__(self, response_data=_UNSET): |
| 40 | + self.extract = _SyncExtractManager(response_data=response_data) |
36 | 41 |
|
37 | 42 |
|
38 | 43 | class _AsyncClient: |
39 | | - def __init__(self): |
40 | | - self.extract = _AsyncExtractManager() |
| 44 | + def __init__(self, response_data=_UNSET): |
| 45 | + self.extract = _AsyncExtractManager(response_data=response_data) |
41 | 46 |
|
42 | 47 |
|
43 | 48 | def test_extract_tool_runnable_does_not_mutate_input_params(): |
@@ -104,3 +109,85 @@ async def run(): |
104 | 109 | HyperbrowserError, match="Invalid JSON string provided for `schema`" |
105 | 110 | ): |
106 | 111 | asyncio.run(run()) |
| 112 | + |
| 113 | + |
| 114 | +def test_extract_tool_runnable_serializes_empty_object_data(): |
| 115 | + client = _SyncClient(response_data={}) |
| 116 | + |
| 117 | + output = WebsiteExtractTool.runnable(client, {"urls": ["https://example.com"]}) |
| 118 | + |
| 119 | + assert output == "{}" |
| 120 | + |
| 121 | + |
| 122 | +def test_extract_tool_async_runnable_serializes_empty_list_data(): |
| 123 | + client = _AsyncClient(response_data=[]) |
| 124 | + |
| 125 | + async def run(): |
| 126 | + return await WebsiteExtractTool.async_runnable( |
| 127 | + client, {"urls": ["https://example.com"]} |
| 128 | + ) |
| 129 | + |
| 130 | + output = asyncio.run(run()) |
| 131 | + |
| 132 | + assert output == "[]" |
| 133 | + |
| 134 | + |
| 135 | +def test_extract_tool_runnable_returns_empty_string_for_none_data(): |
| 136 | + client = _SyncClient(response_data=None) |
| 137 | + |
| 138 | + output = WebsiteExtractTool.runnable(client, {"urls": ["https://example.com"]}) |
| 139 | + |
| 140 | + assert output == "" |
| 141 | + |
| 142 | + |
| 143 | +def test_extract_tool_runnable_wraps_serialization_failures(): |
| 144 | + client = _SyncClient(response_data={1, 2}) |
| 145 | + |
| 146 | + with pytest.raises( |
| 147 | + HyperbrowserError, match="Failed to serialize extract tool response data" |
| 148 | + ) as exc_info: |
| 149 | + WebsiteExtractTool.runnable(client, {"urls": ["https://example.com"]}) |
| 150 | + |
| 151 | + assert exc_info.value.original_error is not None |
| 152 | + |
| 153 | + |
| 154 | +def test_extract_tool_runnable_wraps_unexpected_schema_parse_failures( |
| 155 | + monkeypatch: pytest.MonkeyPatch, |
| 156 | +): |
| 157 | + def _raise_recursion_error(_: str): |
| 158 | + raise RecursionError("schema parsing recursion overflow") |
| 159 | + |
| 160 | + monkeypatch.setattr(tools_module.json, "loads", _raise_recursion_error) |
| 161 | + |
| 162 | + with pytest.raises( |
| 163 | + HyperbrowserError, match="Invalid JSON string provided for `schema`" |
| 164 | + ) as exc_info: |
| 165 | + WebsiteExtractTool.runnable( |
| 166 | + _SyncClient(), |
| 167 | + { |
| 168 | + "urls": ["https://example.com"], |
| 169 | + "schema": '{"type":"object"}', |
| 170 | + }, |
| 171 | + ) |
| 172 | + |
| 173 | + assert exc_info.value.original_error is not None |
| 174 | + |
| 175 | + |
| 176 | +def test_extract_tool_runnable_preserves_hyperbrowser_schema_parse_errors( |
| 177 | + monkeypatch: pytest.MonkeyPatch, |
| 178 | +): |
| 179 | + def _raise_hyperbrowser_error(_: str): |
| 180 | + raise HyperbrowserError("custom schema parse failure") |
| 181 | + |
| 182 | + monkeypatch.setattr(tools_module.json, "loads", _raise_hyperbrowser_error) |
| 183 | + |
| 184 | + with pytest.raises(HyperbrowserError, match="custom schema parse failure") as exc_info: |
| 185 | + WebsiteExtractTool.runnable( |
| 186 | + _SyncClient(), |
| 187 | + { |
| 188 | + "urls": ["https://example.com"], |
| 189 | + "schema": '{"type":"object"}', |
| 190 | + }, |
| 191 | + ) |
| 192 | + |
| 193 | + assert exc_info.value.original_error is None |
0 commit comments