Skip to content

Commit b719fa8

Browse files
Harden tool text field normalization for string subclasses
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent ec9e9ee commit b719fa8

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

hyperbrowser/tools/__init__.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,24 @@ def _normalize_optional_text_field_value(
223223
if field_value is None:
224224
return ""
225225
if isinstance(field_value, str):
226-
return field_value
226+
try:
227+
normalized_field_value = "".join(character for character in field_value)
228+
if type(normalized_field_value) is not str:
229+
raise TypeError("normalized text field must be a string")
230+
return normalized_field_value
231+
except HyperbrowserError:
232+
raise
233+
except Exception as exc:
234+
raise HyperbrowserError(
235+
error_message,
236+
original_error=exc,
237+
) from exc
227238
if isinstance(field_value, (bytes, bytearray, memoryview)):
228239
try:
229-
return memoryview(field_value).tobytes().decode("utf-8")
240+
normalized_field_value = memoryview(field_value).tobytes().decode("utf-8")
241+
if type(normalized_field_value) is not str:
242+
raise TypeError("normalized text field must be a string")
243+
return normalized_field_value
230244
except (TypeError, ValueError, UnicodeDecodeError) as exc:
231245
raise HyperbrowserError(
232246
error_message,

tests/test_tools_response_handling.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,24 @@ def test_scrape_tool_rejects_non_string_markdown_field():
336336
WebsiteScrapeTool.runnable(client, {"url": "https://example.com"})
337337

338338

339+
def test_scrape_tool_wraps_broken_string_subclass_markdown_field_values():
340+
class _BrokenMarkdownValue(str):
341+
def __iter__(self):
342+
raise RuntimeError("markdown iteration exploded")
343+
344+
client = _SyncScrapeClient(
345+
_Response(data=SimpleNamespace(markdown=_BrokenMarkdownValue("page")))
346+
)
347+
348+
with pytest.raises(
349+
HyperbrowserError,
350+
match="scrape tool response field 'markdown' must be a UTF-8 string",
351+
) as exc_info:
352+
WebsiteScrapeTool.runnable(client, {"url": "https://example.com"})
353+
354+
assert exc_info.value.original_error is not None
355+
356+
339357
def test_scrape_tool_wraps_attributeerror_from_declared_markdown_property():
340358
class _BrokenMarkdownData:
341359
@property
@@ -758,6 +776,24 @@ def test_crawl_tool_rejects_non_string_page_urls():
758776
WebsiteCrawlTool.runnable(client, {"url": "https://example.com"})
759777

760778

779+
def test_crawl_tool_wraps_broken_string_subclass_page_url_values():
780+
class _BrokenUrlValue(str):
781+
def __iter__(self):
782+
raise RuntimeError("url iteration exploded")
783+
784+
client = _SyncCrawlClient(
785+
_Response(data=[SimpleNamespace(url=_BrokenUrlValue("https://example.com"), markdown="body")])
786+
)
787+
788+
with pytest.raises(
789+
HyperbrowserError,
790+
match="crawl tool page field 'url' must be a UTF-8 string at index 0",
791+
) as exc_info:
792+
WebsiteCrawlTool.runnable(client, {"url": "https://example.com"})
793+
794+
assert exc_info.value.original_error is not None
795+
796+
761797
def test_crawl_tool_decodes_utf8_bytes_page_fields():
762798
client = _SyncCrawlClient(
763799
_Response(data=[SimpleNamespace(url=b"https://example.com", markdown=b"page")])
@@ -819,6 +855,24 @@ def test_browser_use_tool_rejects_non_string_final_result():
819855
BrowserUseTool.runnable(client, {"task": "search docs"})
820856

821857

858+
def test_browser_use_tool_wraps_broken_string_subclass_final_result_values():
859+
class _BrokenFinalResultValue(str):
860+
def __iter__(self):
861+
raise RuntimeError("final_result iteration exploded")
862+
863+
client = _SyncBrowserUseClient(
864+
_Response(data=SimpleNamespace(final_result=_BrokenFinalResultValue("done")))
865+
)
866+
867+
with pytest.raises(
868+
HyperbrowserError,
869+
match="browser-use tool response field 'final_result' must be a UTF-8 string",
870+
) as exc_info:
871+
BrowserUseTool.runnable(client, {"task": "search docs"})
872+
873+
assert exc_info.value.original_error is not None
874+
875+
822876
def test_browser_use_tool_wraps_attributeerror_from_declared_final_result_property():
823877
class _BrokenFinalResultData:
824878
@property

0 commit comments

Comments
 (0)