Skip to content

Commit da101f5

Browse files
Harden tool response field extraction and crawl rendering
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent b419013 commit da101f5

2 files changed

Lines changed: 347 additions & 24 deletions

File tree

hyperbrowser/tools/__init__.py

Lines changed: 134 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,98 @@ def _serialize_extract_tool_data(data: Any) -> str:
103103
) from exc
104104

105105

106+
def _read_tool_response_data(response: Any, *, tool_name: str) -> Any:
107+
try:
108+
return response.data
109+
except HyperbrowserError:
110+
raise
111+
except Exception as exc:
112+
raise HyperbrowserError(
113+
f"Failed to read {tool_name} response data",
114+
original_error=exc,
115+
) from exc
116+
117+
118+
def _read_optional_tool_response_field(
119+
response_data: Any,
120+
*,
121+
tool_name: str,
122+
field_name: str,
123+
) -> str:
124+
if response_data is None:
125+
return ""
126+
try:
127+
field_value = getattr(response_data, field_name)
128+
except HyperbrowserError:
129+
raise
130+
except Exception as exc:
131+
raise HyperbrowserError(
132+
f"Failed to read {tool_name} response field '{field_name}'",
133+
original_error=exc,
134+
) from exc
135+
if field_value is None:
136+
return ""
137+
if not isinstance(field_value, str):
138+
raise HyperbrowserError(
139+
f"{tool_name} response field '{field_name}' must be a string"
140+
)
141+
return field_value
142+
143+
144+
def _read_crawl_page_field(page: Any, *, field_name: str, page_index: int) -> Any:
145+
try:
146+
return getattr(page, field_name)
147+
except HyperbrowserError:
148+
raise
149+
except Exception as exc:
150+
raise HyperbrowserError(
151+
f"Failed to read crawl tool page field '{field_name}' at index {page_index}",
152+
original_error=exc,
153+
) from exc
154+
155+
156+
def _render_crawl_markdown_output(response_data: Any) -> str:
157+
if response_data is None:
158+
return ""
159+
if not isinstance(response_data, list):
160+
raise HyperbrowserError("crawl tool response data must be a list")
161+
try:
162+
crawl_pages = list(response_data)
163+
except HyperbrowserError:
164+
raise
165+
except Exception as exc:
166+
raise HyperbrowserError(
167+
"Failed to iterate crawl tool response data",
168+
original_error=exc,
169+
) from exc
170+
markdown_sections: list[str] = []
171+
for index, page in enumerate(crawl_pages):
172+
page_markdown = _read_crawl_page_field(
173+
page, field_name="markdown", page_index=index
174+
)
175+
if page_markdown is None:
176+
continue
177+
if not isinstance(page_markdown, str):
178+
raise HyperbrowserError(
179+
f"crawl tool page field 'markdown' must be a string at index {index}"
180+
)
181+
if not page_markdown:
182+
continue
183+
page_url = _read_crawl_page_field(page, field_name="url", page_index=index)
184+
if page_url is None:
185+
page_url_display = "<unknown url>"
186+
elif not isinstance(page_url, str):
187+
raise HyperbrowserError(
188+
f"crawl tool page field 'url' must be a string at index {index}"
189+
)
190+
else:
191+
page_url_display = page_url if page_url.strip() else "<unknown url>"
192+
markdown_sections.append(
193+
f"\n{'-' * 50}\nUrl: {page_url_display}\nMarkdown:\n{page_markdown}\n"
194+
)
195+
return "".join(markdown_sections)
196+
197+
106198
class WebsiteScrapeTool:
107199
openai_tool_definition = SCRAPE_TOOL_OPENAI
108200
anthropic_tool_definition = SCRAPE_TOOL_ANTHROPIC
@@ -112,14 +204,22 @@ def runnable(hb: Hyperbrowser, params: Mapping[str, Any]) -> str:
112204
resp = hb.scrape.start_and_wait(
113205
params=StartScrapeJobParams(**_to_param_dict(params))
114206
)
115-
return resp.data.markdown if resp.data and resp.data.markdown else ""
207+
return _read_optional_tool_response_field(
208+
_read_tool_response_data(resp, tool_name="scrape tool"),
209+
tool_name="scrape tool",
210+
field_name="markdown",
211+
)
116212

117213
@staticmethod
118214
async def async_runnable(hb: AsyncHyperbrowser, params: Mapping[str, Any]) -> str:
119215
resp = await hb.scrape.start_and_wait(
120216
params=StartScrapeJobParams(**_to_param_dict(params))
121217
)
122-
return resp.data.markdown if resp.data and resp.data.markdown else ""
218+
return _read_optional_tool_response_field(
219+
_read_tool_response_data(resp, tool_name="scrape tool"),
220+
tool_name="scrape tool",
221+
field_name="markdown",
222+
)
123223

124224

125225
class WebsiteScreenshotTool:
@@ -131,14 +231,22 @@ def runnable(hb: Hyperbrowser, params: Mapping[str, Any]) -> str:
131231
resp = hb.scrape.start_and_wait(
132232
params=StartScrapeJobParams(**_to_param_dict(params))
133233
)
134-
return resp.data.screenshot if resp.data and resp.data.screenshot else ""
234+
return _read_optional_tool_response_field(
235+
_read_tool_response_data(resp, tool_name="screenshot tool"),
236+
tool_name="screenshot tool",
237+
field_name="screenshot",
238+
)
135239

136240
@staticmethod
137241
async def async_runnable(hb: AsyncHyperbrowser, params: Mapping[str, Any]) -> str:
138242
resp = await hb.scrape.start_and_wait(
139243
params=StartScrapeJobParams(**_to_param_dict(params))
140244
)
141-
return resp.data.screenshot if resp.data and resp.data.screenshot else ""
245+
return _read_optional_tool_response_field(
246+
_read_tool_response_data(resp, tool_name="screenshot tool"),
247+
tool_name="screenshot tool",
248+
field_name="screenshot",
249+
)
142250

143251

144252
class WebsiteCrawlTool:
@@ -150,28 +258,18 @@ def runnable(hb: Hyperbrowser, params: Mapping[str, Any]) -> str:
150258
resp = hb.crawl.start_and_wait(
151259
params=StartCrawlJobParams(**_to_param_dict(params))
152260
)
153-
markdown = ""
154-
if resp.data:
155-
for page in resp.data:
156-
if page.markdown:
157-
markdown += (
158-
f"\n{'-' * 50}\nUrl: {page.url}\nMarkdown:\n{page.markdown}\n"
159-
)
160-
return markdown
261+
return _render_crawl_markdown_output(
262+
_read_tool_response_data(resp, tool_name="crawl tool")
263+
)
161264

162265
@staticmethod
163266
async def async_runnable(hb: AsyncHyperbrowser, params: Mapping[str, Any]) -> str:
164267
resp = await hb.crawl.start_and_wait(
165268
params=StartCrawlJobParams(**_to_param_dict(params))
166269
)
167-
markdown = ""
168-
if resp.data:
169-
for page in resp.data:
170-
if page.markdown:
171-
markdown += (
172-
f"\n{'-' * 50}\nUrl: {page.url}\nMarkdown:\n{page.markdown}\n"
173-
)
174-
return markdown
270+
return _render_crawl_markdown_output(
271+
_read_tool_response_data(resp, tool_name="crawl tool")
272+
)
175273

176274

177275
class WebsiteExtractTool:
@@ -184,15 +282,19 @@ def runnable(hb: Hyperbrowser, params: Mapping[str, Any]) -> str:
184282
resp = hb.extract.start_and_wait(
185283
params=StartExtractJobParams(**normalized_params)
186284
)
187-
return _serialize_extract_tool_data(resp.data)
285+
return _serialize_extract_tool_data(
286+
_read_tool_response_data(resp, tool_name="extract tool")
287+
)
188288

189289
@staticmethod
190290
async def async_runnable(hb: AsyncHyperbrowser, params: Mapping[str, Any]) -> str:
191291
normalized_params = _prepare_extract_tool_params(params)
192292
resp = await hb.extract.start_and_wait(
193293
params=StartExtractJobParams(**normalized_params)
194294
)
195-
return _serialize_extract_tool_data(resp.data)
295+
return _serialize_extract_tool_data(
296+
_read_tool_response_data(resp, tool_name="extract tool")
297+
)
196298

197299

198300
class BrowserUseTool:
@@ -204,14 +306,22 @@ def runnable(hb: Hyperbrowser, params: Mapping[str, Any]) -> str:
204306
resp = hb.agents.browser_use.start_and_wait(
205307
params=StartBrowserUseTaskParams(**_to_param_dict(params))
206308
)
207-
return resp.data.final_result if resp.data and resp.data.final_result else ""
309+
return _read_optional_tool_response_field(
310+
_read_tool_response_data(resp, tool_name="browser-use tool"),
311+
tool_name="browser-use tool",
312+
field_name="final_result",
313+
)
208314

209315
@staticmethod
210316
async def async_runnable(hb: AsyncHyperbrowser, params: Mapping[str, Any]) -> str:
211317
resp = await hb.agents.browser_use.start_and_wait(
212318
params=StartBrowserUseTaskParams(**_to_param_dict(params))
213319
)
214-
return resp.data.final_result if resp.data and resp.data.final_result else ""
320+
return _read_optional_tool_response_field(
321+
_read_tool_response_data(resp, tool_name="browser-use tool"),
322+
tool_name="browser-use tool",
323+
field_name="final_result",
324+
)
215325

216326

217327
__all__ = [

0 commit comments

Comments
 (0)