Skip to content

Commit da457fc

Browse files
Add URL and range constraints to tool schemas
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent ce94de9 commit da457fc

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

hyperbrowser/tools/schema.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
5151
"properties": {
5252
"url": {
5353
"type": "string",
54+
"format": "uri",
5455
"description": "The URL of the website to scrape",
5556
},
5657
"scrape_options": get_scrape_options(),
@@ -64,6 +65,7 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
6465
"properties": {
6566
"url": {
6667
"type": "string",
68+
"format": "uri",
6769
"description": "The URL of the website to scrape",
6870
},
6971
"scrape_options": get_scrape_options(["screenshot"]),
@@ -77,10 +79,12 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
7779
"properties": {
7880
"url": {
7981
"type": "string",
82+
"format": "uri",
8083
"description": "The URL of the website to crawl",
8184
},
8285
"max_pages": {
83-
"type": "number",
86+
"type": "integer",
87+
"minimum": 1,
8488
"description": "The maximum number of pages to crawl",
8589
},
8690
"follow_links": {
@@ -116,8 +120,11 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
116120
"properties": {
117121
"urls": {
118122
"type": "array",
123+
"minItems": 1,
124+
"maxItems": 10,
119125
"items": {
120126
"type": "string",
127+
"format": "uri",
121128
},
122129
"description": "A required list of up to 10 urls you want to process IN A SINGLE EXTRACTION. When answering questions that involve multiple sources or topics, ALWAYS include ALL relevant URLs in this single array rather than making separate function calls. This enables cross-referencing information across multiple sources to provide comprehensive answers. To allow crawling for any of the urls provided in the list, simply add /* to the end of the url (https://hyperbrowser.ai/*). This will crawl other pages on the site with the same origin and find relevant pages to use for the extraction context.",
123130
},
@@ -133,7 +140,8 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
133140
"description": "A strict JSON schema for the response shape. This can be either a JSON object schema or a JSON string that can be parsed into an object schema. For multi-source extraction, design this schema to accommodate information from all URLs in a single structure.",
134141
},
135142
"max_links": {
136-
"type": "number",
143+
"type": "integer",
144+
"minimum": 1,
137145
"description": "The maximum number of links to look for if performing a crawl for any given url in the urls list.",
138146
},
139147
},

tests/test_tool_schema.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,18 @@ def test_scrape_related_tool_schemas_require_only_url():
2727
assert SCRAPE_SCHEMA["required"] == ["url"]
2828
assert SCREENSHOT_SCHEMA["required"] == ["url"]
2929
assert CRAWL_SCHEMA["required"] == ["url"]
30+
31+
32+
def test_tool_schemas_include_url_and_range_constraints():
33+
assert SCRAPE_SCHEMA["properties"]["url"]["format"] == "uri"
34+
assert SCREENSHOT_SCHEMA["properties"]["url"]["format"] == "uri"
35+
assert CRAWL_SCHEMA["properties"]["url"]["format"] == "uri"
36+
37+
assert CRAWL_SCHEMA["properties"]["max_pages"]["type"] == "integer"
38+
assert CRAWL_SCHEMA["properties"]["max_pages"]["minimum"] == 1
39+
40+
assert EXTRACT_SCHEMA["properties"]["urls"]["minItems"] == 1
41+
assert EXTRACT_SCHEMA["properties"]["urls"]["maxItems"] == 10
42+
assert EXTRACT_SCHEMA["properties"]["urls"]["items"]["format"] == "uri"
43+
assert EXTRACT_SCHEMA["properties"]["max_links"]["type"] == "integer"
44+
assert EXTRACT_SCHEMA["properties"]["max_links"]["minimum"] == 1

0 commit comments

Comments
 (0)