Add URL and range constraints to tool schemas

cursoragent · shrisukhani · cursoragent · commit da457fca9308 · 2026-02-13T05:43:15.000Z
Co-authored-by: Shri Sukhani &lt;shrisukhani@users.noreply.github.com&gt;
diff --git a/hyperbrowser/tools/schema.py b/hyperbrowser/tools/schema.py
@@ -51,6 +51,7 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
     "properties": {
         "url": {
             "type": "string",
+            "format": "uri",
             "description": "The URL of the website to scrape",
         },
         "scrape_options": get_scrape_options(),
@@ -64,6 +65,7 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
     "properties": {
         "url": {
             "type": "string",
+            "format": "uri",
             "description": "The URL of the website to scrape",
         },
         "scrape_options": get_scrape_options(["screenshot"]),
@@ -77,10 +79,12 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
     "properties": {
         "url": {
             "type": "string",
+            "format": "uri",
             "description": "The URL of the website to crawl",
         },
         "max_pages": {
-            "type": "number",
+            "type": "integer",
+            "minimum": 1,
             "description": "The maximum number of pages to crawl",
         },
         "follow_links": {
@@ -116,8 +120,11 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
     "properties": {
         "urls": {
             "type": "array",
+            "minItems": 1,
+            "maxItems": 10,
             "items": {
                 "type": "string",
+                "format": "uri",
             },
             "description": "A required list of up to 10 urls you want to process IN A SINGLE EXTRACTION. When answering questions that involve multiple sources or topics, ALWAYS include ALL relevant URLs in this single array rather than making separate function calls. This enables cross-referencing information across multiple sources to provide comprehensive answers. To allow crawling for any of the urls provided in the list, simply add /* to the end of the url (https://hyperbrowser.ai/*). This will crawl other pages on the site with the same origin and find relevant pages to use for the extraction context.",
         },
@@ -133,7 +140,8 @@ def get_scrape_options(formats: Optional[List[scrape_types]] = None):
             "description": "A strict JSON schema for the response shape. This can be either a JSON object schema or a JSON string that can be parsed into an object schema. For multi-source extraction, design this schema to accommodate information from all URLs in a single structure.",
         },
         "max_links": {
-            "type": "number",
+            "type": "integer",
+            "minimum": 1,
             "description": "The maximum number of links to look for if performing a crawl for any given url in the urls list.",
         },
     },
diff --git a/tests/test_tool_schema.py b/tests/test_tool_schema.py
@@ -27,3 +27,18 @@ def test_scrape_related_tool_schemas_require_only_url():
     assert SCRAPE_SCHEMA["required"] == ["url"]
     assert SCREENSHOT_SCHEMA["required"] == ["url"]
     assert CRAWL_SCHEMA["required"] == ["url"]
+
+
+def test_tool_schemas_include_url_and_range_constraints():
+    assert SCRAPE_SCHEMA["properties"]["url"]["format"] == "uri"
+    assert SCREENSHOT_SCHEMA["properties"]["url"]["format"] == "uri"
+    assert CRAWL_SCHEMA["properties"]["url"]["format"] == "uri"
+
+    assert CRAWL_SCHEMA["properties"]["max_pages"]["type"] == "integer"
+    assert CRAWL_SCHEMA["properties"]["max_pages"]["minimum"] == 1
+
+    assert EXTRACT_SCHEMA["properties"]["urls"]["minItems"] == 1
+    assert EXTRACT_SCHEMA["properties"]["urls"]["maxItems"] == 10
+    assert EXTRACT_SCHEMA["properties"]["urls"]["items"]["format"] == "uri"
+    assert EXTRACT_SCHEMA["properties"]["max_links"]["type"] == "integer"
+    assert EXTRACT_SCHEMA["properties"]["max_links"]["minimum"] == 1