Skip to content

Commit 264ca18

Browse files
Handle bounded nested URL encodings without false positives
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent fcb0b62 commit 264ca18

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

hyperbrowser/config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ def _decode_url_component_with_limit(value: str, *, component_label: str) -> str
4242
if next_decoded_value == decoded_value:
4343
return decoded_value
4444
decoded_value = next_decoded_value
45+
if unquote(decoded_value) == decoded_value:
46+
return decoded_value
4547
raise HyperbrowserError(
4648
f"{component_label} contains excessively nested URL encoding"
4749
)
@@ -123,9 +125,14 @@ def normalize_base_url(base_url: str) -> str:
123125
break
124126
decoded_base_netloc = next_decoded_base_netloc
125127
else:
126-
raise HyperbrowserError(
127-
"base_url host contains excessively nested URL encoding"
128-
)
128+
if _ENCODED_HOST_DELIMITER_PATTERN.search(decoded_base_netloc):
129+
raise HyperbrowserError(
130+
"base_url host must not contain encoded delimiter characters"
131+
)
132+
if unquote(decoded_base_netloc) != decoded_base_netloc:
133+
raise HyperbrowserError(
134+
"base_url host contains excessively nested URL encoding"
135+
)
129136
if "\\" in decoded_base_netloc:
130137
raise HyperbrowserError("base_url host must not contain backslashes")
131138
if any(character.isspace() for character in decoded_base_netloc):

tests/test_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,15 @@ def test_client_config_normalize_base_url_validates_and_normalizes():
472472
match="base_url path must not contain encoded query or fragment delimiters",
473473
):
474474
ClientConfig.normalize_base_url("https://example.local/%253Fapi")
475+
bounded_encoded_host_label = "%61"
476+
for _ in range(9):
477+
bounded_encoded_host_label = quote(bounded_encoded_host_label, safe="")
478+
assert (
479+
ClientConfig.normalize_base_url(
480+
f"https://{bounded_encoded_host_label}.example.local"
481+
)
482+
== f"https://{bounded_encoded_host_label}.example.local"
483+
)
475484
deeply_encoded_dot = "%2e"
476485
for _ in range(11):
477486
deeply_encoded_dot = quote(deeply_encoded_dot, safe="")

tests/test_url_building.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,21 @@ def test_client_build_url_allows_query_values_containing_absolute_urls():
367367
client.close()
368368

369369

370+
def test_client_build_url_allows_bounded_nested_safe_encoding():
371+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
372+
try:
373+
bounded_encoded_segment = "%61"
374+
for _ in range(9):
375+
bounded_encoded_segment = quote(bounded_encoded_segment, safe="")
376+
377+
assert (
378+
client._build_url(f"/{bounded_encoded_segment}/session")
379+
== f"https://api.hyperbrowser.ai/api/{bounded_encoded_segment}/session"
380+
)
381+
finally:
382+
client.close()
383+
384+
370385
def test_client_build_url_normalizes_runtime_trailing_slashes():
371386
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
372387
try:

0 commit comments

Comments
 (0)