Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions src/macaron/slsa_analyzer/provenance/loader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the loaders for SLSA provenances."""
Expand All @@ -9,7 +9,6 @@
import json
import logging
import zlib
from urllib.parse import urlparse

from cryptography import x509
from cryptography.x509 import DuplicateExtension, UnsupportedGeneralNameType
Expand All @@ -19,7 +18,7 @@
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError
from macaron.slsa_analyzer.specs.pypi_certificate_predicate import PyPICertificatePredicate
from macaron.util import send_get_http_raw
from macaron.util import send_get_http_raw, url_is_safe

logger: logging.Logger = logging.getLogger(__name__)

Expand All @@ -43,13 +42,8 @@ def _try_read_url_link_file(file_content: bytes) -> str | None:


def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes:
hostname = urlparse(url).hostname
if hostname is None or hostname == "":
if not url_is_safe(url, allow_list=url_link_hostname_allowlist):
raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL")
if hostname not in url_link_hostname_allowlist:
raise LoadIntotoAttestationError(
"Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames."
)

# TODO download size limit?
timeout = defaults.getint("downloads", "timeout", fallback=120)
Expand Down
62 changes: 61 additions & 1 deletion src/macaron/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module includes utilities functions for Macaron."""
Expand All @@ -21,6 +21,66 @@
logger: logging.Logger = logging.getLogger(__name__)


def url_is_safe(url: str, allow_list: list[str] | None = None, allow_login: bool = False) -> bool:
r"""Validate that a URL has an acceptable host and login component.

Parameters
----------
url : str
URL string to validate.
allow_list : list[str] | None
Allowed hostnames. When provided, the parsed hostname must be in this list.
If ``None``, any non-empty hostname is accepted.
allow_login : bool, default=False
Whether username/password URL components are permitted.

Returns
-------
bool
``True`` when the URL passes safety checks, otherwise ``False``.

Examples
--------
>>> url_is_safe("https://example.com")
True
>>> url_is_safe("https://example.com", allow_list=["example.com"])
True
>>> url_is_safe("https://example.com", allow_list=["oracle.com"])
False
>>> url_is_safe("https://user:test@example.com")
False
>>> url_is_safe("https://user:test@example.com", allow_login=True)
True
>>> url_is_safe("not-a-url")
False
>>> url_is_safe("127.0.0.1:6666\\@allowlist.com", ["allowlist.com"])
False
>>> url_is_safe("https://attacker.com:6666\\@allowlist.com", ["allowlist.com"])
False
>>> url_is_safe("https://username:attacker.com\\@allowlist.com", ["allowlist.com"])
False
>>> url_is_safe("https://username:test@allowlist.com", ["allowlist.com"], allow_login = True)
True
"""
try:
parsed_url = urllib.parse.urlparse(url)
except ValueError:
return False
if not allow_login:
if parsed_url.username or parsed_url.password:
logger.debug("Potential attempt to redirect to an invalid URL: hostname %s", parsed_url.hostname)
return False

hostname = parsed_url.hostname
if hostname is None or hostname == "":
return False
if allow_list and (hostname not in allow_list):
logger.debug("URL %s is not in allowed hostnames.", url)
return False

return True


def send_get_http(url: str, headers: dict) -> dict:
"""Send the GET HTTP request with the given url and headers.

Expand Down
Loading