From 9f82448499de600bbe21889b0f53d5e8423eb8f7 Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Fri, 5 Dec 2025 15:27:54 +0100 Subject: [PATCH 1/6] refactor: split client into deploy and download --- databusclient/{client.py => api/deploy.py} | 343 --------------------- databusclient/api/download.py | 331 ++++++++++++++++++++ databusclient/cli.py | 14 +- 3 files changed, 339 insertions(+), 349 deletions(-) rename databusclient/{client.py => api/deploy.py} (53%) create mode 100644 databusclient/api/download.py diff --git a/databusclient/client.py b/databusclient/api/deploy.py similarity index 53% rename from databusclient/client.py rename to databusclient/api/deploy.py index 994e731..ed8b931 100644 --- a/databusclient/client.py +++ b/databusclient/api/deploy.py @@ -3,12 +3,6 @@ import requests import hashlib import json -from tqdm import tqdm -from SPARQLWrapper import SPARQLWrapper, JSON -from hashlib import sha256 -import os - -from databusclient.api.utils import get_databus_id_parts_from_uri, get_json_ld_from_databus __debug = False @@ -491,340 +485,3 @@ def deploy_from_metadata( print(f"Deployed {len(metadata)} file(s):") for entry in metadata: print(f" - {entry['url']}") - - -def __download_file__(url, filename, vault_token_file=None, databus_key=None, auth_url=None, client_id=None) -> None: - """ - Download a file from the internet with a progress bar using tqdm. - - Parameters: - - url: the URL of the file to download - - filename: the local file path where the file should be saved - - vault_token_file: Path to Vault refresh token file - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange - - Steps: - 1. Try direct GET without Authorization header. - 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized) or url starts with "https://data.dbpedia.io/databus.dbpedia.org", - then fetch Vault access token and retry with Authorization header. - """ - - print(f"Download file: {url}") - dirpath = os.path.dirname(filename) - if dirpath: - os.makedirs(dirpath, exist_ok=True) # Create the necessary directories - # --- 1. Get redirect URL by requesting HEAD --- - response = requests.head(url, stream=True) - # Check for redirect and update URL if necessary - if response.headers.get("Location") and response.status_code in [301, 302, 303, 307, 308]: - url = response.headers.get("Location") - print("Redirects url: ", url) - - # --- 2. Try direct GET --- - response = requests.get(url, stream=True, allow_redirects=True, timeout=30) - www = response.headers.get('WWW-Authenticate', '') # get WWW-Authenticate header if present to check for Bearer auth - - # Vault token required if 401 Unauthorized with Bearer challenge - if (response.status_code == 401 and "bearer" in www.lower()): - print(f"Authentication required for {url}") - if not (vault_token_file): - raise ValueError("Vault token file not given for protected download") - - # --- 3. Fetch Vault token --- - vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id) - headers = {"Authorization": f"Bearer {vault_token}"} - - # --- 4. Retry with token --- - response = requests.get(url, headers=headers, stream=True, timeout=30) - - # Databus API key required if only 401 Unauthorized - elif response.status_code == 401: - print(f"API key required for {url}") - if not databus_key: - raise ValueError("Databus API key not given for protected download") - - headers = {"X-API-KEY": databus_key} - response = requests.get(url, headers=headers, stream=True, timeout=30) - - try: - response.raise_for_status() # Raise if still failing - except requests.exceptions.HTTPError as e: - if response.status_code == 404: - print(f"WARNING: Skipping file {url} because it was not found (404).") - return - else: - raise e - - total_size_in_bytes = int(response.headers.get('content-length', 0)) - block_size = 1024 # 1 KiB - - progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) - with open(filename, 'wb') as file: - for data in response.iter_content(block_size): - progress_bar.update(len(data)) - file.write(data) - progress_bar.close() - - # TODO: could be a problem of github raw / openflaas - if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: - raise IOError("Downloaded size does not match Content-Length header") - - -def __get_vault_access__(download_url: str, - token_file: str, - auth_url: str, - client_id: str) -> str: - """ - Get Vault access token for a protected databus download. - """ - # 1. Load refresh token - refresh_token = os.environ.get("REFRESH_TOKEN") - if not refresh_token: - if not os.path.exists(token_file): - raise FileNotFoundError(f"Vault token file not found: {token_file}") - with open(token_file, "r") as f: - refresh_token = f.read().strip() - if len(refresh_token) < 80: - print(f"Warning: token from {token_file} is short (<80 chars)") - - # 2. Refresh token -> access token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "refresh_token", - "refresh_token": refresh_token - }) - resp.raise_for_status() - access_token = resp.json()["access_token"] - - # 3. Extract host as audience - # Remove protocol prefix - if download_url.startswith("https://"): - host_part = download_url[len("https://"):] - elif download_url.startswith("http://"): - host_part = download_url[len("http://"):] - else: - host_part = download_url - audience = host_part.split("/")[0] # host is before first "/" - - # 4. Access token -> Vault token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", - "subject_token": access_token, - "audience": audience - }) - resp.raise_for_status() - vault_token = resp.json()["access_token"] - - print(f"Using Vault access token for {download_url}") - return vault_token - - -def __query_sparql__(endpoint_url, query, databus_key=None) -> dict: - """ - Query a SPARQL endpoint and return results in JSON format. - - Parameters: - - endpoint_url: the URL of the SPARQL endpoint - - query: the SPARQL query string - - databus_key: Optional API key for authentication - - Returns: - - Dictionary containing the query results - """ - sparql = SPARQLWrapper(endpoint_url) - sparql.method = 'POST' - sparql.setQuery(query) - sparql.setReturnFormat(JSON) - if databus_key is not None: - sparql.setCustomHttpHeaders({"X-API-KEY": databus_key}) - results = sparql.query().convert() - return results - - -def __handle_databus_file_query__(endpoint_url, query, databus_key=None) -> List[str]: - result_dict = __query_sparql__(endpoint_url, query, databus_key=databus_key) - for binding in result_dict['results']['bindings']: - if len(binding.keys()) > 1: - print("Error multiple bindings in query response") - break - else: - value = binding[next(iter(binding.keys()))]['value'] - yield value - - -def __handle_databus_artifact_version__(json_str: str) -> List[str]: - """ - Parse the JSON-LD of a databus artifact version to extract download URLs. - Don't get downloadURLs directly from the JSON-LD, but follow the "file" links to count access to databus accurately. - - Returns a list of download URLs. - """ - - databusIdUrl = [] - json_dict = json.loads(json_str) - graph = json_dict.get("@graph", []) - for node in graph: - if node.get("@type") == "Part": - id = node.get("file") - databusIdUrl.append(id) - return databusIdUrl - - -def __get_databus_latest_version_of_artifact__(json_str: str) -> str: - """ - Parse the JSON-LD of a databus artifact to extract URLs of the latest version. - - Returns download URL of latest version of the artifact. - """ - json_dict = json.loads(json_str) - versions = json_dict.get("databus:hasVersion") - - # Single version case {} - if isinstance(versions, dict): - versions = [versions] - # Multiple versions case [{}, {}] - - version_urls = [v["@id"] for v in versions if "@id" in v] - if not version_urls: - raise ValueError("No versions found in artifact JSON-LD") - - version_urls.sort(reverse=True) # Sort versions in descending order - return version_urls[0] # Return the latest version URL - - -def __get_databus_artifacts_of_group__(json_str: str) -> List[str]: - """ - Parse the JSON-LD of a databus group to extract URLs of all artifacts. - - Returns a list of artifact URLs. - """ - json_dict = json.loads(json_str) - artifacts = json_dict.get("databus:hasArtifact", []) - - result = [] - for item in artifacts: - uri = item.get("@id") - if not uri: - continue - _, _, _, _, version, _ = get_databus_id_parts_from_uri(uri) - if version is None: - result.append(uri) - return result - - -def wsha256(raw: str): - return sha256(raw.encode('utf-8')).hexdigest() - - -def __handle_databus_collection__(uri: str, databus_key: str | None = None) -> str: - headers = {"Accept": "text/sparql"} - if databus_key is not None: - headers["X-API-KEY"] = databus_key - - return requests.get(uri, headers=headers, timeout=30).text - - -def __download_list__(urls: List[str], - localDir: str, - vault_token_file: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: - fileLocalDir = localDir - for url in urls: - if localDir is None: - _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) - fileLocalDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") - print(f"Local directory not given, using {fileLocalDir}") - - file = url.split("/")[-1] - filename = os.path.join(fileLocalDir, file) - print("\n") - __download_file__(url=url, filename=filename, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - print("\n") - - -def download( - localDir: str, - endpoint: str, - databusURIs: List[str], - token=None, - databus_key=None, - auth_url=None, - client_id=None -) -> None: - """ - Download datasets to local storage from databus registry. If download is on vault, vault token will be used for downloading protected files. - ------ - localDir: the local directory - endpoint: the databus endpoint URL - databusURIs: identifiers to access databus registered datasets - token: Path to Vault refresh token file - databus_key: Databus API key for protected downloads - auth_url: Keycloak token endpoint URL - client_id: Client ID for token exchange - """ - - # TODO: make pretty - for databusURI in databusURIs: - host, account, group, artifact, version, file = get_databus_id_parts_from_uri(databusURI) - - # dataID or databus collection - if databusURI.startswith("http://") or databusURI.startswith("https://"): - # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus) - if endpoint is None: - endpoint = f"https://{host}/sparql" - print(f"SPARQL endpoint {endpoint}") - - # databus collection - if group == "collections": - query = __handle_databus_collection__(databusURI, databus_key=databus_key) - res = __handle_databus_file_query__(endpoint, query) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus file - elif file is not None: - __download_list__([databusURI], localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus artifact version - elif version is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus artifact - elif artifact is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - latest = __get_databus_latest_version_of_artifact__(json_str) - print(f"No version given, using latest version: {latest}") - json_str = get_json_ld_from_databus(latest, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - # databus group - elif group is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - artifacts = __get_databus_artifacts_of_group__(json_str) - for artifact_uri in artifacts: - print(f"Processing artifact {artifact_uri}") - json_str = get_json_ld_from_databus(artifact_uri, databus_key=databus_key) - latest = __get_databus_latest_version_of_artifact__(json_str) - print(f"No version given, using latest version: {latest}") - json_str = get_json_ld_from_databus(latest, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - # databus account - elif account is not None: - print("accountId not supported yet") # TODO - else: - print("dataId not supported yet") # TODO add support for other DatabusIds - # query in local file - elif databusURI.startswith("file://"): - print("query in file not supported yet") - # query as argument - else: - print("QUERY {}", databusURI.replace("\n", " ")) - if endpoint is None: # endpoint is required for queries (--databus) - raise ValueError("No endpoint given for query") - res = __handle_databus_file_query__(endpoint, databusURI, databus_key=databus_key) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) diff --git a/databusclient/api/download.py b/databusclient/api/download.py new file mode 100644 index 0000000..d8dd4b3 --- /dev/null +++ b/databusclient/api/download.py @@ -0,0 +1,331 @@ +from typing import List +import requests +import os +from tqdm import tqdm +import json +from SPARQLWrapper import SPARQLWrapper, JSON + +from databusclient.api.utils import get_databus_id_parts_from_uri, get_json_ld_from_databus + +def __handle_databus_collection__(uri: str, databus_key: str | None = None) -> str: + headers = {"Accept": "text/sparql"} + if databus_key is not None: + headers["X-API-KEY"] = databus_key + + return requests.get(uri, headers=headers, timeout=30).text + +def __get_vault_access__(download_url: str, + token_file: str, + auth_url: str, + client_id: str) -> str: + """ + Get Vault access token for a protected databus download. + """ + # 1. Load refresh token + refresh_token = os.environ.get("REFRESH_TOKEN") + if not refresh_token: + if not os.path.exists(token_file): + raise FileNotFoundError(f"Vault token file not found: {token_file}") + with open(token_file, "r") as f: + refresh_token = f.read().strip() + if len(refresh_token) < 80: + print(f"Warning: token from {token_file} is short (<80 chars)") + + # 2. Refresh token -> access token + resp = requests.post(auth_url, data={ + "client_id": client_id, + "grant_type": "refresh_token", + "refresh_token": refresh_token + }) + resp.raise_for_status() + access_token = resp.json()["access_token"] + + # 3. Extract host as audience + # Remove protocol prefix + if download_url.startswith("https://"): + host_part = download_url[len("https://"):] + elif download_url.startswith("http://"): + host_part = download_url[len("http://"):] + else: + host_part = download_url + audience = host_part.split("/")[0] # host is before first "/" + + # 4. Access token -> Vault token + resp = requests.post(auth_url, data={ + "client_id": client_id, + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "subject_token": access_token, + "audience": audience + }) + resp.raise_for_status() + vault_token = resp.json()["access_token"] + + print(f"Using Vault access token for {download_url}") + return vault_token + +def __download_file__(url, filename, vault_token_file=None, databus_key=None, auth_url=None, client_id=None) -> None: + """ + Download a file from the internet with a progress bar using tqdm. + + Parameters: + - url: the URL of the file to download + - filename: the local file path where the file should be saved + - vault_token_file: Path to Vault refresh token file + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + + Steps: + 1. Try direct GET without Authorization header. + 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized) or url starts with "https://data.dbpedia.io/databus.dbpedia.org", + then fetch Vault access token and retry with Authorization header. + """ + + print(f"Download file: {url}") + dirpath = os.path.dirname(filename) + if dirpath: + os.makedirs(dirpath, exist_ok=True) # Create the necessary directories + # --- 1. Get redirect URL by requesting HEAD --- + response = requests.head(url, stream=True) + # Check for redirect and update URL if necessary + if response.headers.get("Location") and response.status_code in [301, 302, 303, 307, 308]: + url = response.headers.get("Location") + print("Redirects url: ", url) + + # --- 2. Try direct GET --- + response = requests.get(url, stream=True, allow_redirects=True, timeout=30) + www = response.headers.get('WWW-Authenticate', '') # get WWW-Authenticate header if present to check for Bearer auth + + # Vault token required if 401 Unauthorized with Bearer challenge + if (response.status_code == 401 and "bearer" in www.lower()): + print(f"Authentication required for {url}") + if not (vault_token_file): + raise ValueError("Vault token file not given for protected download") + + # --- 3. Fetch Vault token --- + vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id) + headers = {"Authorization": f"Bearer {vault_token}"} + + # --- 4. Retry with token --- + response = requests.get(url, headers=headers, stream=True, timeout=30) + + # Databus API key required if only 401 Unauthorized + elif response.status_code == 401: + print(f"API key required for {url}") + if not databus_key: + raise ValueError("Databus API key not given for protected download") + + headers = {"X-API-KEY": databus_key} + response = requests.get(url, headers=headers, stream=True, timeout=30) + + try: + response.raise_for_status() # Raise if still failing + except requests.exceptions.HTTPError as e: + if response.status_code == 404: + print(f"WARNING: Skipping file {url} because it was not found (404).") + return + else: + raise e + + total_size_in_bytes = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 KiB + + progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) + with open(filename, 'wb') as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + progress_bar.close() + + # TODO: could be a problem of github raw / openflaas + if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: + raise IOError("Downloaded size does not match Content-Length header") + +def __download_list__(urls: List[str], + localDir: str, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None) -> None: + fileLocalDir = localDir + for url in urls: + if localDir is None: + _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) + fileLocalDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") + print(f"Local directory not given, using {fileLocalDir}") + + file = url.split("/")[-1] + filename = os.path.join(fileLocalDir, file) + print("\n") + __download_file__(url=url, filename=filename, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + print("\n") + +def __query_sparql__(endpoint_url, query, databus_key=None) -> dict: + """ + Query a SPARQL endpoint and return results in JSON format. + + Parameters: + - endpoint_url: the URL of the SPARQL endpoint + - query: the SPARQL query string + - databus_key: Optional API key for authentication + + Returns: + - Dictionary containing the query results + """ + sparql = SPARQLWrapper(endpoint_url) + sparql.method = 'POST' + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + if databus_key is not None: + sparql.setCustomHttpHeaders({"X-API-KEY": databus_key}) + results = sparql.query().convert() + return results + +def __handle_databus_file_query__(endpoint_url, query, databus_key=None) -> List[str]: + result_dict = __query_sparql__(endpoint_url, query, databus_key=databus_key) + for binding in result_dict['results']['bindings']: + if len(binding.keys()) > 1: + print("Error multiple bindings in query response") + break + else: + value = binding[next(iter(binding.keys()))]['value'] + yield value + +def __get_databus_latest_version_of_artifact__(json_str: str) -> str: + """ + Parse the JSON-LD of a databus artifact to extract URLs of the latest version. + + Returns download URL of latest version of the artifact. + """ + json_dict = json.loads(json_str) + versions = json_dict.get("databus:hasVersion") + + # Single version case {} + if isinstance(versions, dict): + versions = [versions] + # Multiple versions case [{}, {}] + + version_urls = [v["@id"] for v in versions if "@id" in v] + if not version_urls: + raise ValueError("No versions found in artifact JSON-LD") + + version_urls.sort(reverse=True) # Sort versions in descending order + return version_urls[0] # Return the latest version URL + +def __handle_databus_artifact_version__(json_str: str) -> List[str]: + """ + Parse the JSON-LD of a databus artifact version to extract download URLs. + Don't get downloadURLs directly from the JSON-LD, but follow the "file" links to count access to databus accurately. + + Returns a list of download URLs. + """ + + databusIdUrl = [] + json_dict = json.loads(json_str) + graph = json_dict.get("@graph", []) + for node in graph: + if node.get("@type") == "Part": + id = node.get("file") + databusIdUrl.append(id) + return databusIdUrl + +def __get_databus_artifacts_of_group__(json_str: str) -> List[str]: + """ + Parse the JSON-LD of a databus group to extract URLs of all artifacts. + + Returns a list of artifact URLs. + """ + json_dict = json.loads(json_str) + artifacts = json_dict.get("databus:hasArtifact", []) + + result = [] + for item in artifacts: + uri = item.get("@id") + if not uri: + continue + _, _, _, _, version, _ = get_databus_id_parts_from_uri(uri) + if version is None: + result.append(uri) + return result + +def download( + localDir: str, + endpoint: str, + databusURIs: List[str], + token=None, + databus_key=None, + auth_url=None, + client_id=None +) -> None: + """ + Download datasets to local storage from databus registry. If download is on vault, vault token will be used for downloading protected files. + ------ + localDir: the local directory + endpoint: the databus endpoint URL + databusURIs: identifiers to access databus registered datasets + token: Path to Vault refresh token file + databus_key: Databus API key for protected downloads + auth_url: Keycloak token endpoint URL + client_id: Client ID for token exchange + """ + + # TODO: make pretty + for databusURI in databusURIs: + host, account, group, artifact, version, file = get_databus_id_parts_from_uri(databusURI) + + # dataID or databus collection + if databusURI.startswith("http://") or databusURI.startswith("https://"): + # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus) + if endpoint is None: + endpoint = f"https://{host}/sparql" + print(f"SPARQL endpoint {endpoint}") + + # databus collection + if group == "collections": + query = __handle_databus_collection__(databusURI, databus_key=databus_key) + res = __handle_databus_file_query__(endpoint, query) + __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + # databus file + elif file is not None: + __download_list__([databusURI], localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + # databus artifact version + elif version is not None: + json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) + res = __handle_databus_artifact_version__(json_str) + __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + # databus artifact + elif artifact is not None: + json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) + latest = __get_databus_latest_version_of_artifact__(json_str) + print(f"No version given, using latest version: {latest}") + json_str = get_json_ld_from_databus(latest, databus_key=databus_key) + res = __handle_databus_artifact_version__(json_str) + __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + # databus group + elif group is not None: + json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) + artifacts = __get_databus_artifacts_of_group__(json_str) + for artifact_uri in artifacts: + print(f"Processing artifact {artifact_uri}") + json_str = get_json_ld_from_databus(artifact_uri, databus_key=databus_key) + latest = __get_databus_latest_version_of_artifact__(json_str) + print(f"No version given, using latest version: {latest}") + json_str = get_json_ld_from_databus(latest, databus_key=databus_key) + res = __handle_databus_artifact_version__(json_str) + __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + # databus account + elif account is not None: + print("accountId not supported yet") # TODO + else: + print("dataId not supported yet") # TODO add support for other DatabusIds + # query in local file + elif databusURI.startswith("file://"): + print("query in file not supported yet") + # query as argument + else: + print("QUERY {}", databusURI.replace("\n", " ")) + if endpoint is None: # endpoint is required for queries (--databus) + raise ValueError("No endpoint given for query") + res = __handle_databus_file_query__(endpoint, databusURI, databus_key=databus_key) + __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) \ No newline at end of file diff --git a/databusclient/cli.py b/databusclient/cli.py index c983544..804a950 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -4,10 +4,12 @@ import click from typing import List -from databusclient import client from databusclient.rclone_wrapper import upload + from databusclient.api.delete import delete as api_delete +import databusclient.api.deploy as api_deploy +from databusclient.api.download import download as api_download @click.group() def app(): @@ -55,8 +57,8 @@ def deploy(version_id, title, abstract, description, license_url, apikey, click.echo("[MODE] Classic deploy with distributions") click.echo(f"Deploying dataset version: {version_id}") - dataid = client.create_dataset(version_id, title, abstract, description, license_url, distributions) - client.deploy(dataid=dataid, api_key=apikey) + dataid = api_deploy.create_dataset(version_id, title, abstract, description, license_url, distributions) + api_deploy.deploy(dataid=dataid, api_key=apikey) return # === Mode 2: Metadata File === @@ -64,7 +66,7 @@ def deploy(version_id, title, abstract, description, license_url, apikey, click.echo(f"[MODE] Deploy from metadata file: {metadata_file}") with open(metadata_file, 'r') as f: metadata = json.load(f) - client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) + api_deploy.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) return # === Mode 3: Upload & Deploy (Nextcloud) === @@ -80,7 +82,7 @@ def deploy(version_id, title, abstract, description, license_url, apikey, click.echo("[MODE] Upload & Deploy to DBpedia Databus via Nextcloud") click.echo(f"→ Uploading to: {remote}:{path}") metadata = upload.upload_to_nextcloud(distributions, remote, path, webdav_url) - client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) + api_deploy.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) return raise click.UsageError( @@ -103,7 +105,7 @@ def download(databusuris: List[str], localdir, databus, vault_token, databus_key """ Download datasets from databus, optionally using vault access if vault options are provided. """ - client.download( + api_download( localDir=localdir, endpoint=databus, databusURIs=databusuris, From 1872648503fef59e646a5fda60000ecd6e5dcd95 Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Fri, 5 Dec 2025 17:08:22 +0100 Subject: [PATCH 2/6] refactor: iteration over download.py --- README.md | 2 + databusclient/__init__.py | 2 +- databusclient/api/delete.py | 6 +- databusclient/api/download.py | 422 +++++++++++++++++++++++----------- databusclient/api/utils.py | 2 +- databusclient/cli.py | 4 +- tests/test_databusclient.py | 2 +- tests/test_download.py | 6 +- 8 files changed, 298 insertions(+), 148 deletions(-) diff --git a/README.md b/README.md index 8add7c5..c652275 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,8 @@ Options: e.g. https://databus.dbpedia.org/sparql) --vault-token TEXT Path to Vault refresh token file --databus-key TEXT Databus API key to download from protected databus + --latest-only When downloading artifacts, only download the latest + version --authurl TEXT Keycloak token endpoint URL [default: https://auth.dbpedia.org/realms/dbpedia/protocol/openid- connect/token] diff --git a/databusclient/__init__.py b/databusclient/__init__.py index fbb1463..3e053b5 100644 --- a/databusclient/__init__.py +++ b/databusclient/__init__.py @@ -1,5 +1,5 @@ from databusclient import cli -from databusclient.client import create_dataset, deploy, create_distribution +from databusclient.api.deploy import create_dataset, deploy, create_distribution __all__ = ["create_dataset", "deploy", "create_distribution"] diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index a3d7625..5db8ab2 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -2,7 +2,7 @@ import requests from typing import List -from databusclient.api.utils import get_databus_id_parts_from_uri, get_json_ld_from_databus +from databusclient.api.utils import get_databus_id_parts_from_uri, fetch_databus_jsonld def _confirm_delete(databusURI: str) -> str: """ @@ -97,7 +97,7 @@ def _delete_artifact(databusURI: str, databus_key: str, dry_run: bool = False, f - databus_key: Databus API key to authenticate the deletion requests - dry_run: If True, do not perform the deletion but only print what would be deleted """ - artifact_body = get_json_ld_from_databus(databusURI, databus_key) + artifact_body = fetch_databus_jsonld(databusURI, databus_key) json_dict = json.loads(artifact_body) versions = json_dict.get("databus:hasVersion") @@ -133,7 +133,7 @@ def _delete_group(databusURI: str, databus_key: str, dry_run: bool = False, forc - databus_key: Databus API key to authenticate the deletion requests - dry_run: If True, do not perform the deletion but only print what would be deleted """ - group_body = get_json_ld_from_databus(databusURI, databus_key) + group_body = fetch_databus_jsonld(databusURI, databus_key) json_dict = json.loads(group_body) artifacts = json_dict.get("databus:hasArtifact", []) diff --git a/databusclient/api/download.py b/databusclient/api/download.py index d8dd4b3..859e35f 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -1,84 +1,36 @@ -from typing import List +from typing import List, Iterator import requests import os from tqdm import tqdm import json from SPARQLWrapper import SPARQLWrapper, JSON -from databusclient.api.utils import get_databus_id_parts_from_uri, get_json_ld_from_databus +from databusclient.api.delete import _delete_group, _delete_resource +from databusclient.api.utils import get_databus_id_parts_from_uri, fetch_databus_jsonld -def __handle_databus_collection__(uri: str, databus_key: str | None = None) -> str: - headers = {"Accept": "text/sparql"} - if databus_key is not None: - headers["X-API-KEY"] = databus_key - return requests.get(uri, headers=headers, timeout=30).text - -def __get_vault_access__(download_url: str, - token_file: str, - auth_url: str, - client_id: str) -> str: - """ - Get Vault access token for a protected databus download. - """ - # 1. Load refresh token - refresh_token = os.environ.get("REFRESH_TOKEN") - if not refresh_token: - if not os.path.exists(token_file): - raise FileNotFoundError(f"Vault token file not found: {token_file}") - with open(token_file, "r") as f: - refresh_token = f.read().strip() - if len(refresh_token) < 80: - print(f"Warning: token from {token_file} is short (<80 chars)") - - # 2. Refresh token -> access token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "refresh_token", - "refresh_token": refresh_token - }) - resp.raise_for_status() - access_token = resp.json()["access_token"] - - # 3. Extract host as audience - # Remove protocol prefix - if download_url.startswith("https://"): - host_part = download_url[len("https://"):] - elif download_url.startswith("http://"): - host_part = download_url[len("http://"):] - else: - host_part = download_url - audience = host_part.split("/")[0] # host is before first "/" - - # 4. Access token -> Vault token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", - "subject_token": access_token, - "audience": audience - }) - resp.raise_for_status() - vault_token = resp.json()["access_token"] - - print(f"Using Vault access token for {download_url}") - return vault_token - -def __download_file__(url, filename, vault_token_file=None, databus_key=None, auth_url=None, client_id=None) -> None: +def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_url=None, client_id=None) -> None: """ Download a file from the internet with a progress bar using tqdm. Parameters: - url: the URL of the file to download - - filename: the local file path where the file should be saved + - localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory. - vault_token_file: Path to Vault refresh token file - auth_url: Keycloak token endpoint URL - client_id: Client ID for token exchange Steps: 1. Try direct GET without Authorization header. - 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized) or url starts with "https://data.dbpedia.io/databus.dbpedia.org", - then fetch Vault access token and retry with Authorization header. + 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized), then fetch Vault access token and retry with Authorization header. """ + if localDir is None: + _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) + fileLocalDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") + print(f"Local directory not given, using {fileLocalDir}") + + file = url.split("/")[-1] + filename = os.path.join(fileLocalDir, file) print(f"Download file: {url}") dirpath = os.path.dirname(filename) @@ -102,6 +54,7 @@ def __download_file__(url, filename, vault_token_file=None, databus_key=None, au raise ValueError("Vault token file not given for protected download") # --- 3. Fetch Vault token --- + # TODO: cache token vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id) headers = {"Authorization": f"Bearer {vault_token}"} @@ -140,26 +93,46 @@ def __download_file__(url, filename, vault_token_file=None, databus_key=None, au if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: raise IOError("Downloaded size does not match Content-Length header") -def __download_list__(urls: List[str], + +def _download_files(urls: List[str], localDir: str, vault_token_file: str = None, databus_key: str = None, auth_url: str = None, client_id: str = None) -> None: - fileLocalDir = localDir + """ + Download multiple files from the databus. + + Parameters: + - urls: List of file download URLs + - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + - vault_token_file: Path to Vault refresh token file + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + """ for url in urls: - if localDir is None: - _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) - fileLocalDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") - print(f"Local directory not given, using {fileLocalDir}") - - file = url.split("/")[-1] - filename = os.path.join(fileLocalDir, file) - print("\n") - __download_file__(url=url, filename=filename, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - print("\n") - -def __query_sparql__(endpoint_url, query, databus_key=None) -> dict: + _download_file(url=url, localDir=localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + +def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str: + """ + Get SPARQL query of collection members from databus collection URI. + + Parameters: + - uri: The full databus collection URI + - databus_key: Optional Databus API key for authentication on protected resources + + Returns: + SPARQL query string to get download URLs of all files in the collection. + """ + headers = {"Accept": "text/sparql"} + if databus_key is not None: + headers["X-API-KEY"] = databus_key + + return requests.get(uri, headers=headers, timeout=30).text + + +def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: """ Query a SPARQL endpoint and return results in JSON format. @@ -180,21 +153,178 @@ def __query_sparql__(endpoint_url, query, databus_key=None) -> dict: results = sparql.query().convert() return results -def __handle_databus_file_query__(endpoint_url, query, databus_key=None) -> List[str]: - result_dict = __query_sparql__(endpoint_url, query, databus_key=databus_key) - for binding in result_dict['results']['bindings']: - if len(binding.keys()) > 1: - print("Error multiple bindings in query response") - break - else: - value = binding[next(iter(binding.keys()))]['value'] - yield value -def __get_databus_latest_version_of_artifact__(json_str: str) -> str: +def _get_file_download_urls_from_sparql_query(endpoint_url, query, databus_key=None) -> List[str]: + """ + Execute a SPARQL query to get databus file download URLs. + + Parameters: + - endpoint_url: the URL of the SPARQL endpoint + - query: the SPARQL query string + - databus_key: Optional API key for authentication + + Returns: + - List of file download URLs """ - Parse the JSON-LD of a databus artifact to extract URLs of the latest version. + result_dict = _query_sparql_endpoint(endpoint_url, query, databus_key=databus_key) + + bindings = result_dict.get("results", {}).get("bindings") + if not isinstance(bindings, list): + raise ValueError("Invalid SPARQL response: 'bindings' missing or not a list") + + urls: List[str] = [] + + for binding in bindings: + if not isinstance(binding, dict) or len(binding) != 1: + raise ValueError(f"Invalid SPARQL binding structure: {binding}") + + value_dict = next(iter(binding.values())) + value = value_dict.get("value") + + if not isinstance(value, str): + raise ValueError(f"Invalid SPARQL value field: {value_dict}") - Returns download URL of latest version of the artifact. + urls.append(value) + + return urls + +def __get_vault_access__(download_url: str, + token_file: str, + auth_url: str, + client_id: str) -> str: + """ + Get Vault access token for a protected databus download. + """ + # 1. Load refresh token + refresh_token = os.environ.get("REFRESH_TOKEN") + if not refresh_token: + if not os.path.exists(token_file): + raise FileNotFoundError(f"Vault token file not found: {token_file}") + with open(token_file, "r") as f: + refresh_token = f.read().strip() + if len(refresh_token) < 80: + print(f"Warning: token from {token_file} is short (<80 chars)") + + # 2. Refresh token -> access token + resp = requests.post(auth_url, data={ + "client_id": client_id, + "grant_type": "refresh_token", + "refresh_token": refresh_token + }) + resp.raise_for_status() + access_token = resp.json()["access_token"] + + # 3. Extract host as audience + # Remove protocol prefix + if download_url.startswith("https://"): + host_part = download_url[len("https://"):] + elif download_url.startswith("http://"): + host_part = download_url[len("http://"):] + else: + host_part = download_url + audience = host_part.split("/")[0] # host is before first "/" + + # 4. Access token -> Vault token + resp = requests.post(auth_url, data={ + "client_id": client_id, + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "subject_token": access_token, + "audience": audience + }) + resp.raise_for_status() + vault_token = resp.json()["access_token"] + + print(f"Using Vault access token for {download_url}") + return vault_token + + +def _download_collection(uri: str, + endpoint: str, + localDir: str, + vault_token: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None) -> None: + """ + Download all files in a databus collection. + + Parameters: + - uri: The full databus collection URI + - endpoint: the databus SPARQL endpoint URL + - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + - vault_token: Path to Vault refresh token file for protected downloads + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + """ + query = _get_sparql_query_of_collection(uri, databus_key=databus_key) + file_urls = _get_file_download_urls_from_sparql_query(endpoint, query, databus_key=databus_key) + _download_files(list(file_urls), localDir, vault_token_file=vault_token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + +def _download_version(uri: str, + localDir: str, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None) -> None: + """ + Download all files in a databus artifact version. + + Parameters: + - uri: The full databus artifact version URI + - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + - vault_token_file: Path to Vault refresh token file for protected downloads + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + """ + json_str = fetch_databus_jsonld(uri, databus_key=databus_key) + file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) + _download_files(file_urls, localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + +def _download_artifact(uri: str, + localDir: str, + all_versions: bool = False, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None) -> None: + """ + Download files in a databus artifact. + + Parameters: + - uri: The full databus artifact URI + - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + - all_versions: If True, download all versions of the artifact; otherwise, only download the latest version + - vault_token_file: Path to Vault refresh token file for protected downloads + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + """ + json_str = fetch_databus_jsonld(uri, databus_key=databus_key) + versions = _get_databus_versions_of_artifact(json_str, all_versions=all_versions) + if isinstance(versions, str): + versions = [versions] + for version_uri in versions: + print(f"Downloading version: {version_uri}") + json_str = fetch_databus_jsonld(version_uri, databus_key=databus_key) + file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) + _download_files(file_urls, localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + +def _get_databus_versions_of_artifact(json_str: str, all_versions: bool) -> str | List[str]: + """ + Parse the JSON-LD of a databus artifact to extract URLs of its versions. + + Parameters: + - json_str: JSON-LD string of the databus artifact + - all_versions: If True, return all version URLs; otherwise, return only the latest version URL + + Returns: + - If all_versions is True: List of all version URLs + - If all_versions is False: URL of the latest version """ json_dict = json.loads(json_str) versions = json_dict.get("databus:hasVersion") @@ -209,14 +339,21 @@ def __get_databus_latest_version_of_artifact__(json_str: str) -> str: raise ValueError("No versions found in artifact JSON-LD") version_urls.sort(reverse=True) # Sort versions in descending order - return version_urls[0] # Return the latest version URL -def __handle_databus_artifact_version__(json_str: str) -> List[str]: + if all_versions: + return version_urls + return version_urls[0] + +def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: """ Parse the JSON-LD of a databus artifact version to extract download URLs. Don't get downloadURLs directly from the JSON-LD, but follow the "file" links to count access to databus accurately. - Returns a list of download URLs. + Parameters: + - json_str: JSON-LD string of the databus artifact version + + Returns: + List of all file download URLs in the artifact version. """ databusIdUrl = [] @@ -228,7 +365,35 @@ def __handle_databus_artifact_version__(json_str: str) -> List[str]: databusIdUrl.append(id) return databusIdUrl -def __get_databus_artifacts_of_group__(json_str: str) -> List[str]: + +def _download_group(uri: str, + localDir: str, + all_versions: bool = False, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None) -> None: + """ + Download files in a databus group. + + Parameters: + - uri: The full databus group URI + - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + - all_versions: If True, download all versions of each artifact in the group; otherwise, only download the latest version + - vault_token_file: Path to Vault refresh token file for protected downloads + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL + - client_id: Client ID for token exchange + """ + json_str = fetch_databus_jsonld(uri, databus_key=databus_key) + artifacts = _get_databus_artifacts_of_group(json_str) + for artifact_uri in artifacts: + print(f"Download artifact: {artifact_uri}") + _download_artifact(artifact_uri, localDir, all_versions=all_versions, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + + + +def _get_databus_artifacts_of_group(json_str: str) -> List[str]: """ Parse the JSON-LD of a databus group to extract URLs of all artifacts. @@ -253,68 +418,49 @@ def download( databusURIs: List[str], token=None, databus_key=None, + all_versions=None, auth_url=None, client_id=None ) -> None: """ - Download datasets to local storage from databus registry. If download is on vault, vault token will be used for downloading protected files. - ------ - localDir: the local directory - endpoint: the databus endpoint URL - databusURIs: identifiers to access databus registered datasets - token: Path to Vault refresh token file - databus_key: Databus API key for protected downloads - auth_url: Keycloak token endpoint URL - client_id: Client ID for token exchange + Download datasets from databus. + + Download of files, versions, artifacts, groups or databus collections by ther databus URIs or user-defined SPARQL queries that return file download URLs. + + Parameters: + - localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. + - endpoint: the databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. + - databusURIs: databus identifiers to specify datasets to download. + - token: Path to Vault refresh token file for protected downloads + - databus_key: Databus API key for protected downloads + - auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token". + - client_id: Client ID for token exchange. Default is "vault-token-exchange". """ - - # TODO: make pretty for databusURI in databusURIs: host, account, group, artifact, version, file = get_databus_id_parts_from_uri(databusURI) # dataID or databus collection if databusURI.startswith("http://") or databusURI.startswith("https://"): - # Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus) + # Auto-detect sparql endpoint from host if not given if endpoint is None: endpoint = f"https://{host}/sparql" print(f"SPARQL endpoint {endpoint}") - # databus collection - if group == "collections": - query = __handle_databus_collection__(databusURI, databus_key=databus_key) - res = __handle_databus_file_query__(endpoint, query) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus file + if group == "collections" and artifact is not None: + print(f"Downloading collection: {databusURI}") + _download_collection(databusURI, endpoint, localDir, token, databus_key, auth_url, client_id) elif file is not None: - __download_list__([databusURI], localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus artifact version + print(f"Downloading file: {databusURI}") + _download_file(databusURI, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) elif version is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - # databus artifact + print(f"Downloading version: {databusURI}") + _download_version(databusURI, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) elif artifact is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - latest = __get_databus_latest_version_of_artifact__(json_str) - print(f"No version given, using latest version: {latest}") - json_str = get_json_ld_from_databus(latest, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - # databus group - elif group is not None: - json_str = get_json_ld_from_databus(databusURI, databus_key=databus_key) - artifacts = __get_databus_artifacts_of_group__(json_str) - for artifact_uri in artifacts: - print(f"Processing artifact {artifact_uri}") - json_str = get_json_ld_from_databus(artifact_uri, databus_key=databus_key) - latest = __get_databus_latest_version_of_artifact__(json_str) - print(f"No version given, using latest version: {latest}") - json_str = get_json_ld_from_databus(latest, databus_key=databus_key) - res = __handle_databus_artifact_version__(json_str) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - # databus account + print(f"Downloading {'all' if all_versions else 'latest'} version(s) of artifact: {databusURI}") + _download_artifact(databusURI, localDir, all_versions=all_versions, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + elif group is not None and group != "collections": + print(f"Downloading group and all its artifacts and versions: {databusURI}") + _download_group(databusURI, localDir, all_versions=all_versions, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) elif account is not None: print("accountId not supported yet") # TODO else: @@ -327,5 +473,5 @@ def download( print("QUERY {}", databusURI.replace("\n", " ")) if endpoint is None: # endpoint is required for queries (--databus) raise ValueError("No endpoint given for query") - res = __handle_databus_file_query__(endpoint, databusURI, databus_key=databus_key) - __download_list__(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) \ No newline at end of file + res = _get_file_download_urls_from_sparql_query(endpoint, databusURI, databus_key=databus_key) + _download_files(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) \ No newline at end of file diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 1ffe421..5c0fd3f 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -17,7 +17,7 @@ def get_databus_id_parts_from_uri(uri: str) -> Tuple[Optional[str], Optional[str parts += [None] * (6 - len(parts)) # pad with None if less than 6 parts return tuple(parts[:6]) # return only the first 6 parts -def get_json_ld_from_databus(uri: str, databus_key: str | None = None) -> str: +def fetch_databus_jsonld(uri: str, databus_key: str | None = None) -> str: """ Retrieve JSON-LD representation of a databus resource. diff --git a/databusclient/cli.py b/databusclient/cli.py index 804a950..d8f050f 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -99,9 +99,10 @@ def deploy(version_id, title, abstract, description, license_url, apikey, @click.option("--databus", help="Databus URL (if not given, inferred from databusuri, e.g. https://databus.dbpedia.org/sparql)") @click.option("--vault-token", help="Path to Vault refresh token file") @click.option("--databus-key", help="Databus API key to download from protected databus") +@click.option("--all-versions", is_flag=True, help="When downloading artifacts, download all versions instead of only the latest") @click.option("--authurl", default="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", show_default=True, help="Keycloak token endpoint URL") @click.option("--clientid", default="vault-token-exchange", show_default=True, help="Client ID for token exchange") -def download(databusuris: List[str], localdir, databus, vault_token, databus_key, authurl, clientid): +def download(databusuris: List[str], localdir, databus, vault_token, databus_key, all_versions, authurl, clientid): """ Download datasets from databus, optionally using vault access if vault options are provided. """ @@ -111,6 +112,7 @@ def download(databusuris: List[str], localdir, databus, vault_token, databus_key databusURIs=databusuris, token=vault_token, databus_key=databus_key, + all_versions=all_versions, auth_url=authurl, client_id=clientid, ) diff --git a/tests/test_databusclient.py b/tests/test_databusclient.py index 202ac16..ef965be 100644 --- a/tests/test_databusclient.py +++ b/tests/test_databusclient.py @@ -1,6 +1,6 @@ """Client tests""" import pytest -from databusclient.client import create_dataset, create_distribution, __get_file_info +from databusclient.api.deploy import create_dataset, create_distribution, __get_file_info from collections import OrderedDict diff --git a/tests/test_download.py b/tests/test_download.py index 6a1a72e..19dd3bc 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -1,6 +1,6 @@ """Download Tests""" import pytest -import databusclient.client as cl +from databusclient.api.download import download as api_download DEFAULT_ENDPOINT="https://databus.dbpedia.org/sparql" TEST_QUERY=""" @@ -17,7 +17,7 @@ TEST_COLLECTION="https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12" def test_with_query(): - cl.download("tmp",DEFAULT_ENDPOINT,[TEST_QUERY]) + api_download("tmp",DEFAULT_ENDPOINT,[TEST_QUERY]) def test_with_collection(): - cl.download("tmp",DEFAULT_ENDPOINT,[TEST_COLLECTION]) \ No newline at end of file + api_download("tmp",DEFAULT_ENDPOINT,[TEST_COLLECTION]) \ No newline at end of file From 52b5ed3a44014729c3b42fbcb31ae114423e7cea Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Sun, 7 Dec 2025 16:41:56 +0100 Subject: [PATCH 3/6] refactor: iteration over deploy.py --- databusclient/api/deploy.py | 30 +++++++++++++++--------------- databusclient/api/download.py | 7 +++---- tests/test_databusclient.py | 4 ++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/databusclient/api/deploy.py b/databusclient/api/deploy.py index ed8b931..b8147c0 100644 --- a/databusclient/api/deploy.py +++ b/databusclient/api/deploy.py @@ -23,7 +23,7 @@ class DeployLogLevel(Enum): debug = 2 -def __get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: +def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: args = distribution_str.split("|") # cv string is ALWAYS at position 1 after the URL @@ -41,7 +41,7 @@ def __get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: return cvs -def __get_filetype_definition( +def _get_filetype_definition( distribution_str: str, ) -> Tuple[Optional[str], Optional[str]]: file_ext = None @@ -80,9 +80,9 @@ def __get_filetype_definition( return file_ext, compression -def __get_extensions(distribution_str: str) -> Tuple[str, str, str]: +def _get_extensions(distribution_str: str) -> Tuple[str, str, str]: extension_part = "" - format_extension, compression = __get_filetype_definition(distribution_str) + format_extension, compression = _get_filetype_definition(distribution_str) if format_extension is not None: # build the format extension (only append compression if not none) @@ -119,7 +119,7 @@ def __get_extensions(distribution_str: str) -> Tuple[str, str, str]: return extension_part, format_extension, compression -def __get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int]]: +def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int]]: metadata_list = distribution_str.split("|")[1:] # check whether there is the shasum:length tuple separated by : if len(metadata_list) == 0 or ":" not in metadata_list[-1]: @@ -139,7 +139,7 @@ def __get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int return sha256sum, content_length -def __load_file_stats(url: str) -> Tuple[str, int]: +def _load_file_stats(url: str) -> Tuple[str, int]: resp = requests.get(url) if resp.status_code > 400: raise requests.exceptions.RequestException(response=resp) @@ -149,20 +149,20 @@ def __load_file_stats(url: str) -> Tuple[str, int]: return sha256sum, content_length -def __get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, int]: - cvs = __get_content_variants(distribution_str) - extension_part, format_extension, compression = __get_extensions(distribution_str) +def get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, int]: + cvs = _get_content_variants(distribution_str) + extension_part, format_extension, compression = _get_extensions(distribution_str) content_variant_part = "_".join([f"{key}={value}" for key, value in cvs.items()]) if __debug: print("DEBUG", distribution_str, extension_part) - sha256sum, content_length = __get_file_stats(distribution_str) + sha256sum, content_length = _get_file_stats(distribution_str) if sha256sum is None or content_length is None: __url = str(distribution_str).split("|")[0] - sha256sum, content_length = __load_file_stats(__url) + sha256sum, content_length = _load_file_stats(__url) return cvs, format_extension, compression, sha256sum, content_length @@ -200,7 +200,7 @@ def create_distribution( return f"{url}|{meta_string}" -def create_distributions_from_metadata(metadata: List[Dict[str, Union[str, int]]]) -> List[str]: +def _create_distributions_from_metadata(metadata: List[Dict[str, Union[str, int]]]) -> List[str]: """ Create distributions from metadata entries. @@ -313,7 +313,7 @@ def create_dataset( compression, sha256sum, content_length, - ) = __get_file_info(dst_string) + ) = get_file_info(dst_string) if not cvs and len(distributions) > 1: raise BadArgumentException( @@ -453,7 +453,7 @@ def deploy_from_metadata( Parameters ---------- metadata : List[Dict[str, Union[str, int]]] - List of file metadata entries (see create_distributions_from_metadata) + List of file metadata entries (see _create_distributions_from_metadata) version_id : str Dataset version ID in the form $DATABUS_BASE/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION title : str @@ -467,7 +467,7 @@ def deploy_from_metadata( apikey : str API key for authentication """ - distributions = create_distributions_from_metadata(metadata) + distributions = _create_distributions_from_metadata(metadata) dataset = create_dataset( version_id=version_id, diff --git a/databusclient/api/download.py b/databusclient/api/download.py index 859e35f..0fa7dce 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -26,12 +26,11 @@ def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_ """ if localDir is None: _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) - fileLocalDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") - print(f"Local directory not given, using {fileLocalDir}") + localDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") + print(f"Local directory not given, using {localDir}") file = url.split("/")[-1] - filename = os.path.join(fileLocalDir, file) - + filename = os.path.join(localDir, file) print(f"Download file: {url}") dirpath = os.path.dirname(filename) if dirpath: diff --git a/tests/test_databusclient.py b/tests/test_databusclient.py index ef965be..4c65e19 100644 --- a/tests/test_databusclient.py +++ b/tests/test_databusclient.py @@ -1,6 +1,6 @@ """Client tests""" import pytest -from databusclient.api.deploy import create_dataset, create_distribution, __get_file_info +from databusclient.api.deploy import create_dataset, create_distribution, get_file_info from collections import OrderedDict @@ -47,7 +47,7 @@ def test_distribution_cases(): compression, sha256sum, content_length, - ) = __get_file_info(artifact_name, dst_string) + ) = get_file_info(artifact_name, dst_string) created_dst_str = create_distribution( uri, cvs, formatExtension, compression, (sha256sum, content_length) From 56863b45d16d76c49dc0191dfb9752cb97c3895a Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Sun, 7 Dec 2025 19:17:50 +0100 Subject: [PATCH 4/6] refactor: webdav --- databusclient/api/__init__.py | 1 + databusclient/api/download.py | 3 +-- databusclient/cli.py | 4 ++-- databusclient/consume/download.py | 4 ---- databusclient/extensions/__init__.py | 1 + .../{rclone_wrapper/upload.py => extensions/webdav.py} | 2 +- 6 files changed, 6 insertions(+), 9 deletions(-) create mode 100644 databusclient/api/__init__.py delete mode 100644 databusclient/consume/download.py create mode 100644 databusclient/extensions/__init__.py rename databusclient/{rclone_wrapper/upload.py => extensions/webdav.py} (96%) diff --git a/databusclient/api/__init__.py b/databusclient/api/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/databusclient/api/__init__.py @@ -0,0 +1 @@ + diff --git a/databusclient/api/download.py b/databusclient/api/download.py index 0fa7dce..eb03d5f 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -1,11 +1,10 @@ -from typing import List, Iterator +from typing import List import requests import os from tqdm import tqdm import json from SPARQLWrapper import SPARQLWrapper, JSON -from databusclient.api.delete import _delete_group, _delete_resource from databusclient.api.utils import get_databus_id_parts_from_uri, fetch_databus_jsonld diff --git a/databusclient/cli.py b/databusclient/cli.py index d8f050f..a977b71 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -5,7 +5,7 @@ import click from typing import List -from databusclient.rclone_wrapper import upload +from databusclient.extensions import webdav from databusclient.api.delete import delete as api_delete import databusclient.api.deploy as api_deploy @@ -81,7 +81,7 @@ def deploy(version_id, title, abstract, description, license_url, apikey, click.echo("[MODE] Upload & Deploy to DBpedia Databus via Nextcloud") click.echo(f"→ Uploading to: {remote}:{path}") - metadata = upload.upload_to_nextcloud(distributions, remote, path, webdav_url) + metadata = webdav.upload_to_webdav(distributions, remote, path, webdav_url) api_deploy.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) return diff --git a/databusclient/consume/download.py b/databusclient/consume/download.py deleted file mode 100644 index a1bbd8a..0000000 --- a/databusclient/consume/download.py +++ /dev/null @@ -1,4 +0,0 @@ -### All kind of download functionalities for Databus ### - -class Downloder: - pass diff --git a/databusclient/extensions/__init__.py b/databusclient/extensions/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/databusclient/extensions/__init__.py @@ -0,0 +1 @@ + diff --git a/databusclient/rclone_wrapper/upload.py b/databusclient/extensions/webdav.py similarity index 96% rename from databusclient/rclone_wrapper/upload.py rename to databusclient/extensions/webdav.py index f0d3328..cac7027 100644 --- a/databusclient/rclone_wrapper/upload.py +++ b/databusclient/extensions/webdav.py @@ -26,7 +26,7 @@ def get_all_files(path): files.append(os.path.join(root, name)) return files -def upload_to_nextcloud(source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str): +def upload_to_webdav(source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str): result = [] for path in source_paths: if not os.path.exists(path): From 1c4a636845bc2650013ca6ceb89b83165579cddc Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Sun, 7 Dec 2025 19:34:03 +0100 Subject: [PATCH 5/6] feat: ruff linter & formatter --- .github/workflows/python-CI.yml | 25 +- README.md | 27 ++ databusclient/__init__.py | 3 +- databusclient/api/delete.py | 51 +++- databusclient/api/deploy.py | 36 ++- databusclient/api/download.py | 331 +++++++++++++++------- databusclient/api/utils.py | 18 +- databusclient/cli.py | 142 +++++++--- databusclient/extensions/webdav.py | 33 ++- poetry.lock | 430 ++++++++++++++++++----------- pyproject.toml | 9 +- tests/test_databusclient.py | 10 +- tests/test_download.py | 18 +- 13 files changed, 763 insertions(+), 370 deletions(-) diff --git a/.github/workflows/python-CI.yml b/.github/workflows/python-CI.yml index 547f7e8..f0cbee0 100644 --- a/.github/workflows/python-CI.yml +++ b/.github/workflows/python-CI.yml @@ -18,24 +18,17 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 + - uses: actions/checkout@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - uses: Gr1N/setup-poetry@v8 #install poetry - - name: Install parts of toolchain - run: | - python -m pip install --upgrade pip - pip install flake8 pytest + - name: Upgrade pip + run: python -m pip install --upgrade pip - name: Install requirements with poetry run: poetry install - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Lint with Ruff + run: poetry run ruff check --output-format=github . - name: Test with pytest - run: | - poetry run pytest + run: poetry run pytest diff --git a/README.md b/README.md index c652275..6eba86e 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ Command-line and Python client for downloading and deploying datasets on DBpedia - [Delete](#cli-delete) - [Module Usage](#module-usage) - [Deploy](#module-deploy) +- [Contributing](#contributing) + - [Linting](#linting) ## Quickstart @@ -43,6 +45,7 @@ You can then use the client in the command line: ```bash databusclient --help databusclient deploy --help +databusclient delete --help databusclient download --help ``` @@ -553,3 +556,27 @@ from databusclient import deploy # API key can be found (or generated) at https://$$DATABUS_BASE$$/$$USER$$#settings deploy(dataset, "mysterious API key") ``` + +## Development + +Install development dependencies yourself or via [Poetry](https://python-poetry.org/): + +```bash +poetry install --with dev +``` + +### Linting + +The used linter is [Ruff](https://ruff.rs/). Ruff is configured in `pyproject.toml` and is enforced in CI (`.github/workflows/ruff.yml`). + +For development, you can run linting locally with `ruff check . ` and optionally auto-format with `ruff format .`. + +To ensuere compatibility with the `pyproject.toml` configured dependencies, run Ruff via Poetry: + +```bash +# To check for linting issues: +poetry run ruff check . + +# To auto-format code: +poetry run ruff format . +``` \ No newline at end of file diff --git a/databusclient/__init__.py b/databusclient/__init__.py index 3e053b5..d15edb6 100644 --- a/databusclient/__init__.py +++ b/databusclient/__init__.py @@ -1,7 +1,8 @@ from databusclient import cli -from databusclient.api.deploy import create_dataset, deploy, create_distribution +from databusclient.api.deploy import create_dataset, create_distribution, deploy __all__ = ["create_dataset", "deploy", "create_distribution"] + def run(): cli.app() diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index 5db8ab2..828644f 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -1,8 +1,10 @@ import json -import requests from typing import List -from databusclient.api.utils import get_databus_id_parts_from_uri, fetch_databus_jsonld +import requests + +from databusclient.api.utils import fetch_databus_jsonld, get_databus_id_parts_from_uri + def _confirm_delete(databusURI: str) -> str: """ @@ -17,9 +19,17 @@ def _confirm_delete(databusURI: str) -> str: - "cancel" if the user chooses to cancel the entire deletion process """ print(f"Are you sure you want to delete: {databusURI}?") - print("\nThis action is irreversible and will permanently remove the resource and all its data.") + print( + "\nThis action is irreversible and will permanently remove the resource and all its data." + ) while True: - choice = input("Type 'yes'/'y' to confirm, 'skip'/'s' to skip this resource, or 'cancel'/'c' to abort: ").strip().lower() + choice = ( + input( + "Type 'yes'/'y' to confirm, 'skip'/'s' to skip this resource, or 'cancel'/'c' to abort: " + ) + .strip() + .lower() + ) if choice in ("yes", "y"): return "confirm" elif choice in ("skip", "s"): @@ -30,7 +40,9 @@ def _confirm_delete(databusURI: str) -> str: print("Invalid input. Please type 'yes'/'y', 'skip'/'s', or 'cancel'/'c'.") -def _delete_resource(databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False): +def _delete_resource( + databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False +): """ Delete a single Databus resource (version, artifact, group). @@ -56,10 +68,7 @@ def _delete_resource(databusURI: str, databus_key: str, dry_run: bool = False, f if databus_key is None: raise ValueError("Databus API key must be provided for deletion") - headers = { - "accept": "*/*", - "X-API-KEY": databus_key - } + headers = {"accept": "*/*", "X-API-KEY": databus_key} if dry_run: print(f"[DRY RUN] Would delete: {databusURI}") @@ -70,10 +79,14 @@ def _delete_resource(databusURI: str, databus_key: str, dry_run: bool = False, f if response.status_code in (200, 204): print(f"Successfully deleted: {databusURI}") else: - raise Exception(f"Failed to delete {databusURI}: {response.status_code} - {response.text}") + raise Exception( + f"Failed to delete {databusURI}: {response.status_code} - {response.text}" + ) -def _delete_list(databusURIs: List[str], databus_key: str, dry_run: bool = False, force: bool = False): +def _delete_list( + databusURIs: List[str], databus_key: str, dry_run: bool = False, force: bool = False +): """ Delete a list of Databus resources. @@ -85,7 +98,9 @@ def _delete_list(databusURIs: List[str], databus_key: str, dry_run: bool = False _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force) -def _delete_artifact(databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False): +def _delete_artifact( + databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False +): """ Delete an artifact and all its versions. @@ -121,7 +136,10 @@ def _delete_artifact(databusURI: str, databus_key: str, dry_run: bool = False, f # Finally, delete the artifact itself _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force) -def _delete_group(databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False): + +def _delete_group( + databusURI: str, databus_key: str, dry_run: bool = False, force: bool = False +): """ Delete a group and all its artifacts and versions. @@ -154,13 +172,14 @@ def _delete_group(databusURI: str, databus_key: str, dry_run: bool = False, forc # Finally, delete the group itself _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force) + def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool): """ Delete a dataset from the databus. Delete a group, artifact, or version identified by the given databus URI. Will recursively delete all data associated with the dataset. - + Parameters: - databusURIs: List of full databus URIs of the resources to delete - databus_key: Databus API key to authenticate the deletion requests @@ -169,7 +188,9 @@ def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool) """ for databusURI in databusURIs: - _host, _account, group, artifact, version, file = get_databus_id_parts_from_uri(databusURI) + _host, _account, group, artifact, version, file = get_databus_id_parts_from_uri( + databusURI + ) if group == "collections" and artifact is not None: print(f"Deleting collection: {databusURI}") diff --git a/databusclient/api/deploy.py b/databusclient/api/deploy.py index b8147c0..4c7eb27 100644 --- a/databusclient/api/deploy.py +++ b/databusclient/api/deploy.py @@ -1,8 +1,9 @@ -from enum import Enum -from typing import List, Dict, Tuple, Optional, Union -import requests import hashlib import json +from enum import Enum +from typing import Dict, List, Optional, Tuple, Union + +import requests __debug = False @@ -153,7 +154,7 @@ def get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, cvs = _get_content_variants(distribution_str) extension_part, format_extension, compression = _get_extensions(distribution_str) - content_variant_part = "_".join([f"{key}={value}" for key, value in cvs.items()]) + # content_variant_part = "_".join([f"{key}={value}" for key, value in cvs.items()]) if __debug: print("DEBUG", distribution_str, extension_part) @@ -200,7 +201,10 @@ def create_distribution( return f"{url}|{meta_string}" -def _create_distributions_from_metadata(metadata: List[Dict[str, Union[str, int]]]) -> List[str]: + +def _create_distributions_from_metadata( + metadata: List[Dict[str, Union[str, int]]], +) -> List[str]: """ Create distributions from metadata entries. @@ -233,11 +237,16 @@ def _create_distributions_from_metadata(metadata: List[Dict[str, Union[str, int] size = entry["size"] url = entry["url"] if not isinstance(size, int) or size <= 0: - raise ValueError(f"Invalid size for {url}: expected positive integer, got {size}") + raise ValueError( + f"Invalid size for {url}: expected positive integer, got {size}" + ) # Validate SHA-256 hex digest (64 hex chars) - if not isinstance(checksum, str) or len(checksum) != 64 or not all( - c in '0123456789abcdefABCDEF' for c in checksum): - raise ValueError(f"Invalid checksum for {url}") + if ( + not isinstance(checksum, str) + or len(checksum) != 64 + or not all(c in "0123456789abcdefABCDEF" for c in checksum) + ): + raise ValueError(f"Invalid checksum for {url}") distributions.append( create_distribution( @@ -245,12 +254,13 @@ def _create_distributions_from_metadata(metadata: List[Dict[str, Union[str, int] cvs={"count": f"{counter}"}, file_format=entry.get("file_format"), compression=entry.get("compression"), - sha256_length_tuple=(checksum, size) + sha256_length_tuple=(checksum, size), ) ) counter += 1 return distributions + def create_dataset( version_id: str, title: str, @@ -361,7 +371,7 @@ def create_dataset( "@type": "Artifact", "title": title, "abstract": abstract, - "description": description + "description": description, } graphs.append(artifact_graph) @@ -445,7 +455,7 @@ def deploy_from_metadata( abstract: str, description: str, license_url: str, - apikey: str + apikey: str, ) -> None: """ Deploy a dataset from metadata entries. @@ -475,7 +485,7 @@ def deploy_from_metadata( abstract=abstract, description=description, license_url=license_url, - distributions=distributions + distributions=distributions, ) print(f"Deploying dataset version: {version_id}") diff --git a/databusclient/api/download.py b/databusclient/api/download.py index eb03d5f..5f5877a 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -1,14 +1,22 @@ +import json +import os from typing import List + import requests -import os +from SPARQLWrapper import JSON, SPARQLWrapper from tqdm import tqdm -import json -from SPARQLWrapper import SPARQLWrapper, JSON -from databusclient.api.utils import get_databus_id_parts_from_uri, fetch_databus_jsonld +from databusclient.api.utils import fetch_databus_jsonld, get_databus_id_parts_from_uri -def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_url=None, client_id=None) -> None: +def _download_file( + url, + localDir, + vault_token_file=None, + databus_key=None, + auth_url=None, + client_id=None, +) -> None: """ Download a file from the internet with a progress bar using tqdm. @@ -24,8 +32,16 @@ def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_ 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized), then fetch Vault access token and retry with Authorization header. """ if localDir is None: - _host, account, group, artifact, version, file = get_databus_id_parts_from_uri(url) - localDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest") + _host, account, group, artifact, version, file = get_databus_id_parts_from_uri( + url + ) + localDir = os.path.join( + os.getcwd(), + account, + group, + artifact, + version if version is not None else "latest", + ) print(f"Local directory not given, using {localDir}") file = url.split("/")[-1] @@ -37,16 +53,24 @@ def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_ # --- 1. Get redirect URL by requesting HEAD --- response = requests.head(url, stream=True) # Check for redirect and update URL if necessary - if response.headers.get("Location") and response.status_code in [301, 302, 303, 307, 308]: + if response.headers.get("Location") and response.status_code in [ + 301, + 302, + 303, + 307, + 308, + ]: url = response.headers.get("Location") print("Redirects url: ", url) # --- 2. Try direct GET --- response = requests.get(url, stream=True, allow_redirects=True, timeout=30) - www = response.headers.get('WWW-Authenticate', '') # get WWW-Authenticate header if present to check for Bearer auth + www = response.headers.get( + "WWW-Authenticate", "" + ) # get WWW-Authenticate header if present to check for Bearer auth # Vault token required if 401 Unauthorized with Bearer challenge - if (response.status_code == 401 and "bearer" in www.lower()): + if response.status_code == 401 and "bearer" in www.lower(): print(f"Authentication required for {url}") if not (vault_token_file): raise ValueError("Vault token file not given for protected download") @@ -58,7 +82,7 @@ def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_ # --- 4. Retry with token --- response = requests.get(url, headers=headers, stream=True, timeout=30) - + # Databus API key required if only 401 Unauthorized elif response.status_code == 401: print(f"API key required for {url}") @@ -77,27 +101,29 @@ def _download_file(url, localDir, vault_token_file=None, databus_key=None, auth_ else: raise e - total_size_in_bytes = int(response.headers.get('content-length', 0)) + total_size_in_bytes = int(response.headers.get("content-length", 0)) block_size = 1024 # 1 KiB - progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) - with open(filename, 'wb') as file: + progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) + with open(filename, "wb") as file: for data in response.iter_content(block_size): progress_bar.update(len(data)) file.write(data) progress_bar.close() # TODO: could be a problem of github raw / openflaas - if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: - raise IOError("Downloaded size does not match Content-Length header") + # if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: + # raise IOError("Downloaded size does not match Content-Length header") -def _download_files(urls: List[str], - localDir: str, - vault_token_file: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: +def _download_files( + urls: List[str], + localDir: str, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None, +) -> None: """ Download multiple files from the databus. @@ -110,7 +136,15 @@ def _download_files(urls: List[str], - client_id: Client ID for token exchange """ for url in urls: - _download_file(url=url, localDir=localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + _download_file( + url=url, + localDir=localDir, + vault_token_file=vault_token_file, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) + def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str: """ @@ -143,7 +177,7 @@ def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: - Dictionary containing the query results """ sparql = SPARQLWrapper(endpoint_url) - sparql.method = 'POST' + sparql.method = "POST" sparql.setQuery(query) sparql.setReturnFormat(JSON) if databus_key is not None: @@ -152,7 +186,9 @@ def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: return results -def _get_file_download_urls_from_sparql_query(endpoint_url, query, databus_key=None) -> List[str]: +def _get_file_download_urls_from_sparql_query( + endpoint_url, query, databus_key=None +) -> List[str]: """ Execute a SPARQL query to get databus file download URLs. @@ -186,10 +222,10 @@ def _get_file_download_urls_from_sparql_query(endpoint_url, query, databus_key=N return urls -def __get_vault_access__(download_url: str, - token_file: str, - auth_url: str, - client_id: str) -> str: + +def __get_vault_access__( + download_url: str, token_file: str, auth_url: str, client_id: str +) -> str: """ Get Vault access token for a protected databus download. """ @@ -204,31 +240,37 @@ def __get_vault_access__(download_url: str, print(f"Warning: token from {token_file} is short (<80 chars)") # 2. Refresh token -> access token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "refresh_token", - "refresh_token": refresh_token - }) + resp = requests.post( + auth_url, + data={ + "client_id": client_id, + "grant_type": "refresh_token", + "refresh_token": refresh_token, + }, + ) resp.raise_for_status() access_token = resp.json()["access_token"] # 3. Extract host as audience # Remove protocol prefix if download_url.startswith("https://"): - host_part = download_url[len("https://"):] + host_part = download_url[len("https://") :] elif download_url.startswith("http://"): - host_part = download_url[len("http://"):] + host_part = download_url[len("http://") :] else: host_part = download_url audience = host_part.split("/")[0] # host is before first "/" # 4. Access token -> Vault token - resp = requests.post(auth_url, data={ - "client_id": client_id, - "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", - "subject_token": access_token, - "audience": audience - }) + resp = requests.post( + auth_url, + data={ + "client_id": client_id, + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "subject_token": access_token, + "audience": audience, + }, + ) resp.raise_for_status() vault_token = resp.json()["access_token"] @@ -236,13 +278,15 @@ def __get_vault_access__(download_url: str, return vault_token -def _download_collection(uri: str, - endpoint: str, - localDir: str, - vault_token: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: +def _download_collection( + uri: str, + endpoint: str, + localDir: str, + vault_token: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None, +) -> None: """ Download all files in a databus collection. @@ -256,16 +300,27 @@ def _download_collection(uri: str, - client_id: Client ID for token exchange """ query = _get_sparql_query_of_collection(uri, databus_key=databus_key) - file_urls = _get_file_download_urls_from_sparql_query(endpoint, query, databus_key=databus_key) - _download_files(list(file_urls), localDir, vault_token_file=vault_token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - -def _download_version(uri: str, - localDir: str, - vault_token_file: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: + file_urls = _get_file_download_urls_from_sparql_query( + endpoint, query, databus_key=databus_key + ) + _download_files( + list(file_urls), + localDir, + vault_token_file=vault_token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) + + +def _download_version( + uri: str, + localDir: str, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None, +) -> None: """ Download all files in a databus artifact version. @@ -279,16 +334,25 @@ def _download_version(uri: str, """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) - _download_files(file_urls, localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - -def _download_artifact(uri: str, - localDir: str, - all_versions: bool = False, - vault_token_file: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: + _download_files( + file_urls, + localDir, + vault_token_file=vault_token_file, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) + + +def _download_artifact( + uri: str, + localDir: str, + all_versions: bool = False, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None, +) -> None: """ Download files in a databus artifact. @@ -309,10 +373,19 @@ def _download_artifact(uri: str, print(f"Downloading version: {version_uri}") json_str = fetch_databus_jsonld(version_uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) - _download_files(file_urls, localDir, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - - -def _get_databus_versions_of_artifact(json_str: str, all_versions: bool) -> str | List[str]: + _download_files( + file_urls, + localDir, + vault_token_file=vault_token_file, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) + + +def _get_databus_versions_of_artifact( + json_str: str, all_versions: bool +) -> str | List[str]: """ Parse the JSON-LD of a databus artifact to extract URLs of its versions. @@ -342,6 +415,7 @@ def _get_databus_versions_of_artifact(json_str: str, all_versions: bool) -> str return version_urls return version_urls[0] + def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: """ Parse the JSON-LD of a databus artifact version to extract download URLs. @@ -364,13 +438,15 @@ def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: return databusIdUrl -def _download_group(uri: str, - localDir: str, - all_versions: bool = False, - vault_token_file: str = None, - databus_key: str = None, - auth_url: str = None, - client_id: str = None) -> None: +def _download_group( + uri: str, + localDir: str, + all_versions: bool = False, + vault_token_file: str = None, + databus_key: str = None, + auth_url: str = None, + client_id: str = None, +) -> None: """ Download files in a databus group. @@ -387,8 +463,15 @@ def _download_group(uri: str, artifacts = _get_databus_artifacts_of_group(json_str) for artifact_uri in artifacts: print(f"Download artifact: {artifact_uri}") - _download_artifact(artifact_uri, localDir, all_versions=all_versions, vault_token_file=vault_token_file, databus_key=databus_key, auth_url=auth_url, client_id=client_id) - + _download_artifact( + artifact_uri, + localDir, + all_versions=all_versions, + vault_token_file=vault_token_file, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) def _get_databus_artifacts_of_group(json_str: str) -> List[str]: @@ -410,6 +493,7 @@ def _get_databus_artifacts_of_group(json_str: str) -> List[str]: result.append(uri) return result + def download( localDir: str, endpoint: str, @@ -418,13 +502,13 @@ def download( databus_key=None, all_versions=None, auth_url=None, - client_id=None + client_id=None, ) -> None: """ Download datasets from databus. - + Download of files, versions, artifacts, groups or databus collections by ther databus URIs or user-defined SPARQL queries that return file download URLs. - + Parameters: - localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. - endpoint: the databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. @@ -435,7 +519,9 @@ def download( - client_id: Client ID for token exchange. Default is "vault-token-exchange". """ for databusURI in databusURIs: - host, account, group, artifact, version, file = get_databus_id_parts_from_uri(databusURI) + host, account, group, artifact, version, file = get_databus_id_parts_from_uri( + databusURI + ) # dataID or databus collection if databusURI.startswith("http://") or databusURI.startswith("https://"): @@ -446,23 +532,67 @@ def download( if group == "collections" and artifact is not None: print(f"Downloading collection: {databusURI}") - _download_collection(databusURI, endpoint, localDir, token, databus_key, auth_url, client_id) + _download_collection( + databusURI, + endpoint, + localDir, + token, + databus_key, + auth_url, + client_id, + ) elif file is not None: print(f"Downloading file: {databusURI}") - _download_file(databusURI, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + _download_file( + databusURI, + localDir, + vault_token_file=token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) elif version is not None: print(f"Downloading version: {databusURI}") - _download_version(databusURI, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + _download_version( + databusURI, + localDir, + vault_token_file=token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) elif artifact is not None: - print(f"Downloading {'all' if all_versions else 'latest'} version(s) of artifact: {databusURI}") - _download_artifact(databusURI, localDir, all_versions=all_versions, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + print( + f"Downloading {'all' if all_versions else 'latest'} version(s) of artifact: {databusURI}" + ) + _download_artifact( + databusURI, + localDir, + all_versions=all_versions, + vault_token_file=token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) elif group is not None and group != "collections": - print(f"Downloading group and all its artifacts and versions: {databusURI}") - _download_group(databusURI, localDir, all_versions=all_versions, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) + print( + f"Downloading group and all its artifacts and versions: {databusURI}" + ) + _download_group( + databusURI, + localDir, + all_versions=all_versions, + vault_token_file=token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) elif account is not None: print("accountId not supported yet") # TODO else: - print("dataId not supported yet") # TODO add support for other DatabusIds + print( + "dataId not supported yet" + ) # TODO add support for other DatabusIds # query in local file elif databusURI.startswith("file://"): print("query in file not supported yet") @@ -471,5 +601,14 @@ def download( print("QUERY {}", databusURI.replace("\n", " ")) if endpoint is None: # endpoint is required for queries (--databus) raise ValueError("No endpoint given for query") - res = _get_file_download_urls_from_sparql_query(endpoint, databusURI, databus_key=databus_key) - _download_files(res, localDir, vault_token_file=token, databus_key=databus_key, auth_url=auth_url, client_id=client_id) \ No newline at end of file + res = _get_file_download_urls_from_sparql_query( + endpoint, databusURI, databus_key=databus_key + ) + _download_files( + res, + localDir, + vault_token_file=token, + databus_key=databus_key, + auth_url=auth_url, + client_id=client_id, + ) diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 5c0fd3f..0c6f342 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -1,10 +1,21 @@ +from typing import Optional, Tuple + import requests -from typing import Tuple, Optional -def get_databus_id_parts_from_uri(uri: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], Optional[str]]: + +def get_databus_id_parts_from_uri( + uri: str, +) -> Tuple[ + Optional[str], + Optional[str], + Optional[str], + Optional[str], + Optional[str], + Optional[str], +]: """ Extract databus ID parts from a given databus URI. - + Parameters: - uri: The full databus URI @@ -17,6 +28,7 @@ def get_databus_id_parts_from_uri(uri: str) -> Tuple[Optional[str], Optional[str parts += [None] * (6 - len(parts)) # pad with None if less than 6 parts return tuple(parts[:6]) # return only the first 6 parts + def fetch_databus_jsonld(uri: str, databus_key: str | None = None) -> str: """ Retrieve JSON-LD representation of a databus resource. diff --git a/databusclient/cli.py b/databusclient/cli.py index a977b71..abb0f03 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 import json import os - -import click from typing import List -from databusclient.extensions import webdav +import click -from databusclient.api.delete import delete as api_delete import databusclient.api.deploy as api_deploy +from databusclient.api.delete import delete as api_delete from databusclient.api.download import download as api_download +from databusclient.extensions import webdav + @click.group() def app(): @@ -19,26 +19,46 @@ def app(): @app.command() @click.option( - "--version-id", "version_id", + "--version-id", + "version_id", required=True, help="Target databus version/dataset identifier of the form " - "", + "", ) @click.option("--title", required=True, help="Dataset title") @click.option("--abstract", required=True, help="Dataset abstract max 200 chars") @click.option("--description", required=True, help="Dataset description") -@click.option("--license", "license_url", required=True, help="License (see dalicc.net)") +@click.option( + "--license", "license_url", required=True, help="License (see dalicc.net)" +) @click.option("--apikey", required=True, help="API key") - -@click.option("--metadata", "metadata_file", type=click.Path(exists=True), - help="Path to metadata JSON file (for metadata mode)") -@click.option("--webdav-url", "webdav_url", help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)") +@click.option( + "--metadata", + "metadata_file", + type=click.Path(exists=True), + help="Path to metadata JSON file (for metadata mode)", +) +@click.option( + "--webdav-url", + "webdav_url", + help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)", +) @click.option("--remote", help="rclone remote name (e.g., 'nextcloud')") @click.option("--path", help="Remote path on Nextcloud (e.g., 'datasets/mydataset')") - @click.argument("distributions", nargs=-1) -def deploy(version_id, title, abstract, description, license_url, apikey, - metadata_file, webdav_url, remote, path, distributions: List[str]): +def deploy( + version_id, + title, + abstract, + description, + license_url, + apikey, + metadata_file, + webdav_url, + remote, + path, + distributions: List[str], +): """ Flexible deploy to Databus command supporting three modes:\n - Classic deploy (distributions as arguments)\n @@ -48,41 +68,55 @@ def deploy(version_id, title, abstract, description, license_url, apikey, # Sanity checks for conflicting options if metadata_file and any([distributions, webdav_url, remote, path]): - raise click.UsageError("Invalid combination: when using --metadata, do not provide --webdav-url, --remote, --path, or distributions.") + raise click.UsageError( + "Invalid combination: when using --metadata, do not provide --webdav-url, --remote, --path, or distributions." + ) if any([webdav_url, remote, path]) and not all([webdav_url, remote, path]): - raise click.UsageError("Invalid combination: when using WebDAV/Nextcloud mode, please provide --webdav-url, --remote, and --path together.") + raise click.UsageError( + "Invalid combination: when using WebDAV/Nextcloud mode, please provide --webdav-url, --remote, and --path together." + ) # === Mode 1: Classic Deploy === if distributions and not (metadata_file or webdav_url or remote or path): click.echo("[MODE] Classic deploy with distributions") click.echo(f"Deploying dataset version: {version_id}") - dataid = api_deploy.create_dataset(version_id, title, abstract, description, license_url, distributions) + dataid = api_deploy.create_dataset( + version_id, title, abstract, description, license_url, distributions + ) api_deploy.deploy(dataid=dataid, api_key=apikey) return # === Mode 2: Metadata File === if metadata_file: click.echo(f"[MODE] Deploy from metadata file: {metadata_file}") - with open(metadata_file, 'r') as f: + with open(metadata_file, "r") as f: metadata = json.load(f) - api_deploy.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) + api_deploy.deploy_from_metadata( + metadata, version_id, title, abstract, description, license_url, apikey + ) return - + # === Mode 3: Upload & Deploy (Nextcloud) === if webdav_url and remote and path: if not distributions: - raise click.UsageError("Please provide files to upload when using WebDAV/Nextcloud mode.") + raise click.UsageError( + "Please provide files to upload when using WebDAV/Nextcloud mode." + ) - #Check that all given paths exist and are files or directories.# + # Check that all given paths exist and are files or directories.# invalid = [f for f in distributions if not os.path.exists(f)] if invalid: - raise click.UsageError(f"The following input files or folders do not exist: {', '.join(invalid)}") + raise click.UsageError( + f"The following input files or folders do not exist: {', '.join(invalid)}" + ) click.echo("[MODE] Upload & Deploy to DBpedia Databus via Nextcloud") click.echo(f"→ Uploading to: {remote}:{path}") metadata = webdav.upload_to_webdav(distributions, remote, path, webdav_url) - api_deploy.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey) + api_deploy.deploy_from_metadata( + metadata, version_id, title, abstract, description, license_url, apikey + ) return raise click.UsageError( @@ -95,14 +129,45 @@ def deploy(version_id, title, abstract, description, license_url, apikey, @app.command() @click.argument("databusuris", nargs=-1, required=True) -@click.option("--localdir", help="Local databus folder (if not given, databus folder structure is created in current working directory)") -@click.option("--databus", help="Databus URL (if not given, inferred from databusuri, e.g. https://databus.dbpedia.org/sparql)") +@click.option( + "--localdir", + help="Local databus folder (if not given, databus folder structure is created in current working directory)", +) +@click.option( + "--databus", + help="Databus URL (if not given, inferred from databusuri, e.g. https://databus.dbpedia.org/sparql)", +) @click.option("--vault-token", help="Path to Vault refresh token file") -@click.option("--databus-key", help="Databus API key to download from protected databus") -@click.option("--all-versions", is_flag=True, help="When downloading artifacts, download all versions instead of only the latest") -@click.option("--authurl", default="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", show_default=True, help="Keycloak token endpoint URL") -@click.option("--clientid", default="vault-token-exchange", show_default=True, help="Client ID for token exchange") -def download(databusuris: List[str], localdir, databus, vault_token, databus_key, all_versions, authurl, clientid): +@click.option( + "--databus-key", help="Databus API key to download from protected databus" +) +@click.option( + "--all-versions", + is_flag=True, + help="When downloading artifacts, download all versions instead of only the latest", +) +@click.option( + "--authurl", + default="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", + show_default=True, + help="Keycloak token endpoint URL", +) +@click.option( + "--clientid", + default="vault-token-exchange", + show_default=True, + help="Client ID for token exchange", +) +def download( + databusuris: List[str], + localdir, + databus, + vault_token, + databus_key, + all_versions, + authurl, + clientid, +): """ Download datasets from databus, optionally using vault access if vault options are provided. """ @@ -117,11 +182,18 @@ def download(databusuris: List[str], localdir, databus, vault_token, databus_key client_id=clientid, ) + @app.command() @click.argument("databusuris", nargs=-1, required=True) -@click.option("--databus-key", help="Databus API key to access protected databus", required=True) -@click.option("--dry-run", is_flag=True, help="Perform a dry run without actual deletion") -@click.option("--force", is_flag=True, help="Force deletion without confirmation prompt") +@click.option( + "--databus-key", help="Databus API key to access protected databus", required=True +) +@click.option( + "--dry-run", is_flag=True, help="Perform a dry run without actual deletion" +) +@click.option( + "--force", is_flag=True, help="Force deletion without confirmation prompt" +) def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool): """ Delete a dataset from the databus. @@ -135,7 +207,7 @@ def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool) databus_key=databus_key, dry_run=dry_run, force=force, - ) + ) if __name__ == "__main__": diff --git a/databusclient/extensions/webdav.py b/databusclient/extensions/webdav.py index cac7027..c0747f6 100644 --- a/databusclient/extensions/webdav.py +++ b/databusclient/extensions/webdav.py @@ -1,14 +1,14 @@ import hashlib import os -import subprocess import posixpath -from urllib.parse import urljoin, quote +import subprocess +from urllib.parse import quote, urljoin def compute_sha256_and_length(filepath): sha256 = hashlib.sha256() total_length = 0 - with open(filepath, 'rb') as f: + with open(filepath, "rb") as f: while True: chunk = f.read(4096) if not chunk: @@ -17,6 +17,7 @@ def compute_sha256_and_length(filepath): total_length += len(chunk) return sha256.hexdigest(), total_length + def get_all_files(path): if os.path.isfile(path): return [path] @@ -26,7 +27,10 @@ def get_all_files(path): files.append(os.path.join(root, name)) return files -def upload_to_webdav(source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str): + +def upload_to_webdav( + source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str +): result = [] for path in source_paths: if not os.path.exists(path): @@ -40,7 +44,7 @@ def upload_to_webdav(source_paths: list[str], remote_name: str, remote_path: str tmp_results = [] for file in files: - checksum,size = compute_sha256_and_length(file) + checksum, size = compute_sha256_and_length(file) if os.path.isdir(path): rel_file = os.path.relpath(file, abs_path) @@ -51,15 +55,20 @@ def upload_to_webdav(source_paths: list[str], remote_name: str, remote_path: str remote_webdav_path = posixpath.join(remote_path, os.path.basename(file)) # Preserve scheme/host and percent-encode path segments - url = urljoin(webdav_url.rstrip("/") + "/", quote(remote_webdav_path.lstrip("/"), safe="/")) + url = urljoin( + webdav_url.rstrip("/") + "/", + quote(remote_webdav_path.lstrip("/"), safe="/"), + ) filename = os.path.basename(file) - tmp_results.append({ - "filename": filename, - "checksum": checksum, - "size": size, - "url": url, - }) + tmp_results.append( + { + "filename": filename, + "checksum": checksum, + "size": size, + "url": url, + } + ) dest_subpath = posixpath.join(remote_path.lstrip("/"), basename) if os.path.isdir(path): diff --git a/poetry.lock b/poetry.lock index b4b80af..f772e40 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "black" @@ -38,126 +38,149 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "certifi" -version = "2024.2.2" +version = "2025.11.12" description = "Python package for providing Mozilla's CA Bundle." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" groups = ["main"] files = [ - {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, - {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, + {file = "certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b"}, + {file = "certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316"}, ] [[package]] name = "charset-normalizer" -version = "3.3.2" +version = "3.4.4" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.7" groups = ["main"] files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win32.whl", hash = "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win32.whl", hash = "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50"}, + {file = "charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f"}, + {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"}, ] [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] [package.dependencies] @@ -178,42 +201,48 @@ markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \" [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.3.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev"] markers = "python_version < \"3.11\"" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"}, + {file = "exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + [package.extras] test = ["pytest (>=6)"] [[package]] name = "idna" -version = "3.6" +version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, + {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, + {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, ] +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] [[package]] @@ -231,26 +260,26 @@ files = [ [[package]] name = "mypy-extensions" -version = "1.0.0" +version = "1.1.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] [[package]] name = "packaging" -version = "23.2" +version = "25.0" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] [[package]] @@ -267,46 +296,47 @@ files = [ [[package]] name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +version = "4.4.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, + {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"}, + {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"}, ] [package.extras] -docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" -version = "1.4.0" +version = "1.6.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] [package.extras] dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" +version = "3.2.5" +description = "pyparsing - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.6.8" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, - {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, + {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"}, + {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"}, ] [package.extras] @@ -337,14 +367,14 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no [[package]] name = "rdflib" -version = "7.2.1" +version = "7.5.0" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." optional = false python-versions = ">=3.8.1" groups = ["main"] files = [ - {file = "rdflib-7.2.1-py3-none-any.whl", hash = "sha256:1a175bc1386a167a42fbfaba003bfa05c164a2a3ca3cb9c0c97f9c9638ca6ac2"}, - {file = "rdflib-7.2.1.tar.gz", hash = "sha256:cf9b7fa25234e8925da8b1fb09700f8349b5f0f100e785fb4260e737308292ac"}, + {file = "rdflib-7.5.0-py3-none-any.whl", hash = "sha256:b011dfc40d0fc8a44252e906dcd8fc806a7859bc231be190c37e9568a31ac572"}, + {file = "rdflib-7.5.0.tar.gz", hash = "sha256:663083443908b1830e567350d72e74d9948b310f827966358d76eebdc92bf592"}, ] [package.dependencies] @@ -357,22 +387,23 @@ html = ["html5rdf (>=1.2,<2)"] lxml = ["lxml (>=4.3,<6.0)"] networkx = ["networkx (>=2,<4)"] orjson = ["orjson (>=3.9.14,<4)"] +rdf4j = ["httpx (>=0.28.1,<0.29.0)"] [[package]] name = "requests" -version = "2.31.0" +version = "2.32.5" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, + {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, ] [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" +charset_normalizer = ">=2,<4" idna = ">=2.5,<4" urllib3 = ">=1.21.1,<3" @@ -380,6 +411,34 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "ruff" +version = "0.5.7" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "ruff-0.5.7-py3-none-linux_armv6l.whl", hash = "sha256:548992d342fc404ee2e15a242cdbea4f8e39a52f2e7752d0e4cbe88d2d2f416a"}, + {file = "ruff-0.5.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:00cc8872331055ee017c4f1071a8a31ca0809ccc0657da1d154a1d2abac5c0be"}, + {file = "ruff-0.5.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf3d86a1fdac1aec8a3417a63587d93f906c678bb9ed0b796da7b59c1114a1e"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a01c34400097b06cf8a6e61b35d6d456d5bd1ae6961542de18ec81eaf33b4cb8"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcc8054f1a717e2213500edaddcf1dbb0abad40d98e1bd9d0ad364f75c763eea"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f70284e73f36558ef51602254451e50dd6cc479f8b6f8413a95fcb5db4a55fc"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a78ad870ae3c460394fc95437d43deb5c04b5c29297815a2a1de028903f19692"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ccd078c66a8e419475174bfe60a69adb36ce04f8d4e91b006f1329d5cd44bcf"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e31c9bad4ebf8fdb77b59cae75814440731060a09a0e0077d559a556453acbb"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d796327eed8e168164346b769dd9a27a70e0298d667b4ecee6877ce8095ec8e"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a09ea2c3f7778cc635e7f6edf57d566a8ee8f485f3c4454db7771efb692c499"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a36d8dcf55b3a3bc353270d544fb170d75d2dff41eba5df57b4e0b67a95bb64e"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9369c218f789eefbd1b8d82a8cf25017b523ac47d96b2f531eba73770971c9e5"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b88ca3db7eb377eb24fb7c82840546fb7acef75af4a74bd36e9ceb37a890257e"}, + {file = "ruff-0.5.7-py3-none-win32.whl", hash = "sha256:33d61fc0e902198a3e55719f4be6b375b28f860b09c281e4bdbf783c0566576a"}, + {file = "ruff-0.5.7-py3-none-win_amd64.whl", hash = "sha256:083bbcbe6fadb93cd86709037acc510f86eed5a314203079df174c40bbbca6b3"}, + {file = "ruff-0.5.7-py3-none-win_arm64.whl", hash = "sha256:2dca26154ff9571995107221d0aeaad0e75a77b5a682d6236cf89a58c70b76f4"}, + {file = "ruff-0.5.7.tar.gz", hash = "sha256:8dfc0a458797f5d9fb622dd0efc52d796f23f0a1493a9527f4e49a550ae9a7e5"}, +] + [[package]] name = "sparqlwrapper" version = "2.0.0" @@ -403,70 +462,111 @@ pandas = ["pandas (>=1.3.5)"] [[package]] name = "tomli" -version = "2.0.1" +version = "2.3.0" description = "A lil' TOML parser" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev"] markers = "python_full_version < \"3.11.0a7\"" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"}, + {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"}, + {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"}, + {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"}, + {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"}, + {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"}, + {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"}, + {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"}, + {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"}, + {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"}, + {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"}, + {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"}, + {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"}, + {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"}, + {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"}, + {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"}, + {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"}, + {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"}, + {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"}, + {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"}, + {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"}, + {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"}, + {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"}, + {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"}, + {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"}, + {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"}, + {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"}, + {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"}, + {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"}, + {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"}, + {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"}, + {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"}, + {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"}, + {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"}, + {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"}, + {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"}, + {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"}, + {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"}, + {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"}, + {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"}, + {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"}, + {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"}, ] [[package]] name = "tqdm" -version = "4.66.2" +version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, - {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.9.0" -description = "Backported and Experimental Type Hints for Python 3.8+" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] -markers = "python_version < \"3.10\"" +markers = "python_version < \"3.11\"" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] [[package]] name = "urllib3" -version = "2.2.0" +version = "2.6.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "urllib3-2.2.0-py3-none-any.whl", hash = "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224"}, - {file = "urllib3-2.2.0.tar.gz", hash = "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20"}, + {file = "urllib3-2.6.0-py3-none-any.whl", hash = "sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f"}, + {file = "urllib3-2.6.0.tar.gz", hash = "sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1"}, ] [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [metadata] lock-version = "2.1" python-versions = "^3.9" -content-hash = "6f798ca5bc7629dc0668179934c9889c0d971743c1b162ae1387bd0c5a349d94" +content-hash = "5961c30b6d27c388e50a2a08a598b37160f14e38719937c86faeb7d56ed770ec" diff --git a/pyproject.toml b/pyproject.toml index 0d32ee1..27bfca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "databusclient" -version = "0.12" -description = "A simple client for submitting data to the DBpedia Databus" +version = "0.14" +description = "A simple client for submitting, downloading, and deleting data on the DBpedia Databus" authors = ["DBpedia Association"] license = "Apache-2.0 License" readme = "README.md" @@ -17,10 +17,15 @@ rdflib = "^7.2.1" [tool.poetry.group.dev.dependencies] black = "^22.6.0" pytest = "^7.1.3" +ruff = "^0.5.5" [tool.poetry.scripts] databusclient = "databusclient.cli:app" +[tool.ruff] +target-version = "py39" +src = ["databusclient", "tests"] + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_databusclient.py b/tests/test_databusclient.py index 4c65e19..2a559d4 100644 --- a/tests/test_databusclient.py +++ b/tests/test_databusclient.py @@ -1,14 +1,16 @@ """Client tests""" -import pytest -from databusclient.api.deploy import create_dataset, create_distribution, get_file_info + from collections import OrderedDict +import pytest + +from databusclient.api.deploy import create_dataset, create_distribution, get_file_info EXAMPLE_URL = "https://raw.githubusercontent.com/dbpedia/databus/608482875276ef5df00f2360a2f81005e62b58bd/server/app/api/swagger.yml" + @pytest.mark.skip(reason="temporarily disabled since code needs fixing") def test_distribution_cases(): - metadata_args_with_filler = OrderedDict() metadata_args_with_filler["type=config_source=databus"] = "" @@ -24,7 +26,6 @@ def test_distribution_cases(): parameters = list(metadata_args_with_filler.keys()) for i in range(0, len(metadata_args_with_filler.keys())): - if i == 1: continue @@ -58,7 +59,6 @@ def test_distribution_cases(): @pytest.mark.skip(reason="temporarily disabled since code needs fixing") def test_empty_cvs(): - dst = [create_distribution(url=EXAMPLE_URL, cvs={})] dataset = create_dataset( diff --git a/tests/test_download.py b/tests/test_download.py index 19dd3bc..56dc6b6 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -1,9 +1,9 @@ """Download Tests""" -import pytest + from databusclient.api.download import download as api_download -DEFAULT_ENDPOINT="https://databus.dbpedia.org/sparql" -TEST_QUERY=""" +DEFAULT_ENDPOINT = "https://databus.dbpedia.org/sparql" +TEST_QUERY = """ PREFIX dcat: SELECT ?file WHERE { @@ -14,10 +14,14 @@ } LIMIT 10 """ -TEST_COLLECTION="https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12" +TEST_COLLECTION = ( + "https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12" +) + def test_with_query(): - api_download("tmp",DEFAULT_ENDPOINT,[TEST_QUERY]) - + api_download("tmp", DEFAULT_ENDPOINT, [TEST_QUERY]) + + def test_with_collection(): - api_download("tmp",DEFAULT_ENDPOINT,[TEST_COLLECTION]) \ No newline at end of file + api_download("tmp", DEFAULT_ENDPOINT, [TEST_COLLECTION]) From 072f37ccc153ef5ddc490fa591d2fda663e5a29f Mon Sep 17 00:00:00 2001 From: Integer-Ctrl Date: Tue, 9 Dec 2025 09:53:18 +0100 Subject: [PATCH 6/6] feat: python 3.11 migration --- Dockerfile | 2 +- README.md | 33 ++++-- databusclient/api/delete.py | 11 +- databusclient/api/deploy.py | 39 +++++-- databusclient/api/download.py | 96 +++++++++++----- databusclient/api/utils.py | 5 +- databusclient/cli.py | 2 +- poetry.lock | 107 +----------------- pyproject.toml | 4 +- .../{test_databusclient.py => test_deploy.py} | 32 +++++- tests/test_download.py | 2 + 11 files changed, 171 insertions(+), 162 deletions(-) rename tests/{test_databusclient.py => test_deploy.py} (81%) diff --git a/Dockerfile b/Dockerfile index b44f7b8..7cc4829 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.11-slim WORKDIR /data diff --git a/README.md b/README.md index 6eba86e..dc9991f 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,9 @@ Command-line and Python client for downloading and deploying datasets on DBpedia - [Delete](#cli-delete) - [Module Usage](#module-usage) - [Deploy](#module-deploy) -- [Contributing](#contributing) +- [Development & Contributing](#development--contributing) - [Linting](#linting) + - [Testing](#testing) ## Quickstart @@ -32,7 +33,7 @@ You can use either **Python** or **Docker**. Both methods support all client fea ### Python -Requirements: [Python](https://www.python.org/downloads/) and [pip](https://pip.pypa.io/en/stable/installation/) +Requirements: [Python 3.11+](https://www.python.org/downloads/) and [pip](https://pip.pypa.io/en/stable/installation/) Before using the client, install it via pip: @@ -186,8 +187,8 @@ Options: e.g. https://databus.dbpedia.org/sparql) --vault-token TEXT Path to Vault refresh token file --databus-key TEXT Databus API key to download from protected databus - --latest-only When downloading artifacts, only download the latest - version + --all-versions When downloading artifacts, download all versions + instead of only the latest --authurl TEXT Keycloak token endpoint URL [default: https://auth.dbpedia.org/realms/dbpedia/protocol/openid- connect/token] @@ -557,7 +558,7 @@ from databusclient import deploy deploy(dataset, "mysterious API key") ``` -## Development +## Development & Contributing Install development dependencies yourself or via [Poetry](https://python-poetry.org/): @@ -569,9 +570,9 @@ poetry install --with dev The used linter is [Ruff](https://ruff.rs/). Ruff is configured in `pyproject.toml` and is enforced in CI (`.github/workflows/ruff.yml`). -For development, you can run linting locally with `ruff check . ` and optionally auto-format with `ruff format .`. +For development, you can run linting locally with `ruff check .` and optionally auto-format with `ruff format .`. -To ensuere compatibility with the `pyproject.toml` configured dependencies, run Ruff via Poetry: +To ensure compatibility with the `pyproject.toml` configured dependencies, run Ruff via Poetry: ```bash # To check for linting issues: @@ -579,4 +580,22 @@ poetry run ruff check . # To auto-format code: poetry run ruff format . +``` + +### Testing + +When developing new features please make sure to add appropriate tests and ensure that all tests pass. Tests are under `tests/` and use [pytest](https://docs.pytest.org/en/7.4.x/) as test framework. + +When fixing bugs or refactoring existing code, please make sure to add tests that cover the affected functionality. The current test coverage is very low, so any additional tests are highly appreciated. + +To run tests locally, use: + +```bash +pytest tests/ +``` + +Or to ensure compatibility with the `pyproject.toml` configured dependencies, run pytest via Poetry: + +```bash +poetry run pytest tests/ ``` \ No newline at end of file diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index 828644f..41bb119 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -3,7 +3,10 @@ import requests -from databusclient.api.utils import fetch_databus_jsonld, get_databus_id_parts_from_uri +from databusclient.api.utils import ( + fetch_databus_jsonld, + get_databus_id_parts_from_file_url, +) def _confirm_delete(databusURI: str) -> str: @@ -161,7 +164,7 @@ def _delete_group( uri = item.get("@id") if not uri: continue - _, _, _, _, version, _ = get_databus_id_parts_from_uri(uri) + _, _, _, _, version, _ = get_databus_id_parts_from_file_url(uri) if version is None: artifact_uris.append(uri) @@ -188,8 +191,8 @@ def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool) """ for databusURI in databusURIs: - _host, _account, group, artifact, version, file = get_databus_id_parts_from_uri( - databusURI + _host, _account, group, artifact, version, file = ( + get_databus_id_parts_from_file_url(databusURI) ) if group == "collections" and artifact is not None: diff --git a/databusclient/api/deploy.py b/databusclient/api/deploy.py index 4c7eb27..ef8ebf5 100644 --- a/databusclient/api/deploy.py +++ b/databusclient/api/deploy.py @@ -5,7 +5,7 @@ import requests -__debug = False +_debug = False class DeployError(Exception): @@ -36,6 +36,11 @@ def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: cvs = {} for kv in cv_str.split("_"): + if "=" not in kv: + raise BadArgumentException( + f"Invalid content variant format: '{kv}'. Expected 'key=value' format." + ) + key, value = kv.split("=") cvs[key] = value @@ -141,8 +146,8 @@ def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int] def _load_file_stats(url: str) -> Tuple[str, int]: - resp = requests.get(url) - if resp.status_code > 400: + resp = requests.get(url, timeout=30) + if resp.status_code >= 400: raise requests.exceptions.RequestException(response=resp) sha256sum = hashlib.sha256(bytes(resp.content)).hexdigest() @@ -156,7 +161,7 @@ def get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, # content_variant_part = "_".join([f"{key}={value}" for key, value in cvs.items()]) - if __debug: + if _debug: print("DEBUG", distribution_str, extension_part) sha256sum, content_length = _get_file_stats(distribution_str) @@ -306,7 +311,13 @@ def create_dataset( """ _versionId = str(version_id).strip("/") - _, account_name, group_name, artifact_name, version = _versionId.rsplit("/", 4) + parts = _versionId.rsplit("/", 4) + if len(parts) < 5: + raise BadArgumentException( + f"Invalid version_id format: '{version_id}'. " + f"Expected format: ////" + ) + _, _account_name, _group_name, _artifact_name, version = parts # could be build from stuff above, # was not sure if there are edge cases BASE=http://databus.example.org/"base"/... @@ -428,22 +439,30 @@ def deploy( headers = {"X-API-KEY": f"{api_key}", "Content-Type": "application/json"} data = json.dumps(dataid) - base = "/".join(dataid["@graph"][0]["@id"].split("/")[0:3]) + + try: + base = "/".join(dataid["@graph"][0]["@id"].split("/")[0:3]) + except (KeyError, IndexError, TypeError) as e: + raise DeployError(f"Invalid dataid structure: {e}") + api_uri = ( base + f"/api/publish?verify-parts={str(verify_parts).lower()}&log-level={log_level.name}" ) - resp = requests.post(api_uri, data=data, headers=headers) + resp = requests.post(api_uri, data=data, headers=headers, timeout=30) - if debug or __debug: - dataset_uri = dataid["@graph"][0]["@id"] + if debug or _debug: + try: + dataset_uri = dataid["@graph"][0]["@id"] + except (KeyError, IndexError, TypeError) as e: + raise DeployError(f"Invalid dataid structure: {e}") print(f"Trying submitting data to {dataset_uri}:") print(data) if resp.status_code != 200: raise DeployError(f"Could not deploy dataset to databus. Reason: '{resp.text}'") - if debug or __debug: + if debug or _debug: print("---------") print(resp.text) diff --git a/databusclient/api/download.py b/databusclient/api/download.py index 5f5877a..190fada 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -6,7 +6,10 @@ from SPARQLWrapper import JSON, SPARQLWrapper from tqdm import tqdm -from databusclient.api.utils import fetch_databus_jsonld, get_databus_id_parts_from_uri +from databusclient.api.utils import ( + fetch_databus_jsonld, + get_databus_id_parts_from_file_url, +) def _download_file( @@ -32,8 +35,8 @@ def _download_file( 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized), then fetch Vault access token and retry with Authorization header. """ if localDir is None: - _host, account, group, artifact, version, file = get_databus_id_parts_from_uri( - url + _host, account, group, artifact, version, file = ( + get_databus_id_parts_from_file_url(url) ) localDir = os.path.join( os.getcwd(), @@ -51,7 +54,7 @@ def _download_file( if dirpath: os.makedirs(dirpath, exist_ok=True) # Create the necessary directories # --- 1. Get redirect URL by requesting HEAD --- - response = requests.head(url, stream=True) + response = requests.head(url, stream=True, timeout=30) # Check for redirect and update URL if necessary if response.headers.get("Location") and response.status_code in [ 301, @@ -111,9 +114,12 @@ def _download_file( file.write(data) progress_bar.close() - # TODO: could be a problem of github raw / openflaas - # if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: - # raise IOError("Downloaded size does not match Content-Length header") + # TODO: keep check or remove? + if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: + localsize = os.path.getsize(filename) + print(f"\nHeaders: {response.headers}") + print(f"\n[WARNING]: Downloaded size {progress_bar.n} does not match Content-Length header {total_size_in_bytes} ( local file size: {localsize})") + # raise IOError("Downloaded size does not match Content-Length header") def _download_files( @@ -161,7 +167,9 @@ def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> if databus_key is not None: headers["X-API-KEY"] = databus_key - return requests.get(uri, headers=headers, timeout=30).text + response = requests.get(uri, headers=headers, timeout=30) + response.raise_for_status() + return response.text def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: @@ -247,6 +255,7 @@ def __get_vault_access__( "grant_type": "refresh_token", "refresh_token": refresh_token, }, + timeout=30, ) resp.raise_for_status() access_token = resp.json()["access_token"] @@ -270,6 +279,7 @@ def __get_vault_access__( "subject_token": access_token, "audience": audience, }, + timeout=30, ) resp.raise_for_status() vault_token = resp.json()["access_token"] @@ -400,12 +410,20 @@ def _get_databus_versions_of_artifact( json_dict = json.loads(json_str) versions = json_dict.get("databus:hasVersion") - # Single version case {} + if versions is None: + raise ValueError("No 'databus:hasVersion' field in artifact JSON-LD") + if isinstance(versions, dict): versions = [versions] - # Multiple versions case [{}, {}] + elif not isinstance(versions, list): + raise ValueError( + f"Unexpected type for 'databus:hasVersion': {type(versions).__name__}" + ) + + version_urls = [ + v["@id"] for v in versions if isinstance(v, dict) and "@id" in v + ] - version_urls = [v["@id"] for v in versions if "@id" in v] if not version_urls: raise ValueError("No versions found in artifact JSON-LD") @@ -428,13 +446,16 @@ def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: List of all file download URLs in the artifact version. """ - databusIdUrl = [] + databusIdUrl: List[str] = [] + json_dict = json.loads(json_str) graph = json_dict.get("@graph", []) for node in graph: if node.get("@type") == "Part": - id = node.get("file") - databusIdUrl.append(id) + file_uri = node.get("file") + if not isinstance(file_uri, str): + continue + databusIdUrl.append(file_uri) return databusIdUrl @@ -481,14 +502,28 @@ def _get_databus_artifacts_of_group(json_str: str) -> List[str]: Returns a list of artifact URLs. """ json_dict = json.loads(json_str) - artifacts = json_dict.get("databus:hasArtifact", []) + artifacts = json_dict.get("databus:hasArtifact") - result = [] - for item in artifacts: + if artifacts is None: + return [] + + if isinstance(artifacts, dict): + artifacts_iter = [artifacts] + elif isinstance(artifacts, list): + artifacts_iter = artifacts + else: + raise ValueError( + f"Unexpected type for 'databus:hasArtifact': {type(artifacts).__name__}" + ) + + result: List[str] = [] + for item in artifacts_iter: + if not isinstance(item, dict): + continue uri = item.get("@id") if not uri: continue - _, _, _, _, version, _ = get_databus_id_parts_from_uri(uri) + _, _, _, _, version, _ = get_databus_id_parts_from_file_url(uri) if version is None: result.append(uri) return result @@ -501,13 +536,13 @@ def download( token=None, databus_key=None, all_versions=None, - auth_url=None, - client_id=None, + auth_url="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", + client_id="vault-token-exchange", ) -> None: """ Download datasets from databus. - Download of files, versions, artifacts, groups or databus collections by ther databus URIs or user-defined SPARQL queries that return file download URLs. + Download of files, versions, artifacts, groups or databus collections via their databus URIs or user-defined SPARQL queries that return file download URLs. Parameters: - localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. @@ -519,22 +554,25 @@ def download( - client_id: Client ID for token exchange. Default is "vault-token-exchange". """ for databusURI in databusURIs: - host, account, group, artifact, version, file = get_databus_id_parts_from_uri( - databusURI + host, account, group, artifact, version, file = ( + get_databus_id_parts_from_file_url(databusURI) ) + # Determine endpoint per-URI if not explicitly provided + uri_endpoint = endpoint + # dataID or databus collection if databusURI.startswith("http://") or databusURI.startswith("https://"): # Auto-detect sparql endpoint from host if not given - if endpoint is None: - endpoint = f"https://{host}/sparql" - print(f"SPARQL endpoint {endpoint}") + if uri_endpoint is None: + uri_endpoint = f"https://{host}/sparql" + print(f"SPARQL endpoint {uri_endpoint}") if group == "collections" and artifact is not None: print(f"Downloading collection: {databusURI}") _download_collection( databusURI, - endpoint, + uri_endpoint, localDir, token, databus_key, @@ -599,10 +637,10 @@ def download( # query as argument else: print("QUERY {}", databusURI.replace("\n", " ")) - if endpoint is None: # endpoint is required for queries (--databus) + if uri_endpoint is None: # endpoint is required for queries (--databus) raise ValueError("No endpoint given for query") res = _get_file_download_urls_from_sparql_query( - endpoint, databusURI, databus_key=databus_key + uri_endpoint, databusURI, databus_key=databus_key ) _download_files( res, diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 0c6f342..7e27ff3 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -3,7 +3,7 @@ import requests -def get_databus_id_parts_from_uri( +def get_databus_id_parts_from_file_url( uri: str, ) -> Tuple[ Optional[str], @@ -17,7 +17,8 @@ def get_databus_id_parts_from_uri( Extract databus ID parts from a given databus URI. Parameters: - - uri: The full databus URI + - uri: The full databus URI of the form + "http(s)://host/accountId/groupId/artifactId/versionId/fileId" Returns: A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). diff --git a/databusclient/cli.py b/databusclient/cli.py index abb0f03..97430f5 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -104,7 +104,7 @@ def deploy( "Please provide files to upload when using WebDAV/Nextcloud mode." ) - # Check that all given paths exist and are files or directories.# + # Check that all given paths exist and are files or directories. invalid = [f for f in distributions if not os.path.exists(f)] if invalid: raise click.UsageError( diff --git a/poetry.lock b/poetry.lock index f772e40..e3759ff 100644 --- a/poetry.lock +++ b/poetry.lock @@ -27,8 +27,6 @@ click = ">=8.0.0" mypy-extensions = ">=0.4.3" pathspec = ">=0.9.0" platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} -typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -199,25 +197,6 @@ files = [ ] markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} -[[package]] -name = "exceptiongroup" -version = "1.3.1" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" -files = [ - {file = "exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"}, - {file = "exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219"}, -] - -[package.dependencies] -typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} - -[package.extras] -test = ["pytest (>=6)"] - [[package]] name = "idna" version = "3.11" @@ -245,19 +224,6 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] -[[package]] -name = "isodate" -version = "0.7.2" -description = "An ISO 8601 date/time/duration parser and formatter" -optional = false -python-versions = ">=3.7" -groups = ["main"] -markers = "python_version < \"3.11\"" -files = [ - {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, - {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, -] - [[package]] name = "mypy-extensions" version = "1.1.0" @@ -356,11 +322,9 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -378,7 +342,6 @@ files = [ ] [package.dependencies] -isodate = {version = ">=0.7.2,<1.0.0", markers = "python_version < \"3.11\""} pyparsing = ">=2.1.0,<4" [package.extras] @@ -460,59 +423,6 @@ docs = ["sphinx (<5)", "sphinx-rtd-theme"] keepalive = ["keepalive (>=0.5)"] pandas = ["pandas (>=1.3.5)"] -[[package]] -name = "tomli" -version = "2.3.0" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -markers = "python_full_version < \"3.11.0a7\"" -files = [ - {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"}, - {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"}, - {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"}, - {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"}, - {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"}, - {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"}, - {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"}, - {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"}, - {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"}, - {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"}, - {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"}, - {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"}, - {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"}, - {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"}, - {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"}, - {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"}, - {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"}, - {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"}, - {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"}, - {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"}, - {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"}, - {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"}, - {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"}, - {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"}, - {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"}, - {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"}, - {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"}, - {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"}, - {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"}, - {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"}, - {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"}, - {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"}, - {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"}, - {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"}, - {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"}, - {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"}, - {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"}, - {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"}, - {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"}, - {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"}, - {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"}, - {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"}, -] - [[package]] name = "tqdm" version = "4.67.1" @@ -535,19 +445,6 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] -[[package]] -name = "typing-extensions" -version = "4.15.0" -description = "Backported and Experimental Type Hints for Python 3.9+" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "python_version < \"3.11\"" -files = [ - {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, - {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, -] - [[package]] name = "urllib3" version = "2.6.0" @@ -568,5 +465,5 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [metadata] lock-version = "2.1" -python-versions = "^3.9" -content-hash = "5961c30b6d27c388e50a2a08a598b37160f14e38719937c86faeb7d56ed770ec" +python-versions = "^3.11" +content-hash = "f625db7ea6714ebf87336efecaef03ec2dc4f6f7838c3239432828cd6649ff96" diff --git a/pyproject.toml b/pyproject.toml index 27bfca2..5593c74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ license = "Apache-2.0 License" readme = "README.md" [tool.poetry.dependencies] -python = "^3.9" +python = "^3.11" click = "^8.0.4" requests = "^2.28.1" tqdm = "^4.42.1" @@ -23,7 +23,7 @@ ruff = "^0.5.5" databusclient = "databusclient.cli:app" [tool.ruff] -target-version = "py39" +target-version = "py311" src = ["databusclient", "tests"] [build-system] diff --git a/tests/test_databusclient.py b/tests/test_deploy.py similarity index 81% rename from tests/test_databusclient.py rename to tests/test_deploy.py index 2a559d4..aada04c 100644 --- a/tests/test_databusclient.py +++ b/tests/test_deploy.py @@ -4,11 +4,41 @@ import pytest -from databusclient.api.deploy import create_dataset, create_distribution, get_file_info +from databusclient.api.deploy import ( + create_dataset, + create_distribution, + get_file_info, + _get_content_variants, + BadArgumentException, +) EXAMPLE_URL = "https://raw.githubusercontent.com/dbpedia/databus/608482875276ef5df00f2360a2f81005e62b58bd/server/app/api/swagger.yml" +def test_get_content_variants(): + # With content variants + cvs = _get_content_variants( + "https://example.com/file.ttl|lang=en_type=parsed|ttl|none|sha256hash|12345" + ) + assert cvs == { + "lang": "en", + "type": "parsed", + } + + # Without content variants + cvs = _get_content_variants( + "https://example.com/file.ttl||ttl|none|sha256hash|12345" + ) + assert cvs == {} + + csv = _get_content_variants("https://example.com/file.ttl") + assert csv == {} + + # Wrong format + with pytest.raises(BadArgumentException): + _ = _get_content_variants("https://example.com/file.ttl|invalidformat") + + @pytest.mark.skip(reason="temporarily disabled since code needs fixing") def test_distribution_cases(): metadata_args_with_filler = OrderedDict() diff --git a/tests/test_download.py b/tests/test_download.py index 56dc6b6..76fe19b 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -2,6 +2,8 @@ from databusclient.api.download import download as api_download +# TODO: overall test structure not great, needs refactoring + DEFAULT_ENDPOINT = "https://databus.dbpedia.org/sparql" TEST_QUERY = """ PREFIX dcat: