diff --git a/bases/renku_data_services/data_api/app.py b/bases/renku_data_services/data_api/app.py index 13d7412a3..0a48c5fdc 100644 --- a/bases/renku_data_services/data_api/app.py +++ b/bases/renku_data_services/data_api/app.py @@ -106,6 +106,9 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic: session_repo=config.session_repo, data_connector_repo=config.data_connector_repo, project_migration_repo=config.project_migration_repo, + core_scv_url=config.core_svc_url, + gitlab_client=config.gitlab_client, + internal_gitlab_authenticator=config.gitlab_authenticator, ) project_session_secrets = ProjectSessionSecretBP( name="project_session_secrets", diff --git a/components/renku_data_services/app_config/config.py b/components/renku_data_services/app_config/config.py index d2bfe4065..695a2e878 100644 --- a/components/renku_data_services/app_config/config.py +++ b/components/renku_data_services/app_config/config.py @@ -277,6 +277,7 @@ class Config: server_options_file: Optional[str] = None server_defaults_file: Optional[str] = None async_oauth2_client_class: type[AsyncOAuth2Client] = AsyncOAuth2Client + core_svc_url: str | None = None _user_repo: UserRepository | None = field(default=None, repr=False, init=False) _rp_repo: ResourcePoolRepository | None = field(default=None, repr=False, init=False) _storage_repo: StorageRepository | None = field(default=None, repr=False, init=False) @@ -611,6 +612,7 @@ def from_env(cls, prefix: str = "") -> "Config": kc_api: IKeycloakAPI secrets_service_public_key: PublicKeyTypes gitlab_url: str | None + core_svc_url = os.environ.get("CORE_SERVICE_URL") if os.environ.get(f"{prefix}DUMMY_STORES", "false").lower() == "true": encryption_key = secrets.token_bytes(32) @@ -710,4 +712,5 @@ def from_env(cls, prefix: str = "") -> "Config": gitlab_url=gitlab_url, nb_config=nb_config, builds_config=builds_config, + core_svc_url=core_svc_url, ) diff --git a/components/renku_data_services/base_models/core.py b/components/renku_data_services/base_models/core.py index aed58a66f..10e700be4 100644 --- a/components/renku_data_services/base_models/core.py +++ b/components/renku_data_services/base_models/core.py @@ -117,6 +117,10 @@ async def filter_projects_by_access_level( """Get a list of projects of which the user is a member with a specific access level.""" ... + async def get_project_url_from_path(self, user: APIUser, project_path: str) -> str | None: + """Get the project ID from the path i.e. from /group1/subgroup2/project3.""" + ... + class UserStore(Protocol): """The interface through which Keycloak or a similar application can be accessed.""" diff --git a/components/renku_data_services/git/gitlab.py b/components/renku_data_services/git/gitlab.py index aa15b6689..20d29f611 100644 --- a/components/renku_data_services/git/gitlab.py +++ b/components/renku_data_services/git/gitlab.py @@ -28,6 +28,14 @@ def __post_init__(self) -> None: self.gitlab_graphql_url = f"{gitlab_url}/api/graphql" + async def _query_gitlab_graphql(self, body: dict[str, Any], header: dict[str, Any]) -> dict[str, Any]: + async with httpx.AsyncClient(verify=get_ssl_context(), timeout=5) as client: + resp = await client.post(self.gitlab_graphql_url, json=body, headers=header, timeout=10) + if resp.status_code != 200: + raise errors.BaseError(message=f"Error querying Gitlab api {self.gitlab_graphql_url}: {resp.text}") + result = cast(dict[str, Any], resp.json()) + return result + async def filter_projects_by_access_level( self, user: APIUser, project_ids: list[str], min_access_level: GitlabAccessLevel ) -> list[str]: @@ -65,18 +73,9 @@ async def filter_projects_by_access_level( """ } - async def _query_gitlab_graphql(body: dict[str, Any], header: dict[str, Any]) -> dict[str, Any]: - async with httpx.AsyncClient(verify=get_ssl_context(), timeout=5) as client: - resp = await client.post(self.gitlab_graphql_url, json=body, headers=header, timeout=10) - if resp.status_code != 200: - raise errors.BaseError(message=f"Error querying Gitlab api {self.gitlab_graphql_url}: {resp.text}") - result = cast(dict[str, Any], resp.json()) - - if "data" not in result or "projects" not in result["data"]: - raise errors.BaseError(message=f"Got unexpected response from Gitlab: {result}") - return result - - resp_body = await _query_gitlab_graphql(body, header) + resp_body = await self._query_gitlab_graphql(body, header) + if "data" not in resp_body or "projects" not in resp_body["data"]: + raise errors.BaseError(message=f"Got unexpected response from Gitlab: {resp_body}") result: list[str] = [] def _process_projects( @@ -108,12 +107,28 @@ def _process_projects( }} """ } - resp_body = await _query_gitlab_graphql(body, header) + resp_body = await self._query_gitlab_graphql(body, header) + if "data" not in resp_body or "projects" not in resp_body["data"]: + raise errors.BaseError(message=f"Got unexpected response from Gitlab: {resp_body}") page_info = resp_body["data"]["projects"]["pageInfo"] _process_projects(resp_body, min_access_level, result) return result + async def get_project_url_from_path(self, user: APIUser, project_path: str) -> str | None: + """Get the project ID from the path i.e. from /group1/subgroup2/project3.""" + header = {"Content-Type": "application/json"} + if user.access_token: + header["Authorization"] = f"Bearer {user.access_token}" + body = { + "query": f'{{project(fullPath: "{project_path}") {{httpUrlToRepo}}}}', + } + + resp_body = await self._query_gitlab_graphql(body, header) + if "data" not in resp_body or "project" not in resp_body["data"]: + raise errors.BaseError(message=f"Got unexpected response from Gitlab: {resp_body}") + return cast(str | None, resp_body["data"]["project"].get("httpUrlToRepo")) + @dataclass(kw_only=True) class DummyGitlabAPI: @@ -139,3 +154,7 @@ async def filter_projects_by_access_level( return [] user_projects = self._store.get(user.full_name, {}).get(min_access_level, []) return [p for p in project_ids if p in user_projects] + + async def get_project_url_from_path(self, user: APIUser, project_path: str) -> str | None: + """Get the project ID from the path i.e. from /group1/subgroup2/project3.""" + raise NotImplementedError() diff --git a/components/renku_data_services/project/api.spec.yaml b/components/renku_data_services/project/api.spec.yaml index 6da2ed5ca..17ce01e77 100644 --- a/components/renku_data_services/project/api.spec.yaml +++ b/components/renku_data_services/project/api.spec.yaml @@ -201,6 +201,33 @@ paths: $ref: "#/components/responses/Error" tags: - projects + /renku_v1_projects/path/{path}: + get: + summary: Try to get information about a v1 project from the core service. The path has to be url encoded. + parameters: + - in: path + name: path + required: true + description: The Gitlab path for the project. + schema: + type: string + responses: + "200": + description: V1 Project keywords and description + content: + application/json: + schema: + $ref: "#/components/schemas/V1Project" + "404": + description: No corresponding project found in Gitlab + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + default: + $ref: "#/components/responses/Error" + tags: + - projects /namespaces/{namespace}/projects/{slug}: get: summary: Get a project by namespace and project slug @@ -1159,6 +1186,26 @@ components: maxLength: 5000 nullable: true example: My secret value + V1Project: + description: V1 Project properties + type: object + additionalProperties: false + properties: + id: + type: string + name: + type: string + keywords: + type: array + items: + type: string + description: + type: string + example: + id: 1234 + keywords: ["kw1", "kw2"] + description: This is a sample description for a project. + name: Some Project PaginationRequest: type: object additionalProperties: false diff --git a/components/renku_data_services/project/apispec.py b/components/renku_data_services/project/apispec.py index b0c3c5733..249277b0e 100644 --- a/components/renku_data_services/project/apispec.py +++ b/components/renku_data_services/project/apispec.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: api.spec.yaml -# timestamp: 2025-04-04T08:05:01+00:00 +# timestamp: 2025-04-09T22:30:09+00:00 from __future__ import annotations @@ -120,6 +120,16 @@ class SessionSecretPatchExistingSecret(BaseAPISpec): ) +class V1Project(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + id: Optional[str] = None + name: Optional[str] = None + keywords: Optional[List[str]] = None + description: Optional[str] = None + + class PaginationRequest(BaseAPISpec): model_config = ConfigDict( extra="forbid", diff --git a/components/renku_data_services/project/blueprints.py b/components/renku_data_services/project/blueprints.py index 2da20ee92..5ba5088c2 100644 --- a/components/renku_data_services/project/blueprints.py +++ b/components/renku_data_services/project/blueprints.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Any +from urllib.parse import unquote from sanic import HTTPResponse, Request from sanic.response import JSONResponse @@ -12,6 +13,7 @@ from renku_data_services.authz.models import Member, Role, Visibility from renku_data_services.base_api.auth import ( authenticate, + authenticate_2, only_authenticated, validate_path_user_id, ) @@ -27,6 +29,7 @@ from renku_data_services.project import models as project_models from renku_data_services.project.core import ( copy_project, + get_v1_project_info, validate_project_patch, validate_session_secret_slot_patch, validate_session_secrets_patch, @@ -54,6 +57,9 @@ class ProjectsBP(CustomBlueprint): session_repo: SessionRepository data_connector_repo: DataConnectorRepository project_migration_repo: ProjectMigrationRepository + internal_gitlab_authenticator: base_models.Authenticator + gitlab_client: base_models.GitlabAPIProtocol + core_scv_url: str | None = None def get_all(self) -> BlueprintFactoryResponse: """List all projects.""" @@ -114,6 +120,25 @@ async def _post_migration( return "/renku_v1_projects//migrations", ["POST"], _post_migration + def get_v1_project_by_path(self) -> BlueprintFactoryResponse: + """Get information about a v1 project from the path.""" + + @authenticate_2(self.authenticator, self.internal_gitlab_authenticator) + async def _get_v1_project_by_path( + _: Request, user: base_models.APIUser, internal_gitlab_user: base_models.APIUser, path: str + ) -> JSONResponse: + if self.core_scv_url is None: + raise errors.MissingResourceError( + message="The core service url is not defined so we cannot get project information." + ) + decoded_path = unquote(path) + output = await get_v1_project_info( + user, internal_gitlab_user, decoded_path, self.gitlab_client, self.core_scv_url + ) + return validated_json(apispec.V1Project, output) + + return "/renku_v1_projects/path/{path:str}", ["GET"], _get_v1_project_by_path + def get_project_migration_info(self) -> BlueprintFactoryResponse: """Get project migration by project v2 id.""" diff --git a/components/renku_data_services/project/core.py b/components/renku_data_services/project/core.py index 121b6e45d..be4cb07c3 100644 --- a/components/renku_data_services/project/core.py +++ b/components/renku_data_services/project/core.py @@ -1,13 +1,16 @@ """Business logic for projects.""" from pathlib import PurePosixPath -from urllib.parse import urlparse +from typing import cast +from urllib.parse import urljoin, urlparse +import httpx from ulid import ULID from renku_data_services import errors from renku_data_services.authz.models import Visibility from renku_data_services.base_models import RESET, APIUser, ResetType, Slug +from renku_data_services.base_models.core import GitlabAPIProtocol from renku_data_services.data_connectors.db import DataConnectorRepository from renku_data_services.project import apispec, models from renku_data_services.project.db import ProjectRepository @@ -214,3 +217,51 @@ def _validate_session_launcher_secret_slot_filename(filename: str) -> None: filename_candidate = PurePosixPath(filename) if filename_candidate.name != filename: raise errors.ValidationError(message=f"Filename {filename} is not valid.") + + +async def get_v1_project_info( + user: APIUser, + internal_gitlab_user: APIUser, + project_path: str, + gitlab_client: GitlabAPIProtocol, + core_svc_url: str, +) -> dict[str, str | list[str] | int | None]: + """Request project information from the core service for a Renku v1 project.""" + url = await gitlab_client.get_project_url_from_path(internal_gitlab_user, project_path) + if not url: + raise errors.MissingResourceError( + message=f"The Renku v1 project with path {project_path} cannot be found " + "in Gitlab or you do not have access to it" + ) + + body = {"git_url": url, "is_delayed": False, "migrate_project": False} + headers = {} + if user.access_token: + headers["Authorization"] = user.access_token + full_url = urljoin(core_svc_url + "/", "project.show") + async with httpx.AsyncClient() as clnt: + res = await clnt.post(full_url, json=body, headers=headers) + if res.status_code != 200: + raise errors.MissingResourceError( + message=f"The core service responded with an unexpected code {res.status_code} when getting " + f"information about project {project_path} and url {url}" + ) + res_json = cast(dict[str, dict[str, str | int | list[str]]], res.json()) + if res_json.get("error") is not None: + raise errors.MissingResourceError( + message=f"The core service responded with an error when getting " + f"information about project {project_path} and url {url}", + detail=cast(str | None, res_json.get("error", {}).get("userMessage")), + ) + + kws = res_json.get("result", {}).get("keywords") + desc = res_json.get("result", {}).get("description") + id = res_json.get("result", {}).get("id") + name = res_json.get("result", {}).get("name") + output = { + "name": name, + "id": id, + "keywords": kws, + "description": desc, + } + return output