diff --git a/changes/149.bugfix b/changes/149.bugfix new file mode 100644 index 0000000..9e7c212 --- /dev/null +++ b/changes/149.bugfix @@ -0,0 +1 @@ +Fix ``path`` and ``branch`` extraction removing *every* ``/blob/`` and ``/tree/`` occurrence instead of only the leading marker, which corrupted file paths and branch names containing those segments. diff --git a/giturlparse/platforms/github.py b/giturlparse/platforms/github.py index e31943f..d795456 100644 --- a/giturlparse/platforms/github.py +++ b/giturlparse/platforms/github.py @@ -34,7 +34,7 @@ class GitHubPlatform(BasePlatform): def clean_data(data): data = BasePlatform.clean_data(data) if data["path_raw"].startswith("/blob/"): - data["path"] = data["path_raw"].replace("/blob/", "") + data["path"] = data["path_raw"][len("/blob/") :] if data["path_raw"].startswith("/tree/"): - data["branch"] = data["path_raw"].replace("/tree/", "") + data["branch"] = data["path_raw"][len("/tree/") :] return data diff --git a/giturlparse/platforms/gitlab.py b/giturlparse/platforms/gitlab.py index 44d3cd5..7c139c8 100644 --- a/giturlparse/platforms/gitlab.py +++ b/giturlparse/platforms/gitlab.py @@ -38,9 +38,9 @@ class GitLabPlatform(BasePlatform): def clean_data(data): data = BasePlatform.clean_data(data) if data["path_raw"].startswith("/blob/"): - data["path"] = data["path_raw"].replace("/blob/", "") + data["path"] = data["path_raw"][len("/blob/") :] if data["path_raw"].startswith("/-/blob/"): - data["path"] = data["path_raw"].replace("/-/blob/", "") + data["path"] = data["path_raw"][len("/-/blob/") :] if data["path_raw"].startswith("/-/tree/"): - data["branch"] = data["path_raw"].replace("/-/tree/", "") + data["branch"] = data["path_raw"][len("/-/tree/") :] return data diff --git a/giturlparse/tests/test_parse.py b/giturlparse/tests/test_parse.py index bd9e70d..0d18348 100644 --- a/giturlparse/tests/test_parse.py +++ b/giturlparse/tests/test_parse.py @@ -481,6 +481,80 @@ }, ), ), + ( + "HTTPS", + ( + # Regression: a file path that itself contains a "blob" directory must + # not have the inner "/blob/" stripped (only the leading marker). + "https://github.com/nephila/giturlparse/blob/master/giturlparse/blob/data.py", + { + "host": "github.com", + "resource": "github.com", + "port": "", + "user": "git", + "owner": "nephila", + "repo": "giturlparse", + "name": "giturlparse", + "groups": [], + "path": "master/giturlparse/blob/data.py", + "path_raw": "/blob/master/giturlparse/blob/data.py", + "pathname": "/nephila/giturlparse/blob/master/giturlparse/blob/data.py", + "branch": "", + "protocol": "https", + "protocols": ["https"], + "platform": "github", + }, + ), + ), + ( + "HTTPS", + ( + # Regression: a branch name containing "/tree/" must be preserved + # in full rather than having every "/tree/" removed. + "https://github.com/nephila/giturlparse/tree/feature/tree/x", + { + "host": "github.com", + "resource": "github.com", + "port": "", + "user": "git", + "owner": "nephila", + "repo": "giturlparse", + "name": "giturlparse", + "groups": [], + "path": "", + "path_raw": "/tree/feature/tree/x", + "pathname": "/nephila/giturlparse/tree/feature/tree/x", + "branch": "feature/tree/x", + "protocol": "https", + "protocols": ["https"], + "platform": "github", + }, + ), + ), + ( + "HTTPS", + ( + # Regression (GitLab): inner "/blob/" in the file path must survive. + "https://gitlab.com/nephila/giturlparse/-/blob/master/giturlparse/blob/data.py", + { + "host": "gitlab.com", + "resource": "gitlab.com", + "port": "", + "user": "git", + "owner": "nephila", + "repo": "giturlparse", + "name": "giturlparse", + "groups": [], + "path": "master/giturlparse/blob/data.py", + "path_raw": "/-/blob/master/giturlparse/blob/data.py", + "pathname": "/nephila/giturlparse/-/blob/master/giturlparse/blob/data.py", + "branch": "", + "protocol": "https", + "protocols": ["https"], + "platform": "gitlab", + }, + ), + ), ( "HTTPS", (