From 7ed2b9fc70ffa5c4d1f814401dacf6be2eca7699 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 21 Mar 2025 22:20:42 -0400 Subject: [PATCH 1/7] ENH: just make regex string raw for escapes --- tributors/main/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tributors/main/github.py b/tributors/main/github.py index c0ec47c..e483282 100644 --- a/tributors/main/github.py +++ b/tributors/main/github.py @@ -15,7 +15,7 @@ import requests import sys -repository_regex = "(?P[\w,\-,\_]+)/(?P[\w,\-,\_\.]+)" +repository_regex = r"(?P[\w,\-,\_]+)/(?P[\w,\-,\_\.]+)" bot = logging.getLogger("github") From 513c9eb00945c498c415e6fed9f26228212f903d Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 21 Mar 2025 22:39:37 -0400 Subject: [PATCH 2/7] Further improve attempts to search through ORCID --- tributors/main/orcid.py | 101 ++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py index 5c01037..7397b89 100644 --- a/tributors/main/orcid.py +++ b/tributors/main/orcid.py @@ -130,12 +130,12 @@ def get_orcid_token(): return orcid_token -def record_search(url, email, interactive=False, search_type=""): - """Given a url (with a name or email) do a record search looking for an orcid id. +def record_search(url, terms, interactive=False, search_type=""): + """Given a url (with a name or terms) do a record search looking for an orcid id. Arguments: - url (str) : url to perform request - - email (str) : email, used just for logging + - terms (str) : terms, used just for logging - interactive (bool) : if True, ask user if there is more than a single response - search_type (str) : description on what search is based on, used just for logging """ @@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""): if len(results) == 1: return results[0]["orcid-id"] + term_str = terms[0] % terms[1:] # Only stream results to screen in interactive mode if not interactive: bot.info( - f"{email}: found more than 1 ({len(results)}) result for ORCID search {search_type}, " + f"{term_str}: found more than one ({len(results)}) result for ORCID search {search_type}, " "run with --interactive mode to select." ) - return + return Ellipsis # One or more results if len(results) > 10: bot.warning("Found more than 10 results, will only show top 10.") - print("\n\n%s\n======================================================" % email) + print("\n\n%s\n======================================================" % term_str) for idx, r in enumerate(results): # Limit is ten results, count starting at 0 idx = idx + 1 @@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""): else: print("[%s]\n%s\n" % (idx, record)) + # TODO: here we should remember for a person on what we already presented as + # options and not to show them again. + # # If interactive, ask for choice prompt if interactive: skip_choices = ["s", "S", "skip"] @@ -226,54 +230,69 @@ def record_search(url, email, interactive=False, search_type=""): # Return the orcid identifier return results[int(choice) - 1]["orcid-id"] +def extended_search_url(q, *args): + """Helper to properly quote args and avoid duplicating URL etc""" + # We will show only up to 10, so requesting 11, no need to get all default 1000 + url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}&args=11" + if args: + url %= tuple(map(urllib.parse.quote, args)) + return url -def get_orcid(email, name=None, interactive=False): - """Get an orcid identifier for a given email or name.""" - # We must have an email OR name - if not email and not name: - return - - def extended_search_url(q, *args): - """Helper to properly quote args and avoid duplicating URL etc""" - url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}" - if args: - url %= tuple(map(urllib.parse.quote, args)) - return url - - # First look for records based on email - orcid_id = None +strict, loose = True, False +def gen_searches(email, name): if email: - url = extended_search_url("email:%s", email) - orcid_id = record_search(url, email, interactive, "by email") + yield (("email:%s", email), "by email", strict) - # Attempt # 2 will use the first and last name - if not orcid_id and name is not None: + # Next attempts will use name + if name is not None: delim = "," if "," in name else " " cleaner = "," if delim == " " else " " - parts = name.split(delim) + parts = [_.strip(cleaner) for _ in name.split(delim)] # No go if only a first or last name if len(parts) == 1: bot.debug(f"Skipping {name}, first and last are required for search.") - return orcid_id + return - last, first = parts[0].strip(cleaner), " ".join(parts[1:]).strip(cleaner) - url = extended_search_url("%s+AND+%s", first, last) - orcid_id = record_search(url, name, interactive, "by name") + # Just as is + yield (('credit-name:"%s"+OR+other-names:"%s"', name, name), + "by full credit or other names", strict) + + if delim == ',': + # Last, First Middle + last, given = parts[0], " ".join(parts[1:]) + else: + # First Middle Last + given, last = " ".join(parts[:-1]), parts[-1] + + yield (('given-names:"%s"+AND+family-name:"%s"', given, last), "by name", strict) # Attempt # 3 will try removing the middle name - if not orcid_id and " " in first: - url = extended_search_url( - "%s+AND+%s", - first.split(" ")[0].strip(), - last, + if " " in given: + yield ( + ('given-names:"%s"+AND+family-name:"%s"', + given.split(" ")[0].strip(), last), + "by name", + loose ) - orcid_id = record_search(url, name, interactive, "by name without middle") - # Last attempt tries full name "as is" - if not orcid_id: - url = extended_search_url("%s", name) - orcid_id = record_search(url, name, interactive, "full name") + # Just a combination of all parts of the name + yield (("+AND+".join(["%s"] * len(parts)),) + tuple(parts), "by name parts", loose) + - return orcid_id +def get_orcid(email: str|None, name: str|None=None, interactive=False): + """Get an orcid identifier for a given email or name.""" + # We must have an email OR name + if not email and not name: + return + + for search_args, search_desc, strictness in gen_searches(email, name): + url = extended_search_url(*search_args) + if (orcid_id := record_search(url, search_args, interactive, search_desc)) is not Ellipsis: + return orcid_id + if orcid_id is Ellipsis: + orcid_id = None + if strict: + break + # if loose, and still got multiple results, continue \ No newline at end of file From c2d9833a550fb22c693d1acf3176a5a5212c8739 Mon Sep 17 00:00:00 2001 From: "Yaroslav O. Halchenko" Date: Fri, 9 May 2025 20:10:59 -0400 Subject: [PATCH 3/7] [DATALAD RUNCMD] Run black on modified file === Do not change lines below === { "chain": [], "cmd": "black tributors/main/orcid.py", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- tributors/main/orcid.py | 50 ++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py index 7397b89..ac6dcac 100644 --- a/tributors/main/orcid.py +++ b/tributors/main/orcid.py @@ -230,6 +230,7 @@ def record_search(url, terms, interactive=False, search_type=""): # Return the orcid identifier return results[int(choice) - 1]["orcid-id"] + def extended_search_url(q, *args): """Helper to properly quote args and avoid duplicating URL etc""" # We will show only up to 10, so requesting 11, no need to get all default 1000 @@ -238,7 +239,10 @@ def extended_search_url(q, *args): url %= tuple(map(urllib.parse.quote, args)) return url + strict, loose = True, False + + def gen_searches(email, name): if email: yield (("email:%s", email), "by email", strict) @@ -256,32 +260,46 @@ def gen_searches(email, name): return # Just as is - yield (('credit-name:"%s"+OR+other-names:"%s"', name, name), - "by full credit or other names", strict) + yield ( + ('credit-name:"%s"+OR+other-names:"%s"', name, name), + "by full credit or other names", + strict, + ) - if delim == ',': - # Last, First Middle - last, given = parts[0], " ".join(parts[1:]) + if delim == ",": + # Last, First Middle + last, given = parts[0], " ".join(parts[1:]) else: - # First Middle Last - given, last = " ".join(parts[:-1]), parts[-1] + # First Middle Last + given, last = " ".join(parts[:-1]), parts[-1] - yield (('given-names:"%s"+AND+family-name:"%s"', given, last), "by name", strict) + yield ( + ('given-names:"%s"+AND+family-name:"%s"', given, last), + "by name", + strict, + ) # Attempt # 3 will try removing the middle name if " " in given: yield ( - ('given-names:"%s"+AND+family-name:"%s"', - given.split(" ")[0].strip(), last), + ( + 'given-names:"%s"+AND+family-name:"%s"', + given.split(" ")[0].strip(), + last, + ), "by name", - loose + loose, ) # Just a combination of all parts of the name - yield (("+AND+".join(["%s"] * len(parts)),) + tuple(parts), "by name parts", loose) + yield ( + ("+AND+".join(["%s"] * len(parts)),) + tuple(parts), + "by name parts", + loose, + ) -def get_orcid(email: str|None, name: str|None=None, interactive=False): +def get_orcid(email: str | None, name: str | None = None, interactive=False): """Get an orcid identifier for a given email or name.""" # We must have an email OR name if not email and not name: @@ -289,10 +307,12 @@ def get_orcid(email: str|None, name: str|None=None, interactive=False): for search_args, search_desc, strictness in gen_searches(email, name): url = extended_search_url(*search_args) - if (orcid_id := record_search(url, search_args, interactive, search_desc)) is not Ellipsis: + if ( + orcid_id := record_search(url, search_args, interactive, search_desc) + ) is not Ellipsis: return orcid_id if orcid_id is Ellipsis: orcid_id = None if strict: break - # if loose, and still got multiple results, continue \ No newline at end of file + # if loose, and still got multiple results, continue From 659c2a4581d4261d63e234583dad4ac0cc45c4c1 Mon Sep 17 00:00:00 2001 From: "Yaroslav O. Halchenko" Date: Fri, 9 May 2025 20:12:18 -0400 Subject: [PATCH 4/7] [DATALAD RUNCMD] Boost action/checkout to @v4 === Do not change lines below === { "chain": [], "cmd": "sed -i -e s,checkout@v3,checkout@v4,g .github/workflows/codespell.yml .github/workflows/deploy.yml .github/workflows/release.yml .github/workflows/shellcheck.yml .github/workflows/test-action.yml .github/workflows/test-tributors.yml .github/workflows/update-contributors.yml", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- .github/workflows/codespell.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/shellcheck.yml | 2 +- .github/workflows/test-action.yml | 2 +- .github/workflows/test-tributors.yml | 6 +++--- .github/workflows/update-contributors.yml | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 7373aff..c5e1604 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -17,6 +17,6 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Codespell uses: codespell-project/actions-codespell@v2 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6c70882..f01e6a1 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -11,7 +11,7 @@ jobs: env: CONTAINER: quay.io/con/tributors steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build Docker Image run: docker build -t "${CONTAINER}" . - name: Log In to Quay.io diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 24853d8..f7bc5c7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml index 8d9c4f1..633a0e4 100644 --- a/.github/workflows/shellcheck.yml +++ b/.github/workflows/shellcheck.yml @@ -13,7 +13,7 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install shellcheck - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run shellcheck run: | # I: running only on a subset of scripts which are shellcheck clean ATM diff --git a/.github/workflows/test-action.yml b/.github/workflows/test-action.yml index 39670e7..6eee055 100644 --- a/.github/workflows/test-action.yml +++ b/.github/workflows/test-action.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Generate Updated Zenodo and Contributors # Important! Update to release https://github.com/con/tributors diff --git a/.github/workflows/test-tributors.yml b/.github/workflows/test-tributors.yml index 40100c0..a56ce2a 100644 --- a/.github/workflows/test-tributors.yml +++ b/.github/workflows/test-tributors.yml @@ -8,7 +8,7 @@ jobs: formatting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup black environment run: conda create --quiet --name black pyflakes @@ -29,7 +29,7 @@ jobs: needs: formatting runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup testing environment run: conda create --quiet --name testing pytest @@ -49,7 +49,7 @@ jobs: env: CONTAINER: quay.io/con/tributors steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build Docker Image run: docker build -t "${CONTAINER}" . - name: Tag and Preview Container diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml index e4a7418..3a1c0cc 100644 --- a/.github/workflows/update-contributors.yml +++ b/.github/workflows/update-contributors.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Tributors Update # Important! Update to release https://github.com/con/tributors From f16a07251bf1f307eae31f39e5f2c2f0738439ce Mon Sep 17 00:00:00 2001 From: "Yaroslav O. Halchenko" Date: Fri, 9 May 2025 20:16:25 -0400 Subject: [PATCH 5/7] BF: replace no longer present "email" with "term_str" in prompt, thanks flake --- tributors/main/orcid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py index ac6dcac..cd360bd 100644 --- a/tributors/main/orcid.py +++ b/tributors/main/orcid.py @@ -220,7 +220,7 @@ def record_search(url, terms, interactive=False, search_type=""): if choice in enter_choices: return entry_prompt( - f"Please enter the ORCID for {email}.", + f"Please enter the ORCID for {term_str}.", regex="[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", ) From 147358cd0c624822a0a916934bf774c95c46ac93 Mon Sep 17 00:00:00 2001 From: "Yaroslav O. Halchenko" Date: Fri, 9 May 2025 20:18:13 -0400 Subject: [PATCH 6/7] [DATALAD RUNCMD] Boost action/upload-artifact to @v4 === Do not change lines below === { "chain": [], "cmd": "sed -i -e s,upload-artifact@v3,upload-artifact@v4,g .github/workflows/codespell.yml .github/workflows/deploy.yml .github/workflows/release.yml .github/workflows/shellcheck.yml .github/workflows/test-action.yml .github/workflows/test-tributors.yml .github/workflows/update-contributors.yml", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- .github/workflows/test-action.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-action.yml b/.github/workflows/test-action.yml index 6eee055..f0c6d3f 100644 --- a/.github/workflows/test-action.yml +++ b/.github/workflows/test-action.yml @@ -55,7 +55,7 @@ jobs: allcontrib_skip_generate: false - name: Upload zenodo data as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 # Path is relative to GITHUB_WORKSPACE with: @@ -63,13 +63,13 @@ jobs: path: .zenodo.json - name: Upload allcontributors data as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: allcontrib path: .all-contributorsrc - name: Upload README as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: readme path: README.md From e04d4be82f0d7dcbe9ec69253546084d77fbc4f0 Mon Sep 17 00:00:00 2001 From: "Yaroslav O. Halchenko" Date: Fri, 9 May 2025 20:44:12 -0400 Subject: [PATCH 7/7] Return orcid_id only if not None... although may be that would us become non-specific? --- tributors/main/orcid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py index cd360bd..c7cce21 100644 --- a/tributors/main/orcid.py +++ b/tributors/main/orcid.py @@ -309,7 +309,7 @@ def get_orcid(email: str | None, name: str | None = None, interactive=False): url = extended_search_url(*search_args) if ( orcid_id := record_search(url, search_args, interactive, search_desc) - ) is not Ellipsis: + ) is not Ellipsis and orcid_id: return orcid_id if orcid_id is Ellipsis: orcid_id = None