diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 7373aff..c5e1604 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -17,6 +17,6 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Codespell uses: codespell-project/actions-codespell@v2 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6c70882..f01e6a1 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -11,7 +11,7 @@ jobs: env: CONTAINER: quay.io/con/tributors steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build Docker Image run: docker build -t "${CONTAINER}" . - name: Log In to Quay.io diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 24853d8..f7bc5c7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml index 8d9c4f1..633a0e4 100644 --- a/.github/workflows/shellcheck.yml +++ b/.github/workflows/shellcheck.yml @@ -13,7 +13,7 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install shellcheck - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run shellcheck run: | # I: running only on a subset of scripts which are shellcheck clean ATM diff --git a/.github/workflows/test-action.yml b/.github/workflows/test-action.yml index 39670e7..f0c6d3f 100644 --- a/.github/workflows/test-action.yml +++ b/.github/workflows/test-action.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Generate Updated Zenodo and Contributors # Important! Update to release https://github.com/con/tributors @@ -55,7 +55,7 @@ jobs: allcontrib_skip_generate: false - name: Upload zenodo data as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 # Path is relative to GITHUB_WORKSPACE with: @@ -63,13 +63,13 @@ jobs: path: .zenodo.json - name: Upload allcontributors data as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: allcontrib path: .all-contributorsrc - name: Upload README as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: readme path: README.md diff --git a/.github/workflows/test-tributors.yml b/.github/workflows/test-tributors.yml index 40100c0..a56ce2a 100644 --- a/.github/workflows/test-tributors.yml +++ b/.github/workflows/test-tributors.yml @@ -8,7 +8,7 @@ jobs: formatting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup black environment run: conda create --quiet --name black pyflakes @@ -29,7 +29,7 @@ jobs: needs: formatting runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup testing environment run: conda create --quiet --name testing pytest @@ -49,7 +49,7 @@ jobs: env: CONTAINER: quay.io/con/tributors steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build Docker Image run: docker build -t "${CONTAINER}" . - name: Tag and Preview Container diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml index e4a7418..3a1c0cc 100644 --- a/.github/workflows/update-contributors.yml +++ b/.github/workflows/update-contributors.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Tributors Update # Important! Update to release https://github.com/con/tributors diff --git a/tributors/main/github.py b/tributors/main/github.py index c0ec47c..e483282 100644 --- a/tributors/main/github.py +++ b/tributors/main/github.py @@ -15,7 +15,7 @@ import requests import sys -repository_regex = "(?P[\w,\-,\_]+)/(?P[\w,\-,\_\.]+)" +repository_regex = r"(?P[\w,\-,\_]+)/(?P[\w,\-,\_\.]+)" bot = logging.getLogger("github") diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py index 5c01037..c7cce21 100644 --- a/tributors/main/orcid.py +++ b/tributors/main/orcid.py @@ -130,12 +130,12 @@ def get_orcid_token(): return orcid_token -def record_search(url, email, interactive=False, search_type=""): - """Given a url (with a name or email) do a record search looking for an orcid id. +def record_search(url, terms, interactive=False, search_type=""): + """Given a url (with a name or terms) do a record search looking for an orcid id. Arguments: - url (str) : url to perform request - - email (str) : email, used just for logging + - terms (str) : terms, used just for logging - interactive (bool) : if True, ask user if there is more than a single response - search_type (str) : description on what search is based on, used just for logging """ @@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""): if len(results) == 1: return results[0]["orcid-id"] + term_str = terms[0] % terms[1:] # Only stream results to screen in interactive mode if not interactive: bot.info( - f"{email}: found more than 1 ({len(results)}) result for ORCID search {search_type}, " + f"{term_str}: found more than one ({len(results)}) result for ORCID search {search_type}, " "run with --interactive mode to select." ) - return + return Ellipsis # One or more results if len(results) > 10: bot.warning("Found more than 10 results, will only show top 10.") - print("\n\n%s\n======================================================" % email) + print("\n\n%s\n======================================================" % term_str) for idx, r in enumerate(results): # Limit is ten results, count starting at 0 idx = idx + 1 @@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""): else: print("[%s]\n%s\n" % (idx, record)) + # TODO: here we should remember for a person on what we already presented as + # options and not to show them again. + # # If interactive, ask for choice prompt if interactive: skip_choices = ["s", "S", "skip"] @@ -216,7 +220,7 @@ def record_search(url, email, interactive=False, search_type=""): if choice in enter_choices: return entry_prompt( - f"Please enter the ORCID for {email}.", + f"Please enter the ORCID for {term_str}.", regex="[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", ) @@ -227,53 +231,88 @@ def record_search(url, email, interactive=False, search_type=""): return results[int(choice) - 1]["orcid-id"] -def get_orcid(email, name=None, interactive=False): - """Get an orcid identifier for a given email or name.""" - # We must have an email OR name - if not email and not name: - return +def extended_search_url(q, *args): + """Helper to properly quote args and avoid duplicating URL etc""" + # We will show only up to 10, so requesting 11, no need to get all default 1000 + url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}&args=11" + if args: + url %= tuple(map(urllib.parse.quote, args)) + return url + - def extended_search_url(q, *args): - """Helper to properly quote args and avoid duplicating URL etc""" - url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}" - if args: - url %= tuple(map(urllib.parse.quote, args)) - return url +strict, loose = True, False - # First look for records based on email - orcid_id = None + +def gen_searches(email, name): if email: - url = extended_search_url("email:%s", email) - orcid_id = record_search(url, email, interactive, "by email") + yield (("email:%s", email), "by email", strict) - # Attempt # 2 will use the first and last name - if not orcid_id and name is not None: + # Next attempts will use name + if name is not None: delim = "," if "," in name else " " cleaner = "," if delim == " " else " " - parts = name.split(delim) + parts = [_.strip(cleaner) for _ in name.split(delim)] # No go if only a first or last name if len(parts) == 1: bot.debug(f"Skipping {name}, first and last are required for search.") - return orcid_id + return + + # Just as is + yield ( + ('credit-name:"%s"+OR+other-names:"%s"', name, name), + "by full credit or other names", + strict, + ) - last, first = parts[0].strip(cleaner), " ".join(parts[1:]).strip(cleaner) - url = extended_search_url("%s+AND+%s", first, last) - orcid_id = record_search(url, name, interactive, "by name") + if delim == ",": + # Last, First Middle + last, given = parts[0], " ".join(parts[1:]) + else: + # First Middle Last + given, last = " ".join(parts[:-1]), parts[-1] + + yield ( + ('given-names:"%s"+AND+family-name:"%s"', given, last), + "by name", + strict, + ) # Attempt # 3 will try removing the middle name - if not orcid_id and " " in first: - url = extended_search_url( - "%s+AND+%s", - first.split(" ")[0].strip(), - last, + if " " in given: + yield ( + ( + 'given-names:"%s"+AND+family-name:"%s"', + given.split(" ")[0].strip(), + last, + ), + "by name", + loose, ) - orcid_id = record_search(url, name, interactive, "by name without middle") - # Last attempt tries full name "as is" - if not orcid_id: - url = extended_search_url("%s", name) - orcid_id = record_search(url, name, interactive, "full name") + # Just a combination of all parts of the name + yield ( + ("+AND+".join(["%s"] * len(parts)),) + tuple(parts), + "by name parts", + loose, + ) + - return orcid_id +def get_orcid(email: str | None, name: str | None = None, interactive=False): + """Get an orcid identifier for a given email or name.""" + # We must have an email OR name + if not email and not name: + return + + for search_args, search_desc, strictness in gen_searches(email, name): + url = extended_search_url(*search_args) + if ( + orcid_id := record_search(url, search_args, interactive, search_desc) + ) is not Ellipsis and orcid_id: + return orcid_id + if orcid_id is Ellipsis: + orcid_id = None + if strict: + break + # if loose, and still got multiple results, continue