From 1c2e13e09828f28c7d552f1469de29c68b653c23 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 26 Sep 2025 14:00:23 -0500 Subject: [PATCH 01/14] Initial API endpoint consumption routine --- pyabc2/sources/the_session.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index da9cdd6..8fd25af 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -477,6 +477,48 @@ def load_meta( return df +def _consume( + endpoint: str, + *, + pages: Optional[int] = None, + size: Optional[int] = None, +) -> List[dict]: + """Consume paginated The Session API endpoint, returning a list of entries.""" + import requests + + if not endpoint.startswith("/"): + endpoint = "/" + endpoint + + base_url = "https://thesession.org" + + if size is not None: + per_page = size + else: + per_page = 50 # max + + def get_page(page: int) -> dict: + url = base_url + f"{endpoint}?format=json&perpage={per_page}&page={page}" + r = requests.get(url, timeout=5) + r.raise_for_status() + return r.json() + + # Even for page out of bounds + # https://thesession.org/tunes/popular?format=json&perpage=50&page=1000000 + # we get 'pages' (page count) and 'total' (entry count) + # (though the key that contains the data we want varies by endpoint) + # So start by getting the first page, and then we can multithread the rest + first_page = get_page(1) + if pages is None: + pages = first_page.get("pages", 1) + assert isinstance(pages, int) + + remaining_pages = [] + for page in range(2, pages + 1): + remaining_pages.append(get_page(page)) + + return [first_page] + remaining_pages + + if __name__ == "__main__": # pragma: no cover tune = load_url("https://thesession.org/tunes/10000") print(tune) From 7763e7a753533440783a9bde563af2475f8d83e9 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 26 Sep 2025 14:34:45 -0500 Subject: [PATCH 02/14] Allow passing additional parameters --- pyabc2/sources/the_session.py | 42 ++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 8fd25af..456bcdc 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -15,7 +15,7 @@ import os import warnings from pathlib import Path -from typing import TYPE_CHECKING, List, Literal, Optional, Union +from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Union from .._util import get_logger as _get_logger from ..parse import Tune @@ -481,9 +481,27 @@ def _consume( endpoint: str, *, pages: Optional[int] = None, - size: Optional[int] = None, -) -> List[dict]: - """Consume paginated The Session API endpoint, returning a list of entries.""" + size: Optional[int] = 50, + **params, +) -> List[Dict]: + """Consume paginated The Session API endpoint, returning a list of entries. + + Parameters + ---------- + endpoint + The API endpoint, e.g. ``'/tunes/popular'``. + pages + Number of pages to retrieve. + Default: all pages. + size + Number of entries per page. + Corresponds to the ``perpage`` API parameter. + Default: 50 (maximum). + **params + Additional parameters to pass to the API. + For example, ``sortby=popular`` works for some endpoints. + Note that these, if provided, will be ignored: ``format``, ``perpage``, ``page``. + """ import requests if not endpoint.startswith("/"): @@ -491,14 +509,18 @@ def _consume( base_url = "https://thesession.org" - if size is not None: - per_page = size - else: - per_page = 50 # max + params.update( + { + "format": "json", + "perpage": size, + } + ) def get_page(page: int) -> dict: - url = base_url + f"{endpoint}?format=json&perpage={per_page}&page={page}" - r = requests.get(url, timeout=5) + page_params = params.copy() + page_params["page"] = page + url = base_url + endpoint + r = requests.get(url, timeout=5, params=page_params) r.raise_for_status() return r.json() From 74584e812e6f97ddf7272690b45748858a70d2d2 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 26 Sep 2025 14:45:59 -0500 Subject: [PATCH 03/14] Add threading --- pyabc2/sources/the_session.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 456bcdc..c66a7ea 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -482,6 +482,7 @@ def _consume( *, pages: Optional[int] = None, size: Optional[int] = 50, + max_threads: int = 1, **params, ) -> List[Dict]: """Consume paginated The Session API endpoint, returning a list of entries. @@ -497,6 +498,9 @@ def _consume( Number of entries per page. Corresponds to the ``perpage`` API parameter. Default: 50 (maximum). + max_threads + Maximum number of threads to use. + Default: 1 (no multi-threading). **params Additional parameters to pass to the API. For example, ``sortby=popular`` works for some endpoints. @@ -533,10 +537,16 @@ def get_page(page: int) -> dict: if pages is None: pages = first_page.get("pages", 1) assert isinstance(pages, int) + parallel = pages > 2 and max_threads > 1 - remaining_pages = [] - for page in range(2, pages + 1): - remaining_pages.append(get_page(page)) + page_range = range(2, pages + 1) + if parallel: + from multiprocessing.pool import ThreadPool + + with ThreadPool(min(max_threads, pages - 1)) as pool: + remaining_pages = pool.map(get_page, page_range) + else: + remaining_pages = [get_page(page) for page in page_range] return [first_page] + remaining_pages From b0965cff73a1d3d62e15f2dd95248676b6cfb33e Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 26 Sep 2025 15:10:45 -0500 Subject: [PATCH 04/14] Validate --- pyabc2/sources/the_session.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index c66a7ea..c211fe2 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -481,7 +481,7 @@ def _consume( endpoint: str, *, pages: Optional[int] = None, - size: Optional[int] = 50, + size: int = 50, max_threads: int = 1, **params, ) -> List[Dict]: @@ -511,6 +511,13 @@ def _consume( if not endpoint.startswith("/"): endpoint = "/" + endpoint + if not 1 <= size <= 50: + raise ValueError("`size` must be between 1 and 50 (inclusive).") + if pages is not None and pages < 1: + raise ValueError("`pages` must be >= 1.") + if max_threads < 1: + raise ValueError("`max_threads` must be >= 1.") + base_url = "https://thesession.org" params.update( From da88978265b2e667b04fd3206747b4e0e6c0756e Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 26 Sep 2025 15:35:11 -0500 Subject: [PATCH 05/14] Get collections for a single tune --- pyabc2/sources/the_session.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index c211fe2..6c2866b 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -558,6 +558,26 @@ def get_page(page: int) -> dict: return [first_page] + remaining_pages +def get_tune_collections(tune_id: int) -> "pandas.DataFrame": + """Get data about the other collections a tune is in.""" + # https://thesession.org/tunes/1/collections?format=json + import pandas as pd + + endpoint = f"/tunes/{tune_id}/collections" + (result,) = _consume(endpoint) + + return pd.DataFrame(result["collections"]).rename( + columns={ + "id": "collection_id", + "name": "collection_name", + "url": "collection_page", + # ^ https://thesession.org/tunes/collections/ID + "identifier": "collection_tune_id", + # ^ sometimes string ID (e.g. for print book), sometimes URL (e.g. for Norbeck) + } + ) + + if __name__ == "__main__": # pragma: no cover tune = load_url("https://thesession.org/tunes/10000") print(tune) From e0980343eff7dc52cc2136568139a0b600569c50 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 21 Nov 2025 14:00:50 -0600 Subject: [PATCH 06/14] WIP: get sets --- pyabc2/sources/the_session.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 6c2866b..482f0a3 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -578,6 +578,48 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": ) +def get_member_set(member_id: int, set_id: int) -> List[Result]: + import requests + + url = f"https://thesession.org/members/{member_id}/sets/{set_id}?format=json" + r = requests.get(url) + r.raise_for_status() + data = r.json() + + results = [] + for setting in data["settings"]: + url = setting["url"] + tune_id = int(url.split("/")[-1].split("#")[0]) + d: Result = { + "name": setting["name"], + "tune_id": tune_id, + "setting_id": setting["id"], + "type": setting["type"], + "key": setting["key"], + "starts": starts(setting["abc"].replace("! ", "")), + "name_input": setting["name"], + } + results.append(d) + + return results + + +def get_member_sets(member_id: int) -> list[list[Result]]: + import requests + + url = f"https://thesession.org/members/{member_id}/sets?format=json" + r = requests.get(url) + r.raise_for_status() + data = r.json() + + sets = [] + for set in data["sets"]: + set_id = set["id"] + sets.append(get_member_set(member_id, set_id)) + + return sets + + if __name__ == "__main__": # pragma: no cover tune = load_url("https://thesession.org/tunes/10000") print(tune) From 87e7080c0efd1b70f27db8d830a488f7fe03e347 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 17:10:29 -0600 Subject: [PATCH 07/14] Use consume func --- pyabc2/sources/the_session.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 916cffa..c33177a 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -564,9 +564,9 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": import pandas as pd endpoint = f"/tunes/{tune_id}/collections" - (result,) = _consume(endpoint) + (data,) = _consume(endpoint) - return pd.DataFrame(result["collections"]).rename( + return pd.DataFrame(data["collections"]).rename( columns={ "id": "collection_id", "name": "collection_name", @@ -579,12 +579,8 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": def get_member_set(member_id: int, set_id: int) -> list[dict]: - import requests - - url = f"https://thesession.org/members/{member_id}/sets/{set_id}?format=json" - r = requests.get(url) - r.raise_for_status() - data = r.json() + endpoint = f"/members/{member_id}/sets/{set_id}" + (data,) = _consume(endpoint) results = [] for setting in data["settings"]: @@ -604,12 +600,8 @@ def get_member_set(member_id: int, set_id: int) -> list[dict]: def get_member_sets(member_id: int) -> list[list[dict]]: - import requests - - url = f"https://thesession.org/members/{member_id}/sets?format=json" - r = requests.get(url) - r.raise_for_status() - data = r.json() + endpoint = f"/members/{member_id}/sets" + (data,) = _consume(endpoint) sets = [] for set in data["sets"]: From 513e0a543e2f88e7bc173c07a05e416deaa5d453 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 21:57:37 -0600 Subject: [PATCH 08/14] Support member having more than one page of sets --- pyabc2/sources/the_session.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index c33177a..3a772bf 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -14,6 +14,7 @@ import logging import os import warnings +from itertools import chain from pathlib import Path from typing import TYPE_CHECKING, Literal @@ -564,9 +565,9 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": import pandas as pd endpoint = f"/tunes/{tune_id}/collections" - (data,) = _consume(endpoint) + (res,) = _consume(endpoint) - return pd.DataFrame(data["collections"]).rename( + return pd.DataFrame(res["collections"]).rename( columns={ "id": "collection_id", "name": "collection_name", @@ -580,10 +581,10 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": def get_member_set(member_id: int, set_id: int) -> list[dict]: endpoint = f"/members/{member_id}/sets/{set_id}" - (data,) = _consume(endpoint) + (res,) = _consume(endpoint) - results = [] - for setting in data["settings"]: + tunes = [] + for setting in res["settings"]: url = setting["url"] tune_id = int(url.split("/")[-1].split("#")[0]) d = { @@ -592,19 +593,18 @@ def get_member_set(member_id: int, set_id: int) -> list[dict]: "setting_id": setting["id"], "type": setting["type"], "key": setting["key"], - "name_input": setting["name"], } - results.append(d) + tunes.append(d) - return results + return tunes def get_member_sets(member_id: int) -> list[list[dict]]: endpoint = f"/members/{member_id}/sets" - (data,) = _consume(endpoint) + results = _consume(endpoint, max_threads=4) sets = [] - for set in data["sets"]: + for set in chain.from_iterable(res["sets"] for res in results): set_id = set["id"] sets.append(get_member_set(member_id, set_id)) From d7b391d9612fdadd3b65992cb4b8fdf6d76ad655 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 22:09:05 -0600 Subject: [PATCH 09/14] The set results are all included in the sets response --- pyabc2/sources/the_session.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 3a772bf..8ca4fa1 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -579,17 +579,22 @@ def get_tune_collections(tune_id: int) -> "pandas.DataFrame": ) +def _tune_id_from_url(url: str) -> int: + from urllib.parse import urlsplit + + res = urlsplit(url) + return int(res.path.split("/")[-1]) + + def get_member_set(member_id: int, set_id: int) -> list[dict]: endpoint = f"/members/{member_id}/sets/{set_id}" (res,) = _consume(endpoint) tunes = [] for setting in res["settings"]: - url = setting["url"] - tune_id = int(url.split("/")[-1].split("#")[0]) d = { "name": setting["name"], - "tune_id": tune_id, + "tune_id": _tune_id_from_url(setting["url"]), "setting_id": setting["id"], "type": setting["type"], "key": setting["key"], @@ -605,8 +610,18 @@ def get_member_sets(member_id: int) -> list[list[dict]]: sets = [] for set in chain.from_iterable(res["sets"] for res in results): - set_id = set["id"] - sets.append(get_member_set(member_id, set_id)) + sets.append( + [ + { + "name": setting["name"], + "tune_id": _tune_id_from_url(setting["url"]), + "setting_id": setting["id"], + "type": setting["type"], + "key": setting["key"], + } + for setting in set["settings"] + ] + ) return sets From f1438155eb1e8020bef1c163bacca13a306c3862 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 22:18:35 -0600 Subject: [PATCH 10/14] Add some tests --- tests/test_sources.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_sources.py b/tests/test_sources.py index 84b86d4..f45b253 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -474,3 +474,26 @@ def test_bill_black_load(): lst = bill_black.load_meta() assert len(lst) > 0 assert lst[0].startswith("X:") + + +def test_the_session_get_tune_collections(): + df = the_session.get_tune_collections(1) # Cooley's + assert not df.empty + + +def test_the_session_get_member_set(): + tunes = the_session.get_member_set(65013, 106212) + assert len(tunes) == 3 + d = tunes[0] + assert d["name"] == "Garech's Wedding" + assert d["tune_id"] == 2620 + assert d["setting_id"] == 31341 + + +def test_the_session_get_member_sets(): + sets = the_session.get_member_sets(65013) + assert len(sets) >= 1 + d = sets[0][0] + assert d["name"] == "Garech's Wedding" + assert d["tune_id"] == 2620 + assert d["setting_id"] == 31341 From 5ccc65b94a7d063abd79acdb4817ca78175b0228 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 20 Jan 2026 09:53:02 -0600 Subject: [PATCH 11/14] Add more tests --- pyabc2/sources/the_session.py | 7 +++++-- tests/test_sources.py | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index 8ca4fa1..fc62314 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -604,9 +604,12 @@ def get_member_set(member_id: int, set_id: int) -> list[dict]: return tunes -def get_member_sets(member_id: int) -> list[list[dict]]: +def get_member_sets(member_id: int, **kwargs) -> list[list[dict]]: endpoint = f"/members/{member_id}/sets" - results = _consume(endpoint, max_threads=4) + + if "max_threads" not in kwargs: + kwargs["max_threads"] = 4 + results = _consume(endpoint, **kwargs) sets = [] for set in chain.from_iterable(res["sets"] for res in results): diff --git a/tests/test_sources.py b/tests/test_sources.py index f45b253..7c4b1f0 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -497,3 +497,24 @@ def test_the_session_get_member_sets(): assert d["name"] == "Garech's Wedding" assert d["tune_id"] == 2620 assert d["setting_id"] == 31341 + + +def test_the_session_get_member_sets_multipage(): + sets = the_session.get_member_sets(1, pages=3, size=2, max_threads=2) + assert len(sets) == 6 + d = sets[0][0] + assert d["name"] == "Toss The Feathers" + assert d["tune_id"] == 138 + + +def test_the_session_consume_validation(): + f = the_session.get_member_sets + + with pytest.raises(ValueError): + _ = f(1, size=1000) + + with pytest.raises(ValueError): + _ = f(1, pages=0) + + with pytest.raises(ValueError): + _ = f(1, max_threads=0) From 488d6203086decfea5cf778e5138a4e19e796914 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 20 Jan 2026 10:01:05 -0600 Subject: [PATCH 12/14] Test auto-adding the leading forward slash --- tests/test_sources.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_sources.py b/tests/test_sources.py index 7c4b1f0..780d80d 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -518,3 +518,8 @@ def test_the_session_consume_validation(): with pytest.raises(ValueError): _ = f(1, max_threads=0) + + +def test_the_session_consume_auto_leading_slash(): + (d,) = the_session._consume("tunes/22878") + assert d["name"] == "Jack Farrell's" From 9b03f7b21a8684d2e3f9c85b679d2bdc8f0d5d35 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 20 Jan 2026 11:12:55 -0600 Subject: [PATCH 13/14] More specific ValueError check --- tests/test_sources.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_sources.py b/tests/test_sources.py index 780d80d..1a23492 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -510,13 +510,13 @@ def test_the_session_get_member_sets_multipage(): def test_the_session_consume_validation(): f = the_session.get_member_sets - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="`size`"): _ = f(1, size=1000) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="`pages`"): _ = f(1, pages=0) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="`max_threads`"): _ = f(1, max_threads=0) From 6b5c1f3c17eb436767a311453e31ae03fe387272 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 20 Jan 2026 11:20:58 -0600 Subject: [PATCH 14/14] doc --- pyabc2/sources/the_session.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pyabc2/sources/the_session.py b/pyabc2/sources/the_session.py index fc62314..d608be3 100644 --- a/pyabc2/sources/the_session.py +++ b/pyabc2/sources/the_session.py @@ -587,6 +587,17 @@ def _tune_id_from_url(url: str) -> int: def get_member_set(member_id: int, set_id: int) -> list[dict]: + """Get information about the tunes in a specific member's set. + + Parameters + ---------- + member_id + Numeric identifier of the member on The Session. + For example, Jeremy is ``1`` (https://thesession.org/members/1). + set_id + Numeric identifier of the set belonging to ``member_id``. + """ + endpoint = f"/members/{member_id}/sets/{set_id}" (res,) = _consume(endpoint) @@ -605,6 +616,22 @@ def get_member_set(member_id: int, set_id: int) -> list[dict]: def get_member_sets(member_id: int, **kwargs) -> list[list[dict]]: + """Get information about all sets belonging to a specific member. + + Parameters + ---------- + member_id + Numeric identifier of the member on The Session. + For example, Jeremy is ``1`` (https://thesession.org/members/1). + **kwargs + Additional parameters passed to :func:`_consume`, + e.g. ``pages``, ``size``, ``max_threads``. + + See Also + -------- + get_member_set + """ + endpoint = f"/members/{member_id}/sets" if "max_threads" not in kwargs: