From 9f697d0462acf9f457d4a35e7e3cf9b3879b30fb Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 15 Apr 2025 13:27:07 -0500 Subject: [PATCH 01/23] Only HTTP currently --- pyabc2/sources/bill_black.py | 5 +++++ tests/test_sources.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 pyabc2/sources/bill_black.py diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py new file mode 100644 index 0000000..187c1f7 --- /dev/null +++ b/pyabc2/sources/bill_black.py @@ -0,0 +1,5 @@ +""" +Bill Black's Irish Traditional Tune Library + +http://www.capeirish.com/ittl/tunefolders/ +""" diff --git a/tests/test_sources.py b/tests/test_sources.py index 72d03ed..e969b1f 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -249,3 +249,17 @@ def test_load_url_norbeck(): def test_load_url_invalid_domain(): with pytest.raises(NotImplementedError): _ = load_url("https://www.google.com") + + +def test_bill_black_no_https(): + import requests + + url = "http://www.capeirish.com/ittl/tunefolders/" + url_https = url.replace("http://", "https://") + + r = requests.head(url, timeout=5) + r.raise_for_status() + + with pytest.raises(requests.exceptions.SSLError): + r = requests.head(url_https, timeout=5) + r.raise_for_status() From d66b68f60fd18a88901fe40a346ba3d0d9b11a28 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 15 Apr 2025 15:07:51 -0500 Subject: [PATCH 02/23] Download some collections --- pyabc2/sources/bill_black.py | 155 +++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 187c1f7..4eff797 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -3,3 +3,158 @@ http://www.capeirish.com/ittl/tunefolders/ """ + +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional, Union + +HERE = Path(__file__).parent + +ITTL = "http://www.capeirish.com/ittl/" +SAVE_TO = HERE / "_bill-black" + + +@dataclass +class Collection: + key: str + title: str + folder: str + volumes: List[str] = field(default_factory=list) + urls: List[str] = field(default_factory=list) + + @property + def abc_urls(self) -> List[str]: + if self.urls: + return self.urls + else: + num = self.folder + if self.volumes: + return [f"{ITTL}tunefolders/{num}/{vol}/{vol}-ABC.rtf" for vol in self.volumes] + else: + return [f"{ITTL}tunefolders/{num}/{num}-ABC.rtf"] + + @property + def files(self) -> List[Path]: + return [self.url_to_file(url) for url in self.abc_urls] + + def url_to_file(self, url: str) -> Path: + """Convert a URL to a file path.""" + return SAVE_TO / f"{url.split('/')[-1]}.gz" + + +COLLECTIONS: List[Collection] = [ + Collection( + key="aif", + title="Allan's Irish Fiddler", + folder="11", + ), + Collection( + key="bbmg", + title="BB's Mostly Gems", + folder="12", + urls=[ + f"{ITTL}bbmg/{char}-tunes-ABC.rtf" + for char in [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I-J", + "K", + "L", + "M", + "N", + "O", + "P-Q", + "R", + "S", + "T", + "U-Y", + ] + ], + ), + Collection( + key="bs", + title="Bulmer & Sharpley", + folder="13", + volumes=["131", "132", "133", "134"], + ), + # http://www.capeirish.com/ittl/tunefolders/18/181/181-ABC.rtf + Collection( + key="cre", + title="Ceol Rince na hÉireann", + folder="18", + volumes=["181", "182", "183", "184", "185"], + ), + Collection( + key="dmi", + title="Dance Music of Ireland", + folder="21", + ), + Collection( + key="dmwc", + title="Dance Music of Willie Clancy", + folder="22", + ), + Collection( + key="foinn", + title="Foinn Seisiún", + folder="25", + volumes=["251", "252", "253"], + ), + Collection( + key="ofpc", + title="O'Farrell's Pocket Companion", + folder="48", + volumes=["481", "482", "483", "484"], + ), + Collection( + key="moi", + title="Music of Ireland", + folder="49", + volumes=["491", "492", "493", "494", "495", "496", "497"], + ), + Collection( + key="roche", + title="Roche Collection", + folder="53", + volumes=["531", "532", "533", "534"], # TODO: 535 is missing + ), +] + +_KEY_TO_COLLECTION = {c.key: c for c in COLLECTIONS} + + +def download(key: Optional[Union[str, List[str]]] = None, *, verbose: bool = False) -> None: + import gzip + + import requests + + SAVE_TO.mkdir(exist_ok=True) + + if key is None: + collections = COLLECTIONS + elif isinstance(key, str): + collections = [_KEY_TO_COLLECTION[key]] + else: + collections = [_KEY_TO_COLLECTION[k] for k in key] + + for collection in collections: + for url in collection.abc_urls: + p = collection.url_to_file(url) + if verbose: + print(f"Downloading {url} to {p.relative_to(HERE).as_posix()}") + r = requests.get(url, timeout=5) + r.raise_for_status() + + # Extract filename from URL and append .gz + with gzip.open(p, "wb") as f: + f.write(r.content) + + +if __name__ == "__main__": + download(verbose=True) From 9fc094c8b89fe6076589dbf188ff29f97627d6d8 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 15 Apr 2025 15:17:44 -0500 Subject: [PATCH 03/23] Add a few more --- pyabc2/sources/bill_black.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 4eff797..0e8b900 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -83,6 +83,11 @@ def url_to_file(self, url: str) -> Path: folder="13", volumes=["131", "132", "133", "134"], ), + Collection( + key="car", + title="Carolan Tunes", + folder="14", + ), # http://www.capeirish.com/ittl/tunefolders/18/181/181-ABC.rtf Collection( key="cre", @@ -106,6 +111,17 @@ def url_to_file(self, url: str) -> Path: folder="25", volumes=["251", "252", "253"], ), + Collection( + key="jol", + title="Johnny O'Leary of Sliabh Luachra", + folder="31", + ), + Collection( + key="levey", + title="Levey Collection", + folder="33", + volumes=["331", "332"], + ), Collection( key="ofpc", title="O'Farrell's Pocket Companion", From 992538d1bca212db0d44d040e4e83b2422482edd Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 15 Apr 2025 15:44:28 -0500 Subject: [PATCH 04/23] Towards ABC extraction --- pyabc2/sources/bill_black.py | 59 +++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 0e8b900..046874b 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import List, Optional, Union +from typing import Iterable, List, Optional, Union HERE = Path(__file__).parent @@ -145,7 +145,18 @@ def url_to_file(self, url: str) -> Path: _KEY_TO_COLLECTION = {c.key: c for c in COLLECTIONS} -def download(key: Optional[Union[str, List[str]]] = None, *, verbose: bool = False) -> None: +def get_collection(key: str) -> Collection: + """Get a collection by key.""" + try: + return _KEY_TO_COLLECTION[key] + except KeyError as e: + raise ValueError( + f"Unknown collection key: {key!r}. " + f"Valid keys are: {sorted(c.key for c in COLLECTIONS)}." + ) from e + + +def download(key: Optional[Union[str, Iterable[str]]] = None, *, verbose: bool = False) -> None: import gzip import requests @@ -155,9 +166,9 @@ def download(key: Optional[Union[str, List[str]]] = None, *, verbose: bool = Fal if key is None: collections = COLLECTIONS elif isinstance(key, str): - collections = [_KEY_TO_COLLECTION[key]] + collections = [get_collection(key)] else: - collections = [_KEY_TO_COLLECTION[k] for k in key] + collections = [get_collection(k) for k in key] for collection in collections: for url in collection.abc_urls: @@ -172,5 +183,43 @@ def download(key: Optional[Union[str, List[str]]] = None, *, verbose: bool = Fal f.write(r.content) +def load_meta(key: str, *, redownload: bool = False) -> List[str]: + """Load the tunebook data, no parsing.""" + + import gzip + import re + + collection = get_collection(key) + if redownload or any(not p.is_file() for p in collection.files): + print("downloading...", end=" ", flush=True) + download(key=collection.key, verbose=False) + print("done") + + abcs = [] + for p in collection.files: + print(p) + with gzip.open(p, "rt") as f: + text = f.read() + + # A tune block starts with the X: line and ends with a %%% line + # or the end of the file. + + # Find the start of the first tune, in order to skip header info + start = text.find("X:") + if start == -1: + raise RuntimeError(f"Could not find start of tune in {p.name}") + + # Split on 3 or more % + blocks = re.split(r"\s*%{3,}\s*", text[start:]) + if not blocks: + raise RuntimeError(f"Splitting blocks failed for {p.name}") + + abcs.extend(blocks) + + return abcs + + if __name__ == "__main__": - download(verbose=True) + # download(verbose=True) + + abcs = load_meta("cre") From b0d70da3144ab6e7ad8b9ec47aa55db17a3b3a45 Mon Sep 17 00:00:00 2001 From: zmoon Date: Wed, 23 Apr 2025 21:39:27 -0500 Subject: [PATCH 05/23] Clean up ABC blocks from RTF may be more to do, but cre looks pretty good, at least the few I checked striprtf package may be another more robust way --- pyabc2/sources/bill_black.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 046874b..addf99f 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -201,6 +201,12 @@ def load_meta(key: str, *, redownload: bool = False) -> List[str]: with gzip.open(p, "rt") as f: text = f.read() + # Replace \\\n with just \n + text = text.replace("\\\n", "\n") + + # Continuation + text = text.replace("\\\\", "\\") + # A tune block starts with the X: line and ends with a %%% line # or the end of the file. @@ -214,7 +220,23 @@ def load_meta(key: str, *, redownload: bool = False) -> List[str]: if not blocks: raise RuntimeError(f"Splitting blocks failed for {p.name}") - abcs.extend(blocks) + good_blocks = [] + for i, block in enumerate(blocks): + if not block.strip(): + continue + + if re.fullmatch(r"[0-9]+ deleted", block) is not None: + continue + + if not block.startswith("X:"): + continue + + # Remove anything that may be after the final bar symbol + j = max(block.rfind("]"), block.rfind("|")) + assert j != -1 + good_blocks.append(block[: j + 1]) + + abcs.extend(good_blocks) return abcs From 661a43457bbae21280fe715f85fdb6b19f16eed7 Mon Sep 17 00:00:00 2001 From: zmoon Date: Wed, 23 Apr 2025 22:09:05 -0500 Subject: [PATCH 06/23] Debug --- pyabc2/sources/bill_black.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index addf99f..5b01ce0 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -4,10 +4,13 @@ http://www.capeirish.com/ittl/tunefolders/ """ +import logging from dataclasses import dataclass, field from pathlib import Path from typing import Iterable, List, Optional, Union +logger = logging.getLogger(__name__) + HERE = Path(__file__).parent ITTL = "http://www.capeirish.com/ittl/" @@ -223,18 +226,26 @@ def load_meta(key: str, *, redownload: bool = False) -> List[str]: good_blocks = [] for i, block in enumerate(blocks): if not block.strip(): + logger.debug(f"Empty block {i} in {p.name}") continue if re.fullmatch(r"[0-9]+ deleted", block) is not None: + logger.debug(f"Tune in block {i} in {p.name} marked as deleted: {block!r}") continue if not block.startswith("X:"): + logger.debug(f"Block {i} in {p.name} does not start with `X:`: {block!r}") continue # Remove anything that may be after the final bar symbol j = max(block.rfind("]"), block.rfind("|")) assert j != -1 good_blocks.append(block[: j + 1]) + if j < len(block) - 1: + logger.info( + f"Block {i} in {p.name} has trailing data after the final bar symbol " + f"that will be ignored: {block[j+1:]!r}" + ) abcs.extend(good_blocks) @@ -244,4 +255,9 @@ def load_meta(key: str, *, redownload: bool = False) -> List[str]: if __name__ == "__main__": # download(verbose=True) + logging.basicConfig( + level=logging.DEBUG, + format="%(levelname)s:%(message)s", + ) + abcs = load_meta("cre") From a43b7339f37a130cde2f9f3b6849180e46fde30d Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 10:43:08 -0600 Subject: [PATCH 07/23] Update ITTL link and typing --- pyabc2/sources/bill_black.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 5b01ce0..89a5912 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -1,13 +1,13 @@ """ Bill Black's Irish Traditional Tune Library -http://www.capeirish.com/ittl/tunefolders/ +http://www.capeirish.com/ittl/ """ import logging +from collections.abc import Iterable from dataclasses import dataclass, field from pathlib import Path -from typing import Iterable, List, Optional, Union logger = logging.getLogger(__name__) @@ -22,11 +22,11 @@ class Collection: key: str title: str folder: str - volumes: List[str] = field(default_factory=list) - urls: List[str] = field(default_factory=list) + volumes: list[str] = field(default_factory=list) + urls: list[str] = field(default_factory=list) @property - def abc_urls(self) -> List[str]: + def abc_urls(self) -> list[str]: if self.urls: return self.urls else: @@ -37,7 +37,7 @@ def abc_urls(self) -> List[str]: return [f"{ITTL}tunefolders/{num}/{num}-ABC.rtf"] @property - def files(self) -> List[Path]: + def files(self) -> list[Path]: return [self.url_to_file(url) for url in self.abc_urls] def url_to_file(self, url: str) -> Path: @@ -45,7 +45,7 @@ def url_to_file(self, url: str) -> Path: return SAVE_TO / f"{url.split('/')[-1]}.gz" -COLLECTIONS: List[Collection] = [ +COLLECTIONS: list[Collection] = [ Collection( key="aif", title="Allan's Irish Fiddler", @@ -159,7 +159,7 @@ def get_collection(key: str) -> Collection: ) from e -def download(key: Optional[Union[str, Iterable[str]]] = None, *, verbose: bool = False) -> None: +def download(key: str | Iterable[str] | None = None, *, verbose: bool = False) -> None: import gzip import requests @@ -186,7 +186,7 @@ def download(key: Optional[Union[str, Iterable[str]]] = None, *, verbose: bool = f.write(r.content) -def load_meta(key: str, *, redownload: bool = False) -> List[str]: +def load_meta(key: str, *, redownload: bool = False) -> list[str]: """Load the tunebook data, no parsing.""" import gzip From 3317d21162c1afb8cf2aff69adbde622d1b3ec48 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 11:25:03 -0600 Subject: [PATCH 08/23] Update BB tunefolders for the mid 2025 changes some don't work anymore because of lack of RTF file and two have an unexpected dir structure --- ...ill_black.py => bill_black_tunefolders.py} | 79 ++++++++++--------- tests/test_sources.py | 25 +++++- 2 files changed, 64 insertions(+), 40 deletions(-) rename pyabc2/sources/{bill_black.py => bill_black_tunefolders.py} (76%) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black_tunefolders.py similarity index 76% rename from pyabc2/sources/bill_black.py rename to pyabc2/sources/bill_black_tunefolders.py index 89a5912..681ef0b 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -2,6 +2,11 @@ Bill Black's Irish Traditional Tune Library http://www.capeirish.com/ittl/ + +As of the 2025-06-14 update, the "tunefolders" method is deprecated. +Bill Black is now using the Eskin ABC Tools (http://www.capeirish.com/ittl/alltunes/html/), +while also posting ABC text files (http://www.capeirish.com/ittl/alltunes/text/), +both split up alphabetically by tune name. """ import logging @@ -14,7 +19,7 @@ HERE = Path(__file__).parent ITTL = "http://www.capeirish.com/ittl/" -SAVE_TO = HERE / "_bill-black" +SAVE_TO = HERE / "_bill-black_tunefolders" @dataclass @@ -22,7 +27,7 @@ class Collection: key: str title: str folder: str - volumes: list[str] = field(default_factory=list) + subfolders: list[str] = field(default_factory=list) urls: list[str] = field(default_factory=list) @property @@ -31,8 +36,11 @@ def abc_urls(self) -> list[str]: return self.urls else: num = self.folder - if self.volumes: - return [f"{ITTL}tunefolders/{num}/{vol}/{vol}-ABC.rtf" for vol in self.volumes] + if self.subfolders: + return [ + f"{ITTL}tunefolders/{num}/{subfolder}/{subfolder}-ABC.rtf" + for subfolder in self.subfolders + ] else: return [f"{ITTL}tunefolders/{num}/{num}-ABC.rtf"] @@ -55,93 +63,88 @@ def url_to_file(self, url: str) -> Path: key="bbmg", title="BB's Mostly Gems", folder="12", - urls=[ - f"{ITTL}bbmg/{char}-tunes-ABC.rtf" - for char in [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I-J", - "K", - "L", - "M", - "N", - "O", - "P-Q", - "R", - "S", - "T", - "U-Y", - ] - ], + subfolders=["12-AE", "12-FJ", "12-KQ", "12-RST", "12-UY"], ), Collection( key="bs", title="Bulmer & Sharpley", folder="13", - volumes=["131", "132", "133", "134"], + subfolders=["13-hps", "13-jigs", "13-misc", "13-p&s", "13-reels", "13-sjigs"], ), Collection( key="car", title="Carolan Tunes", folder="14", ), - # http://www.capeirish.com/ittl/tunefolders/18/181/181-ABC.rtf Collection( key="cre", title="Ceol Rince na hÉireann", folder="18", - volumes=["181", "182", "183", "184", "185"], + subfolders=["18-hornpipes", "18-jigs", "18-polkas_slides", "18-reels", "18-slipjigs"], ), Collection( key="dmi", title="Dance Music of Ireland", folder="21", + subfolders=["hps", "jigs", "reels", "slipjigs"], ), Collection( key="dmwc", title="Dance Music of Willie Clancy", folder="22", + subfolders=["22-hps", "22-jigs", "22-misc", "22-reels", "22-sjigs"], ), Collection( key="foinn", title="Foinn Seisiún", folder="25", - volumes=["251", "252", "253"], + subfolders=["hps", "jigs", "misc", "p&s", "reels"], ), Collection( key="jol", title="Johnny O'Leary of Sliabh Luachra", folder="31", + subfolders=["31-hps", "31-jigs", "31-misc", "31-polkas", "31-reels", "31-slides"], ), Collection( key="levey", title="Levey Collection", folder="33", - volumes=["331", "332"], + subfolders=["33-hps", "33-jigs", "33-marches", "33-reels", "33-sjigs"], ), Collection( key="ofpc", title="O'Farrell's Pocket Companion", folder="48", - volumes=["481", "482", "483", "484"], + subfolders=[ + "48-hps", + "48-jigs", + "48-marches", + "48-misc", + "48-polkas", + "48-reels", + "48-sjigs", + ], ), Collection( key="moi", title="Music of Ireland", folder="49", - volumes=["491", "492", "493", "494", "495", "496", "497"], + subfolders=[ + "491-airs", + "492-hps", + "493-jigs", + "494-misc", + "495-reels", + "496-sjigs", + "497-arr", + ], ), Collection( key="roche", title="Roche Collection", folder="53", - volumes=["531", "532", "533", "534"], # TODO: 535 is missing + subfolders=["53-hps", "53-jigs", "53-misc", "53-polkas", "53-reels", "53-sjigs"], ), ] @@ -178,7 +181,7 @@ def download(key: str | Iterable[str] | None = None, *, verbose: bool = False) - p = collection.url_to_file(url) if verbose: print(f"Downloading {url} to {p.relative_to(HERE).as_posix()}") - r = requests.get(url, timeout=5) + r = requests.get(url, headers={"User-Agent": "pyabc2"}, timeout=5) r.raise_for_status() # Extract filename from URL and append .gz @@ -260,4 +263,4 @@ def load_meta(key: str, *, redownload: bool = False) -> list[str]: format="%(levelname)s:%(message)s", ) - abcs = load_meta("cre") + abcs = load_meta("aif") diff --git a/tests/test_sources.py b/tests/test_sources.py index 63fcb67..a063f43 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -6,6 +6,7 @@ from pyabc2 import Key from pyabc2.parse import Tune from pyabc2.sources import ( + bill_black_tunefolders, eskin, examples, load_example, @@ -415,14 +416,34 @@ def test_eskin_invalid_tunebook_key(): def test_bill_black_no_https(): + # If the site does get HTTPS, we'd like to know import requests url = "http://www.capeirish.com/ittl/tunefolders/" url_https = url.replace("http://", "https://") - r = requests.head(url, timeout=5) + r = requests.head(url, headers={"User-Agent": "pyabc2"}, timeout=5) r.raise_for_status() with pytest.raises(requests.exceptions.SSLError): - r = requests.head(url_https, timeout=5) + r = requests.head(url_https, headers={"User-Agent": "pyabc2"}, timeout=5) r.raise_for_status() + + +@pytest.mark.parametrize("key", list(bill_black_tunefolders._KEY_TO_COLLECTION)) +def test_bill_black_tunefolders(key): + import requests + + col = bill_black_tunefolders.get_collection(key) + if int(col.folder) in {14, 18, 21, 25, 49}: + # 14, 18, 25 -- These only have .txt now, not .rtf + # 21 -- some subfolder names don't match the file names + # 49 -- has subsubfolders + with pytest.raises(requests.exceptions.HTTPError) as e: + lst = bill_black_tunefolders.load_meta(key) + assert e.response.status_code == 404 + return + else: + lst = bill_black_tunefolders.load_meta(key) + + assert len(lst) > 0 From 1213b62b9208a4493ebe99e6f0e72d8fcb447ecc Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 11:35:51 -0600 Subject: [PATCH 09/23] Use our get-logger func --- pyabc2/sources/bill_black_tunefolders.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pyabc2/sources/bill_black_tunefolders.py b/pyabc2/sources/bill_black_tunefolders.py index 681ef0b..d7b9f4d 100644 --- a/pyabc2/sources/bill_black_tunefolders.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -9,12 +9,13 @@ both split up alphabetically by tune name. """ -import logging from collections.abc import Iterable from dataclasses import dataclass, field from pathlib import Path -logger = logging.getLogger(__name__) +from pyabc2._util import get_logger as _get_logger + +logger = _get_logger(__name__) HERE = Path(__file__).parent @@ -255,12 +256,7 @@ def load_meta(key: str, *, redownload: bool = False) -> list[str]: return abcs -if __name__ == "__main__": - # download(verbose=True) - - logging.basicConfig( - level=logging.DEBUG, - format="%(levelname)s:%(message)s", - ) +if __name__ == "__main__": # pragma: no cover + logger.setLevel("DEBUG") abcs = load_meta("aif") From f46ddfa74db2d3ebd5c816277377a813cfa61e0b Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 11:50:05 -0600 Subject: [PATCH 10/23] Implement downloading BB alltunes/text --- pyabc2/sources/bill_black.py | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 pyabc2/sources/bill_black.py diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py new file mode 100644 index 0000000..47056a9 --- /dev/null +++ b/pyabc2/sources/bill_black.py @@ -0,0 +1,71 @@ +""" +Bill Black's Irish Traditional Tune Library + +http://www.capeirish.com/ittl/ +""" + +from pathlib import Path + +HERE = Path(__file__).parent + +SAVE_TO = HERE / "_bill-black" +TXT_FNS = [ + "a-tunes-1.txt", + "b-tunes-1.txt", + "c-tunes-1.txt", + "d-tunes-1.txt", + "e-tunes-1.txt", + "f-tunes-1.txt", + "g-tunes-1.txt", + "h-tunes-1.txt", + "i-tunes-1.txt", + "j-tunes-1.txt", + "k-tunes-1.txt", + "l-tunes-1.txt", + "m-tunes-1.txt", + "n-tunes-1.txt", + "o-tunes-1.txt", + "pq-tunes-1.txt", + "r-tunes-1.txt", + "s-tunes-1.txt", + "s-tunes-2.txt", + "t-tunes-1.txt", + "uv-tunes-1.txt", + "wz-tunes-1.txt", +] + + +def download() -> None: + """Download the alphabetical text files from http://www.capeirish.com/ittl/alltunes/text/ + and store them in a compressed archive. + """ + import zipfile + from concurrent.futures import ThreadPoolExecutor + + import requests + + def download_one(url): + r = requests.get(url, headers={"User-Agent": "pyabc2"}, timeout=5) + r.raise_for_status() + return r.text + + with ThreadPoolExecutor(max_workers=4) as executor: + futures = [] + for fn in TXT_FNS: + url = f"http://www.capeirish.com/ittl/alltunes/text/{fn}" + futures.append(executor.submit(download_one, url)) + + SAVE_TO.mkdir(exist_ok=True) + + with zipfile.ZipFile( + SAVE_TO / "bill_black_alltunes_text.zip", + "w", + compression=zipfile.ZIP_DEFLATED, + ) as zf: + for fn, future in zip(TXT_FNS, futures, strict=True): + text = future.result() + zf.writestr(fn, text) + + +if __name__ == "__main__": # pragma: no cover + download() From e2af64cdf8df3260cc0157427917aeef5fb76e9b Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 12:27:47 -0600 Subject: [PATCH 11/23] Initial tune block loader 10 or so blocks are currently rejected, because of PGM notes or bumping up against something, or in one case a missing `X` --- pyabc2/sources/bill_black.py | 48 +++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 47056a9..8be1afb 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -4,6 +4,7 @@ http://www.capeirish.com/ittl/ """ +import re from pathlib import Path HERE = Path(__file__).parent @@ -67,5 +68,50 @@ def download_one(url): zf.writestr(fn, text) +def load_meta(*, redownload: bool = False) -> list[str]: + """Load the tunebook data, splitting into tune blocks and removing ``%`` lines.""" + import zipfile + from textwrap import indent + + zip_path = SAVE_TO / "bill_black_alltunes_text.zip" + if redownload or not zip_path.is_file(): + print("downloading...", end=" ", flush=True) + download() + print("done") + + tunes = [] + with zipfile.ZipFile(zip_path, "r") as zf: + for zi in zf.filelist: + with zf.open(zi, "r") as f: + text = f.read().decode("utf-8") + + # A tune block starts with the X: line and ends with a blank line + # or the end of the file. + # Unlike the RTF files, %%% is not _necessarily_ present as a tune separator. + + # Remove all lines that start with % + text = "\n".join( + line.strip() for line in text.splitlines() if not line.lstrip().startswith("%") + ) + + # Find the start of the first tune, in order to skip header info + start = text.find("X:") + if start == -1: + raise RuntimeError(f"Unable to find first tune in Bill Black file {zi.filename!r}") + + text = text[start:] + + blocks = re.split(r"\n{2,}", text.rstrip()) + for block in blocks: + block = block.strip() + if not block: + continue + elif not block.startswith("X:"): + print(f"skipping non-tune block in {zi.filename!r}:\n{indent(block, '| ')}") + continue + tunes.append(block) + return tunes + + if __name__ == "__main__": # pragma: no cover - download() + tunes = load_meta() From cf20ac4e7534fb461268162161ffa03f353e208a Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 12:49:41 -0600 Subject: [PATCH 12/23] Address some skipped blocks --- pyabc2/sources/bill_black.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 8be1afb..1def01d 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -106,10 +106,22 @@ def load_meta(*, redownload: bool = False) -> list[str]: block = block.strip() if not block: continue - elif not block.startswith("X:"): - print(f"skipping non-tune block in {zi.filename!r}:\n{indent(block, '| ')}") - continue + + if block.startswith(":313\nT:GRAVEL WALK (reel) (1), The"): + block = "X" + block + + if not block.startswith("X:"): + # First look for tune later in the block + # Some blocks start with comment text, sometimes including other settings but without `X:` + start = block.find("X:") + if start != -1: + block = block[start:] + else: + print(f"skipping non-tune block in {zi.filename!r}:\n{indent(block, '| ')}") + continue + tunes.append(block) + return tunes From 117713b589e7f24bd067e06d81a17e50640ea033 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 13:07:01 -0600 Subject: [PATCH 13/23] More special cases Summary so far: * "GRAVEL WALK (reel) (1), The" is missing the `X` at the beginning * "HEATHER BREEZE (reel) (a-2), The" is missing an `X` line (and no newline between it and the "a-3" version) * PGM note in s-tunes-1.txt (and 2) mentions 6/8, but the tunes above and below it are reels (it appears twice, with the same tunes surrounding it (duplicates?)) * missing newline between - 252,253 and 658,666 in c-tunes - 222,223 in d-tunes - 33,34 in e-tunes --- pyabc2/sources/bill_black.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 1def01d..1cdd542 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -82,6 +82,7 @@ def load_meta(*, redownload: bool = False) -> list[str]: tunes = [] with zipfile.ZipFile(zip_path, "r") as zf: for zi in zf.filelist: + fn = zi.filename with zf.open(zi, "r") as f: text = f.read().decode("utf-8") @@ -97,11 +98,26 @@ def load_meta(*, redownload: bool = False) -> list[str]: # Find the start of the first tune, in order to skip header info start = text.find("X:") if start == -1: - raise RuntimeError(f"Unable to find first tune in Bill Black file {zi.filename!r}") + raise RuntimeError(f"Unable to find first tune in Bill Black file {fn!r}") text = text[start:] + # Separate some blocks that are missing empty lines + if fn.startswith("c-tunes"): + to_sep = [253, 666] + elif fn.startswith("d-tunes"): + to_sep = [223] + elif fn.startswith("e-tunes"): + to_sep = [34] + else: + to_sep = [] + for n in to_sep: + text = text.replace(f"X:{n}", f"\nX:{n}") + + expected_num = text.count("X:") + blocks = re.split(r"\n{2,}", text.rstrip()) + this_tunes = [] for block in blocks: block = block.strip() if not block: @@ -109,6 +125,7 @@ def load_meta(*, redownload: bool = False) -> list[str]: if block.startswith(":313\nT:GRAVEL WALK (reel) (1), The"): block = "X" + block + expected_num += 1 if not block.startswith("X:"): # First look for tune later in the block @@ -117,10 +134,19 @@ def load_meta(*, redownload: bool = False) -> list[str]: if start != -1: block = block[start:] else: - print(f"skipping non-tune block in {zi.filename!r}:\n{indent(block, '| ')}") + print(f"note: skipping non-tune block in {fn!r}:\n{indent(block, '| ')}") continue - tunes.append(block) + if block.count("X:") > 1: + print(f"warning: multiple X: lines in block in {fn!r}:\n{indent(block, '| ')}") + + this_tunes.append(block) + + actual_num = len(this_tunes) + if actual_num != expected_num: + print(f"warning: expected {expected_num} tunes in {fn!r}, but found {actual_num}") + + tunes.extend(this_tunes) return tunes From c030655e9581276a983413ec04d96c6f8af0166d Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 13:56:13 -0600 Subject: [PATCH 14/23] Remove duplicate blocks; assess duplicate X vals notes: * in s-tunes 1 and 2, - there is a high number of duplicate X's - "SNOWY OWL, The" which appears 4x - 62134 is used for two tunes: "STRANGER (hornpipe), The" and "SONNY BROGAN'S JIG (a)" * is the intention to use s-tunes-2 _in place of_ s-tunes-1? * even after removing duplicate tune blocks, there are a few cases of duplicate X values in most of the files - for example, in a-tunes, 2, 3, and 5 are each used for 2 different tunes --- pyabc2/sources/bill_black.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 1cdd542..e1425b8 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -71,6 +71,7 @@ def download_one(url): def load_meta(*, redownload: bool = False) -> list[str]: """Load the tunebook data, splitting into tune blocks and removing ``%`` lines.""" import zipfile + from collections import Counter from textwrap import indent zip_path = SAVE_TO / "bill_black_alltunes_text.zip" @@ -146,6 +147,26 @@ def load_meta(*, redownload: bool = False) -> list[str]: if actual_num != expected_num: print(f"warning: expected {expected_num} tunes in {fn!r}, but found {actual_num}") + # Drop fully duplicate tune blocks while preserving order + seen = set() + this_tunes_unique = [] + for block in this_tunes: + if block not in seen: + seen.add(block) + this_tunes_unique.append(block) + if len(this_tunes_unique) < len(this_tunes): + print( + f"note: removed {len(this_tunes) - len(this_tunes_unique)}/{len(this_tunes)} fully duplicate " + f"tune blocks in {fn!r}" + ) + this_tunes = this_tunes_unique + + x_counts = Counter(block.splitlines()[0] for block in this_tunes) + x_count_counts = Counter(x_counts.values()) + if set(x_count_counts) != {1}: + s_counts = ", ".join(f"{m} ({n})" for m, n in sorted(x_count_counts.items())) + print(f"note: non-unique X vals in {fn!r}: {s_counts}") + tunes.extend(this_tunes) return tunes From cac905e87b8506be87dc28d23312d4ca31f4d975 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 14:01:38 -0600 Subject: [PATCH 15/23] Just use s-tunes-2 seems to be the same except some changes related to fixing special characters like the copyright symbol --- pyabc2/sources/bill_black.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index e1425b8..991e303 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -28,7 +28,6 @@ "o-tunes-1.txt", "pq-tunes-1.txt", "r-tunes-1.txt", - "s-tunes-1.txt", "s-tunes-2.txt", "t-tunes-1.txt", "uv-tunes-1.txt", From d316daaf001a99f1fdc89ed04fc23902ca0024c7 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 14:12:56 -0600 Subject: [PATCH 16/23] Add a few tests --- tests/test_sources.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_sources.py b/tests/test_sources.py index a063f43..e1615b9 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -6,6 +6,7 @@ from pyabc2 import Key from pyabc2.parse import Tune from pyabc2.sources import ( + bill_black, bill_black_tunefolders, eskin, examples, @@ -447,3 +448,24 @@ def test_bill_black_tunefolders(key): lst = bill_black_tunefolders.load_meta(key) assert len(lst) > 0 + + +def test_bill_black_text_fns(): + import requests + + url = "http://www.capeirish.com/ittl/alltunes/text/" + r = requests.get(url, headers={"User-Agent": "pyabc2"}, timeout=5) + r.raise_for_status() + + fns_web = sorted(re.findall(r'href=["\']([a-z0-9\-]+\.txt)["\']', r.text)) + if "s-tunes-1.txt" in fns_web: + # We're using s-tunes-2, not both + fns_web.remove("s-tunes-1.txt") + + assert bill_black.TXT_FNS == fns_web + + +def test_bill_black_load(): + lst = bill_black.load_meta() + assert len(lst) > 0 + assert lst[0].startswith("X:") From 89d961ed9015f794e1827ba66dbabd4ecf7bb905 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 14:25:53 -0600 Subject: [PATCH 17/23] Add BB to docs --- docs/api.rst | 14 ++++++++++++++ docs/examples/sources.ipynb | 37 +++++++++++++++++++++++++++++++++--- pyabc2/sources/bill_black.py | 2 +- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index c40945c..9bd1154 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -151,6 +151,20 @@ Functions: eskin.abctools_url_to_abc eskin.abc_to_abctools_url +Bill Black +---------- + +.. automodule:: pyabc2.sources.bill_black + +Functions: + +.. currentmodule:: pyabc2.sources + +.. autosummary:: + :toctree: api/ + + bill_black.load_meta + abcjs tools =========== diff --git a/docs/examples/sources.ipynb b/docs/examples/sources.ipynb index 259debe..e149874 100644 --- a/docs/examples/sources.ipynb +++ b/docs/examples/sources.ipynb @@ -17,7 +17,8 @@ "metadata": {}, "outputs": [], "source": [ - "from pyabc2.sources import load_example, norbeck, the_session, eskin" + "from pyabc2 import Tune\n", + "from pyabc2.sources import load_example, norbeck, the_session, eskin, bill_black" ] }, { @@ -401,8 +402,6 @@ "metadata": {}, "outputs": [], "source": [ - "from pyabc2 import Tune\n", - "\n", "Tune(df.query(\"group == 'jigs'\").iloc[0].abc)" ] }, @@ -419,6 +418,38 @@ "display(Markdown(f\"<{url}>\"))\n", "eskin.load_url(url)" ] + }, + { + "cell_type": "markdown", + "id": "36", + "metadata": {}, + "source": [ + "## Bill Black\n", + "\n", + "Bill Black has an extensive ABC library, available at .\n", + "We can load all of the tune blocks (strings) with {func}`pyabc2.sources.bill_black.load_meta`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "lst = bill_black.load_meta()\n", + "len(lst)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "Tune(lst[0])" + ] } ], "metadata": { diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 991e303..54fde12 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -68,7 +68,7 @@ def download_one(url): def load_meta(*, redownload: bool = False) -> list[str]: - """Load the tunebook data, splitting into tune blocks and removing ``%`` lines.""" + """Load all data, splitting into tune blocks and removing ``%`` lines.""" import zipfile from collections import Counter from textwrap import indent From 8d711c229d9fa2707ca1020e982910a1464b71c2 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 14:42:44 -0600 Subject: [PATCH 18/23] Use logger --- docs/examples/sources.ipynb | 6 +-- pyabc2/sources/bill_black.py | 52 +++++++++++++++--------- pyabc2/sources/bill_black_tunefolders.py | 15 ++++--- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/docs/examples/sources.ipynb b/docs/examples/sources.ipynb index e149874..d807252 100644 --- a/docs/examples/sources.ipynb +++ b/docs/examples/sources.ipynb @@ -437,8 +437,8 @@ "metadata": {}, "outputs": [], "source": [ - "lst = bill_black.load_meta()\n", - "len(lst)" + "abcs = bill_black.load_meta()\n", + "len(abcs)" ] }, { @@ -448,7 +448,7 @@ "metadata": {}, "outputs": [], "source": [ - "Tune(lst[0])" + "Tune(abcs[0])" ] } ], diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 54fde12..236eff9 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -4,9 +4,14 @@ http://www.capeirish.com/ittl/ """ +import logging import re from pathlib import Path +from pyabc2._util import get_logger as _get_logger + +logger = _get_logger(__name__) + HERE = Path(__file__).parent SAVE_TO = HERE / "_bill-black" @@ -67,19 +72,24 @@ def download_one(url): zf.writestr(fn, text) -def load_meta(*, redownload: bool = False) -> list[str]: +def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: """Load all data, splitting into tune blocks and removing ``%`` lines.""" import zipfile from collections import Counter from textwrap import indent + if debug: # pragma: no cover + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.NOTSET) + zip_path = SAVE_TO / "bill_black_alltunes_text.zip" if redownload or not zip_path.is_file(): print("downloading...", end=" ", flush=True) download() print("done") - tunes = [] + abcs = [] with zipfile.ZipFile(zip_path, "r") as zf: for zi in zf.filelist: fn = zi.filename @@ -117,7 +127,7 @@ def load_meta(*, redownload: bool = False) -> list[str]: expected_num = text.count("X:") blocks = re.split(r"\n{2,}", text.rstrip()) - this_tunes = [] + this_abcs = [] for block in blocks: block = block.strip() if not block: @@ -134,42 +144,44 @@ def load_meta(*, redownload: bool = False) -> list[str]: if start != -1: block = block[start:] else: - print(f"note: skipping non-tune block in {fn!r}:\n{indent(block, '| ')}") + logger.info(f"skipping non-tune block in {fn!r}:\n{indent(block, '| ')}") continue if block.count("X:") > 1: - print(f"warning: multiple X: lines in block in {fn!r}:\n{indent(block, '| ')}") + logger.warning(f"multiple X: lines in block in {fn!r}:\n{indent(block, '| ')}") - this_tunes.append(block) + this_abcs.append(block) - actual_num = len(this_tunes) + actual_num = len(this_abcs) if actual_num != expected_num: - print(f"warning: expected {expected_num} tunes in {fn!r}, but found {actual_num}") + logger.warning(f"expected {expected_num} tunes in {fn!r}, but found {actual_num}") # Drop fully duplicate tune blocks while preserving order seen = set() - this_tunes_unique = [] - for block in this_tunes: + this_abcs_unique = [] + for block in this_abcs: if block not in seen: seen.add(block) - this_tunes_unique.append(block) - if len(this_tunes_unique) < len(this_tunes): - print( - f"note: removed {len(this_tunes) - len(this_tunes_unique)}/{len(this_tunes)} fully duplicate " + this_abcs_unique.append(block) + if len(this_abcs_unique) < len(this_abcs): + logger.info( + f"removed {len(this_abcs) - len(this_abcs_unique)}/{len(this_abcs)} fully duplicate " f"tune blocks in {fn!r}" ) - this_tunes = this_tunes_unique + this_abcs = this_abcs_unique - x_counts = Counter(block.splitlines()[0] for block in this_tunes) + x_counts = Counter(block.splitlines()[0] for block in this_abcs) x_count_counts = Counter(x_counts.values()) if set(x_count_counts) != {1}: s_counts = ", ".join(f"{m} ({n})" for m, n in sorted(x_count_counts.items())) - print(f"note: non-unique X vals in {fn!r}: {s_counts}") + logger.info(f"non-unique X vals in {fn!r}: {s_counts}") - tunes.extend(this_tunes) + abcs.extend(this_abcs) - return tunes + return abcs if __name__ == "__main__": # pragma: no cover - tunes = load_meta() + tunes = load_meta(debug=True) + print() + print(tunes[0]) diff --git a/pyabc2/sources/bill_black_tunefolders.py b/pyabc2/sources/bill_black_tunefolders.py index d7b9f4d..9b56288 100644 --- a/pyabc2/sources/bill_black_tunefolders.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -9,6 +9,7 @@ both split up alphabetically by tune name. """ +import logging from collections.abc import Iterable from dataclasses import dataclass, field from pathlib import Path @@ -190,12 +191,16 @@ def download(key: str | Iterable[str] | None = None, *, verbose: bool = False) - f.write(r.content) -def load_meta(key: str, *, redownload: bool = False) -> list[str]: +def load_meta(key: str, *, redownload: bool = False, debug: bool = False) -> list[str]: """Load the tunebook data, no parsing.""" - import gzip import re + if debug: # pragma: no cover + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.NOTSET) + collection = get_collection(key) if redownload or any(not p.is_file() for p in collection.files): print("downloading...", end=" ", flush=True) @@ -257,6 +262,6 @@ def load_meta(key: str, *, redownload: bool = False) -> list[str]: if __name__ == "__main__": # pragma: no cover - logger.setLevel("DEBUG") - - abcs = load_meta("aif") + abcs = load_meta("aif", debug=True) + print() + print(abcs[0]) From 25bf9b388921dcee76a5ac3e59b5f65868287967 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 14:53:54 -0600 Subject: [PATCH 19/23] cov adding no-cover for the unexpected log message cases --- pyabc2/sources/bill_black.py | 8 ++++---- pyabc2/sources/bill_black_tunefolders.py | 15 +++++++-------- tests/test_sources.py | 5 +++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 236eff9..bc02d2d 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -107,7 +107,7 @@ def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: # Find the start of the first tune, in order to skip header info start = text.find("X:") - if start == -1: + if start == -1: # pragma: no cover raise RuntimeError(f"Unable to find first tune in Bill Black file {fn!r}") text = text[start:] @@ -130,7 +130,7 @@ def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: this_abcs = [] for block in blocks: block = block.strip() - if not block: + if not block: # pragma: no cover continue if block.startswith(":313\nT:GRAVEL WALK (reel) (1), The"): @@ -147,13 +147,13 @@ def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: logger.info(f"skipping non-tune block in {fn!r}:\n{indent(block, '| ')}") continue - if block.count("X:") > 1: + if block.count("X:") > 1: # pragma: no cover logger.warning(f"multiple X: lines in block in {fn!r}:\n{indent(block, '| ')}") this_abcs.append(block) actual_num = len(this_abcs) - if actual_num != expected_num: + if actual_num != expected_num: # pragma: no cover logger.warning(f"expected {expected_num} tunes in {fn!r}, but found {actual_num}") # Drop fully duplicate tune blocks while preserving order diff --git a/pyabc2/sources/bill_black_tunefolders.py b/pyabc2/sources/bill_black_tunefolders.py index 9b56288..556fecf 100644 --- a/pyabc2/sources/bill_black_tunefolders.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -164,25 +164,24 @@ def get_collection(key: str) -> Collection: ) from e -def download(key: str | Iterable[str] | None = None, *, verbose: bool = False) -> None: +def download(key: str | Iterable[str] | None = None) -> None: import gzip import requests SAVE_TO.mkdir(exist_ok=True) - if key is None: + if key is None: # pragma: no cover collections = COLLECTIONS elif isinstance(key, str): collections = [get_collection(key)] - else: + else: # pragma: no cover collections = [get_collection(k) for k in key] for collection in collections: for url in collection.abc_urls: p = collection.url_to_file(url) - if verbose: - print(f"Downloading {url} to {p.relative_to(HERE).as_posix()}") + logger.info(f"Downloading {url} to {p.relative_to(HERE).as_posix()}") r = requests.get(url, headers={"User-Agent": "pyabc2"}, timeout=5) r.raise_for_status() @@ -204,7 +203,7 @@ def load_meta(key: str, *, redownload: bool = False, debug: bool = False) -> lis collection = get_collection(key) if redownload or any(not p.is_file() for p in collection.files): print("downloading...", end=" ", flush=True) - download(key=collection.key, verbose=False) + download(key=collection.key) print("done") abcs = [] @@ -224,12 +223,12 @@ def load_meta(key: str, *, redownload: bool = False, debug: bool = False) -> lis # Find the start of the first tune, in order to skip header info start = text.find("X:") - if start == -1: + if start == -1: # pragma: no cover raise RuntimeError(f"Could not find start of tune in {p.name}") # Split on 3 or more % blocks = re.split(r"\s*%{3,}\s*", text[start:]) - if not blocks: + if not blocks: # pragma: no cover raise RuntimeError(f"Splitting blocks failed for {p.name}") good_blocks = [] diff --git a/tests/test_sources.py b/tests/test_sources.py index e1615b9..8c40bd0 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -450,6 +450,11 @@ def test_bill_black_tunefolders(key): assert len(lst) > 0 +def test_bill_black_tunefolders_invalid_key(): + with pytest.raises(ValueError, match="Unknown collection key: 'asdf'"): + _ = bill_black_tunefolders.get_collection("asdf") + + def test_bill_black_text_fns(): import requests From 145b712774105e531124504f5808bcf6657b1618 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 16:11:28 -0600 Subject: [PATCH 20/23] Replace another print with log --- pyabc2/sources/bill_black.py | 1 + pyabc2/sources/bill_black_tunefolders.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index bc02d2d..93b0c82 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -93,6 +93,7 @@ def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: with zipfile.ZipFile(zip_path, "r") as zf: for zi in zf.filelist: fn = zi.filename + logger.debug(f"Loading {fn!r}") with zf.open(zi, "r") as f: text = f.read().decode("utf-8") diff --git a/pyabc2/sources/bill_black_tunefolders.py b/pyabc2/sources/bill_black_tunefolders.py index 556fecf..ae85625 100644 --- a/pyabc2/sources/bill_black_tunefolders.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -208,7 +208,7 @@ def load_meta(key: str, *, redownload: bool = False, debug: bool = False) -> lis abcs = [] for p in collection.files: - print(p) + logger.debug(f"Loading {p.name}") with gzip.open(p, "rt") as f: text = f.read() From f9f1f9599f25b624673c0a3c4b7445365a4f8dcf Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 16:12:40 -0600 Subject: [PATCH 21/23] Fix 404 check --- tests/test_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sources.py b/tests/test_sources.py index 8c40bd0..84b86d4 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -442,7 +442,7 @@ def test_bill_black_tunefolders(key): # 49 -- has subsubfolders with pytest.raises(requests.exceptions.HTTPError) as e: lst = bill_black_tunefolders.load_meta(key) - assert e.response.status_code == 404 + assert e.value.response.status_code == 404 return else: lst = bill_black_tunefolders.load_meta(key) From d889ac41825e0b819115d2fb06d1053dc23a7da0 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 16:17:09 -0600 Subject: [PATCH 22/23] Clarify the block-splitting special cases --- pyabc2/sources/bill_black.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 93b0c82..380e2bc 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -113,7 +113,8 @@ def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: text = text[start:] - # Separate some blocks that are missing empty lines + # Separate some two-tune blocks + # These X vals have a tune above them without an empty line in between if fn.startswith("c-tunes"): to_sep = [253, 666] elif fn.startswith("d-tunes"): From 80a0141bdf2f02d4d62a91f85914c72bc2df1d66 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 19 Jan 2026 16:23:09 -0600 Subject: [PATCH 23/23] doc --- pyabc2/sources/bill_black.py | 10 +++++++++- pyabc2/sources/bill_black_tunefolders.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pyabc2/sources/bill_black.py b/pyabc2/sources/bill_black.py index 380e2bc..d907062 100644 --- a/pyabc2/sources/bill_black.py +++ b/pyabc2/sources/bill_black.py @@ -73,7 +73,15 @@ def download_one(url): def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: - """Load all data, splitting into tune blocks and removing ``%`` lines.""" + """Load all data, splitting into ABC tune blocks and removing lines that start with ``%``. + + Parameters + ---------- + redownload + Re-download the data file. + debug + Show debug messages. + """ import zipfile from collections import Counter from textwrap import indent diff --git a/pyabc2/sources/bill_black_tunefolders.py b/pyabc2/sources/bill_black_tunefolders.py index ae85625..0d45b3c 100644 --- a/pyabc2/sources/bill_black_tunefolders.py +++ b/pyabc2/sources/bill_black_tunefolders.py @@ -191,7 +191,15 @@ def download(key: str | Iterable[str] | None = None) -> None: def load_meta(key: str, *, redownload: bool = False, debug: bool = False) -> list[str]: - """Load the tunebook data, no parsing.""" + """Load the tunebook data, no parsing. + + Parameters + ---------- + redownload + Re-download the data file. + debug + Show debug messages. + """ import gzip import re