-
Notifications
You must be signed in to change notification settings - Fork 1
Load Bill Black #92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Load Bill Black #92
Changes from all commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
9f697d0
Only HTTP currently
zmoon d66b68f
Download some collections
zmoon 9fc094c
Add a few more
zmoon 992538d
Towards ABC extraction
zmoon b0d70da
Clean up ABC blocks from RTF
zmoon 661a434
Debug
zmoon a38e8a3
Merge remote-tracking branch 'origin/main' into black
zmoon a43b733
Update ITTL link and typing
zmoon 3317d21
Update BB tunefolders for the mid 2025 changes
zmoon 1213b62
Use our get-logger func
zmoon f46ddfa
Implement downloading BB alltunes/text
zmoon e2af64c
Initial tune block loader
zmoon cf20ac4
Address some skipped blocks
zmoon 117713b
More special cases
zmoon c030655
Remove duplicate blocks; assess duplicate X vals
zmoon cac905e
Just use s-tunes-2
zmoon d316daa
Add a few tests
zmoon 89d961e
Add BB to docs
zmoon 8d711c2
Use logger
zmoon 25bf9b3
cov
zmoon 145b712
Replace another print with log
zmoon f9f1f95
Fix 404 check
zmoon d889ac4
Clarify the block-splitting special cases
zmoon 80a0141
doc
zmoon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,197 @@ | ||
| """ | ||
| Bill Black's Irish Traditional Tune Library | ||
|
|
||
| http://www.capeirish.com/ittl/ | ||
| """ | ||
|
|
||
| import logging | ||
| import re | ||
| from pathlib import Path | ||
|
|
||
| from pyabc2._util import get_logger as _get_logger | ||
|
|
||
| logger = _get_logger(__name__) | ||
|
|
||
| HERE = Path(__file__).parent | ||
|
|
||
| SAVE_TO = HERE / "_bill-black" | ||
| TXT_FNS = [ | ||
| "a-tunes-1.txt", | ||
| "b-tunes-1.txt", | ||
| "c-tunes-1.txt", | ||
| "d-tunes-1.txt", | ||
| "e-tunes-1.txt", | ||
| "f-tunes-1.txt", | ||
| "g-tunes-1.txt", | ||
| "h-tunes-1.txt", | ||
| "i-tunes-1.txt", | ||
| "j-tunes-1.txt", | ||
| "k-tunes-1.txt", | ||
| "l-tunes-1.txt", | ||
| "m-tunes-1.txt", | ||
| "n-tunes-1.txt", | ||
| "o-tunes-1.txt", | ||
| "pq-tunes-1.txt", | ||
| "r-tunes-1.txt", | ||
| "s-tunes-2.txt", | ||
| "t-tunes-1.txt", | ||
| "uv-tunes-1.txt", | ||
| "wz-tunes-1.txt", | ||
| ] | ||
|
|
||
|
|
||
| def download() -> None: | ||
| """Download the alphabetical text files from http://www.capeirish.com/ittl/alltunes/text/ | ||
| and store them in a compressed archive. | ||
| """ | ||
| import zipfile | ||
| from concurrent.futures import ThreadPoolExecutor | ||
|
|
||
| import requests | ||
|
|
||
| def download_one(url): | ||
| r = requests.get(url, headers={"User-Agent": "pyabc2"}, timeout=5) | ||
| r.raise_for_status() | ||
| return r.text | ||
|
|
||
| with ThreadPoolExecutor(max_workers=4) as executor: | ||
| futures = [] | ||
| for fn in TXT_FNS: | ||
| url = f"http://www.capeirish.com/ittl/alltunes/text/{fn}" | ||
| futures.append(executor.submit(download_one, url)) | ||
|
|
||
| SAVE_TO.mkdir(exist_ok=True) | ||
|
|
||
| with zipfile.ZipFile( | ||
| SAVE_TO / "bill_black_alltunes_text.zip", | ||
| "w", | ||
| compression=zipfile.ZIP_DEFLATED, | ||
| ) as zf: | ||
| for fn, future in zip(TXT_FNS, futures, strict=True): | ||
| text = future.result() | ||
| zf.writestr(fn, text) | ||
|
|
||
|
|
||
| def load_meta(*, redownload: bool = False, debug: bool = False) -> list[str]: | ||
| """Load all data, splitting into ABC tune blocks and removing lines that start with ``%``. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| redownload | ||
| Re-download the data file. | ||
| debug | ||
| Show debug messages. | ||
| """ | ||
| import zipfile | ||
| from collections import Counter | ||
| from textwrap import indent | ||
|
|
||
| if debug: # pragma: no cover | ||
| logger.setLevel(logging.DEBUG) | ||
| else: | ||
| logger.setLevel(logging.NOTSET) | ||
|
|
||
| zip_path = SAVE_TO / "bill_black_alltunes_text.zip" | ||
| if redownload or not zip_path.is_file(): | ||
| print("downloading...", end=" ", flush=True) | ||
| download() | ||
| print("done") | ||
|
|
||
| abcs = [] | ||
| with zipfile.ZipFile(zip_path, "r") as zf: | ||
| for zi in zf.filelist: | ||
| fn = zi.filename | ||
| logger.debug(f"Loading {fn!r}") | ||
| with zf.open(zi, "r") as f: | ||
| text = f.read().decode("utf-8") | ||
|
|
||
| # A tune block starts with the X: line and ends with a blank line | ||
| # or the end of the file. | ||
| # Unlike the RTF files, %%% is not _necessarily_ present as a tune separator. | ||
|
|
||
| # Remove all lines that start with % | ||
| text = "\n".join( | ||
| line.strip() for line in text.splitlines() if not line.lstrip().startswith("%") | ||
| ) | ||
|
|
||
| # Find the start of the first tune, in order to skip header info | ||
| start = text.find("X:") | ||
| if start == -1: # pragma: no cover | ||
| raise RuntimeError(f"Unable to find first tune in Bill Black file {fn!r}") | ||
|
|
||
| text = text[start:] | ||
|
|
||
| # Separate some two-tune blocks | ||
| # These X vals have a tune above them without an empty line in between | ||
| if fn.startswith("c-tunes"): | ||
| to_sep = [253, 666] | ||
| elif fn.startswith("d-tunes"): | ||
| to_sep = [223] | ||
| elif fn.startswith("e-tunes"): | ||
| to_sep = [34] | ||
| else: | ||
| to_sep = [] | ||
| for n in to_sep: | ||
| text = text.replace(f"X:{n}", f"\nX:{n}") | ||
|
|
||
| expected_num = text.count("X:") | ||
|
|
||
| blocks = re.split(r"\n{2,}", text.rstrip()) | ||
| this_abcs = [] | ||
| for block in blocks: | ||
| block = block.strip() | ||
| if not block: # pragma: no cover | ||
| continue | ||
|
|
||
| if block.startswith(":313\nT:GRAVEL WALK (reel) (1), The"): | ||
| block = "X" + block | ||
| expected_num += 1 | ||
|
|
||
| if not block.startswith("X:"): | ||
| # First look for tune later in the block | ||
| # Some blocks start with comment text, sometimes including other settings but without `X:` | ||
| start = block.find("X:") | ||
| if start != -1: | ||
| block = block[start:] | ||
| else: | ||
| logger.info(f"skipping non-tune block in {fn!r}:\n{indent(block, '| ')}") | ||
| continue | ||
|
|
||
| if block.count("X:") > 1: # pragma: no cover | ||
| logger.warning(f"multiple X: lines in block in {fn!r}:\n{indent(block, '| ')}") | ||
|
|
||
| this_abcs.append(block) | ||
|
|
||
| actual_num = len(this_abcs) | ||
| if actual_num != expected_num: # pragma: no cover | ||
| logger.warning(f"expected {expected_num} tunes in {fn!r}, but found {actual_num}") | ||
|
|
||
| # Drop fully duplicate tune blocks while preserving order | ||
| seen = set() | ||
| this_abcs_unique = [] | ||
| for block in this_abcs: | ||
| if block not in seen: | ||
| seen.add(block) | ||
| this_abcs_unique.append(block) | ||
| if len(this_abcs_unique) < len(this_abcs): | ||
| logger.info( | ||
| f"removed {len(this_abcs) - len(this_abcs_unique)}/{len(this_abcs)} fully duplicate " | ||
| f"tune blocks in {fn!r}" | ||
| ) | ||
| this_abcs = this_abcs_unique | ||
|
|
||
| x_counts = Counter(block.splitlines()[0] for block in this_abcs) | ||
| x_count_counts = Counter(x_counts.values()) | ||
| if set(x_count_counts) != {1}: | ||
| s_counts = ", ".join(f"{m} ({n})" for m, n in sorted(x_count_counts.items())) | ||
| logger.info(f"non-unique X vals in {fn!r}: {s_counts}") | ||
|
|
||
| abcs.extend(this_abcs) | ||
|
|
||
| return abcs | ||
|
|
||
|
|
||
| if __name__ == "__main__": # pragma: no cover | ||
| tunes = load_meta(debug=True) | ||
| print() | ||
| print(tunes[0]) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.