|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Lists all unique tags and categories found in YAML front matter of .md files. |
| 4 | +Looks for both 'tags' and 'categories' keys (common variations). |
| 5 | +""" |
| 6 | + |
| 7 | +import os |
| 8 | +import yaml |
| 9 | +from pathlib import Path |
| 10 | +from typing import Set |
| 11 | + |
| 12 | +DOCS_DIR = Path(__file__).parent |
| 13 | +EXTENSIONS = (".md", ".markdown", ".mkd") |
| 14 | + |
| 15 | +def extract_frontmatter(file_path: Path) -> dict: |
| 16 | + """Extract YAML front matter if present.""" |
| 17 | + content = file_path.read_text(encoding="utf-8") |
| 18 | + if not content.startswith("---"): |
| 19 | + return {} |
| 20 | + try: |
| 21 | + parts = content.split("---", 2) |
| 22 | + if len(parts) < 3: |
| 23 | + return {} |
| 24 | + fm = yaml.safe_load(parts[1]) or {} |
| 25 | + return fm if isinstance(fm, dict) else {} |
| 26 | + except yaml.YAMLError: |
| 27 | + print(f"Warning: Invalid YAML in {file_path}") |
| 28 | + return {} |
| 29 | + |
| 30 | +def collect_tags() -> tuple[Set[str], Set[str]]: |
| 31 | + all_tags: Set[str] = set() |
| 32 | + all_categories: Set[str] = set() |
| 33 | + |
| 34 | + docs_path = Path(DOCS_DIR) |
| 35 | + if not docs_path.is_dir(): |
| 36 | + print(f"Error: Directory not found: {docs_path}") |
| 37 | + return all_tags, all_categories |
| 38 | + |
| 39 | + for file_path in docs_path.rglob("*"): |
| 40 | + if not file_path.is_file() or not file_path.suffix.lower() in EXTENSIONS: |
| 41 | + continue |
| 42 | + |
| 43 | + fm = extract_frontmatter(file_path) |
| 44 | + |
| 45 | + # Handle 'tags' |
| 46 | + tags = fm.get("tags", []) |
| 47 | + if isinstance(tags, str): |
| 48 | + tags = [t.strip() for t in tags.split(",") if t.strip()] |
| 49 | + if isinstance(tags, list): |
| 50 | + all_tags.update(str(t).strip() for t in tags if t) |
| 51 | + |
| 52 | + # Handle 'categories' (sometimes used instead / in addition) |
| 53 | + cats = fm.get("categories", []) |
| 54 | + if isinstance(cats, str): |
| 55 | + cats = [c.strip() for c in cats.split(",") if c.strip()] |
| 56 | + if isinstance(cats, list): |
| 57 | + all_categories.update(str(c).strip() for c in cats if c) |
| 58 | + |
| 59 | + all_tags.discard('TAG_ONE') |
| 60 | + all_tags.discard('TAG_TWO') |
| 61 | + all_categories.discard('CATEGORY_ONE') |
| 62 | + all_categories.discard('CATEGORY_TWO') |
| 63 | + |
| 64 | + return all_tags, all_categories |
| 65 | + |
| 66 | + |
| 67 | +def main(): |
| 68 | + tags, categories = collect_tags() |
| 69 | + |
| 70 | + print(f"\nExisting categories: {', '.join(sorted(categories))}", ) |
| 71 | + print(f"\nExisting tags: {', '.join(sorted(tags))}", ) |
| 72 | + |
| 73 | + |
| 74 | +if __name__ == "__main__": |
| 75 | + main() |
| 76 | + |
| 77 | + |
0 commit comments