longieirl · longieirl · Mar 30, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
@@ -2,8 +2,8 @@
 #
 # Jobs (all lint jobs run in parallel):
 #   changes     — detect which paths changed (skips heavy jobs on workflow-only PRs)
-#   lint-core   — black, isort, flake8, mypy on packages/parser-core
-#   lint-free   — black, isort, flake8 on packages/parser-free
+#   lint-core   — black, isort, ruff, mypy on packages/parser-core
+#   lint-free   — black, isort, ruff on packages/parser-free
 #   security    — bandit + safety on both packages
 #   test-core   — pytest with 91% coverage gate (Python matrix), needs lint-core
 #   test-free   — pytest on packages/parser-free, needs lint-free
@@ -115,8 +115,8 @@ jobs:
     - name: isort
       run: isort --check-only --diff src tests
 
-    - name: Flake8
-      run: flake8 src tests --max-line-length=88 --extend-ignore=E203,W503,E501,W504,D,C420
+    - name: Ruff
+      run: ruff check src tests
 
     - name: MyPy
       run: mypy src --ignore-missing-imports
@@ -162,16 +162,16 @@ jobs:
         pip install --upgrade pip
         pip install -e ../parser-core
         pip install -e ".[test]"
-        pip install black isort flake8
+        pip install black isort ruff
 
     - name: Black
       run: black --check --diff src tests
 
     - name: isort
       run: isort --check-only --diff src tests
 
-    - name: Flake8
-      run: flake8 src tests --max-line-length=88 --extend-ignore=E203,W503,E501,W504,D,C420
+    - name: Ruff
+      run: ruff check src tests
 
   security:
     name: Security — bandit + safety

@@ -33,20 +33,13 @@ repos:
       - id: black
         language_version: python3
 
-  # Ruff - fast Python linter and auto-fixer
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.265
+  # Python linting (ruff)
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.0
     hooks:
-      - id: ruff
+      - id: ruff-check
         args: ["--fix"]
 
-  # Python linting
-  - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
-    hooks:
-      - id: flake8
-        args: ["--max-line-length=88", "--extend-ignore=E203,W503,E501,W504,D,C420"]
-
   # Type checking
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.8.0

@@ -32,7 +32,6 @@ dependencies = [
 dev = [
     "black>=23.0.0,<27.0.0",
     "isort>=5.12.0,<9.0.0",
-    "flake8>=6.0.0,<8.0.0",
     "mypy>=1.8.0,<2.0.0",
     "pyright>=1.1.350",
     "types-python-dateutil>=2.8.0.0",
@@ -41,7 +40,7 @@ dev = [
     "ipython>=8.0.0,<10.0.0",
     "ipdb>=0.13.0",
     "pre-commit>=3.0.0,<5.0.0",
-    "ruff>=0.0.265,<1.0.0",
+    "ruff>=0.8.0,<1.0.0",
     "bandit[toml]>=1.7.0,<2.0.0",
     "safety>=2.0.0,<4.0.0",
     "detect-secrets>=1.4.0,<2.0.0",
@@ -145,3 +144,34 @@ profile = "black"
 multi_line_output = 3
 line_length = 88
 known_first_party = ["bankstatements_core"]
+
+[tool.ruff]
+line-length = 88
+target-version = "py311"
+
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "W",      # pycodestyle warnings
+    "F",      # pyflakes
+    "B",      # flake8-bugbear
+    "C901",   # mccabe complexity
+    "G",      # flake8-logging-format
+    "PLC",    # pylint convention
+    "PLR",    # pylint refactoring
+    "T201",   # flake8-print
+    "BLE001", # flake8-blind-except
+    "UP",     # pyupgrade
+    "RUF",    # ruff-specific rules
+]
+ignore = [
+    "E501",    # line too long — handled by black
+    "PLR2004", # magic value comparison — acceptable in tests and config
+    "G004",    # logging f-string — 214 violations, deferred, see GitHub issue #90
+]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]
+"tests/**/*.py" = ["F401", "PLC0415", "PLR0913", "C901", "RUF043", "RUF059", "RUF005", "RUF003"]
+"tests/integration/*.py" = ["T201"]
+"src/bankstatements_core/pdf_table_extractor.py" = ["E402"]
@@ -7,7 +7,7 @@
 )
 
 __all__ = [
-    "PDFPlumberReaderAdapter",
     "PDFPlumberDocumentAdapter",
     "PDFPlumberPageAdapter",
+    "PDFPlumberReaderAdapter",
 ]
@@ -15,7 +15,7 @@
 class PDFPlumberPageAdapter:
     """Adapter wrapping pdfplumber Page to implement IPDFPage protocol."""
 
-    def __init__(self, page: "Page"):
+    def __init__(self, page: Page):
         """Initialize page adapter.
 
         Args:
@@ -118,7 +118,7 @@ def crop(self, bbox: tuple[float, float, float, float]) -> PDFPlumberPageAdapter
 class PDFPlumberDocumentAdapter:
     """Adapter wrapping pdfplumber PDF to implement IPDFDocument protocol."""
 
-    def __init__(self, pdf_doc: "PDF"):
+    def __init__(self, pdf_doc: PDF):
         """Initialize document adapter.
 
         Args:
@@ -174,12 +174,12 @@ def open(self, pdf_path: Path) -> PDFPlumberDocumentAdapter:
             # pdfplumber.open returns pdfplumber.PDF but type system expects pdfplumber.pdf.PDF
             return PDFPlumberDocumentAdapter(pdf_doc)  # type: ignore[arg-type]
         except FileNotFoundError:
-            raise FileNotFoundError(f"PDF file not found: {pdf_path}")
+            raise FileNotFoundError(f"PDF file not found: {pdf_path}") from None
         except (OSError, ValueError, TypeError, RuntimeError) as e:
             # Expected errors: file I/O errors, invalid PDF structure, type errors, PDF library errors
             # PDFSyntaxError and other pdfminer exceptions inherit from RuntimeError or are library-specific
-            raise IOError(f"Failed to open PDF {pdf_path}: {e}") from e
+            raise OSError(f"Failed to open PDF {pdf_path}: {e}") from e
         except Exception as e:
             # Catch any other PDF library exceptions (PDFSyntaxError, etc.)
             # These are library-specific errors that indicate corrupted/invalid PDFs
-            raise IOError(f"Failed to open PDF {pdf_path}: {e}") from e
+            raise OSError(f"Failed to open PDF {pdf_path}: {e}") from e
@@ -5,7 +5,6 @@
 """
 
 from dataclasses import dataclass
-from typing import List, Tuple
 
 
 @dataclass
@@ -133,7 +132,7 @@ def expand_bbox(bbox: BBox, margin: float) -> BBox:
     )
 
 
-def merge_bboxes(bboxes: List[BBox]) -> BBox:
+def merge_bboxes(bboxes: list[BBox]) -> BBox:
     """Merge multiple bounding boxes into a single container bbox.
 
     Args:
@@ -156,7 +155,7 @@ def merge_bboxes(bboxes: List[BBox]) -> BBox:
     return BBox(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1)
 
 
-def bbox_from_words(words: List[dict]) -> BBox:
+def bbox_from_words(words: list[dict]) -> BBox:
     """Create a bounding box that contains all given words.
 
     Args:
@@ -179,7 +178,7 @@ def bbox_from_words(words: List[dict]) -> BBox:
     return BBox(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1)
 
 
-def bbox_intersection(bbox1: BBox, bbox2: BBox) -> Tuple[float, float]:
+def bbox_intersection(bbox1: BBox, bbox2: BBox) -> tuple[float, float]:
     """Calculate the intersection dimensions of two bounding boxes.
 
     Args:

@@ -5,7 +5,7 @@
 """
 
 import logging
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 
 from bankstatements_core.analysis.bbox_utils import BBox
 
@@ -34,7 +34,7 @@ def __init__(
 
     def analyze_columns(
         self, page: Any, table_bbox: BBox
-    ) -> Dict[str, Tuple[float, float]]:
+    ) -> dict[str, tuple[float, float]]:
         """Analyze table and detect column boundaries.
 
         Args:
@@ -95,7 +95,7 @@ def analyze_columns(
         logger.info(f"Detected {len(columns)} columns")
         return columns
 
-    def _cluster_x_coordinates(self, words: List[dict]) -> List[float]:
+    def _cluster_x_coordinates(self, words: list[dict]) -> list[float]:
         """Cluster word X coordinates to find column alignment points.
 
         Args:
@@ -135,8 +135,8 @@ def _cluster_x_coordinates(self, words: List[dict]) -> List[float]:
         return sorted(clusters)
 
     def _detect_boundaries_from_clusters(
-        self, clusters: List[float]
-    ) -> List[Tuple[float, float]]:
+        self, clusters: list[float]
+    ) -> list[tuple[float, float]]:
         """Detect column boundaries from cluster centers.
 
         Args:
@@ -165,21 +165,20 @@ def _detect_boundaries_from_clusters(
                 else:
                     # Small gap - columns are close, use midpoint
                     x_max = (clusters[i] + clusters[i + 1]) / 2
+            # Last column - extend to reasonable width
+            elif i > 0:
+                avg_width = (clusters[i] - clusters[0]) / i
+                x_max = clusters[i] + avg_width
             else:
-                # Last column - extend to reasonable width
-                if i > 0:
-                    avg_width = (clusters[i] - clusters[0]) / i
-                    x_max = clusters[i] + avg_width
-                else:
-                    x_max = clusters[i] + 100  # Default width
+                x_max = clusters[i] + 100  # Default width
 
             boundaries.append((x_min, x_max))
 
         return boundaries
 
     def _find_header_words(
-        self, table_words: List[dict], table_bbox: BBox
-    ) -> List[dict]:
+        self, table_words: list[dict], table_bbox: BBox
+    ) -> list[dict]:
         """Find words in the header row of the table.
 
         Args:
@@ -208,8 +207,8 @@ def _find_header_words(
         return header_words
 
     def _assign_column_names(
-        self, boundaries: List[Tuple[float, float]], header_words: List[dict]
-    ) -> List[str]:
+        self, boundaries: list[tuple[float, float]], header_words: list[dict]
+    ) -> list[str]:
         """Assign names to columns based on header words.
 
         Strategy: Each header word should be assigned to its BEST matching column only.
@@ -246,7 +245,7 @@ def _assign_column_names(
         word_groups.append(current_group)
 
         # Assign each word group to the best matching column boundary
-        column_names: List[Optional[str]] = [None] * len(boundaries)
+        column_names: list[str | None] = [None] * len(boundaries)
 
         for group in word_groups:
             # Calculate group center
@@ -278,7 +277,7 @@ def _assign_column_names(
                 )
 
         # Fill in any unassigned columns with generic names
-        result_names: List[str] = []
+        result_names: list[str] = []
         for i in range(len(column_names)):
             name_val = column_names[i]
             if name_val is None:
@@ -294,8 +293,8 @@ def _assign_column_names(
         return result_names
 
     def _resolve_overlapping_boundaries(
-        self, boundaries: List[Tuple[float, float]]
-    ) -> List[Tuple[float, float]]:
+        self, boundaries: list[tuple[float, float]]
+    ) -> list[tuple[float, float]]:
         """Resolve overlapping column boundaries.
 
         When columns overlap, adjust boundaries so column i ends just before
@@ -337,8 +336,8 @@ def _resolve_overlapping_boundaries(
         return resolved
 
     def _create_columns_from_headers(
-        self, header_words: List[dict], table_bbox: BBox
-    ) -> Tuple[List[Tuple[float, float]], List[str]]:
+        self, header_words: list[dict], table_bbox: BBox
+    ) -> tuple[list[tuple[float, float]], list[str]]:
         """Create column boundaries and names directly from header words.
 
         Args:

@@ -9,7 +9,7 @@
 import logging
 import re
 from dataclasses import dataclass
-from typing import Any, List, Optional
+from typing import Any
 
 from bankstatements_core.analysis.bbox_utils import BBox, overlaps
 from bankstatements_core.extraction.iban_extractor import IBANExtractor
@@ -33,7 +33,7 @@ class IBANCandidate:
     masked: str
     bbox: BBox
     confidence_score: float = 0.0
-    rejection_reason: Optional[str] = None
+    rejection_reason: str | None = None
 
 
 class IBANSpatialFilter:
@@ -46,7 +46,7 @@ def __init__(self) -> None:
         """Initialize IBAN spatial filter."""
         self.iban_extractor = IBANExtractor()
 
-    def extract_iban_candidates(self, page: Any) -> List[IBANCandidate]:
+    def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]:  # noqa: C901
         """Extract IBAN candidates with spatial coordinates from page.
 
         Uses two strategies:
@@ -169,10 +169,10 @@ def extract_iban_candidates(self, page: Any) -> List[IBANCandidate]:
 
     def filter_by_table_overlap(
         self,
-        candidates: List[IBANCandidate],
-        table_regions: List[BBox],
+        candidates: list[IBANCandidate],
+        table_regions: list[BBox],
         overlap_threshold: float = 0.0,
-    ) -> List[IBANCandidate]:
+    ) -> list[IBANCandidate]:
         """Filter out IBANs that overlap with table regions.
 
         Args:
@@ -216,8 +216,8 @@ def filter_by_table_overlap(
         return filtered
 
     def score_candidates(
-        self, candidates: List[IBANCandidate], page_height: float
-    ) -> List[IBANCandidate]:
+        self, candidates: list[IBANCandidate], page_height: float
+    ) -> list[IBANCandidate]:
         """Score IBAN candidates based on location and context.
 
         Higher scores are given to:
@@ -265,9 +265,7 @@ def score_candidates(
 
         return candidates_sorted
 
-    def select_best_iban(
-        self, candidates: List[IBANCandidate]
-    ) -> Optional[IBANCandidate]:
+    def select_best_iban(self, candidates: list[IBANCandidate]) -> IBANCandidate | None:
         """Select the best IBAN from scored candidates.
 
         Args: