diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 3d93411..0000000 --- a/.flake8 +++ /dev/null @@ -1,15 +0,0 @@ -[flake8] -max-line-length = 88 -extend-ignore = E203,W503,E501,W504,D,C420 -exclude = - .git, - __pycache__, - .venv, - venv, - .env, - build, - dist, - migrations -per-file-ignores = - __init__.py:F401 - tests/*:F401 \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 281328a..30e6230 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,8 +2,8 @@ # # Jobs (all lint jobs run in parallel): # changes — detect which paths changed (skips heavy jobs on workflow-only PRs) -# lint-core — black, isort, flake8, mypy on packages/parser-core -# lint-free — black, isort, flake8 on packages/parser-free +# lint-core — black, isort, ruff, mypy on packages/parser-core +# lint-free — black, isort, ruff on packages/parser-free # security — bandit + safety on both packages # test-core — pytest with 91% coverage gate (Python matrix), needs lint-core # test-free — pytest on packages/parser-free, needs lint-free @@ -115,8 +115,8 @@ jobs: - name: isort run: isort --check-only --diff src tests - - name: Flake8 - run: flake8 src tests --max-line-length=88 --extend-ignore=E203,W503,E501,W504,D,C420 + - name: Ruff + run: ruff check src tests - name: MyPy run: mypy src --ignore-missing-imports @@ -162,7 +162,7 @@ jobs: pip install --upgrade pip pip install -e ../parser-core pip install -e ".[test]" - pip install black isort flake8 + pip install black isort ruff - name: Black run: black --check --diff src tests @@ -170,8 +170,8 @@ jobs: - name: isort run: isort --check-only --diff src tests - - name: Flake8 - run: flake8 src tests --max-line-length=88 --extend-ignore=E203,W503,E501,W504,D,C420 + - name: Ruff + run: ruff check src tests security: name: Security — bandit + safety diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 322de3d..3bf79e6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,20 +33,13 @@ repos: - id: black language_version: python3 - # Ruff - fast Python linter and auto-fixer - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.265 + # Python linting (ruff) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 hooks: - - id: ruff + - id: ruff-check args: ["--fix"] - # Python linting - - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - args: ["--max-line-length=88", "--extend-ignore=E203,W503,E501,W504,D,C420"] - # Type checking - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 diff --git a/packages/parser-core/pyproject.toml b/packages/parser-core/pyproject.toml index 1052ebc..9418588 100644 --- a/packages/parser-core/pyproject.toml +++ b/packages/parser-core/pyproject.toml @@ -32,7 +32,6 @@ dependencies = [ dev = [ "black>=23.0.0,<27.0.0", "isort>=5.12.0,<9.0.0", - "flake8>=6.0.0,<8.0.0", "mypy>=1.8.0,<2.0.0", "pyright>=1.1.350", "types-python-dateutil>=2.8.0.0", @@ -41,7 +40,7 @@ dev = [ "ipython>=8.0.0,<10.0.0", "ipdb>=0.13.0", "pre-commit>=3.0.0,<5.0.0", - "ruff>=0.0.265,<1.0.0", + "ruff>=0.8.0,<1.0.0", "bandit[toml]>=1.7.0,<2.0.0", "safety>=2.0.0,<4.0.0", "detect-secrets>=1.4.0,<2.0.0", @@ -145,3 +144,34 @@ profile = "black" multi_line_output = 3 line_length = 88 known_first_party = ["bankstatements_core"] + +[tool.ruff] +line-length = 88 +target-version = "py311" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "B", # flake8-bugbear + "C901", # mccabe complexity + "G", # flake8-logging-format + "PLC", # pylint convention + "PLR", # pylint refactoring + "T201", # flake8-print + "BLE001", # flake8-blind-except + "UP", # pyupgrade + "RUF", # ruff-specific rules +] +ignore = [ + "E501", # line too long — handled by black + "PLR2004", # magic value comparison — acceptable in tests and config + "G004", # logging f-string — 214 violations, deferred, see GitHub issue #90 +] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] +"tests/**/*.py" = ["F401", "PLC0415", "PLR0913", "C901", "RUF043", "RUF059", "RUF005", "RUF003"] +"tests/integration/*.py" = ["T201"] +"src/bankstatements_core/pdf_table_extractor.py" = ["E402"] diff --git a/packages/parser-core/src/bankstatements_core/adapters/__init__.py b/packages/parser-core/src/bankstatements_core/adapters/__init__.py index 918dd10..e848cd6 100644 --- a/packages/parser-core/src/bankstatements_core/adapters/__init__.py +++ b/packages/parser-core/src/bankstatements_core/adapters/__init__.py @@ -7,7 +7,7 @@ ) __all__ = [ - "PDFPlumberReaderAdapter", "PDFPlumberDocumentAdapter", "PDFPlumberPageAdapter", + "PDFPlumberReaderAdapter", ] diff --git a/packages/parser-core/src/bankstatements_core/adapters/pdfplumber_adapter.py b/packages/parser-core/src/bankstatements_core/adapters/pdfplumber_adapter.py index 385abad..6d32c3a 100644 --- a/packages/parser-core/src/bankstatements_core/adapters/pdfplumber_adapter.py +++ b/packages/parser-core/src/bankstatements_core/adapters/pdfplumber_adapter.py @@ -15,7 +15,7 @@ class PDFPlumberPageAdapter: """Adapter wrapping pdfplumber Page to implement IPDFPage protocol.""" - def __init__(self, page: "Page"): + def __init__(self, page: Page): """Initialize page adapter. Args: @@ -118,7 +118,7 @@ def crop(self, bbox: tuple[float, float, float, float]) -> PDFPlumberPageAdapter class PDFPlumberDocumentAdapter: """Adapter wrapping pdfplumber PDF to implement IPDFDocument protocol.""" - def __init__(self, pdf_doc: "PDF"): + def __init__(self, pdf_doc: PDF): """Initialize document adapter. Args: @@ -174,12 +174,12 @@ def open(self, pdf_path: Path) -> PDFPlumberDocumentAdapter: # pdfplumber.open returns pdfplumber.PDF but type system expects pdfplumber.pdf.PDF return PDFPlumberDocumentAdapter(pdf_doc) # type: ignore[arg-type] except FileNotFoundError: - raise FileNotFoundError(f"PDF file not found: {pdf_path}") + raise FileNotFoundError(f"PDF file not found: {pdf_path}") from None except (OSError, ValueError, TypeError, RuntimeError) as e: # Expected errors: file I/O errors, invalid PDF structure, type errors, PDF library errors # PDFSyntaxError and other pdfminer exceptions inherit from RuntimeError or are library-specific - raise IOError(f"Failed to open PDF {pdf_path}: {e}") from e + raise OSError(f"Failed to open PDF {pdf_path}: {e}") from e except Exception as e: # Catch any other PDF library exceptions (PDFSyntaxError, etc.) # These are library-specific errors that indicate corrupted/invalid PDFs - raise IOError(f"Failed to open PDF {pdf_path}: {e}") from e + raise OSError(f"Failed to open PDF {pdf_path}: {e}") from e diff --git a/packages/parser-core/src/bankstatements_core/analysis/bbox_utils.py b/packages/parser-core/src/bankstatements_core/analysis/bbox_utils.py index 108d45b..8123c26 100644 --- a/packages/parser-core/src/bankstatements_core/analysis/bbox_utils.py +++ b/packages/parser-core/src/bankstatements_core/analysis/bbox_utils.py @@ -5,7 +5,6 @@ """ from dataclasses import dataclass -from typing import List, Tuple @dataclass @@ -133,7 +132,7 @@ def expand_bbox(bbox: BBox, margin: float) -> BBox: ) -def merge_bboxes(bboxes: List[BBox]) -> BBox: +def merge_bboxes(bboxes: list[BBox]) -> BBox: """Merge multiple bounding boxes into a single container bbox. Args: @@ -156,7 +155,7 @@ def merge_bboxes(bboxes: List[BBox]) -> BBox: return BBox(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1) -def bbox_from_words(words: List[dict]) -> BBox: +def bbox_from_words(words: list[dict]) -> BBox: """Create a bounding box that contains all given words. Args: @@ -179,7 +178,7 @@ def bbox_from_words(words: List[dict]) -> BBox: return BBox(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1) -def bbox_intersection(bbox1: BBox, bbox2: BBox) -> Tuple[float, float]: +def bbox_intersection(bbox1: BBox, bbox2: BBox) -> tuple[float, float]: """Calculate the intersection dimensions of two bounding boxes. Args: diff --git a/packages/parser-core/src/bankstatements_core/analysis/column_analyzer.py b/packages/parser-core/src/bankstatements_core/analysis/column_analyzer.py index c896890..5b2ef2f 100644 --- a/packages/parser-core/src/bankstatements_core/analysis/column_analyzer.py +++ b/packages/parser-core/src/bankstatements_core/analysis/column_analyzer.py @@ -5,7 +5,7 @@ """ import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Any from bankstatements_core.analysis.bbox_utils import BBox @@ -34,7 +34,7 @@ def __init__( def analyze_columns( self, page: Any, table_bbox: BBox - ) -> Dict[str, Tuple[float, float]]: + ) -> dict[str, tuple[float, float]]: """Analyze table and detect column boundaries. Args: @@ -95,7 +95,7 @@ def analyze_columns( logger.info(f"Detected {len(columns)} columns") return columns - def _cluster_x_coordinates(self, words: List[dict]) -> List[float]: + def _cluster_x_coordinates(self, words: list[dict]) -> list[float]: """Cluster word X coordinates to find column alignment points. Args: @@ -135,8 +135,8 @@ def _cluster_x_coordinates(self, words: List[dict]) -> List[float]: return sorted(clusters) def _detect_boundaries_from_clusters( - self, clusters: List[float] - ) -> List[Tuple[float, float]]: + self, clusters: list[float] + ) -> list[tuple[float, float]]: """Detect column boundaries from cluster centers. Args: @@ -165,21 +165,20 @@ def _detect_boundaries_from_clusters( else: # Small gap - columns are close, use midpoint x_max = (clusters[i] + clusters[i + 1]) / 2 + # Last column - extend to reasonable width + elif i > 0: + avg_width = (clusters[i] - clusters[0]) / i + x_max = clusters[i] + avg_width else: - # Last column - extend to reasonable width - if i > 0: - avg_width = (clusters[i] - clusters[0]) / i - x_max = clusters[i] + avg_width - else: - x_max = clusters[i] + 100 # Default width + x_max = clusters[i] + 100 # Default width boundaries.append((x_min, x_max)) return boundaries def _find_header_words( - self, table_words: List[dict], table_bbox: BBox - ) -> List[dict]: + self, table_words: list[dict], table_bbox: BBox + ) -> list[dict]: """Find words in the header row of the table. Args: @@ -208,8 +207,8 @@ def _find_header_words( return header_words def _assign_column_names( - self, boundaries: List[Tuple[float, float]], header_words: List[dict] - ) -> List[str]: + self, boundaries: list[tuple[float, float]], header_words: list[dict] + ) -> list[str]: """Assign names to columns based on header words. Strategy: Each header word should be assigned to its BEST matching column only. @@ -246,7 +245,7 @@ def _assign_column_names( word_groups.append(current_group) # Assign each word group to the best matching column boundary - column_names: List[Optional[str]] = [None] * len(boundaries) + column_names: list[str | None] = [None] * len(boundaries) for group in word_groups: # Calculate group center @@ -278,7 +277,7 @@ def _assign_column_names( ) # Fill in any unassigned columns with generic names - result_names: List[str] = [] + result_names: list[str] = [] for i in range(len(column_names)): name_val = column_names[i] if name_val is None: @@ -294,8 +293,8 @@ def _assign_column_names( return result_names def _resolve_overlapping_boundaries( - self, boundaries: List[Tuple[float, float]] - ) -> List[Tuple[float, float]]: + self, boundaries: list[tuple[float, float]] + ) -> list[tuple[float, float]]: """Resolve overlapping column boundaries. When columns overlap, adjust boundaries so column i ends just before @@ -337,8 +336,8 @@ def _resolve_overlapping_boundaries( return resolved def _create_columns_from_headers( - self, header_words: List[dict], table_bbox: BBox - ) -> Tuple[List[Tuple[float, float]], List[str]]: + self, header_words: list[dict], table_bbox: BBox + ) -> tuple[list[tuple[float, float]], list[str]]: """Create column boundaries and names directly from header words. Args: diff --git a/packages/parser-core/src/bankstatements_core/analysis/iban_spatial_filter.py b/packages/parser-core/src/bankstatements_core/analysis/iban_spatial_filter.py index e2358f7..a14ae62 100644 --- a/packages/parser-core/src/bankstatements_core/analysis/iban_spatial_filter.py +++ b/packages/parser-core/src/bankstatements_core/analysis/iban_spatial_filter.py @@ -9,7 +9,7 @@ import logging import re from dataclasses import dataclass -from typing import Any, List, Optional +from typing import Any from bankstatements_core.analysis.bbox_utils import BBox, overlaps from bankstatements_core.extraction.iban_extractor import IBANExtractor @@ -33,7 +33,7 @@ class IBANCandidate: masked: str bbox: BBox confidence_score: float = 0.0 - rejection_reason: Optional[str] = None + rejection_reason: str | None = None class IBANSpatialFilter: @@ -46,7 +46,7 @@ def __init__(self) -> None: """Initialize IBAN spatial filter.""" self.iban_extractor = IBANExtractor() - def extract_iban_candidates(self, page: Any) -> List[IBANCandidate]: + def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C901 """Extract IBAN candidates with spatial coordinates from page. Uses two strategies: @@ -169,10 +169,10 @@ def extract_iban_candidates(self, page: Any) -> List[IBANCandidate]: def filter_by_table_overlap( self, - candidates: List[IBANCandidate], - table_regions: List[BBox], + candidates: list[IBANCandidate], + table_regions: list[BBox], overlap_threshold: float = 0.0, - ) -> List[IBANCandidate]: + ) -> list[IBANCandidate]: """Filter out IBANs that overlap with table regions. Args: @@ -216,8 +216,8 @@ def filter_by_table_overlap( return filtered def score_candidates( - self, candidates: List[IBANCandidate], page_height: float - ) -> List[IBANCandidate]: + self, candidates: list[IBANCandidate], page_height: float + ) -> list[IBANCandidate]: """Score IBAN candidates based on location and context. Higher scores are given to: @@ -265,9 +265,7 @@ def score_candidates( return candidates_sorted - def select_best_iban( - self, candidates: List[IBANCandidate] - ) -> Optional[IBANCandidate]: + def select_best_iban(self, candidates: list[IBANCandidate]) -> IBANCandidate | None: """Select the best IBAN from scored candidates. Args: diff --git a/packages/parser-core/src/bankstatements_core/analysis/table_detector.py b/packages/parser-core/src/bankstatements_core/analysis/table_detector.py index 9b4da8d..66d2980 100644 --- a/packages/parser-core/src/bankstatements_core/analysis/table_detector.py +++ b/packages/parser-core/src/bankstatements_core/analysis/table_detector.py @@ -6,7 +6,7 @@ import logging from dataclasses import dataclass -from typing import Any, List, Optional +from typing import Any from bankstatements_core.analysis.bbox_utils import BBox, expand_bbox @@ -24,7 +24,7 @@ class TableDetectionResult: page_width: Width of the page in points """ - tables: List[BBox] + tables: list[BBox] page_number: int page_height: float page_width: float @@ -109,7 +109,7 @@ def detect_tables(self, page: Any) -> TableDetectionResult: def get_expanded_table_regions( self, detection: TableDetectionResult, margin: float = 20.0 - ) -> List[BBox]: + ) -> list[BBox]: """Get expanded table regions for overlap detection. Creates a buffer zone around each table to catch IBANs that are @@ -130,7 +130,7 @@ def get_expanded_table_regions( return expanded - def get_largest_table(self, detection: TableDetectionResult) -> Optional[BBox]: + def get_largest_table(self, detection: TableDetectionResult) -> BBox | None: """Get the largest table by area from detection results. Args: @@ -146,7 +146,9 @@ def get_largest_table(self, detection: TableDetectionResult) -> Optional[BBox]: logger.debug(f"Largest table: {largest} (area={largest.area:.0f}px²)") return largest - def _detect_text_based_table(self, page: Any) -> Optional[BBox]: + def _detect_text_based_table( # noqa: C901, PLR0912, PLR0915 + self, page: Any + ) -> BBox | None: """Detect table region from text patterns (fallback method). For PDFs without explicit table borders, this method: @@ -160,7 +162,7 @@ def _detect_text_based_table(self, page: Any) -> Optional[BBox]: Returns: BBox of detected table, or None if no table found """ - from collections import defaultdict + from collections import defaultdict # noqa: PLC0415 # Stricter keywords that are more likely to be column headers # Avoid words that commonly appear in transaction descriptions @@ -301,7 +303,7 @@ def _detect_text_based_table(self, page: Any) -> Optional[BBox]: bottom_margin = avg_row_spacing * 1.5 logger.debug( f"Calculated bottom margin: {bottom_margin:.1f}px " - f"(avg row spacing: {avg_row_spacing:.1f}px × 1.5)" + f"(avg row spacing: {avg_row_spacing:.1f}px × 1.5)" # noqa: RUF001 ) else: bottom_margin = 20 diff --git a/packages/parser-core/src/bankstatements_core/analysis/template_generator.py b/packages/parser-core/src/bankstatements_core/analysis/template_generator.py index c23fd9b..abbf998 100644 --- a/packages/parser-core/src/bankstatements_core/analysis/template_generator.py +++ b/packages/parser-core/src/bankstatements_core/analysis/template_generator.py @@ -6,7 +6,7 @@ import json import logging from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any logger = logging.getLogger(__name__) @@ -14,7 +14,7 @@ class TemplateGenerator: """Generate template JSON configurations from analysis results.""" - def __init__(self, base_template_path: Optional[Path] = None): + def __init__(self, base_template_path: Path | None = None): """Initialize template generator. Args: @@ -30,17 +30,17 @@ def __init__(self, base_template_path: Optional[Path] = None): self.base_template_path = base_template_path logger.debug(f"Using base template: {self.base_template_path}") - def generate_template( + def generate_template( # noqa: PLR0913 self, - columns: Dict[str, Tuple[float, float]], - iban: Optional[str], + columns: dict[str, tuple[float, float]], + iban: str | None, table_top_y: float, table_bottom_y: float, page_height: float, template_id: str = "custom_generated", template_name: str = "Custom Template - Generated", - page: Optional[Any] = None, - ) -> Dict[str, Any]: + page: Any | None = None, + ) -> dict[str, Any]: """Generate template JSON from analysis results. Args: @@ -60,8 +60,8 @@ def generate_template( # Load base template try: - with open(self.base_template_path, "r") as f: - template: Dict[str, Any] = json.load(f) + with open(self.base_template_path) as f: + template: dict[str, Any] = json.load(f) logger.debug(f"Loaded base template from {self.base_template_path}") except (OSError, ValueError, KeyError) as e: # Expected errors: file I/O errors, invalid JSON, missing keys @@ -155,7 +155,7 @@ def generate_template( return template - def save_template(self, template: Dict, output_path: Path) -> None: + def save_template(self, template: dict, output_path: Path) -> None: """Save template JSON to file. Creates or overwrites the file at output_path. @@ -179,10 +179,10 @@ def save_template(self, template: Dict, output_path: Path) -> None: except (OSError, TypeError) as e: # Expected errors: file I/O errors, JSON serialization errors logger.error(f"Failed to save template: {e}") - raise IOError(f"Could not save template to {output_path}: {e}") from e + raise OSError(f"Could not save template to {output_path}: {e}") from e # Let unexpected errors bubble up - def _create_minimal_template(self) -> Dict: + def _create_minimal_template(self) -> dict: """Create a minimal template structure if base template not available. Returns: @@ -211,7 +211,7 @@ def _create_minimal_template(self) -> Dict: def _detect_date_grouping( self, page: Any, - date_column: Tuple[float, float], + date_column: tuple[float, float], table_top_y: float, table_bottom_y: float, ) -> bool: @@ -230,7 +230,7 @@ def _detect_date_grouping( Returns: True if date grouping detected, False otherwise """ - from collections import defaultdict + from collections import defaultdict # noqa: PLC0415 # Extract words in the Date column within table region x_min, x_max = date_column @@ -291,7 +291,7 @@ def _detect_date_grouping( return False - def format_template_for_display(self, template: Dict) -> str: + def format_template_for_display(self, template: dict) -> str: """Format template as pretty-printed JSON for logging. Args: diff --git a/packages/parser-core/src/bankstatements_core/builders/processor_builder.py b/packages/parser-core/src/bankstatements_core/builders/processor_builder.py index fe8b5fe..01e16a5 100644 --- a/packages/parser-core/src/bankstatements_core/builders/processor_builder.py +++ b/packages/parser-core/src/bankstatements_core/builders/processor_builder.py @@ -53,7 +53,7 @@ def __init__(self) -> None: self._activity_log: Any | None = None self._entitlements: Any | None = None - def with_input_dir(self, path: Path) -> "BankStatementProcessorBuilder": + def with_input_dir(self, path: Path) -> BankStatementProcessorBuilder: """ Set input directory (required). @@ -66,7 +66,7 @@ def with_input_dir(self, path: Path) -> "BankStatementProcessorBuilder": self._input_dir = path return self - def with_output_dir(self, path: Path) -> "BankStatementProcessorBuilder": + def with_output_dir(self, path: Path) -> BankStatementProcessorBuilder: """ Set output directory (required). @@ -81,7 +81,7 @@ def with_output_dir(self, path: Path) -> "BankStatementProcessorBuilder": def with_table_bounds( self, top_y: int, bottom_y: int - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Set table boundary coordinates. @@ -98,7 +98,7 @@ def with_table_bounds( def with_columns( self, columns: dict[str, tuple[int | float, int | float]] - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Set column definitions. @@ -113,7 +113,7 @@ def with_columns( def with_dynamic_boundary( self, enabled: bool = True - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Enable or disable dynamic boundary detection. @@ -126,9 +126,7 @@ def with_dynamic_boundary( self._enable_dynamic_boundary = enabled return self - def with_date_sorting( - self, enabled: bool = True - ) -> "BankStatementProcessorBuilder": + def with_date_sorting(self, enabled: bool = True) -> BankStatementProcessorBuilder: """ Enable or disable chronological date sorting. @@ -143,7 +141,7 @@ def with_date_sorting( def with_recursive_scan( self, enabled: bool = False - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Enable or disable recursive directory scanning for PDFs. @@ -158,9 +156,7 @@ def with_recursive_scan( self._recursive_scan = enabled return self - def with_totals( - self, column_patterns: list[str] - ) -> "BankStatementProcessorBuilder": + def with_totals(self, column_patterns: list[str]) -> BankStatementProcessorBuilder: """ Set column patterns for totals calculation. @@ -175,7 +171,7 @@ def with_totals( def with_monthly_summary( self, enabled: bool = True - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Enable or disable monthly summary generation. @@ -190,7 +186,7 @@ def with_monthly_summary( def with_expense_analysis( self, enabled: bool = True - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Enable or disable expense analysis generation. @@ -205,7 +201,7 @@ def with_expense_analysis( def with_output_strategies( self, strategies: dict[str, Any] - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Set output format strategies. @@ -218,7 +214,7 @@ def with_output_strategies( self._output_strategies = strategies return self - def with_duplicate_strategy(self, strategy: Any) -> "BankStatementProcessorBuilder": + def with_duplicate_strategy(self, strategy: Any) -> BankStatementProcessorBuilder: """ Set duplicate detection strategy. @@ -231,7 +227,7 @@ def with_duplicate_strategy(self, strategy: Any) -> "BankStatementProcessorBuild self._duplicate_strategy = strategy return self - def with_repository(self, repository: Any) -> "BankStatementProcessorBuilder": + def with_repository(self, repository: Any) -> BankStatementProcessorBuilder: """ Set transaction repository. @@ -244,7 +240,7 @@ def with_repository(self, repository: Any) -> "BankStatementProcessorBuilder": self._repository = repository return self - def with_activity_log(self, activity_log: Any) -> "BankStatementProcessorBuilder": + def with_activity_log(self, activity_log: Any) -> BankStatementProcessorBuilder: """ Set processing activity log for GDPR audit trail. @@ -257,7 +253,7 @@ def with_activity_log(self, activity_log: Any) -> "BankStatementProcessorBuilder self._activity_log = activity_log return self - def with_entitlements(self, entitlements: Any) -> "BankStatementProcessorBuilder": + def with_entitlements(self, entitlements: Any) -> BankStatementProcessorBuilder: """ Set entitlements for tier-based feature access control. @@ -272,7 +268,7 @@ def with_entitlements(self, entitlements: Any) -> "BankStatementProcessorBuilder def with_processor_config( self, config: ProcessorConfig - ) -> "BankStatementProcessorBuilder": + ) -> BankStatementProcessorBuilder: """ Set all config fields at once from a ProcessorConfig. @@ -344,7 +340,7 @@ def build_config(self) -> ProcessorConfig: ), ) - def build(self) -> "BankStatementProcessor": + def build(self) -> BankStatementProcessor: """ Build and return BankStatementProcessor instance. @@ -355,7 +351,9 @@ def build(self) -> "BankStatementProcessor": ValueError: If required parameters are missing """ # Import here to avoid circular dependencies - from bankstatements_core.processor import BankStatementProcessor + from bankstatements_core.processor import ( # noqa: PLC0415 + BankStatementProcessor, + ) # Build configuration object config = self.build_config() @@ -367,12 +365,16 @@ def build(self) -> "BankStatementProcessor": config.extraction.enable_dynamic_boundary, ) - from bankstatements_core.patterns.strategies import AllFieldsDuplicateStrategy - from bankstatements_core.services.duplicate_detector import ( + from bankstatements_core.patterns.strategies import ( # noqa: PLC0415 + AllFieldsDuplicateStrategy, + ) + from bankstatements_core.services.duplicate_detector import ( # noqa: PLC0415 DuplicateDetectionService, ) - from bankstatements_core.services.service_registry import ServiceRegistry - from bankstatements_core.services.sorting_service import ( + from bankstatements_core.services.service_registry import ( # noqa: PLC0415 + ServiceRegistry, + ) + from bankstatements_core.services.sorting_service import ( # noqa: PLC0415 ChronologicalSortingStrategy, NoSortingStrategy, TransactionSortingService, diff --git a/packages/parser-core/src/bankstatements_core/commands/analyze_pdf.py b/packages/parser-core/src/bankstatements_core/commands/analyze_pdf.py index f7dc816..4388fa5 100644 --- a/packages/parser-core/src/bankstatements_core/commands/analyze_pdf.py +++ b/packages/parser-core/src/bankstatements_core/commands/analyze_pdf.py @@ -14,7 +14,7 @@ import logging import sys from pathlib import Path -from typing import Any, Optional +from typing import Any import pdfplumber @@ -33,9 +33,9 @@ class PDFAnalyzer: def __init__( self, pdf_path: Path, - output_path: Optional[Path] = None, - template_path: Optional[Path] = None, - base_template_path: Optional[Path] = None, + output_path: Path | None = None, + template_path: Path | None = None, + base_template_path: Path | None = None, ): """Initialize PDF analyzer. @@ -58,7 +58,7 @@ def __init__( base_template_path=base_template_path ) - def analyze(self) -> dict: + def analyze(self) -> dict: # noqa: C901, PLR0912, PLR0915 """Run full PDF analysis workflow. Returns: @@ -143,27 +143,26 @@ def analyze(self) -> dict: ) logger.info(f" Location: {best_iban.bbox}") logger.info(" Reason: Header area bonus, Y-position score") - else: - # Fallback: If spatial filtering removed all IBANs, - # use unfiltered candidates (for template generation, - # any valid IBAN is better than none) - if iban_candidates: - logger.warning( - " ⚠️ All IBANs filtered by spatial overlap, " - "using best unfiltered candidate for template" - ) - scored_candidates = self.iban_filter.score_candidates( - iban_candidates, page_height + # Fallback: If spatial filtering removed all IBANs, + # use unfiltered candidates (for template generation, + # any valid IBAN is better than none) + elif iban_candidates: + logger.warning( + " ⚠️ All IBANs filtered by spatial overlap, " + "using best unfiltered candidate for template" + ) + scored_candidates = self.iban_filter.score_candidates( + iban_candidates, page_height + ) + best_iban = self.iban_filter.select_best_iban(scored_candidates) + if best_iban: + logger.info( + f" ✓ Using unfiltered IBAN: {best_iban.masked} " + f"(score: {best_iban.confidence_score:.1f})" ) - best_iban = self.iban_filter.select_best_iban(scored_candidates) - if best_iban: - logger.info( - f" ✓ Using unfiltered IBAN: {best_iban.masked} " - f"(score: {best_iban.confidence_score:.1f})" - ) - else: - best_iban = None - logger.warning(" ⚠️ No valid IBAN found") + else: + best_iban = None + logger.warning(" ⚠️ No valid IBAN found") # Step 7: Analyze columns and generate template logger.info("Step 7: Analyzing column boundaries...") @@ -249,7 +248,7 @@ def analyze(self) -> dict: except (OSError, ValueError, AttributeError, TypeError) as e: # Expected errors: file I/O, invalid PDF structure, missing attributes, type errors - logger.error(f"❌ Analysis failed: {e}", exc_info=True) + logger.exception(f"❌ Analysis failed: {e}") raise ValueError(f"PDF analysis failed: {e}") from e # Let unexpected errors bubble up @@ -262,13 +261,13 @@ def _validate_extraction(self, pdf: Any, template_path: Path) -> None: pdf: Opened pdfplumber PDF object template_path: Path to template JSON file """ - import json + import json # noqa: PLC0415 logger.info(f" Loading template: {template_path.stem}") try: # Load template manually (no TemplateRegistry to avoid entitlement checks) - with open(template_path, "r") as f: + with open(template_path) as f: template_data = json.load(f) extraction_config = template_data.get("extraction", {}) @@ -298,7 +297,9 @@ def _validate_extraction(self, pdf: Any, template_path: Path) -> None: ) # Try to find IBAN using IBANExtractor - from bankstatements_core.extraction.iban_extractor import IBANExtractor + from bankstatements_core.extraction.iban_extractor import ( # noqa: PLC0415 + IBANExtractor, + ) iban_extractor = IBANExtractor() diff --git a/packages/parser-core/src/bankstatements_core/commands/init.py b/packages/parser-core/src/bankstatements_core/commands/init.py index 0c40d54..66b06ea 100644 --- a/packages/parser-core/src/bankstatements_core/commands/init.py +++ b/packages/parser-core/src/bankstatements_core/commands/init.py @@ -9,7 +9,12 @@ logger = logging.getLogger(__name__) -def init_directories( +def _echo(msg: str = "") -> None: + """Write a line to stdout (T201-compliant replacement for print).""" + sys.stdout.write(msg + "\n") + + +def init_directories( # noqa: C901, PLR0912, PLR0915 base_dir: Path | None = None, create_samples: bool = False, verbose: bool = True, @@ -41,8 +46,8 @@ def init_directories( base = base_dir if base_dir else Path.cwd() if verbose: - print(f"Initializing directory structure in: {base.resolve()}") - print() + _echo(f"Initializing directory structure in: {base.resolve()}") + _echo() # Define directories to create directories = { @@ -59,29 +64,29 @@ def init_directories( if dir_path.exists(): if verbose: - print(f"✓ Already exists: {dir_path.relative_to(base)}") + _echo(f"✓ Already exists: {dir_path.relative_to(base)}") else: try: dir_path.mkdir(parents=True, exist_ok=True) created_count += 1 if verbose: - print(f"✓ Created: {dir_path.relative_to(base)}") + _echo(f"✓ Created: {dir_path.relative_to(base)}") except OSError as e: # Expected errors: permission issues, disk full logger.error(f"Failed to create directory {dir_path}: {e}") if verbose: - print(f"✗ Failed to create {dir_path.relative_to(base)}: {e}") + _echo(f"✗ Failed to create {dir_path.relative_to(base)}: {e}") return 1 # Let unexpected errors bubble up if verbose and description: - print(f" {description}") + _echo(f" {description}") # Create sample files if requested if create_samples: if verbose: - print() - print("Creating sample files...") + _echo() + _echo("Creating sample files...") # Create .env file if it doesn't exist env_file = base / ".env" @@ -95,14 +100,13 @@ def init_directories( """ env_file.write_text(env_content) if verbose: - print("✓ Created: .env") + _echo("✓ Created: .env") except OSError as e: logger.warning(f"Failed to create .env file: {e}") if verbose: - print(f"✗ Failed to create .env: {e}") - else: - if verbose: - print("✓ Already exists: .env") + _echo(f"✗ Failed to create .env: {e}") + elif verbose: + _echo("✓ Already exists: .env") # Create README in input directory input_readme = base / "input" / "README.md" @@ -133,25 +137,25 @@ def init_directories( """ input_readme.write_text(readme_content) if verbose: - print("✓ Created: input/README.md") + _echo("✓ Created: input/README.md") except OSError as e: logger.warning(f"Failed to create input README: {e}") # Success message if verbose: - print() + _echo() if created_count > 0: - print( + _echo( f"✅ Successfully created {created_count} director{'y' if created_count == 1 else 'ies'}" ) else: - print("✅ All directories already exist") - print() - print("Next steps:") - print(" 1. Place PDF bank statements in input/") - print(" 2. Run: bankstatements") - print(" 3. Find processed files in output/") - print() + _echo("✅ All directories already exist") + _echo() + _echo("Next steps:") + _echo(" 1. Place PDF bank statements in input/") + _echo(" 2. Run: bankstatements") + _echo(" 3. Find processed files in output/") + _echo() return 0 @@ -159,7 +163,7 @@ def init_directories( # Catch-all for unexpected errors logger.exception("Unexpected error during initialization") if verbose: - print(f"\n❌ Initialization failed: {e}") + _echo(f"\n❌ Initialization failed: {e}") return 1 @@ -170,7 +174,7 @@ def main() -> int: Returns: Exit code """ - import argparse + import argparse # noqa: PLC0415 parser = argparse.ArgumentParser( description="Initialize directory structure for bank statement processing" diff --git a/packages/parser-core/src/bankstatements_core/config/app_config.py b/packages/parser-core/src/bankstatements_core/config/app_config.py index c83492f..0732025 100644 --- a/packages/parser-core/src/bankstatements_core/config/app_config.py +++ b/packages/parser-core/src/bankstatements_core/config/app_config.py @@ -102,7 +102,7 @@ def _validate_output_formats(self) -> None: ) @classmethod - def from_env(cls) -> "AppConfig": + def from_env(cls) -> AppConfig: """ Load configuration from environment variables with validation. @@ -113,7 +113,9 @@ def from_env(cls) -> "AppConfig": ConfigurationError: If configuration is invalid """ # Import here to avoid circular dependency during module load - from bankstatements_core.config.totals_config import parse_totals_columns + from bankstatements_core.config.totals_config import ( # noqa: PLC0415 + parse_totals_columns, + ) try: # Parse integer values with validation @@ -121,7 +123,7 @@ def from_env(cls) -> "AppConfig": table_top_y = EnvironmentParser.parse_int("TABLE_TOP_Y", 300) table_bottom_y = EnvironmentParser.parse_int("TABLE_BOTTOM_Y", 720) except ValueError as e: - raise ConfigurationError(str(e)) + raise ConfigurationError(str(e)) from e # Parse boolean values enable_dynamic_boundary = EnvironmentParser.parse_bool( diff --git a/packages/parser-core/src/bankstatements_core/config/environment_parser.py b/packages/parser-core/src/bankstatements_core/config/environment_parser.py index e2b839f..1768829 100644 --- a/packages/parser-core/src/bankstatements_core/config/environment_parser.py +++ b/packages/parser-core/src/bankstatements_core/config/environment_parser.py @@ -80,7 +80,9 @@ def parse_int(var_name: str, default: int) -> int: try: return int(value_str) except ValueError: - raise ValueError(f"{var_name} must be an integer, got: {value_str}") + raise ValueError( + f"{var_name} must be an integer, got: {value_str}" + ) from None @staticmethod def parse_bool(var_name: str, default: bool = False) -> bool: diff --git a/packages/parser-core/src/bankstatements_core/domain/__init__.py b/packages/parser-core/src/bankstatements_core/domain/__init__.py index 99ca274..f8d2920 100644 --- a/packages/parser-core/src/bankstatements_core/domain/__init__.py +++ b/packages/parser-core/src/bankstatements_core/domain/__init__.py @@ -16,8 +16,8 @@ from bankstatements_core.domain.models.transaction import Transaction __all__ = [ - "Transaction", "ExtractionResult", + "Transaction", "dict_to_transaction", "dicts_to_transactions", "transaction_to_dict", diff --git a/packages/parser-core/src/bankstatements_core/domain/models/__init__.py b/packages/parser-core/src/bankstatements_core/domain/models/__init__.py index 6e01a06..156633b 100644 --- a/packages/parser-core/src/bankstatements_core/domain/models/__init__.py +++ b/packages/parser-core/src/bankstatements_core/domain/models/__init__.py @@ -10,8 +10,8 @@ from bankstatements_core.domain.models.transaction import Transaction __all__ = [ - "Transaction", "ExtractionResult", - "ExtractionWarning", "ExtractionScoringConfig", + "ExtractionWarning", + "Transaction", ] diff --git a/packages/parser-core/src/bankstatements_core/domain/models/extraction_scoring_config.py b/packages/parser-core/src/bankstatements_core/domain/models/extraction_scoring_config.py index 71721c6..b8203a3 100644 --- a/packages/parser-core/src/bankstatements_core/domain/models/extraction_scoring_config.py +++ b/packages/parser-core/src/bankstatements_core/domain/models/extraction_scoring_config.py @@ -42,6 +42,6 @@ def __post_init__(self) -> None: raise ValueError(f"{name} must be >= 0.0, got {val}") @classmethod - def default(cls) -> "ExtractionScoringConfig": + def default(cls) -> ExtractionScoringConfig: """Return the default production scoring configuration.""" return cls() diff --git a/packages/parser-core/src/bankstatements_core/domain/models/extraction_warning.py b/packages/parser-core/src/bankstatements_core/domain/models/extraction_warning.py index 8f08bed..5d1a883 100644 --- a/packages/parser-core/src/bankstatements_core/domain/models/extraction_warning.py +++ b/packages/parser-core/src/bankstatements_core/domain/models/extraction_warning.py @@ -35,7 +35,7 @@ def to_dict(self) -> dict: return {"code": self.code, "message": self.message, "page": self.page} @classmethod - def from_dict(cls, data: dict) -> "ExtractionWarning": + def from_dict(cls, data: dict) -> ExtractionWarning: """Deserialise from a plain dict.""" return cls( code=data["code"], diff --git a/packages/parser-core/src/bankstatements_core/domain/protocols/__init__.py b/packages/parser-core/src/bankstatements_core/domain/protocols/__init__.py index 79a2bfc..3ccbddc 100644 --- a/packages/parser-core/src/bankstatements_core/domain/protocols/__init__.py +++ b/packages/parser-core/src/bankstatements_core/domain/protocols/__init__.py @@ -22,18 +22,18 @@ ) __all__ = [ - "IJsonWriter", + "IColumnTotals", + "IDuplicateDetector", "IFileDeleter", "IFileReader", - "IPDFReader", + "IIBANGrouping", + "IJsonWriter", + "IMonthlySummary", + "IPDFDiscovery", "IPDFDocument", "IPDFPage", - "IPDFDiscovery", - "ITransactionFilter", - "IIBANGrouping", - "IColumnTotals", + "IPDFReader", "ITemplateDetector", - "IDuplicateDetector", + "ITransactionFilter", "ITransactionSorting", - "IMonthlySummary", ] diff --git a/packages/parser-core/src/bankstatements_core/domain/protocols/services.py b/packages/parser-core/src/bankstatements_core/domain/protocols/services.py index f3f96ed..d860dd8 100644 --- a/packages/parser-core/src/bankstatements_core/domain/protocols/services.py +++ b/packages/parser-core/src/bankstatements_core/domain/protocols/services.py @@ -23,15 +23,15 @@ def discover_pdfs(self, input_dir: Path, recursive: bool = False) -> list[Path]: class ITransactionFilter(Protocol): """Protocol for filtering transaction rows.""" - def apply_all_filters(self, rows: list["Transaction"]) -> list["Transaction"]: + def apply_all_filters(self, rows: list[Transaction]) -> list[Transaction]: """Apply all configured filters to rows.""" ... - def filter_empty_rows(self, rows: list["Transaction"]) -> list["Transaction"]: + def filter_empty_rows(self, rows: list[Transaction]) -> list[Transaction]: """Filter out rows with insufficient data.""" ... - def filter_header_rows(self, rows: list["Transaction"]) -> list["Transaction"]: + def filter_header_rows(self, rows: list[Transaction]) -> list[Transaction]: """Filter out header rows that were incorrectly extracted.""" ... @@ -41,9 +41,9 @@ class IIBANGrouping(Protocol): def group_by_iban( self, - transactions: list["Transaction"], + transactions: list[Transaction], pdf_ibans: dict[str, str], - ) -> dict[str, list["Transaction"]]: + ) -> dict[str, list[Transaction]]: """Group transactions by IBAN suffix (last 4 digits).""" ... @@ -65,7 +65,7 @@ def format_totals_row( class ITemplateDetector(Protocol): """Protocol for detecting PDF bank statement templates.""" - def detect_template(self, pdf_path: Path, first_page: Any) -> "BankTemplate": + def detect_template(self, pdf_path: Path, first_page: Any) -> BankTemplate: """Detect template from PDF first page.""" ... @@ -75,8 +75,8 @@ class IDuplicateDetector(Protocol): def detect_and_separate( self, - transactions: list["Transaction"], - ) -> tuple[list["Transaction"], list["Transaction"]]: + transactions: list[Transaction], + ) -> tuple[list[Transaction], list[Transaction]]: """Separate unique transactions from duplicates.""" ... @@ -84,7 +84,7 @@ def detect_and_separate( class ITransactionSorting(Protocol): """Protocol for sorting transactions.""" - def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: + def sort(self, transactions: list[Transaction]) -> list[Transaction]: """Sort transactions using configured strategy.""" ... diff --git a/packages/parser-core/src/bankstatements_core/entitlements.py b/packages/parser-core/src/bankstatements_core/entitlements.py index 29db153..18614af 100644 --- a/packages/parser-core/src/bankstatements_core/entitlements.py +++ b/packages/parser-core/src/bankstatements_core/entitlements.py @@ -51,7 +51,7 @@ class Entitlements: require_iban: bool @classmethod - def free_tier(cls) -> "Entitlements": + def free_tier(cls) -> Entitlements: """ Create FREE tier entitlements. @@ -72,7 +72,7 @@ def free_tier(cls) -> "Entitlements": ) @classmethod - def paid_tier(cls) -> "Entitlements": + def paid_tier(cls) -> Entitlements: """ Create PAID tier entitlements. diff --git a/packages/parser-core/src/bankstatements_core/exceptions.py b/packages/parser-core/src/bankstatements_core/exceptions.py index 52a9c26..0674bc2 100644 --- a/packages/parser-core/src/bankstatements_core/exceptions.py +++ b/packages/parser-core/src/bankstatements_core/exceptions.py @@ -359,22 +359,22 @@ class TransactionProcessingError(ProcessingError): "BankStatementError", # Configuration "ConfigurationError", + "DataValidationError", + "DuplicateDetectionError", + # Entitlements + "EntitlementError", + "InputValidationError", # PDF Extraction "PDFExtractionError", "PDFReadError", + # Processing + "ProcessingError", "TableExtractionError", + "TemplateDetectionError", # Templates "TemplateError", - "TemplateDetectionError", "TemplateValidationError", + "TransactionProcessingError", # Validation "ValidationError", - "DataValidationError", - "InputValidationError", - # Entitlements - "EntitlementError", - # Processing - "ProcessingError", - "DuplicateDetectionError", - "TransactionProcessingError", ] diff --git a/packages/parser-core/src/bankstatements_core/extraction/__init__.py b/packages/parser-core/src/bankstatements_core/extraction/__init__.py index 0324b6e..aa29f39 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/__init__.py +++ b/packages/parser-core/src/bankstatements_core/extraction/__init__.py @@ -26,8 +26,8 @@ "BoundaryDetectionResult", "ColumnType", "ColumnTypeIdentifier", - "PageHeaderAnalyser", "PDFTableExtractor", + "PageHeaderAnalyser", "RowBuilder", "RowClassifier", "RowPostProcessor", diff --git a/packages/parser-core/src/bankstatements_core/extraction/boundary_detector.py b/packages/parser-core/src/bankstatements_core/extraction/boundary_detector.py index 7c4a8ef..eda596b 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/boundary_detector.py +++ b/packages/parser-core/src/bankstatements_core/extraction/boundary_detector.py @@ -49,7 +49,7 @@ class TableBoundaryDetector: 5. Administrative content density analysis """ - def __init__( + def __init__( # noqa: PLR0913 self, columns: dict[str, tuple[int | float, int | float]], fallback_bottom_y: int = 720, @@ -85,7 +85,7 @@ def __init__( self.structure_breakdown_threshold = structure_breakdown_threshold self.consecutive_threshold = dynamic_boundary_threshold - def detect_boundary(self, words: list[dict]) -> int: + def detect_boundary(self, words: list[dict]) -> int: # noqa: PLR0911 """ Main template method for detecting table end boundary. diff --git a/packages/parser-core/src/bankstatements_core/extraction/column_identifier.py b/packages/parser-core/src/bankstatements_core/extraction/column_identifier.py index cae146c..481ac61 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/column_identifier.py +++ b/packages/parser-core/src/bankstatements_core/extraction/column_identifier.py @@ -35,7 +35,7 @@ class ColumnTypeIdentifier: """ # Pattern mappings for each column type (re-exported from domain) - from bankstatements_core.domain.column_types import ( + from bankstatements_core.domain.column_types import ( # noqa: PLC0415 BALANCE_PATTERNS, CREDIT_PATTERNS, DATE_PATTERNS, diff --git a/packages/parser-core/src/bankstatements_core/extraction/extraction_facade.py b/packages/parser-core/src/bankstatements_core/extraction/extraction_facade.py index 7c4f9c0..462ec8e 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/extraction_facade.py +++ b/packages/parser-core/src/bankstatements_core/extraction/extraction_facade.py @@ -19,7 +19,7 @@ from bankstatements_core.templates.template_model import BankTemplate -def detect_table_end_boundary_smart( +def detect_table_end_boundary_smart( # noqa: PLR0913 words: list[dict], table_top_y: int, columns: dict[str, tuple[int | float, int | float]], @@ -27,7 +27,7 @@ def detect_table_end_boundary_smart( min_section_gap: int = 50, structure_breakdown_threshold: int = 8, dynamic_boundary_threshold: int = 15, - row_classifier: "RowClassifier | None" = None, + row_classifier: RowClassifier | None = None, ) -> int: """ Detect table end intelligently (facade). @@ -47,7 +47,9 @@ def detect_table_end_boundary_smart( Returns: Detected bottom Y coordinate """ - from bankstatements_core.extraction.boundary_detector import TableBoundaryDetector + from bankstatements_core.extraction.boundary_detector import ( # noqa: PLC0415 + TableBoundaryDetector, + ) detector = TableBoundaryDetector( columns=columns, @@ -62,7 +64,7 @@ def detect_table_end_boundary_smart( return detector.detect_boundary(words) -def extract_tables_from_pdf( +def extract_tables_from_pdf( # noqa: PLR0913 pdf_path: Path, table_top_y: int = TABLE_TOP_Y, table_bottom_y: int = TABLE_BOTTOM_Y, @@ -70,7 +72,7 @@ def extract_tables_from_pdf( enable_dynamic_boundary: bool = False, enable_page_validation: bool | None = None, enable_header_check: bool | None = None, - template: "BankTemplate" | None = None, + template: BankTemplate | None = None, ) -> ExtractionResult: """ Extract table data from PDF within specified bounds (facade function). @@ -91,7 +93,9 @@ def extract_tables_from_pdf( ExtractionResult containing extracted transactions, page count, IBAN, source file path, and any document-level warnings """ - from bankstatements_core.extraction.pdf_extractor import PDFTableExtractor + from bankstatements_core.extraction.pdf_extractor import ( # noqa: PLC0415 + PDFTableExtractor, + ) # If template provided, use template configuration if template is not None: diff --git a/packages/parser-core/src/bankstatements_core/extraction/iban_extractor.py b/packages/parser-core/src/bankstatements_core/extraction/iban_extractor.py index 162ed0b..f92c198 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/iban_extractor.py +++ b/packages/parser-core/src/bankstatements_core/extraction/iban_extractor.py @@ -26,7 +26,7 @@ class IBANExtractor: # IBAN format: 2-letter country code + 2 check digits + up to 30 alphanumeric characters # Common formats by country (code: length) - IBAN_LENGTHS = { + IBAN_LENGTHS = { # noqa: RUF012 "AD": 24, # Andorra "AE": 23, # UAE "AL": 28, # Albania diff --git a/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py b/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py index f443de3..8d31897 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py +++ b/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py @@ -29,7 +29,7 @@ class PageHeaderAnalyser: """Inspects the page header area for credit card indicators and IBAN.""" - def __init__(self, iban_extractor: "IBANExtractor") -> None: + def __init__(self, iban_extractor: IBANExtractor) -> None: self._iban_extractor = iban_extractor def is_credit_card_statement(self, page: Any, table_top_y: int) -> bool: diff --git a/packages/parser-core/src/bankstatements_core/extraction/pdf_extractor.py b/packages/parser-core/src/bankstatements_core/extraction/pdf_extractor.py index 0e83b8b..6aa67ee 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/pdf_extractor.py +++ b/packages/parser-core/src/bankstatements_core/extraction/pdf_extractor.py @@ -45,7 +45,7 @@ class PDFTableExtractor: - RowPostProcessor: date propagation and metadata tagging """ - def __init__( + def __init__( # noqa: PLR0913 self, columns: dict[str, tuple[int | float, int | float]], table_top_y: int = 300, @@ -54,9 +54,9 @@ def __init__( enable_page_validation: bool = True, enable_header_check: bool = True, header_check_top_y: int | None = None, - pdf_reader: "IPDFReader | None" = None, - extraction_config: "Any | None" = None, - template: "Any | None" = None, + pdf_reader: IPDFReader | None = None, + extraction_config: Any | None = None, + template: Any | None = None, scoring_config: ExtractionScoringConfig | None = None, ): self.columns = columns @@ -75,7 +75,7 @@ def __init__( self._header_analyser = PageHeaderAnalyser(IBANExtractor()) if pdf_reader is None: - from bankstatements_core.adapters.pdfplumber_adapter import ( + from bankstatements_core.adapters.pdfplumber_adapter import ( # noqa: PLC0415 PDFPlumberReaderAdapter, ) @@ -172,7 +172,7 @@ def _extract_page(self, page: Any, page_num: int) -> list[dict] | None: page_rows = self._row_builder.build_rows(words) if self.page_validation_enabled: - from bankstatements_core.services.page_validation import ( + from bankstatements_core.services.page_validation import ( # noqa: PLC0415 PageValidationService, ) @@ -185,7 +185,9 @@ def _extract_page(self, page: Any, page_num: int) -> list[dict] | None: ) return None - from bankstatements_core.services.row_merger import RowMergerService + from bankstatements_core.services.row_merger import ( # noqa: PLC0415 + RowMergerService, + ) return RowMergerService().merge_continuation_lines(page_rows, self.columns) @@ -216,10 +218,10 @@ def _determine_boundaries_and_extract( all_words = initial_area.extract_words(use_text_flow=True) if self.header_check_enabled: - from bankstatements_core.extraction.extraction_params import ( + from bankstatements_core.extraction.extraction_params import ( # noqa: PLC0415 MIN_HEADER_KEYWORDS, ) - from bankstatements_core.services.header_detection import ( + from bankstatements_core.services.header_detection import ( # noqa: PLC0415 HeaderDetectionService, ) @@ -237,7 +239,7 @@ def _determine_boundaries_and_extract( logger.info(f"Page {page_num}: No table headers detected, skipping") return None - from bankstatements_core.extraction.extraction_facade import ( + from bankstatements_core.extraction.extraction_facade import ( # noqa: PLC0415 detect_table_end_boundary_smart, ) @@ -264,10 +266,10 @@ def _determine_boundaries_and_extract( words = table_area.extract_words(use_text_flow=True) if self.header_check_enabled: - from bankstatements_core.extraction.extraction_params import ( + from bankstatements_core.extraction.extraction_params import ( # noqa: PLC0415 MIN_HEADER_KEYWORDS, ) - from bankstatements_core.services.header_detection import ( + from bankstatements_core.services.header_detection import ( # noqa: PLC0415 HeaderDetectionService, ) diff --git a/packages/parser-core/src/bankstatements_core/extraction/row_builder.py b/packages/parser-core/src/bankstatements_core/extraction/row_builder.py index c1f59fd..1319f0f 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/row_builder.py +++ b/packages/parser-core/src/bankstatements_core/extraction/row_builder.py @@ -32,7 +32,7 @@ class RowBuilder: def __init__( self, columns: dict[str, tuple[int | float, int | float]], - row_classifier: "RowClassifier", + row_classifier: RowClassifier, ) -> None: self._columns = columns self._row_classifier = row_classifier diff --git a/packages/parser-core/src/bankstatements_core/extraction/row_classifiers.py b/packages/parser-core/src/bankstatements_core/extraction/row_classifiers.py index c50ca99..2c44df6 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/row_classifiers.py +++ b/packages/parser-core/src/bankstatements_core/extraction/row_classifiers.py @@ -9,7 +9,7 @@ import re from abc import ABC, abstractmethod -from typing import Sequence +from collections.abc import Sequence from bankstatements_core.extraction.column_identifier import ColumnTypeIdentifier from bankstatements_core.services.row_analysis import RowAnalysisService @@ -30,7 +30,7 @@ def __init__(self) -> None: """Initialize classifier with no next classifier.""" self._next_classifier: RowClassifier | None = None - def set_next(self, classifier: "RowClassifier") -> "RowClassifier": + def set_next(self, classifier: RowClassifier) -> RowClassifier: """ Set the next classifier in the chain. @@ -154,8 +154,9 @@ def _do_classify( for _col_name, col_value in row_values.items(): col_value_lower = col_value.lower() if ( - col_value_lower.startswith("'") - and "(" in col_value_lower # 'Field'(description) + ( + col_value_lower.startswith("'") and "(" in col_value_lower + ) # 'Field'(description) or re.match( r"^\w+(date|time|amount|balance|detail)", col_value_lower ) # FieldName patterns diff --git a/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py b/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py index 3cdf7ae..1dececb 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py +++ b/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py @@ -50,11 +50,11 @@ def extract_filename_date(filename: str) -> str: class RowPostProcessor: """Tags rows with metadata and propagates dates to dateless transaction rows.""" - def __init__( + def __init__( # noqa: PLR0913 self, columns: dict[str, tuple[int | float, int | float]], - row_classifier: "RowClassifier", - template: "BankTemplate | None", + row_classifier: RowClassifier, + template: BankTemplate | None, filename_date: str, filename: str, scoring_config: ExtractionScoringConfig | None = None, diff --git a/packages/parser-core/src/bankstatements_core/extraction/word_utils.py b/packages/parser-core/src/bankstatements_core/extraction/word_utils.py index 95cbff0..ca1a969 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/word_utils.py +++ b/packages/parser-core/src/bankstatements_core/extraction/word_utils.py @@ -86,10 +86,9 @@ def assign_words_to_columns( if xmin <= x0 and x1 <= xmax: row[col] += text + " " break - else: - if xmin <= x0 < xmax: - row[col] += text + " " - break + elif xmin <= x0 < xmax: + row[col] += text + " " + break return {k: v.strip() for k, v in row.items()} @@ -115,7 +114,7 @@ def calculate_column_coverage( Note: Canonical source: ``PageValidationService.calculate_column_coverage`` - in ``services/page_validation.py`` (L116–L133). + in ``services/page_validation.py`` (L116-L133). """ if not rows or not columns: return 0.0 diff --git a/packages/parser-core/src/bankstatements_core/facades/processing_facade.py b/packages/parser-core/src/bankstatements_core/facades/processing_facade.py index b2abaf0..c7f94d0 100644 --- a/packages/parser-core/src/bankstatements_core/facades/processing_facade.py +++ b/packages/parser-core/src/bankstatements_core/facades/processing_facade.py @@ -45,12 +45,12 @@ def __init__( """ self.config = config self.entitlements = entitlements or Entitlements.free_tier() - self._processor: "BankStatementProcessor" | None = None + self._processor: BankStatementProcessor | None = None @classmethod def from_environment( cls, entitlements: Entitlements | None = None - ) -> "BankStatementProcessingFacade": + ) -> BankStatementProcessingFacade: """ Create facade from environment variables. @@ -65,7 +65,9 @@ def from_environment( """ # Load configuration using singleton # ConfigurationError will propagate to caller - from bankstatements_core.patterns.repositories import get_config_singleton + from bankstatements_core.patterns.repositories import ( # noqa: PLC0415 + get_config_singleton, + ) try: config = get_config_singleton() @@ -146,14 +148,16 @@ def process_all(self) -> dict[str, Any]: # Let unexpected errors bubble up # Create processing activity log for GDPR audit trail - from bankstatements_core.services.processing_activity_log import ( + from bankstatements_core.services.processing_activity_log import ( # noqa: PLC0415 ProcessingActivityLog, ) activity_log = ProcessingActivityLog(self.config.logs_dir) # Create processor using factory - from bankstatements_core.patterns.factories import ProcessorFactory + from bankstatements_core.patterns.factories import ( # noqa: PLC0415 + ProcessorFactory, + ) self._processor = ProcessorFactory.create_from_config( self.config, activity_log=activity_log, entitlements=self.entitlements @@ -167,7 +171,9 @@ def process_all(self) -> dict[str, Any]: # Auto cleanup if enabled if self.config.auto_cleanup_on_exit: logger.info("Auto cleanup enabled, deleting output files...") - from bankstatements_core.services.data_retention import DataRetentionService + from bankstatements_core.services.data_retention import ( # noqa: PLC0415 + DataRetentionService, + ) service = DataRetentionService(0, self.config.output_dir) deleted_count = service.cleanup_all_files(audit_log=activity_log) @@ -175,7 +181,7 @@ def process_all(self) -> dict[str, Any]: return summary - def process_with_error_handling(self) -> int: + def process_with_error_handling(self) -> int: # noqa: PLR0911 """ Process all files with comprehensive error handling. @@ -192,7 +198,7 @@ def process_with_error_handling(self) -> int: """ try: summary = self.process_all() - from bankstatements_core.utils import log_summary + from bankstatements_core.utils import log_summary # noqa: PLC0415 log_summary(summary) return 0 diff --git a/packages/parser-core/src/bankstatements_core/patterns/__init__.py b/packages/parser-core/src/bankstatements_core/patterns/__init__.py index f4c28e5..02a694e 100644 --- a/packages/parser-core/src/bankstatements_core/patterns/__init__.py +++ b/packages/parser-core/src/bankstatements_core/patterns/__init__.py @@ -28,19 +28,19 @@ ) __all__ = [ # pragma: no cover + "AllFieldsDuplicateStrategy", + "CSVOutputStrategy", # Repositories "ConfigRepository", - "EnvironmentConfigRepository", - "TransactionRepository", - "FileSystemTransactionRepository", + "DateAmountDuplicateStrategy", # Strategies "DuplicateDetectionStrategy", - "AllFieldsDuplicateStrategy", - "DateAmountDuplicateStrategy", - "OutputFormatStrategy", - "CSVOutputStrategy", - "JSONOutputStrategy", + "EnvironmentConfigRepository", "ExcelOutputStrategy", + "FileSystemTransactionRepository", + "JSONOutputStrategy", + "OutputFormatStrategy", # Factories "ProcessorFactory", + "TransactionRepository", ] diff --git a/packages/parser-core/src/bankstatements_core/patterns/factories.py b/packages/parser-core/src/bankstatements_core/patterns/factories.py index 2e5e23a..0de1736 100644 --- a/packages/parser-core/src/bankstatements_core/patterns/factories.py +++ b/packages/parser-core/src/bankstatements_core/patterns/factories.py @@ -48,7 +48,7 @@ def create_from_config( output_strategies: dict[str, OutputFormatStrategy] | None = None, activity_log: Any | None = None, entitlements: Any | None = None, - ) -> "BankStatementProcessor": + ) -> BankStatementProcessor: """ Create a processor from application configuration using Builder pattern. @@ -64,7 +64,9 @@ def create_from_config( Returns: Configured BankStatementProcessor instance """ - from bankstatements_core.builders import BankStatementProcessorBuilder + from bankstatements_core.builders import ( # noqa: PLC0415 + BankStatementProcessorBuilder, + ) # Get column configuration columns = get_columns_config() @@ -127,7 +129,7 @@ def create_from_config( return builder.build() @staticmethod - def create_for_bank(bank_type: str, config: AppConfig) -> "BankStatementProcessor": + def create_for_bank(bank_type: str, config: AppConfig) -> BankStatementProcessor: """ Create a processor optimized for a specific bank. @@ -155,7 +157,7 @@ def create_for_bank(bank_type: str, config: AppConfig) -> "BankStatementProcesso return ProcessorFactory.create_from_config(config, strategy) @staticmethod - def create_custom( + def create_custom( # noqa: PLR0913 input_dir: Path, output_dir: Path, table_top_y: int = 100, @@ -164,7 +166,7 @@ def create_custom( output_strategies: dict[str, OutputFormatStrategy] | None = None, entitlements: Any | None = None, **kwargs: Any, - ) -> "BankStatementProcessor": + ) -> BankStatementProcessor: """ Create a processor with custom parameters. @@ -184,7 +186,9 @@ def create_custom( Returns: Configured BankStatementProcessor instance """ - from bankstatements_core.processor import BankStatementProcessor + from bankstatements_core.processor import ( # noqa: PLC0415 + BankStatementProcessor, + ) columns = get_columns_config() @@ -223,7 +227,9 @@ def create_custom( ), ) - from bankstatements_core.services.service_registry import ServiceRegistry + from bankstatements_core.services.service_registry import ( # noqa: PLC0415 + ServiceRegistry, + ) registry = ServiceRegistry.from_config(config, entitlements=entitlements) diff --git a/packages/parser-core/src/bankstatements_core/patterns/strategies.py b/packages/parser-core/src/bankstatements_core/patterns/strategies.py index 58fec98..f572ea7 100644 --- a/packages/parser-core/src/bankstatements_core/patterns/strategies.py +++ b/packages/parser-core/src/bankstatements_core/patterns/strategies.py @@ -20,7 +20,7 @@ if TYPE_CHECKING: from bankstatements_core.domain.models.transaction import Transaction - from bankstatements_core.entitlements import Entitlements # noqa: F401 + from bankstatements_core.entitlements import Entitlements logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ class DuplicateDetectionStrategy(ABC): """Abstract strategy for detecting duplicate transactions.""" @abstractmethod - def create_key(self, transaction: "Transaction") -> str: + def create_key(self, transaction: Transaction) -> str: """ Create a unique key for a transaction. @@ -44,8 +44,8 @@ def create_key(self, transaction: "Transaction") -> str: pass def detect_duplicates( - self, transactions: list["Transaction"] - ) -> tuple[list["Transaction"], list["Transaction"]]: + self, transactions: list[Transaction] + ) -> tuple[list[Transaction], list[Transaction]]: """ Detect duplicates in a list of transactions. @@ -55,8 +55,8 @@ def detect_duplicates( Returns: Tuple of (unique_transactions, duplicate_transactions) """ - unique_rows: list["Transaction"] = [] - duplicate_rows: list["Transaction"] = [] + unique_rows: list[Transaction] = [] + duplicate_rows: list[Transaction] = [] transaction_files: dict[str, str] = {} for tx in transactions: @@ -86,7 +86,7 @@ class AllFieldsDuplicateStrategy(DuplicateDetectionStrategy): details, and all monetary fields to be considered duplicates. """ - def create_key(self, transaction: "Transaction") -> str: + def create_key(self, transaction: Transaction) -> str: """Create key from date, details, and all monetary columns.""" amounts = [] for field_name in ("debit", "credit", "balance"): @@ -114,7 +114,7 @@ class DateAmountDuplicateStrategy(DuplicateDetectionStrategy): between statements, but the date and amount should match. """ - def create_key(self, transaction: "Transaction") -> str: + def create_key(self, transaction: Transaction) -> str: """Create key from date and sum of all monetary amounts. Only monetary fields (debit, credit, balance) and explicit @@ -152,7 +152,7 @@ class CreditCardDuplicateStrategy(DuplicateDetectionStrategy): - Transaction type provides critical disambiguation """ - def create_key(self, transaction: "Transaction") -> str: + def create_key(self, transaction: Transaction) -> str: """Create composite key: date:transaction_type:amount. Args: @@ -257,7 +257,7 @@ def _supports_totals(self) -> bool: """ return False - def _write_totals( # noqa: B027 + def _write_totals( # noqa: B027 — intentional hook: not all output strategies support totals self, file_path: Path, totals_row: list[str], @@ -371,7 +371,7 @@ def _write_totals( totals_row: list[str], ) -> None: """Append pre-calculated totals row to Excel file.""" - from openpyxl import load_workbook + from openpyxl import load_workbook # noqa: PLC0415 # Re-open workbook to append totals workbook = load_workbook(file_path) @@ -460,7 +460,7 @@ def _apply_number_formatting( df: DataFrame containing the data column_names: Ordered list of column names """ - from openpyxl.styles import numbers + from openpyxl.styles import numbers # noqa: PLC0415 worksheet = writer.sheets["Transactions"] @@ -474,7 +474,7 @@ def _apply_number_formatting( def create_output_strategy( - format_name: str, entitlements: "Entitlements" + format_name: str, entitlements: Entitlements ) -> OutputFormatStrategy: """ Create output strategy with entitlement enforcement. @@ -504,7 +504,6 @@ def create_output_strategy( >>> ent = Entitlements.paid_tier() >>> strategy = create_output_strategy("json", ent) # OK """ - from bankstatements_core.entitlements import Entitlements # noqa: F401 # Normalize format name format_lower = format_name.lower() diff --git a/packages/parser-core/src/bankstatements_core/pdf_table_extractor.py b/packages/parser-core/src/bankstatements_core/pdf_table_extractor.py index 7a69ced..cba9105 100644 --- a/packages/parser-core/src/bankstatements_core/pdf_table_extractor.py +++ b/packages/parser-core/src/bankstatements_core/pdf_table_extractor.py @@ -12,7 +12,7 @@ import logging import warnings -import pdfplumber # noqa: F401 - used by extraction module +import pdfplumber # noqa: F401 — re-exported for backward-compat test patching warnings.warn( "bankstatements_core.pdf_table_extractor is a backward-compatibility shim " @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Re-export column configuration (backward compatibility) -from bankstatements_core.config.column_config import ( # noqa: E402, F401 +from bankstatements_core.config.column_config import ( DEFAULT_COLUMNS, get_column_names, get_columns_config, @@ -34,13 +34,13 @@ ) # Re-export extraction functions (backward compatibility) -from bankstatements_core.extraction.extraction_facade import ( # noqa: E402, F401 +from bankstatements_core.extraction.extraction_facade import ( detect_table_end_boundary_smart, extract_tables_from_pdf, ) # Re-export extraction parameters (backward compatibility) -from bankstatements_core.extraction.extraction_params import ( # noqa: E402, F401 +from bankstatements_core.extraction.extraction_params import ( ADMINISTRATIVE_PATTERNS, CONTENT_DENSITY_THRESHOLD, ENABLE_PAGE_VALIDATION, @@ -55,19 +55,11 @@ TABLE_BOTTOM_Y, TABLE_TOP_Y, ) -from bankstatements_core.extraction.row_classifiers import ( # noqa: E402, F401 - create_row_classifier_chain, -) -from bankstatements_core.services.content_density import ( # noqa: E402, F401 - ContentDensityService, -) -from bankstatements_core.services.header_detection import ( # noqa: E402, F401 - HeaderDetectionService, -) -from bankstatements_core.services.page_validation import ( # noqa: E402, F401 - PageValidationService, -) -from bankstatements_core.services.row_merger import RowMergerService # noqa: E402, F401 +from bankstatements_core.extraction.row_classifiers import create_row_classifier_chain +from bankstatements_core.services.content_density import ContentDensityService +from bankstatements_core.services.header_detection import HeaderDetectionService +from bankstatements_core.services.page_validation import PageValidationService +from bankstatements_core.services.row_merger import RowMergerService # Module-level singletons (instantiated once, not per-call) _PAGE_VALIDATION_SERVICE = PageValidationService( @@ -136,7 +128,9 @@ def analyze_content_density( # from the explicit public list. def _looks_like_date(text: str) -> bool: """Check if text looks like a valid date (backward compatibility wrapper).""" - from bankstatements_core.services.row_analysis import RowAnalysisService + from bankstatements_core.services.row_analysis import ( # noqa: PLC0415 + RowAnalysisService, + ) service = RowAnalysisService() return service.looks_like_date(text) @@ -144,7 +138,9 @@ def _looks_like_date(text: str) -> bool: def calculate_row_completeness_score(row: dict, columns: dict) -> float: """Score row completeness (backward compatibility wrapper).""" - from bankstatements_core.services.row_analysis import RowAnalysisService + from bankstatements_core.services.row_analysis import ( # noqa: PLC0415 + RowAnalysisService, + ) service = RowAnalysisService() return service.calculate_row_completeness_score(row, columns) @@ -152,36 +148,36 @@ def calculate_row_completeness_score(row: dict, columns: dict) -> float: # Explicitly list all public exports for backward compatibility __all__ = [ + "ADMINISTRATIVE_PATTERNS", + "CONTENT_DENSITY_THRESHOLD", # Column configuration "DEFAULT_COLUMNS", - "get_column_names", - "get_columns_config", - "parse_columns_from_env", - # Extraction parameters - "TABLE_TOP_Y", - "TABLE_BOTTOM_Y", - "CONTENT_DENSITY_THRESHOLD", - "SLIDING_WINDOW_SIZE", - "MIN_TRANSACTION_SCORE", "ENABLE_PAGE_VALIDATION", - "MIN_TABLE_ROWS", "MIN_COLUMN_COVERAGE", + "MIN_HEADER_KEYWORDS", + "MIN_TABLE_ROWS", "MIN_TRANSACTION_RATIO", - "REQUIRE_DATE_COLUMN", + "MIN_TRANSACTION_SCORE", "REQUIRE_AMOUNT_COLUMN", - "MIN_HEADER_KEYWORDS", - "ADMINISTRATIVE_PATTERNS", - # Extraction functions - "extract_tables_from_pdf", - "detect_table_end_boundary_smart", - # Row classification - "classify_row_type", + "REQUIRE_DATE_COLUMN", + "SLIDING_WINDOW_SIZE", + "TABLE_BOTTOM_Y", + # Extraction parameters + "TABLE_TOP_Y", # Content analysis "analyze_content_density", - # Validation - "validate_page_structure", "calculate_column_coverage", - "has_column_type", + # Row classification + "classify_row_type", + "detect_table_end_boundary_smart", "detect_table_headers", + # Extraction functions + "extract_tables_from_pdf", + "get_column_names", + "get_columns_config", + "has_column_type", "merge_continuation_lines", + "parse_columns_from_env", + # Validation + "validate_page_structure", ] diff --git a/packages/parser-core/src/bankstatements_core/processor.py b/packages/parser-core/src/bankstatements_core/processor.py index 74848e5..07fac57 100644 --- a/packages/parser-core/src/bankstatements_core/processor.py +++ b/packages/parser-core/src/bankstatements_core/processor.py @@ -1,8 +1,6 @@ from __future__ import annotations -import json # noqa: F401 - imported for test mocking import logging -from collections import defaultdict # noqa: F401 - imported for test mocking from datetime import datetime from typing import Any @@ -10,7 +8,9 @@ from bankstatements_core.config.column_config import DEFAULT_COLUMNS, get_column_names from bankstatements_core.config.processor_config import ProcessorConfig -from bankstatements_core.config.totals_config import parse_totals_columns # noqa: F401 +from bankstatements_core.config.totals_config import ( # noqa: F401 — re-exported for backward compat + parse_totals_columns, +) from bankstatements_core.domain import ExtractionResult from bankstatements_core.domain.converters import transactions_to_dicts from bankstatements_core.domain.models.transaction import Transaction @@ -30,7 +30,10 @@ TransactionSortingService, ) from bankstatements_core.services.transaction_filter import TransactionFilterService -from bankstatements_core.utils import is_date_column, to_float # noqa: F401 +from bankstatements_core.utils import ( # noqa: F401 — re-exported for backward compat + is_date_column, + to_float, +) logger = logging.getLogger(__name__) @@ -112,7 +115,7 @@ def parse_transaction_date(date_str: str) -> datetime: class BankStatementProcessor: - def __init__( + def __init__( # noqa: PLR0913, PLR0915 self, config: ProcessorConfig, output_strategies: dict[str, Any] | None = None, @@ -171,7 +174,7 @@ def __init__( # Strategy pattern: Output format strategies (defaults based on config) if output_strategies is None: - from bankstatements_core.patterns.strategies import ( + from bankstatements_core.patterns.strategies import ( # noqa: PLC0415 CSVOutputStrategy, JSONOutputStrategy, OutputFormatStrategy, @@ -188,7 +191,7 @@ def __init__( # Strategy pattern: Duplicate detection strategy (defaults to AllFieldsStrategy) if duplicate_strategy is None: - from bankstatements_core.patterns.strategies import ( + from bankstatements_core.patterns.strategies import ( # noqa: PLC0415 AllFieldsDuplicateStrategy, ) @@ -222,7 +225,7 @@ def __init__( # Repository pattern: Transaction repository (defaults to FileSystemTransactionRepository) if repository is None: - from bankstatements_core.patterns.repositories import ( + from bankstatements_core.patterns.repositories import ( # noqa: PLC0415 FileSystemTransactionRepository, ) diff --git a/packages/parser-core/src/bankstatements_core/services/content_density.py b/packages/parser-core/src/bankstatements_core/services/content_density.py index f0d1afb..7098169 100644 --- a/packages/parser-core/src/bankstatements_core/services/content_density.py +++ b/packages/parser-core/src/bankstatements_core/services/content_density.py @@ -101,7 +101,7 @@ def _classify_row_type( Returns: String classification: 'transaction', etc. """ - from bankstatements_core.extraction.row_classifiers import ( + from bankstatements_core.extraction.row_classifiers import ( # noqa: PLC0415 create_row_classifier_chain, ) diff --git a/packages/parser-core/src/bankstatements_core/services/data_retention.py b/packages/parser-core/src/bankstatements_core/services/data_retention.py index 4757a5f..e37f018 100644 --- a/packages/parser-core/src/bankstatements_core/services/data_retention.py +++ b/packages/parser-core/src/bankstatements_core/services/data_retention.py @@ -29,7 +29,7 @@ def __init__( self, retention_days: int, output_dir: Path, - file_deleter: "IFileDeleter | None" = None, + file_deleter: IFileDeleter | None = None, ): """ Initialize data retention service. @@ -74,7 +74,7 @@ def find_expired_files(self) -> list[Path]: return expired_files def cleanup_expired_files( - self, audit_log: "ProcessingActivityLog" | None = None + self, audit_log: ProcessingActivityLog | None = None ) -> int: """ Delete files older than retention period with secure deletion. @@ -119,9 +119,7 @@ def cleanup_expired_files( logger.info("Cleanup completed: %d files deleted", deleted_count) return deleted_count - def cleanup_all_files( - self, audit_log: "ProcessingActivityLog" | None = None - ) -> int: + def cleanup_all_files(self, audit_log: ProcessingActivityLog | None = None) -> int: """ Delete all output files (GDPR Article 17: Right to Erasure). @@ -167,7 +165,7 @@ def cleanup_by_date( self, start_date: datetime, end_date: datetime, - audit_log: "ProcessingActivityLog" | None = None, + audit_log: ProcessingActivityLog | None = None, ) -> int: """ Delete files within a specific date range. diff --git a/packages/parser-core/src/bankstatements_core/services/date_parser.py b/packages/parser-core/src/bankstatements_core/services/date_parser.py index 3a5fa1e..cee53a5 100644 --- a/packages/parser-core/src/bankstatements_core/services/date_parser.py +++ b/packages/parser-core/src/bankstatements_core/services/date_parser.py @@ -28,7 +28,7 @@ class DateParserService: DEFAULT_YEAR = 2023 # Default year for partial dates without year component # Common date formats found in bank statements - DATE_FORMATS = [ + DATE_FORMATS = [ # noqa: RUF012 "%d/%m/%y", # 01/12/23 "%d/%m/%Y", # 01/12/2023 "%d-%m-%y", # 01-12-23 diff --git a/packages/parser-core/src/bankstatements_core/services/duplicate_detector.py b/packages/parser-core/src/bankstatements_core/services/duplicate_detector.py index a4a6e41..93aaef5 100644 --- a/packages/parser-core/src/bankstatements_core/services/duplicate_detector.py +++ b/packages/parser-core/src/bankstatements_core/services/duplicate_detector.py @@ -26,7 +26,7 @@ class DuplicateDetectionService: constitutes a duplicate transaction. """ - def __init__(self, strategy: "DuplicateDetectionStrategy"): + def __init__(self, strategy: DuplicateDetectionStrategy): """ Initialize duplicate detection service. @@ -46,8 +46,8 @@ def __init__(self, strategy: "DuplicateDetectionStrategy"): self.strategy = strategy def detect_and_separate( - self, transactions: list["Transaction"] - ) -> tuple[list["Transaction"], list["Transaction"]]: + self, transactions: list[Transaction] + ) -> tuple[list[Transaction], list[Transaction]]: """ Detect duplicates and separate into unique and duplicate lists. @@ -70,8 +70,8 @@ def detect_and_separate( def get_statistics( self, - unique_transactions: list["Transaction"], - duplicate_transactions: list["Transaction"], + unique_transactions: list[Transaction], + duplicate_transactions: list[Transaction], ) -> dict[str, int | float]: """ Get statistics about duplicate detection results. diff --git a/packages/parser-core/src/bankstatements_core/services/expense_analysis.py b/packages/parser-core/src/bankstatements_core/services/expense_analysis.py index 28d0377..a6ab122 100644 --- a/packages/parser-core/src/bankstatements_core/services/expense_analysis.py +++ b/packages/parser-core/src/bankstatements_core/services/expense_analysis.py @@ -47,7 +47,7 @@ class ExpenseAnalysisService: >>> print(insights["insights"]["recurring_charges"]) """ - def __init__(self, entitlements: "Entitlements | None" = None): + def __init__(self, entitlements: Entitlements | None = None): """ Initialize expense analysis service. @@ -123,7 +123,7 @@ def analyze(self, transactions: list[dict]) -> dict[str, Any]: except EntitlementError: # Re-raise tier restriction errors (fail fast) raise - except Exception as e: + except Exception as e: # noqa: BLE001 — service-boundary catch # Unexpected errors: log warning and return empty insights logger.warning( f"Expense analysis failed: {e}. Returning empty insights.", @@ -131,7 +131,7 @@ def analyze(self, transactions: list[dict]) -> dict[str, Any]: ) return self._empty_insights(error=str(e)) - def _detect_recurring_charges( + def _detect_recurring_charges( # noqa: C901, PLR0912 self, tx_objects: list[Transaction] ) -> list[dict[str, Any]]: """ @@ -164,7 +164,7 @@ def _detect_recurring_charges( txs, key=lambda t: _date_parser_service.parse_transaction_date(t.date), ) - except Exception as e: + except (ValueError, TypeError) as e: logger.warning(f"Failed to sort transactions for {description}: {e}") continue @@ -199,7 +199,7 @@ def _detect_recurring_charges( delta = (date2 - date1).days if delta > 0: # Only positive intervals intervals.append(delta) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: logger.warning(f"Failed to calculate interval: {e}") continue @@ -315,7 +315,7 @@ def _detect_repeated_vendors( txs, key=lambda t: _date_parser_service.parse_transaction_date(t.date), ) - except Exception as e: + except (ValueError, TypeError) as e: logger.warning(f"Failed to sort transactions for {description}: {e}") txs_sorted = txs # Use unsorted if date parsing fails diff --git a/packages/parser-core/src/bankstatements_core/services/extraction_orchestrator.py b/packages/parser-core/src/bankstatements_core/services/extraction_orchestrator.py index c3b6dde..f715628 100644 --- a/packages/parser-core/src/bankstatements_core/services/extraction_orchestrator.py +++ b/packages/parser-core/src/bankstatements_core/services/extraction_orchestrator.py @@ -31,10 +31,10 @@ class ExtractionOrchestrator: def __init__( self, extraction_config: ExtractionConfig | None = None, - template_detector: "ITemplateDetector | None" = None, + template_detector: ITemplateDetector | None = None, forced_template: BankTemplate | None = None, entitlements: Entitlements | None = None, - pdf_reader: "IPDFReader | None" = None, + pdf_reader: IPDFReader | None = None, ): """Initialize the extraction orchestrator. @@ -52,7 +52,7 @@ def __init__( # Inject PDF reader or use default pdfplumber adapter if pdf_reader is None: - from bankstatements_core.adapters.pdfplumber_adapter import ( + from bankstatements_core.adapters.pdfplumber_adapter import ( # noqa: PLC0415 PDFPlumberReaderAdapter, ) diff --git a/packages/parser-core/src/bankstatements_core/services/header_detection.py b/packages/parser-core/src/bankstatements_core/services/header_detection.py index d47c13b..4f0b77e 100644 --- a/packages/parser-core/src/bankstatements_core/services/header_detection.py +++ b/packages/parser-core/src/bankstatements_core/services/header_detection.py @@ -28,7 +28,7 @@ class HeaderDetectionService: """ # Common header keywords to look for (including variations) - HEADER_KEYWORDS = { + HEADER_KEYWORDS = { # noqa: RUF012 "date", "trans date", "transaction date", @@ -164,7 +164,7 @@ def detect_headers( ) return False - def check_page_for_headers( + def check_page_for_headers( # noqa: PLR0913 self, page: pdfplumber.page.Page, columns: dict[str, tuple[int | float, int | float]], diff --git a/packages/parser-core/src/bankstatements_core/services/iban_grouping.py b/packages/parser-core/src/bankstatements_core/services/iban_grouping.py index 3fdb7e9..ae17e83 100644 --- a/packages/parser-core/src/bankstatements_core/services/iban_grouping.py +++ b/packages/parser-core/src/bankstatements_core/services/iban_grouping.py @@ -46,9 +46,9 @@ def __init__(self, suffix_length: int = 4): def group_by_iban( self, - rows: list["Transaction"], + rows: list[Transaction], pdf_ibans: dict[str, str], - ) -> dict[str, list["Transaction"]]: + ) -> dict[str, list[Transaction]]: """Group transactions by their source IBAN. Transactions are grouped by the last N characters of their IBAN, @@ -62,7 +62,7 @@ def group_by_iban( Returns: Dictionary mapping IBAN suffix (or "unknown") to list of transactions """ - grouped: dict[str, list["Transaction"]] = {} + grouped: dict[str, list[Transaction]] = {} for tx in rows: filename = tx.filename @@ -103,7 +103,7 @@ def _extract_suffix(self, iban: str) -> str: else: return iban_clean - def _log_grouping_summary(self, grouped: dict[str, list["Transaction"]]) -> None: + def _log_grouping_summary(self, grouped: dict[str, list[Transaction]]) -> None: """Log summary of grouping results. Args: diff --git a/packages/parser-core/src/bankstatements_core/services/monthly_summary.py b/packages/parser-core/src/bankstatements_core/services/monthly_summary.py index 0aa29c7..eb3a402 100644 --- a/packages/parser-core/src/bankstatements_core/services/monthly_summary.py +++ b/packages/parser-core/src/bankstatements_core/services/monthly_summary.py @@ -130,7 +130,7 @@ def _group_by_month( } ) - for tx, tx_dict in zip(transactions, original_dicts): + for tx, tx_dict in zip(transactions, original_dicts, strict=False): # Use domain object's date field (type-safe) date_obj = _date_parser_service.parse_transaction_date(tx.date) diff --git a/packages/parser-core/src/bankstatements_core/services/output_orchestrator.py b/packages/parser-core/src/bankstatements_core/services/output_orchestrator.py index 97a21c3..b480d22 100644 --- a/packages/parser-core/src/bankstatements_core/services/output_orchestrator.py +++ b/packages/parser-core/src/bankstatements_core/services/output_orchestrator.py @@ -37,17 +37,17 @@ class OutputOrchestrator: - Building summary result dictionary """ - def __init__( + def __init__( # noqa: PLR0913 self, output_dir: Path, output_strategies: dict[str, Any], - monthly_summary_service: "IMonthlySummary", + monthly_summary_service: IMonthlySummary, column_names: list[str], totals_columns: list[str] | None, generate_monthly_summary: bool, - totals_service: "IColumnTotals | None" = None, - file_writer: "IJsonWriter | None" = None, - expense_analysis_service: "IExpenseAnalysis | None" = None, + totals_service: IColumnTotals | None = None, + file_writer: IJsonWriter | None = None, + expense_analysis_service: IExpenseAnalysis | None = None, generate_expense_analysis: bool = False, ): """Initialize output orchestrator. @@ -64,7 +64,9 @@ def __init__( expense_analysis_service: Optional service for generating expense analysis generate_expense_analysis: Whether to generate expense analysis (default: False) """ - from bankstatements_core.services.totals_calculator import ColumnTotalsService + from bankstatements_core.services.totals_calculator import ( # noqa: PLC0415 + ColumnTotalsService, + ) self.output_dir = output_dir self.output_strategies = output_strategies @@ -180,7 +182,7 @@ def write_output_files( return output_paths - def build_summary_result( + def build_summary_result( # noqa: PLR0913 self, pdf_count: int, pdfs_extracted: int, diff --git a/packages/parser-core/src/bankstatements_core/services/page_validation.py b/packages/parser-core/src/bankstatements_core/services/page_validation.py index db60457..32d9e41 100644 --- a/packages/parser-core/src/bankstatements_core/services/page_validation.py +++ b/packages/parser-core/src/bankstatements_core/services/page_validation.py @@ -183,7 +183,7 @@ def _classify_row_type( Returns: String classification: 'transaction', etc. """ - from bankstatements_core.extraction.row_classifiers import ( + from bankstatements_core.extraction.row_classifiers import ( # noqa: PLC0415 create_row_classifier_chain, ) diff --git a/packages/parser-core/src/bankstatements_core/services/pdf_processing_orchestrator.py b/packages/parser-core/src/bankstatements_core/services/pdf_processing_orchestrator.py index 26f095f..8079a80 100644 --- a/packages/parser-core/src/bankstatements_core/services/pdf_processing_orchestrator.py +++ b/packages/parser-core/src/bankstatements_core/services/pdf_processing_orchestrator.py @@ -45,16 +45,16 @@ class PDFProcessingOrchestrator: - Error handling for failed PDFs """ - def __init__( + def __init__( # noqa: PLR0913 self, extraction_config: ExtractionConfig, column_names: list[str], output_dir: Path, - repository: "FileSystemTransactionRepository", + repository: FileSystemTransactionRepository, entitlements: Entitlements | None = None, - pdf_discovery: "IPDFDiscovery | None" = None, - extraction_orchestrator: "ExtractionOrchestrator | None" = None, - filter_service: "ITransactionFilter | None" = None, + pdf_discovery: IPDFDiscovery | None = None, + extraction_orchestrator: ExtractionOrchestrator | None = None, + filter_service: ITransactionFilter | None = None, ): """Initialize PDF processing orchestrator. @@ -68,11 +68,13 @@ def __init__( extraction_orchestrator: Service for extracting data from PDFs (optional, creates default if None) filter_service: Service for filtering transactions (optional, creates default if None) """ - from bankstatements_core.services.extraction_orchestrator import ( + from bankstatements_core.services.extraction_orchestrator import ( # noqa: PLC0415 ExtractionOrchestrator, ) - from bankstatements_core.services.pdf_discovery import PDFDiscoveryService - from bankstatements_core.services.transaction_filter import ( + from bankstatements_core.services.pdf_discovery import ( # noqa: PLC0415 + PDFDiscoveryService, + ) + from bankstatements_core.services.transaction_filter import ( # noqa: PLC0415 TransactionFilterService, ) diff --git a/packages/parser-core/src/bankstatements_core/services/processing_activity_log.py b/packages/parser-core/src/bankstatements_core/services/processing_activity_log.py index cfca1d4..0220a87 100644 --- a/packages/parser-core/src/bankstatements_core/services/processing_activity_log.py +++ b/packages/parser-core/src/bankstatements_core/services/processing_activity_log.py @@ -24,7 +24,7 @@ class ProcessingActivityLog: Log format: JSON Lines (JSONL) - one JSON object per line """ - def __init__(self, logs_dir: Path, file_writer: "IJsonWriter | None" = None): + def __init__(self, logs_dir: Path, file_writer: IJsonWriter | None = None): """ Initialize processing activity log. @@ -44,7 +44,7 @@ def __init__(self, logs_dir: Path, file_writer: "IJsonWriter | None" = None): logger.warning("Failed to create logs directory: %s", e) # Let unexpected errors bubble up - def log_processing( + def log_processing( # noqa: PLR0913 self, pdf_count: int, pages_read: int, diff --git a/packages/parser-core/src/bankstatements_core/services/row_merger.py b/packages/parser-core/src/bankstatements_core/services/row_merger.py index fa627a5..ca917e6 100644 --- a/packages/parser-core/src/bankstatements_core/services/row_merger.py +++ b/packages/parser-core/src/bankstatements_core/services/row_merger.py @@ -31,7 +31,7 @@ def __init__(self) -> None: """Initialize the row merger service.""" self._last_transaction_row: dict | None = None - def merge_continuation_lines( + def merge_continuation_lines( # noqa: C901, PLR0912, PLR0915 self, rows: list[dict], columns: dict[str, tuple[int | float, int | float]] ) -> list[dict]: """Merge continuation lines with their parent transactions. @@ -157,7 +157,7 @@ def _classify_row_type( Returns: String classification: 'transaction', 'continuation', etc. """ - from bankstatements_core.extraction.row_classifiers import ( + from bankstatements_core.extraction.row_classifiers import ( # noqa: PLC0415 create_row_classifier_chain, ) diff --git a/packages/parser-core/src/bankstatements_core/services/service_registry.py b/packages/parser-core/src/bankstatements_core/services/service_registry.py index 8b6a497..98f59b4 100644 --- a/packages/parser-core/src/bankstatements_core/services/service_registry.py +++ b/packages/parser-core/src/bankstatements_core/services/service_registry.py @@ -58,9 +58,9 @@ class ServiceRegistry: def __init__( self, context: _ServiceContext, - duplicate_detector: "IDuplicateDetector", - sorting_service: "ITransactionSorting", - grouping_service: "IIBANGrouping", + duplicate_detector: IDuplicateDetector, + sorting_service: ITransactionSorting, + grouping_service: IIBANGrouping, ) -> None: self._context = context self._duplicate_detector = duplicate_detector @@ -74,12 +74,12 @@ def __init__( @classmethod def from_config( cls, - config: "ProcessorConfig", - entitlements: "Entitlements | None" = None, - duplicate_detector: "IDuplicateDetector | None" = None, - sorting_service: "ITransactionSorting | None" = None, - grouping_service: "IIBANGrouping | None" = None, - ) -> "ServiceRegistry": + config: ProcessorConfig, + entitlements: Entitlements | None = None, + duplicate_detector: IDuplicateDetector | None = None, + sorting_service: ITransactionSorting | None = None, + grouping_service: IIBANGrouping | None = None, + ) -> ServiceRegistry: """Build a ServiceRegistry from a ProcessorConfig. Args: @@ -94,14 +94,22 @@ def from_config( Returns: Fully wired ServiceRegistry instance. """ - from bankstatements_core.config.column_config import get_column_names - from bankstatements_core.patterns.strategies import AllFieldsDuplicateStrategy - from bankstatements_core.services.column_analysis import ColumnAnalysisService - from bankstatements_core.services.duplicate_detector import ( + from bankstatements_core.config.column_config import ( # noqa: PLC0415 + get_column_names, + ) + from bankstatements_core.patterns.strategies import ( # noqa: PLC0415 + AllFieldsDuplicateStrategy, + ) + from bankstatements_core.services.column_analysis import ( # noqa: PLC0415 + ColumnAnalysisService, + ) + from bankstatements_core.services.duplicate_detector import ( # noqa: PLC0415 DuplicateDetectionService, ) - from bankstatements_core.services.iban_grouping import IBANGroupingService - from bankstatements_core.services.sorting_service import ( + from bankstatements_core.services.iban_grouping import ( # noqa: PLC0415 + IBANGroupingService, + ) + from bankstatements_core.services.sorting_service import ( # noqa: PLC0415 ChronologicalSortingStrategy, NoSortingStrategy, TransactionSortingService, @@ -145,9 +153,9 @@ def from_config( def process_transaction_group( self, - transactions: list["Transaction"], - template: "BankTemplate | None" = None, - ) -> tuple[list["Transaction"], list["Transaction"]]: + transactions: list[Transaction], + template: BankTemplate | None = None, + ) -> tuple[list[Transaction], list[Transaction]]: """Enrich → classify → deduplicate → sort a group of transactions. Args: @@ -175,9 +183,9 @@ def process_transaction_group( def group_by_iban( self, - transactions: list["Transaction"], + transactions: list[Transaction], pdf_ibans: dict[str, str], - ) -> dict[str, list["Transaction"]]: + ) -> dict[str, list[Transaction]]: """Group transactions by IBAN suffix. Args: @@ -193,13 +201,13 @@ def group_by_iban( # Escape hatches (20 % case) # ------------------------------------------------------------------ - def get_duplicate_detector(self) -> "IDuplicateDetector": + def get_duplicate_detector(self) -> IDuplicateDetector: return self._duplicate_detector - def get_sorting_service(self) -> "ITransactionSorting": + def get_sorting_service(self) -> ITransactionSorting: return self._sorting_service - def get_grouping_service(self) -> "IIBANGrouping": + def get_grouping_service(self) -> IIBANGrouping: return self._grouping_service # ------------------------------------------------------------------ @@ -207,7 +215,7 @@ def get_grouping_service(self) -> "IIBANGrouping": # ------------------------------------------------------------------ @staticmethod - def _enrich_with_filename(transactions: list["Transaction"]) -> None: + def _enrich_with_filename(transactions: list[Transaction]) -> None: """Set filename from source_pdf additional_field if not already present.""" for tx in transactions: if not tx.filename: @@ -215,7 +223,7 @@ def _enrich_with_filename(transactions: list["Transaction"]) -> None: @staticmethod def _enrich_with_document_type( - transactions: list["Transaction"], default_type: str = "bank_statement" + transactions: list[Transaction], default_type: str = "bank_statement" ) -> None: """Set document_type if not already present.""" for tx in transactions: @@ -224,11 +232,11 @@ def _enrich_with_document_type( @staticmethod def _classify_transaction_types( - transactions: list["Transaction"], - template: "BankTemplate | None" = None, + transactions: list[Transaction], + template: BankTemplate | None = None, ) -> None: """Classify each transaction using Chain of Responsibility.""" - from bankstatements_core.services.transaction_type_classifier import ( + from bankstatements_core.services.transaction_type_classifier import ( # noqa: PLC0415 create_transaction_type_classifier_chain, ) diff --git a/packages/parser-core/src/bankstatements_core/services/sorting_service.py b/packages/parser-core/src/bankstatements_core/services/sorting_service.py index 58f05cd..9e5d0aa 100644 --- a/packages/parser-core/src/bankstatements_core/services/sorting_service.py +++ b/packages/parser-core/src/bankstatements_core/services/sorting_service.py @@ -24,7 +24,7 @@ class SortingStrategy(ABC): """Abstract base class for transaction sorting strategies.""" @abstractmethod - def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: + def sort(self, transactions: list[Transaction]) -> list[Transaction]: """ Sort transactions according to the strategy. @@ -40,7 +40,7 @@ def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: class ChronologicalSortingStrategy(SortingStrategy): """Strategy that sorts transactions chronologically by date.""" - def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: + def sort(self, transactions: list[Transaction]) -> list[Transaction]: """ Sort transactions chronologically by date. @@ -64,7 +64,7 @@ def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: class NoSortingStrategy(SortingStrategy): """Strategy that keeps original order (no sorting).""" - def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: + def sort(self, transactions: list[Transaction]) -> list[Transaction]: """ Keep transactions in original order. @@ -95,7 +95,7 @@ def __init__(self, strategy: SortingStrategy): """ self.strategy = strategy - def sort(self, transactions: list["Transaction"]) -> list["Transaction"]: + def sort(self, transactions: list[Transaction]) -> list[Transaction]: """ Sort transactions using the configured strategy. diff --git a/packages/parser-core/src/bankstatements_core/services/transaction_type_classifier.py b/packages/parser-core/src/bankstatements_core/services/transaction_type_classifier.py index 39c2e41..3e81013 100644 --- a/packages/parser-core/src/bankstatements_core/services/transaction_type_classifier.py +++ b/packages/parser-core/src/bankstatements_core/services/transaction_type_classifier.py @@ -32,8 +32,8 @@ def __init__(self) -> None: self._next_classifier: TransactionTypeClassifier | None = None def set_next( - self, classifier: "TransactionTypeClassifier" - ) -> "TransactionTypeClassifier": + self, classifier: TransactionTypeClassifier + ) -> TransactionTypeClassifier: """Set the next classifier in the chain. Args: @@ -46,7 +46,7 @@ def set_next( return classifier def classify( - self, transaction: "Transaction", template: "BankTemplate | None" = None + self, transaction: Transaction, template: BankTemplate | None = None ) -> str: """Classify transaction type, delegating to next classifier if needed. @@ -69,7 +69,7 @@ def classify( @abstractmethod def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Attempt to classify the transaction. @@ -88,7 +88,7 @@ class TemplateKeywordClassifier(TransactionTypeClassifier): """ def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Classify using template transaction_types keyword mappings.""" if not template or not template.processing.transaction_types: @@ -115,7 +115,7 @@ class CreditCardPatternClassifier(TransactionTypeClassifier): """ # Credit card transaction patterns - PURCHASE_PATTERNS = [ + PURCHASE_PATTERNS = [ # noqa: RUF012 "PURCHASE", "SALE", "POS", @@ -126,7 +126,7 @@ class CreditCardPatternClassifier(TransactionTypeClassifier): "E-COMMERCE", ] - PAYMENT_PATTERNS = [ + PAYMENT_PATTERNS = [ # noqa: RUF012 "PAYMENT", "PAYMENT RECEIVED", "DIRECT DEBIT", @@ -134,7 +134,7 @@ class CreditCardPatternClassifier(TransactionTypeClassifier): "AUTOPAY", ] - FEE_PATTERNS = [ + FEE_PATTERNS = [ # noqa: RUF012 "FEE", "CHARGE", "INTEREST", @@ -145,15 +145,15 @@ class CreditCardPatternClassifier(TransactionTypeClassifier): "OVERLIMIT FEE", ] - REFUND_PATTERNS = [ + REFUND_PATTERNS = [ # noqa: RUF012 "REFUND", "REVERSAL", "CREDIT", "CHARGEBACK", ] - def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + def _do_classify( # noqa: PLR0911 + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Classify credit card transactions.""" if transaction.document_type != "credit_card_statement": @@ -186,7 +186,7 @@ class BankStatementPatternClassifier(TransactionTypeClassifier): """ # Bank statement transaction patterns - TRANSFER_PATTERNS = [ + TRANSFER_PATTERNS = [ # noqa: RUF012 "TRANSFER", "TRF", "SEPA", @@ -197,7 +197,7 @@ class BankStatementPatternClassifier(TransactionTypeClassifier): "MOBILE TRANSFER", ] - PAYMENT_PATTERNS = [ + PAYMENT_PATTERNS = [ # noqa: RUF012 "STANDING ORDER", "DIRECT DEBIT", "DD", @@ -205,14 +205,14 @@ class BankStatementPatternClassifier(TransactionTypeClassifier): "BILL PAYMENT", ] - INTEREST_PATTERNS = [ + INTEREST_PATTERNS = [ # noqa: RUF012 "INTEREST", "INT CREDIT", "INTEREST CREDIT", "INTEREST PAID", ] - FEE_PATTERNS = [ + FEE_PATTERNS = [ # noqa: RUF012 "CHARGE", "FEE", "MAINTENANCE FEE", @@ -221,8 +221,8 @@ class BankStatementPatternClassifier(TransactionTypeClassifier): "OVERDRAFT FEE", ] - def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + def _do_classify( # noqa: PLR0911 + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Classify bank statement transactions.""" if transaction.document_type != "bank_statement": @@ -256,7 +256,7 @@ class AmountBasedClassifier(TransactionTypeClassifier): """ def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Classify based on amount patterns.""" debit_amount = to_float(str(transaction.debit)) if transaction.debit else None @@ -292,7 +292,7 @@ class DefaultClassifier(TransactionTypeClassifier): """ def _do_classify( - self, transaction: "Transaction", template: "BankTemplate | None" + self, transaction: Transaction, template: BankTemplate | None ) -> str | None: """Always return 'other' as default classification.""" return "other" diff --git a/packages/parser-core/src/bankstatements_core/templates/__init__.py b/packages/parser-core/src/bankstatements_core/templates/__init__.py index f50f9d0..628f440 100644 --- a/packages/parser-core/src/bankstatements_core/templates/__init__.py +++ b/packages/parser-core/src/bankstatements_core/templates/__init__.py @@ -20,8 +20,8 @@ "DetectionExplanation", "ScoringConfig", "TemplateDetectionConfig", + "TemplateDetector", "TemplateExtractionConfig", "TemplateProcessingConfig", "TemplateRegistry", - "TemplateDetector", ] diff --git a/packages/parser-core/src/bankstatements_core/templates/detectors/__init__.py b/packages/parser-core/src/bankstatements_core/templates/detectors/__init__.py index 08eb219..b296f30 100644 --- a/packages/parser-core/src/bankstatements_core/templates/detectors/__init__.py +++ b/packages/parser-core/src/bankstatements_core/templates/detectors/__init__.py @@ -19,9 +19,9 @@ __all__ = [ "BaseDetector", - "DetectionResult", "CardNumberDetector", "ColumnHeaderDetector", + "DetectionResult", "ExclusionDetector", "FilenameDetector", "HeaderDetector", diff --git a/packages/parser-core/src/bankstatements_core/templates/detectors/card_number_detector.py b/packages/parser-core/src/bankstatements_core/templates/detectors/card_number_detector.py index 085de08..f4de50e 100644 --- a/packages/parser-core/src/bankstatements_core/templates/detectors/card_number_detector.py +++ b/packages/parser-core/src/bankstatements_core/templates/detectors/card_number_detector.py @@ -26,7 +26,7 @@ class CardNumberDetector(BaseDetector): def name(self) -> str: return "CardNumber" - def detect( + def detect( # noqa: C901 self, pdf_path: Path, first_page: Page, templates: list[BankTemplate] ) -> list[DetectionResult]: """Detect templates by searching for card number patterns in header area. diff --git a/packages/parser-core/src/bankstatements_core/templates/detectors/column_header_detector.py b/packages/parser-core/src/bankstatements_core/templates/detectors/column_header_detector.py index 653bae0..08446d5 100644 --- a/packages/parser-core/src/bankstatements_core/templates/detectors/column_header_detector.py +++ b/packages/parser-core/src/bankstatements_core/templates/detectors/column_header_detector.py @@ -20,7 +20,7 @@ class ColumnHeaderDetector(BaseDetector): def name(self) -> str: return "ColumnHeader" - def detect( + def detect( # noqa: C901 self, pdf_path: Path, first_page: Page, templates: list[BankTemplate] ) -> list[DetectionResult]: """Detect templates by finding column headers near top of table area. diff --git a/packages/parser-core/src/bankstatements_core/templates/detectors/iban_detector.py b/packages/parser-core/src/bankstatements_core/templates/detectors/iban_detector.py index 01d550d..00d288f 100644 --- a/packages/parser-core/src/bankstatements_core/templates/detectors/iban_detector.py +++ b/packages/parser-core/src/bankstatements_core/templates/detectors/iban_detector.py @@ -21,7 +21,7 @@ class IBANDetector(BaseDetector): def name(self) -> str: return "IBAN" - def detect( + def detect( # noqa: C901, PLR0912 self, pdf_path: Path, first_page: Page, templates: list[BankTemplate] ) -> list[DetectionResult]: """Detect templates by searching for IBAN patterns in header area. diff --git a/packages/parser-core/src/bankstatements_core/templates/detectors/loan_reference_detector.py b/packages/parser-core/src/bankstatements_core/templates/detectors/loan_reference_detector.py index b8fe2ef..c2d736c 100644 --- a/packages/parser-core/src/bankstatements_core/templates/detectors/loan_reference_detector.py +++ b/packages/parser-core/src/bankstatements_core/templates/detectors/loan_reference_detector.py @@ -26,7 +26,7 @@ class LoanReferenceDetector(BaseDetector): def name(self) -> str: return "LoanReference" - def detect( + def detect( # noqa: C901 self, pdf_path: Path, first_page: Page, templates: list[BankTemplate] ) -> list[DetectionResult]: """Detect templates by searching for loan reference patterns in header area. diff --git a/packages/parser-core/src/bankstatements_core/templates/template_detector.py b/packages/parser-core/src/bankstatements_core/templates/template_detector.py index 7ca1ba2..e00583c 100644 --- a/packages/parser-core/src/bankstatements_core/templates/template_detector.py +++ b/packages/parser-core/src/bankstatements_core/templates/template_detector.py @@ -58,7 +58,7 @@ def __post_init__(self) -> None: ) @classmethod - def default(cls) -> "ScoringConfig": + def default(cls) -> ScoringConfig: """Production scoring — used when no config is injected.""" return cls( weights={ @@ -143,7 +143,7 @@ def __init__( ColumnHeaderDetector(), ] - def _classify_document_type(self, first_page: Page) -> str | None: + def _classify_document_type(self, first_page: Page) -> str | None: # noqa: PLR0911 """Classify document type based on content signals. This pre-classification helps narrow down templates before running @@ -224,7 +224,9 @@ def _classify_document_type(self, first_page: Page) -> str | None: logger.debug("Document type classification uncertain") return None - def detect_template(self, pdf_path: Path, first_page: Page) -> BankTemplate: + def detect_template( # noqa: C901, PLR0911, PLR0912 + self, pdf_path: Path, first_page: Page + ) -> BankTemplate: """Detect template using document type classification and aggregated scoring. Phase 2 enhancements: diff --git a/packages/parser-core/src/bankstatements_core/templates/template_model.py b/packages/parser-core/src/bankstatements_core/templates/template_model.py index 7611528..ea77dfd 100644 --- a/packages/parser-core/src/bankstatements_core/templates/template_model.py +++ b/packages/parser-core/src/bankstatements_core/templates/template_model.py @@ -203,7 +203,7 @@ def _validate_column_boundaries(self) -> None: Issues are logged as warnings (non-fatal) to maintain backward compatibility. """ - import logging + import logging # noqa: PLC0415 logger = logging.getLogger(__name__) diff --git a/packages/parser-core/src/bankstatements_core/templates/template_registry.py b/packages/parser-core/src/bankstatements_core/templates/template_registry.py index 1f33af3..2aaf9ef 100644 --- a/packages/parser-core/src/bankstatements_core/templates/template_registry.py +++ b/packages/parser-core/src/bankstatements_core/templates/template_registry.py @@ -37,7 +37,7 @@ def __init__(self, templates: dict[str, BankTemplate], default_template_id: str) ) @classmethod - def from_json(cls, config_path: Path) -> "TemplateRegistry": + def from_json(cls, config_path: Path) -> TemplateRegistry: """Load templates from JSON configuration file. Args: @@ -53,7 +53,7 @@ def from_json(cls, config_path: Path) -> "TemplateRegistry": if not config_path.exists(): raise FileNotFoundError(f"Template config not found: {config_path}") - with open(config_path, "r") as f: + with open(config_path) as f: config = json.load(f) # Validate config structure @@ -86,7 +86,7 @@ def from_json(cls, config_path: Path) -> "TemplateRegistry": return cls(templates, default_template_id) @classmethod - def from_default_config(cls) -> "TemplateRegistry": + def from_default_config(cls) -> TemplateRegistry: """Load templates from default or configured directory. Supports custom template directory for user-added templates. @@ -127,7 +127,7 @@ def from_default_config(cls) -> "TemplateRegistry": return cls.from_directory(default_dir) @classmethod - def from_directory(cls, templates_dir: Path | str) -> "TemplateRegistry": + def from_directory(cls, templates_dir: Path | str) -> TemplateRegistry: """Load all templates from a directory. Args: @@ -182,7 +182,7 @@ def from_directory(cls, templates_dir: Path | str) -> "TemplateRegistry": default_id = ( enabled_templates[0].id if enabled_templates - else list(all_templates.keys())[0] + else next(iter(all_templates.keys())) ) logger.info( @@ -194,9 +194,9 @@ def from_directory(cls, templates_dir: Path | str) -> "TemplateRegistry": return cls(templates=all_templates, default_template_id=default_id) @classmethod - def from_multiple_directories( + def from_multiple_directories( # noqa: C901 cls, directories: list[Path | str] - ) -> "TemplateRegistry": + ) -> TemplateRegistry: """Load templates from multiple directories with priority order. Templates from earlier directories have higher priority and can override @@ -284,7 +284,7 @@ def from_multiple_directories( default_id = ( enabled_templates[0].id if enabled_templates - else list(all_templates.keys())[0] + else next(iter(all_templates.keys())) ) template_names = ", ".join(t.name for t in all_templates.values()) @@ -306,7 +306,7 @@ def _load_single_template(cls, template_file: Path) -> BankTemplate | None: Returns: BankTemplate if valid, None if invalid """ - with open(template_file, "r", encoding="utf-8") as f: + with open(template_file, encoding="utf-8") as f: data = json.load(f) # Single template format (new) @@ -315,7 +315,7 @@ def _load_single_template(cls, template_file: Path) -> BankTemplate | None: # Legacy format with templates dict (for backward compatibility) if "templates" in data and len(data["templates"]) == 1: - template_id = list(data["templates"].keys())[0] + template_id = next(iter(data["templates"].keys())) template_data = data["templates"][template_id] return cls._parse_template(template_id, template_data) @@ -372,7 +372,9 @@ def _parse_template(template_id: str, data: dict) -> BankTemplate: columns[col_name] = tuple(coords) # Parse per-page overrides (NEW) - from bankstatements_core.templates.template_model import PerPageBoundaries + from bankstatements_core.templates.template_model import ( # noqa: PLC0415 + PerPageBoundaries, + ) per_page_overrides = {} if "per_page_overrides" in extraction_data: @@ -535,7 +537,7 @@ def get_template_ids(self) -> list[str]: """ return list(self._templates.keys()) - def filtered_by_ids(self, ids: set[str]) -> "TemplateRegistry": + def filtered_by_ids(self, ids: set[str]) -> TemplateRegistry: """Return a new registry containing only the templates with the given IDs. The shared registry is never mutated. The default template is preserved if diff --git a/packages/parser-core/src/bankstatements_core/utils.py b/packages/parser-core/src/bankstatements_core/utils.py index 40f257c..621ad90 100644 --- a/packages/parser-core/src/bankstatements_core/utils.py +++ b/packages/parser-core/src/bankstatements_core/utils.py @@ -39,15 +39,15 @@ __all__ = [ "CurrencyParseError", - "to_float", - "format_currency", - "strip_currency_symbols", "calculate_column_sum", - "is_date_column", - "parse_int_env", - "parse_bool_env", "discover_pdfs", + "format_currency", + "is_date_column", "log_summary", + "parse_bool_env", + "parse_int_env", + "strip_currency_symbols", + "to_float", ] @@ -126,7 +126,7 @@ def parse_bool_env(var_name: str, default: bool = False) -> bool: def discover_pdfs( - input_dir: Path, recursive: bool, entitlements: "Entitlements" + input_dir: Path, recursive: bool, entitlements: Entitlements ) -> list[Path]: """ Discover PDF files with entitlement enforcement for recursive scanning. diff --git a/packages/parser-core/tests/analysis/test_template_generator.py b/packages/parser-core/tests/analysis/test_template_generator.py index 2a0dfeb..0ab45ee 100644 --- a/packages/parser-core/tests/analysis/test_template_generator.py +++ b/packages/parser-core/tests/analysis/test_template_generator.py @@ -268,7 +268,7 @@ def test_save_template_success(self): assert output_path.exists() # Verify content - with open(output_path, "r") as f: + with open(output_path) as f: loaded = json.load(f) assert loaded["id"] == "test" assert loaded["name"] == "Test Template" @@ -299,13 +299,13 @@ def test_save_template_overwrites_existing(self): # Save first template generator.save_template(template1, output_path) - with open(output_path, "r") as f: + with open(output_path) as f: loaded1 = json.load(f) assert loaded1["name"] == "First" # Save second template (should overwrite) generator.save_template(template2, output_path) - with open(output_path, "r") as f: + with open(output_path) as f: loaded2 = json.load(f) assert loaded2["name"] == "Second" diff --git a/packages/parser-core/tests/domain/protocols/test_file_io.py b/packages/parser-core/tests/domain/protocols/test_file_io.py index 474302c..5d3ac5d 100644 --- a/packages/parser-core/tests/domain/protocols/test_file_io.py +++ b/packages/parser-core/tests/domain/protocols/test_file_io.py @@ -36,7 +36,7 @@ def test_write_json_creates_file(self): repo.write_json(test_file, test_data) assert test_file.exists() - with open(test_file, "r") as f: + with open(test_file) as f: loaded = json.load(f) assert loaded == test_data @@ -54,7 +54,7 @@ def test_write_json_overwrites_existing(self): repo.write_json(test_file, {"version": 2}) # Should have second version - with open(test_file, "r") as f: + with open(test_file) as f: loaded = json.load(f) assert loaded["version"] == 2 diff --git a/packages/parser-core/tests/domain/test_extraction_result.py b/packages/parser-core/tests/domain/test_extraction_result.py index 27d7d55..9ae1d88 100644 --- a/packages/parser-core/tests/domain/test_extraction_result.py +++ b/packages/parser-core/tests/domain/test_extraction_result.py @@ -15,15 +15,13 @@ class TestExtractionResultImports: def test_importable_from_module(self): - from bankstatements_core.domain.models.extraction_result import ( # noqa: F401 - ExtractionResult, - ) + from bankstatements_core.domain.models.extraction_result import ExtractionResult def test_importable_from_models_package(self): - from bankstatements_core.domain.models import ExtractionResult # noqa: F401 + from bankstatements_core.domain.models import ExtractionResult def test_importable_from_domain_package(self): - from bankstatements_core.domain import ExtractionResult # noqa: F401 + from bankstatements_core.domain import ExtractionResult class TestExtractionResultConstruction: diff --git a/packages/parser-core/tests/services/test_data_retention.py b/packages/parser-core/tests/services/test_data_retention.py index a843055..6bf72cf 100644 --- a/packages/parser-core/tests/services/test_data_retention.py +++ b/packages/parser-core/tests/services/test_data_retention.py @@ -335,7 +335,7 @@ def mock_open(path, *args, **kwargs): if str(path) == str(file_path) and ( "wb" in args or kwargs.get("mode") == "wb" ): - raise IOError("Cannot write") + raise OSError("Cannot write") return original_open(path, *args, **kwargs) monkeypatch.setattr("builtins.open", mock_open) diff --git a/packages/parser-core/tests/services/test_multi_document_output.py b/packages/parser-core/tests/services/test_multi_document_output.py index dc296ec..8e11588 100644 --- a/packages/parser-core/tests/services/test_multi_document_output.py +++ b/packages/parser-core/tests/services/test_multi_document_output.py @@ -64,7 +64,7 @@ def test_csv_includes_document_type_column(self, sample_transactions, tmp_path): strategy.write(sample_transactions, output_file, column_names) # Read CSV and verify structure - with open(output_file, "r", encoding="utf-8") as f: + with open(output_file, encoding="utf-8") as f: reader = csv.DictReader(f) headers = reader.fieldnames @@ -91,7 +91,7 @@ def test_csv_preserves_document_type_values(self, sample_transactions, tmp_path) strategy.write(sample_transactions, output_file, column_names) # Read CSV and verify values - with open(output_file, "r", encoding="utf-8") as f: + with open(output_file, encoding="utf-8") as f: reader = csv.DictReader(f) rows = list(reader) @@ -113,7 +113,7 @@ def test_json_includes_document_type_field(self, sample_transactions, tmp_path): strategy.write(sample_transactions, output_file, column_names) # Read JSON and verify structure - with open(output_file, "r", encoding="utf-8") as f: + with open(output_file, encoding="utf-8") as f: data = json.load(f) assert len(data) == 3 @@ -134,7 +134,7 @@ def test_json_preserves_document_type_values(self, sample_transactions, tmp_path strategy.write(sample_transactions, output_file, column_names) # Read JSON and verify values - with open(output_file, "r", encoding="utf-8") as f: + with open(output_file, encoding="utf-8") as f: data = json.load(f) assert data[0]["document_type"] == "bank_statement" diff --git a/packages/parser-core/tests/services/test_pdf_discovery.py b/packages/parser-core/tests/services/test_pdf_discovery.py index b71e2c4..10f5463 100644 --- a/packages/parser-core/tests/services/test_pdf_discovery.py +++ b/packages/parser-core/tests/services/test_pdf_discovery.py @@ -154,7 +154,7 @@ def test_discover_pdfs_sorted(self): def test_discover_pdfs_creation_permission_error(self): """Test that permission errors when creating directory are properly raised.""" - import unittest.mock as mock + from unittest import mock non_existent = Path(self.temp_dir) / "no_permission" diff --git a/packages/parser-core/tests/services/test_processing_activity_log.py b/packages/parser-core/tests/services/test_processing_activity_log.py index 6db55c5..fcc0194 100644 --- a/packages/parser-core/tests/services/test_processing_activity_log.py +++ b/packages/parser-core/tests/services/test_processing_activity_log.py @@ -58,7 +58,7 @@ def test_log_processing_creates_entry(self, activity_log): ) # Read log file - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: line = f.readline() entry = json.loads(line) @@ -82,7 +82,7 @@ def test_log_processing_timestamp_format(self, activity_log): duration_seconds=1.0, ) - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) # Verify timestamp is valid ISO format @@ -100,7 +100,7 @@ def test_log_processing_rounds_duration(self, activity_log): duration_seconds=12.3456789, ) - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) assert entry["duration_seconds"] == 12.35 @@ -117,7 +117,7 @@ def test_log_deletion_with_age(self, activity_log): age_days=95, ) - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) assert entry["event_type"] == "deletion" @@ -133,7 +133,7 @@ def test_log_deletion_without_age(self, activity_log): reason="User requested deletion", ) - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) assert entry["event_type"] == "deletion" @@ -149,7 +149,7 @@ def test_log_encryption_encrypt(self, activity_log): """Test log_encryption creates entry for encryption.""" activity_log.log_encryption(file_count=5, operation="encrypt") - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) assert entry["event_type"] == "encryption" @@ -161,7 +161,7 @@ def test_log_encryption_decrypt(self, activity_log): """Test log_encryption creates entry for decryption.""" activity_log.log_encryption(file_count=3, operation="decrypt") - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: entry = json.loads(f.readline()) assert entry["event_type"] == "encryption" @@ -191,7 +191,7 @@ def test_multiple_entries_append(self, activity_log): activity_log.log_encryption(file_count=2, operation="encrypt") # Read all entries - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: lines = f.readlines() assert len(lines) == 3 @@ -223,7 +223,7 @@ def test_entries_have_unique_timestamps(self, activity_log): duration_seconds=2.0, ) - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: lines = f.readlines() entry1 = json.loads(lines[0]) @@ -297,7 +297,7 @@ def test_each_line_is_valid_json(self, activity_log): ) activity_log.log_deletion(file_name="test.csv", reason="Test") - with open(activity_log.log_file, "r", encoding="utf-8") as f: + with open(activity_log.log_file, encoding="utf-8") as f: for line in f: # Each line should be valid JSON entry = json.loads(line.strip()) diff --git a/packages/parser-core/tests/test_exceptions.py b/packages/parser-core/tests/test_exceptions.py index 5ed3123..59a70d5 100644 --- a/packages/parser-core/tests/test_exceptions.py +++ b/packages/parser-core/tests/test_exceptions.py @@ -150,11 +150,11 @@ def test_catch_specific_over_general(self): try: raise PDFReadError("Cannot read PDF") - except PDFReadError as e: # noqa: F841 + except PDFReadError: caught_exception = "PDFReadError" - except PDFExtractionError as e: # noqa: F841 + except PDFExtractionError: caught_exception = "PDFExtractionError" - except BankStatementError as e: # noqa: F841 + except BankStatementError: caught_exception = "BankStatementError" assert caught_exception == "PDFReadError" diff --git a/packages/parser-core/tests/test_processor.py b/packages/parser-core/tests/test_processor.py index 021c421..5157066 100644 --- a/packages/parser-core/tests/test_processor.py +++ b/packages/parser-core/tests/test_processor.py @@ -16,6 +16,7 @@ ProcessingConfig, ProcessorConfig, ) +from bankstatements_core.config.totals_config import parse_totals_columns from bankstatements_core.domain import ExtractionResult from bankstatements_core.domain.converters import dicts_to_transactions from bankstatements_core.processor import ( @@ -23,7 +24,6 @@ calculate_column_totals, find_matching_columns, generate_monthly_summary, - parse_totals_columns, parse_transaction_date, ) from bankstatements_core.services.date_parser import DateParserService diff --git a/packages/parser-free/pyproject.toml b/packages/parser-free/pyproject.toml index fefd751..2fdbcb5 100644 --- a/packages/parser-free/pyproject.toml +++ b/packages/parser-free/pyproject.toml @@ -68,3 +68,33 @@ profile = "black" multi_line_output = 3 line_length = 88 known_first_party = ["bankstatements_free", "bankstatements_core"] + +[tool.ruff] +line-length = 88 +target-version = "py311" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "B", # flake8-bugbear + "C901", # mccabe complexity + "G", # flake8-logging-format + "PLC", # pylint convention + "PLR", # pylint refactoring + "T201", # flake8-print + "BLE001", # flake8-blind-except + "UP", # pyupgrade + "RUF", # ruff-specific rules +] +ignore = [ + "E501", # line too long — handled by black + "PLR2004", # magic value comparison — acceptable in tests and config + "G004", # logging f-string — deferred, see GitHub issue #90 +] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] +"tests/**/*.py" = ["F401", "PLC0415"] +"src/bankstatements_free/app.py" = ["T201"] # CLI entrypoint: intentional user-facing print output diff --git a/packages/parser-free/src/bankstatements_free/app.py b/packages/parser-free/src/bankstatements_free/app.py index e727882..e71ae72 100644 --- a/packages/parser-free/src/bankstatements_free/app.py +++ b/packages/parser-free/src/bankstatements_free/app.py @@ -8,7 +8,9 @@ from pathlib import Path from bankstatements_core.config.app_config import AppConfig, ConfigurationError -from bankstatements_core.config.column_config import get_columns_config # noqa: F401 +from bankstatements_core.config.column_config import ( # noqa: F401 — re-exported for backward compat + get_columns_config, +) from bankstatements_core.entitlements import Entitlements logger = logging.getLogger(__name__) @@ -16,10 +18,10 @@ __all__ = [ "AppConfig", "ConfigurationError", + "log_summary", "main", "resolve_entitlements", "setup_logging", - "log_summary", ] @@ -89,10 +91,12 @@ def main(argv: list[str] | None = None) -> int: Returns: Exit code: 0 for success, non-zero for failure """ - import argparse + import argparse # noqa: PLC0415 - from bankstatements_core.__version__ import __version__ - from bankstatements_core.facades import BankStatementProcessingFacade + from bankstatements_core.__version__ import __version__ # noqa: PLC0415 + from bankstatements_core.facades import ( # noqa: PLC0415 + BankStatementProcessingFacade, + ) parser = argparse.ArgumentParser( prog="bankstatements", description="Bank Statement Processor" @@ -120,7 +124,7 @@ def main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) if args.init: - from bankstatements_core.commands.init import init_directories + from bankstatements_core.commands.init import init_directories # noqa: PLC0415 return init_directories( base_dir=args.base_dir, diff --git a/requirements/dev.txt b/requirements/dev.txt index fd4b488..d2d05b7 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -9,7 +9,7 @@ black>=23.0.0,<27.0.0 isort>=5.12.0,<9.0.0 # Linting and Type Checking -flake8>=6.0.0,<8.0.0 +ruff>=0.8.0,<1.0.0 mypy>=1.8.0,<2.0.0 pyright>=1.1.350