Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def transactions_to_dicts(transactions: list[Transaction]) -> list[dict]:
>>> dicts[0]["Date"]
'01/01/23'
"""
return [tx.to_dict() for tx in transactions]
return [tx.to_dict(currency_symbol="") for tx in transactions]


def dict_to_transaction(row: dict) -> Transaction:
Expand Down Expand Up @@ -79,7 +79,7 @@ def transaction_to_dict(transaction: Transaction) -> dict:
>>> tx = Transaction(date="01/01/23", details="Test", debit="50.00",
... credit=None, balance="100.00", filename="test.pdf")
>>> row = transaction_to_dict(tx)
>>> row["Debit"]
>>> row["Debit"]
'50.00'
"""
return transaction.to_dict()
return transaction.to_dict(currency_symbol="")
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class Transaction:
source_page: int | None = None
confidence_score: float = 1.0
extraction_warnings: list[ExtractionWarning] = field(default_factory=list)
document_type: str = ""
transaction_type: str = ""

def is_debit(self) -> bool:
"""Check if transaction is a debit (money out).
Expand Down Expand Up @@ -235,6 +237,8 @@ def from_dict(cls, data: dict[str, str | None]) -> Transaction:
"source_page",
"confidence_score",
"extraction_warnings",
"document_type",
"transaction_type",
}
additional_fields = {
k: str(v)
Expand Down Expand Up @@ -263,6 +267,8 @@ def from_dict(cls, data: dict[str, str | None]) -> Transaction:
]
else:
extraction_warnings = []
document_type = str(data.get("document_type") or "")
transaction_type = str(data.get("transaction_type") or "")

return cls(
date=date or "",
Expand All @@ -275,6 +281,8 @@ def from_dict(cls, data: dict[str, str | None]) -> Transaction:
source_page=source_page,
confidence_score=confidence_score,
extraction_warnings=extraction_warnings,
document_type=document_type,
transaction_type=transaction_type,
)

@staticmethod
Expand Down Expand Up @@ -330,6 +338,8 @@ def to_dict(self, currency_symbol: str = "€") -> dict[str, str | None]:
result["extraction_warnings"] = json.dumps(
[w.to_dict() for w in self.extraction_warnings]
)
result["document_type"] = self.document_type
result["transaction_type"] = self.transaction_type

# Add any additional fields
result.update(self.additional_fields)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,182 +8,98 @@
import pandas as pd

if TYPE_CHECKING:
from bankstatements_core.domain.models.transaction import Transaction
from bankstatements_core.templates.template_model import BankTemplate


class IPDFDiscovery(Protocol):
"""Protocol for discovering PDF files in directories."""

def discover_pdfs(self, input_dir: Path, recursive: bool = False) -> list[Path]:
"""Discover PDF files in directory.

Args:
input_dir: Directory to search for PDFs
recursive: Whether to search subdirectories recursively

Returns:
List of paths to discovered PDF files
"""
"""Discover PDF files in directory."""
...


class ITransactionFilter(Protocol):
"""Protocol for filtering transaction rows."""

def apply_all_filters(self, rows: list[dict]) -> list[dict]:
"""Apply all configured filters to rows.

Args:
rows: List of transaction dictionaries

Returns:
Filtered list of transactions
"""
def apply_all_filters(self, rows: list["Transaction"]) -> list["Transaction"]:
"""Apply all configured filters to rows."""
...

def filter_empty_rows(self, rows: list[dict]) -> list[dict]:
"""Filter out rows with insufficient data.

Args:
rows: List of transaction dictionaries

Returns:
List of non-empty transactions
"""
def filter_empty_rows(self, rows: list["Transaction"]) -> list["Transaction"]:
"""Filter out rows with insufficient data."""
...

def filter_header_rows(self, rows: list[dict]) -> list[dict]:
"""Filter out header rows that were incorrectly extracted.

Args:
rows: List of transaction dictionaries

Returns:
List of transactions with header rows removed
"""
def filter_header_rows(self, rows: list["Transaction"]) -> list["Transaction"]:
"""Filter out header rows that were incorrectly extracted."""
...


class IIBANGrouping(Protocol):
"""Protocol for grouping transactions by IBAN."""

def group_by_iban(
self, transactions: list[dict], pdf_ibans: dict[str, str]
) -> dict[str, list[dict]]:
"""Group transactions by IBAN suffix (last 4 digits).

Args:
transactions: List of all transactions
pdf_ibans: Dictionary mapping PDF filenames to IBANs

Returns:
Dictionary mapping IBAN suffix to list of transactions
"""
self,
transactions: list["Transaction"],
pdf_ibans: dict[str, str],
) -> dict[str, list["Transaction"]]:
"""Group transactions by IBAN suffix (last 4 digits)."""
...


class IColumnTotals(Protocol):
"""Protocol for calculating column totals."""

def calculate(self, df: pd.DataFrame) -> dict[str, float]:
"""Calculate totals for configured columns.

Args:
df: DataFrame containing transaction data

Returns:
Dictionary mapping column names to their totals
"""
"""Calculate totals for configured columns."""
...

def format_totals_row(
self, totals: dict[str, float], column_names: list[str]
) -> list[str]:
"""Format totals as a row matching column structure.

Args:
totals: Dictionary of column totals
column_names: List of all column names

Returns:
List of formatted values for CSV row
"""
"""Format totals as a row matching column structure."""
...


class ITemplateDetector(Protocol):
"""Protocol for detecting PDF bank statement templates."""

def detect_template(self, pdf_path: Path, first_page: Any) -> "BankTemplate":
"""Detect template from PDF first page.

Args:
pdf_path: Path to PDF file
first_page: First page of PDF (pdfplumber Page object or adapter)

Returns:
BankTemplate instance
"""
"""Detect template from PDF first page."""
...


class IDuplicateDetector(Protocol):
"""Protocol for detecting duplicate transactions."""

def detect_and_separate(
self, transactions: list[dict]
) -> tuple[list[dict], list[dict]]:
"""Separate unique transactions from duplicates.

Args:
transactions: List of all transactions

Returns:
Tuple of (unique_transactions, duplicate_transactions)
"""
self,
transactions: list["Transaction"],
) -> tuple[list["Transaction"], list["Transaction"]]:
"""Separate unique transactions from duplicates."""
...


class ITransactionSorting(Protocol):
"""Protocol for sorting transactions."""

def sort(self, transactions: list[dict]) -> list[dict]:
"""Sort transactions using configured strategy.

Args:
transactions: List of transactions to sort

Returns:
Sorted list of transactions
"""
def sort(self, transactions: list["Transaction"]) -> list["Transaction"]:
"""Sort transactions using configured strategy."""
...


class IMonthlySummary(Protocol):
"""Protocol for generating monthly transaction summaries."""

def generate(self, transactions: list[dict]) -> dict[str, Any]:
"""Generate monthly summary from transactions.

Args:
transactions: List of transaction dictionaries

Returns:
Dictionary with monthly summaries and statistics
"""
"""Generate monthly summary from transactions."""
...


class IExpenseAnalysis(Protocol):
"""Protocol for expense analysis service."""

def analyze(self, transactions: list[dict]) -> dict[str, Any]:
"""Analyze transactions and generate expense insights.

Args:
transactions: List of transaction dictionaries

Returns:
Dictionary with expense insights and analysis
"""
"""Analyze transactions and generate expense insights."""
...
Loading
Loading