Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ jobs:
pre-commit run --all-files
continue-on-error: true

- name: Type check with mypy
run: |
mypy src --config-file=pyproject.toml
continue-on-error: true

- name: Test with pytest
# Skip:
# - tests/integration/ — manual scripts that hash & copy real FLAC
Expand Down Expand Up @@ -140,6 +135,11 @@ jobs:
run: |
flake8 src tests --count --statistics --show-source

# mypy is a hard gate too — src/ is type-clean (checked against py3.10).
- name: Type check with mypy
run: |
mypy src --config-file=pyproject.toml

- name: Security check with bandit
run: |
bandit -r src -ll
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ skip_gitignore = true
[tool.mypy]
# Match the project's actual minimum (3.10+). On "3.9" mypy parsed torch's
# match statements with a 3.9 target and crashed before ever reaching src/,
# so it silently checked nothing. Stays advisory in CI (continue-on-error).
# so it silently checked nothing. src/ is now type-clean and mypy is a hard
# gate in the Code Quality Checks CI job.
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
Expand Down
18 changes: 9 additions & 9 deletions src/flac_detective/analysis/audio_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
from pathlib import Path
from threading import Lock
from typing import Optional, Tuple
from typing import Dict, Optional, Tuple

import numpy as np
from scipy.fft import rfft, rfftfreq, set_workers
Expand All @@ -33,8 +33,8 @@ def __init__(self, filepath: Path, original_filepath: Optional[Path] = None):
self.filepath = filepath
self.original_filepath = original_filepath or filepath
self._full_audio: Optional[Tuple[np.ndarray, int]] = None
self._segments: dict = {}
self._spectrum: Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]] = None
self._segments: Dict[Tuple[int, int], Tuple[np.ndarray, int]] = {}
self._spectrum: Optional[Tuple[np.ndarray, np.ndarray, int]] = None
self._cutoff: Optional[float] = None
self._lock = Lock()
self._is_partial = False # Track if audio data is partial
Expand All @@ -55,7 +55,7 @@ def get_full_audio(self) -> Tuple[np.ndarray, int]:
original_filepath=str(self.original_filepath),
)

if data is None:
if data is None or sr is None:
# Full load failed - try partial load
logger.warning(
f"CACHE: Full load failed for {self.filepath.name}, attempting partial load"
Expand All @@ -64,7 +64,7 @@ def get_full_audio(self) -> Tuple[np.ndarray, int]:
str(self.filepath), original_filepath=str(self.original_filepath)
)

if data_partial is None:
if data_partial is None or sr_partial is None:
raise RuntimeError(
f"Failed to load any audio data from {self.filepath}"
)
Expand All @@ -86,6 +86,7 @@ def get_full_audio(self) -> Tuple[np.ndarray, int]:
else:
logger.debug(f"CACHE: Using cached full audio for {self.filepath.name}")

assert self._full_audio is not None
return self._full_audio

def is_partial(self) -> bool:
Expand Down Expand Up @@ -121,7 +122,7 @@ def get_segment(self, start_frame: int, frames: int) -> Tuple[np.ndarray, int]:
always_2d=True,
original_filepath=str(self.original_filepath),
)
if data is None:
if data is None or sr is None:
# Segment load failure is less critical, maybe return empty?
# But let's be consistent and raise, caught by caller
raise RuntimeError(
Expand All @@ -133,9 +134,7 @@ def get_segment(self, start_frame: int, frames: int) -> Tuple[np.ndarray, int]:

return self._segments[key]

def get_spectrum(
self, segment_duration: float = 10.0
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
def get_spectrum(self, segment_duration: float = 10.0) -> Tuple[np.ndarray, np.ndarray, int]:
"""Get spectrum analysis (cached).

Analyzes first segment_duration seconds of the file.
Expand Down Expand Up @@ -183,6 +182,7 @@ def get_spectrum(
else:
logger.debug(f"CACHE: Using cached spectrum for {self.filepath.name}")

assert self._spectrum is not None
return self._spectrum

def get_cutoff(self) -> float:
Expand Down
4 changes: 2 additions & 2 deletions src/flac_detective/analysis/diagnostic_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -129,7 +129,7 @@ def get_statistics(self) -> Dict:
Returns:
Dictionary with diagnostic statistics
"""
stats = {
stats: Dict[str, Any] = {
"total_files": self._files_analyzed,
"files_with_issues": self._files_with_issues,
"clean_files": self._files_analyzed - self._files_with_issues,
Expand Down
9 changes: 6 additions & 3 deletions src/flac_detective/analysis/file_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import logging
from pathlib import Path
from typing import Dict, Optional, Tuple
from typing import Dict, Optional, Tuple, cast

import numpy as np
import soundfile as sf
Expand Down Expand Up @@ -68,7 +68,7 @@ def read_full(self, filepath: Path, **kwargs) -> Tuple[np.ndarray, int]:
Tuple of (audio_data, sample_rate)
"""
if not self._enabled:
return sf.read(str(filepath), **kwargs)
return cast(Tuple[np.ndarray, int], sf.read(str(filepath), **kwargs))

key = str(filepath)

Expand Down Expand Up @@ -97,7 +97,10 @@ def read_segment(
Tuple of (audio_data, sample_rate)
"""
if not self._enabled:
return sf.read(str(filepath), start=start, frames=frames, **kwargs)
return cast(
Tuple[np.ndarray, int],
sf.read(str(filepath), start=start, frames=frames, **kwargs),
)

key = (str(filepath), start, frames)

Expand Down
6 changes: 3 additions & 3 deletions src/flac_detective/analysis/new_scoring/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""

import logging
from typing import Optional, Tuple
from typing import Any, Dict, Optional, Tuple

import numpy as np
import soundfile as sf
Expand Down Expand Up @@ -335,8 +335,8 @@ def analyze_compression_artifacts( # noqa: C901
Tuple of (score_delta, list_of_reasons, details_dict)
"""
score = 0
reasons = []
details = {
reasons: list[str] = []
details: Dict[str, Any] = {
"preecho_percentage": 0.0,
"aliasing_correlation": 0.0,
"mp3_noise_pattern": False,
Expand Down
2 changes: 1 addition & 1 deletion src/flac_detective/analysis/new_scoring/audio_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def load_audio_with_retry(
backoff_multiplier: float = 2.0,
original_filepath: Optional[str] = None,
**kwargs: Any,
) -> Tuple[Optional[NDArray[np.float64]], Optional[int]]:
) -> Tuple[Optional[np.ndarray], Optional[int]]:
"""Load audio file with retry mechanism for temporary decoder errors.

This function attempts to load a FLAC file using soundfile.read() with
Expand Down
11 changes: 8 additions & 3 deletions src/flac_detective/analysis/new_scoring/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import logging
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, List, Optional, Tuple

import numpy as np

from .audio_loader import load_audio_with_retry
from .bitrate import (
Expand Down Expand Up @@ -33,7 +35,7 @@


def _calculate_bitrate_metrics(
filepath: Path, audio_meta: AudioMetadata, source_path: Path = None
filepath: Path, audio_meta: AudioMetadata, source_path: Optional[Path] = None
) -> BitrateMetrics:
"""Calculate all bitrate-related metrics.

Expand Down Expand Up @@ -77,6 +79,9 @@ def _apply_scoring_rules(context: ScoringContext) -> Tuple[int, List[str]]: # n
Returns:
Tuple of (total_score, list_of_reasons)
"""
audio_data: Optional[np.ndarray] = None
sample_rate: Optional[int] = None

# ========== RULE 8: NYQUIST EXCEPTION (ALWAYS FIRST) ==========
# This rule MUST be calculated first and applied before any short-circuit
logger.debug("OPTIMIZATION: Calculating Rule 8 (Nyquist Exception) FIRST...")
Expand Down Expand Up @@ -289,7 +294,7 @@ def new_calculate_score(
cutoff_std: float = 0.0,
energy_ratio: float = 0.0,
cache=None,
source_path: Path = None,
source_path: Optional[Path] = None,
) -> Tuple[int, str, str, str]:
"""Calculate score using the new 8-rule system with file caching.

Expand Down
11 changes: 8 additions & 3 deletions src/flac_detective/analysis/new_scoring/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

from dataclasses import dataclass, field
from pathlib import Path
from typing import List, NamedTuple, Optional
from typing import TYPE_CHECKING, List, NamedTuple, Optional

if TYPE_CHECKING:
import numpy as np

from ..audio_cache import AudioCache


class BitrateMetrics(NamedTuple):
Expand Down Expand Up @@ -41,9 +46,9 @@ class ScoringContext:
reasons: List[str] = field(default_factory=list)

# Cache for heavy rules (Rule 9/11) - Avoids reloading file
audio_data: Optional[object] = None # Using object to avoid numpy dependency in models
audio_data: "Optional[np.ndarray]" = None # numpy only imported under TYPE_CHECKING
loaded_sample_rate: Optional[int] = None
cache: Optional[object] = None # AudioCache instance
cache: "Optional[AudioCache]" = None # AudioCache instance

def add_score(self, score: int, new_reasons: List[str]):
"""Update score and reasons."""
Expand Down
4 changes: 3 additions & 1 deletion src/flac_detective/analysis/new_scoring/rules/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from typing import List, Optional, Tuple

import numpy as np

from ..artifacts import analyze_compression_artifacts


def apply_rule_9_compression_artifacts(
file_path: str,
cutoff_freq: float,
mp3_bitrate_detected: Optional[int],
audio_data: Optional[object] = None,
audio_data: Optional[np.ndarray] = None,
sample_rate: Optional[int] = None,
) -> Tuple[int, List[str], dict]:
"""Apply Rule 9: Psychoacoustic Compression Artifacts Detection.
Expand Down
8 changes: 4 additions & 4 deletions src/flac_detective/analysis/new_scoring/rules/bitrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def apply_rule_3_source_vs_container(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

# Container bitrate threshold
# Using the constant defined in constants.py
Expand Down Expand Up @@ -81,7 +81,7 @@ def apply_rule_4_24bit_suspect(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

# Minimum expected bitrate for 24-bit files
MIN_24BIT_BITRATE = 500
Expand Down Expand Up @@ -151,7 +151,7 @@ def apply_rule_5_high_variance(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

is_high_bitrate = real_bitrate > HIGH_BITRATE_THRESHOLD
is_high_variance = bitrate_variance > VARIANCE_THRESHOLD
Expand Down Expand Up @@ -198,7 +198,7 @@ def apply_rule_6_variable_bitrate_protection(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

# Thresholds for high-quality FLAC protection
BITRATE_THRESHOLD = 700 # Raised from 600 kbps
Expand Down
6 changes: 3 additions & 3 deletions src/flac_detective/analysis/new_scoring/rules/cassette.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Cassette audio source detection (Rule 11)."""

import logging
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, cast

import numpy as np
import soundfile as sf
Expand All @@ -17,7 +17,7 @@ def bandpass_filter(
) -> np.ndarray:
"""Apply a bandpass filter to the data."""
sos = signal.butter(order, [lowcut, highcut], btype="bandpass", fs=fs, output="sos")
return signal.sosfilt(sos, data)
return cast(np.ndarray, signal.sosfilt(sos, data))


def apply_rule_11_cassette_detection( # noqa: C901
Expand Down Expand Up @@ -46,7 +46,7 @@ def apply_rule_11_cassette_detection( # noqa: C901
cassette_score: 0-85 (Positive score means likely cassette)
"""
cassette_score = 0
reasons = []
reasons: list[str] = []

if cutoff_freq >= 19000:
logger.debug(f"RULE 11: Skipped (cutoff {cutoff_freq:.0f} >= 19000)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def apply_rule_10_multi_segment_consistency(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

# Activation condition
if current_score <= 30:
Expand Down
2 changes: 1 addition & 1 deletion src/flac_detective/analysis/new_scoring/rules/silence.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def apply_rule_7_silence_analysis(
Tuple of (score_delta, list_of_reasons, silence_ratio)
"""
score = 0
reasons = []
reasons: list[str] = []
ratio = None

# 1. Check activation condition
Expand Down
4 changes: 2 additions & 2 deletions src/flac_detective/analysis/new_scoring/rules/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def apply_rule_2_cutoff(cutoff_freq: float, sample_rate: int) -> Tuple[int, List
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []
cutoff_threshold = get_cutoff_threshold(sample_rate)

if cutoff_freq < cutoff_threshold:
Expand Down Expand Up @@ -212,7 +212,7 @@ def apply_rule_8_nyquist_exception(
Tuple of (score_delta, list_of_reasons)
"""
score = 0
reasons = []
reasons: list[str] = []

# Calculate Nyquist frequency
nyquist_freq = sample_rate / 2.0
Expand Down
4 changes: 2 additions & 2 deletions src/flac_detective/analysis/new_scoring/silence.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import logging
from pathlib import Path
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Union

import numpy as np
import soundfile as sf
Expand Down Expand Up @@ -119,7 +119,7 @@ def calculate_spectral_energy(


def analyze_silence_ratio( # noqa: C901
file_path: Path, cache=None
file_path: Union[str, Path], cache=None
) -> Tuple[Optional[float], str, float, float]:
"""Analyze the ratio of HF energy between silence and music.

Expand Down
Loading
Loading