From 68103a19dae1b3ca230bdbe4eaa289c20c22a2d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 21:08:16 +0000 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.13 → v0.15.14](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.13...v0.15.14) - https://github.com/psf/black → https://github.com/psf/black-pre-commit-mirror - [github.com/psf/black-pre-commit-mirror: 23.11.0 → 26.5.1](https://github.com/psf/black-pre-commit-mirror/compare/23.11.0...26.5.1) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d22e02a..5c4e70e9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: # Ruff - Fast Python linter and formatter (replaces autoflake, isort, and many other linters) - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.15.14 hooks: # Run the linter with autofix - id: ruff @@ -9,8 +9,8 @@ repos: # Ruff will use the configuration from pyproject.toml # Black - Still keep for consistent formatting - - repo: https://github.com/psf/black - rev: 23.11.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 26.5.1 hooks: - id: black From 4803b1758dda356f9f99a0d51ee63cf8a180b774 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 21:08:30 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/examples/auto_model_usage.py | 1 + examples/performance_comparison_notebook.py | 128 ++--- src/tsbootstrap/backends/batch_processor.py | 47 +- src/tsbootstrap/backends/calibration.py | 37 +- src/tsbootstrap/backends/factory.py | 10 +- src/tsbootstrap/backends/feature_flags.py | 20 +- src/tsbootstrap/backends/performance_utils.py | 37 +- .../backends/statsforecast_backend.py | 4 +- src/tsbootstrap/base_bootstrap.py | 29 +- src/tsbootstrap/block_generator.py | 2 +- src/tsbootstrap/common_fields.py | 1 + .../services/model_scoring_service.py | 1 - src/tsbootstrap/services/service_container.py | 9 +- src/tsbootstrap/sklearn_integration.py | 44 +- src/tsbootstrap/tests/test_all_estimators.py | 1 - src/tsbootstrap/time_series_model.py | 60 +-- src/tsbootstrap/utils/auto_order_selector.py | 16 +- src/tsbootstrap/validators.py | 1 + tests/compatibility/test_dependencies.py | 122 +++-- tests/compatibility/test_estimator_checks.py | 32 +- tests/compatibility/test_skbase_compat.py | 1 + tests/conftest.py | 1 + tests/integration/test_async_bootstrap.py | 1 + .../integration/test_backend_compatibility.py | 1 + tests/integration/test_end_to_end.py | 241 ++++----- tests/integration/test_sklearn_integration.py | 120 ++--- tests/unit/test_async_bootstrap.py | 111 ++--- tests/unit/test_backend_features.py | 80 +-- tests/unit/test_backends.py | 29 +- tests/unit/test_base_bootstrap.py | 89 ++-- tests/unit/test_batch_bootstrap.py | 81 +-- tests/unit/test_batch_bootstrap_service.py | 463 +++++++++--------- tests/unit/test_block_bootstrap.py | 246 +++++----- tests/unit/test_block_bootstrap_services.py | 224 +++++---- tests/unit/test_block_generation.py | 104 ++-- tests/unit/test_bootstrap.py | 163 +++--- tests/unit/test_bootstrap_common.py | 1 + tests/unit/test_bootstrap_ext.py | 3 +- tests/unit/test_bootstrap_factory.py | 2 +- tests/unit/test_bootstrap_services.py | 302 ++++++------ tests/unit/test_model_scoring_service.py | 192 ++++---- tests/unit/test_models.py | 210 ++++---- tests/unit/test_numpy_serialization.py | 385 +++++++-------- tests/unit/test_ranklags.py | 1 + tests/unit/test_rescaling_service.py | 155 +++--- tests/unit/test_service_container.py | 100 ++-- tests/unit/test_services.py | 105 ++-- tests/unit/test_sklearn_compatibility.py | 197 ++++---- tests/unit/test_time_series_model_sklearn.py | 1 + tests/unit/test_time_series_simulator.py | 1 + tests/unit/test_utils.py | 17 +- tests/unit/test_validation.py | 86 ++-- tests/unit/test_validators.py | 1 + 53 files changed, 2015 insertions(+), 2301 deletions(-) diff --git a/docs/examples/auto_model_usage.py b/docs/examples/auto_model_usage.py index a8a2dd6f..3667f73a 100644 --- a/docs/examples/auto_model_usage.py +++ b/docs/examples/auto_model_usage.py @@ -11,6 +11,7 @@ import matplotlib.pyplot as plt import numpy as np + from tsbootstrap.utils.auto_order_selector import AutoOrderSelector diff --git a/examples/performance_comparison_notebook.py b/examples/performance_comparison_notebook.py index b9cae1dd..ddf4bdbd 100644 --- a/examples/performance_comparison_notebook.py +++ b/examples/performance_comparison_notebook.py @@ -35,9 +35,7 @@ def create_performance_notebook(): ) # Setup cell - cells.append( - nbf.v4.new_code_cell( - """# Import required libraries + cells.append(nbf.v4.new_code_cell("""# Import required libraries import os import time import numpy as np @@ -59,14 +57,10 @@ def create_performance_notebook(): # Set random seed for reproducibility np.random.seed(42) -print("Setup complete!")""" - ) - ) +print("Setup complete!")""")) # Performance measurement utilities - cells.append( - nbf.v4.new_code_cell( - """# Utility functions for performance measurement + cells.append(nbf.v4.new_code_cell("""# Utility functions for performance measurement def measure_performance(func, *args, n_runs=5, **kwargs): \"\"\"Measure average performance over multiple runs.\"\"\" @@ -122,22 +116,14 @@ def plot_performance_comparison(results_dict, title="Performance Comparison"): plt.tight_layout() plt.show() -print("Utility functions loaded!")""" - ) - ) +print("Utility functions loaded!")""")) # Example 1: Single Model Fitting - cells.append( - nbf.v4.new_markdown_cell( - """## Example 1: Single Model Fitting + cells.append(nbf.v4.new_markdown_cell("""## Example 1: Single Model Fitting -First, let's compare the performance of fitting a single ARIMA model using both backends.""" - ) - ) +First, let's compare the performance of fitting a single ARIMA model using both backends.""")) - cells.append( - nbf.v4.new_code_cell( - """# Generate sample time series data + cells.append(nbf.v4.new_code_cell("""# Generate sample time series data data = np.cumsum(np.random.randn(1000)) # Random walk with 1000 points print(f"Data shape: {data.shape}") @@ -149,13 +135,9 @@ def plot_performance_comparison(results_dict, title="Performance Comparison"): plt.title("Sample Time Series Data") plt.xlabel("Time") plt.ylabel("Value") -plt.show()""" - ) - ) +plt.show()""")) - cells.append( - nbf.v4.new_code_cell( - """# Compare single ARIMA model fitting + cells.append(nbf.v4.new_code_cell("""# Compare single ARIMA model fitting def fit_arima_statsmodels(data): \"\"\"Fit ARIMA model using statsmodels backend.\"\"\" @@ -186,9 +168,7 @@ def fit_arima_statsforecast(data): print(f"\\nStatsModels: {sm_results['mean']:.3f} ± {sm_results['std']:.3f} seconds") print(f"StatsForecast: {sf_results['mean']:.3f} ± {sf_results['std']:.3f} seconds") -print(f"Speedup: {sm_results['mean'] / sf_results['mean']:.1f}x faster!")""" - ) - ) +print(f"Speedup: {sm_results['mean'] / sf_results['mean']:.1f}x faster!")""")) # Example 2: Batch Processing cells.append( @@ -199,9 +179,7 @@ def fit_arima_statsforecast(data): ) ) - cells.append( - nbf.v4.new_code_cell( - """# Generate multiple time series + cells.append(nbf.v4.new_code_cell("""# Generate multiple time series n_series = 100 series_length = 500 @@ -226,13 +204,9 @@ def fit_arima_statsforecast(data): ax.set_xlabel("Time") ax.set_ylabel("Value") plt.tight_layout() -plt.show()""" - ) - ) +plt.show()""")) - cells.append( - nbf.v4.new_code_cell( - """# Compare batch processing performance + cells.append(nbf.v4.new_code_cell("""# Compare batch processing performance def batch_fit_statsmodels(series_list): \"\"\"Sequential fitting with statsmodels.\"\"\" @@ -275,9 +249,7 @@ def batch_fit_statsforecast(series_list): print(f"Speedup: {sm_batch_results['mean'] / sf_batch_results['mean']:.1f}x faster!") print(f"\\nTime per model:") print(f" StatsModels: {sm_batch_results['mean']/n_series*1000:.1f}ms") -print(f" StatsForecast: {sf_batch_results['mean']/n_series*1000:.1f}ms")""" - ) - ) +print(f" StatsForecast: {sf_batch_results['mean']/n_series*1000:.1f}ms")""")) # Example 3: Bootstrap Performance cells.append( @@ -288,9 +260,7 @@ def batch_fit_statsforecast(series_list): ) ) - cells.append( - nbf.v4.new_code_cell( - """# Compare bootstrap performance + cells.append(nbf.v4.new_code_cell("""# Compare bootstrap performance data = np.cumsum(np.random.randn(365)) # One year of daily data n_bootstraps = 500 @@ -330,22 +300,14 @@ def bootstrap_statsforecast(data, n_bootstraps): print(f"\\nStatsModels: {sm_bootstrap['mean']:.2f} seconds") print(f"StatsForecast: {sf_bootstrap['mean']:.2f} seconds") -print(f"Speedup: {sm_bootstrap['mean'] / sf_bootstrap['mean']:.1f}x faster!")""" - ) - ) +print(f"Speedup: {sm_bootstrap['mean'] / sf_bootstrap['mean']:.1f}x faster!")""")) # Example 4: Scaling Analysis - cells.append( - nbf.v4.new_markdown_cell( - """## Example 4: Scaling Analysis + cells.append(nbf.v4.new_markdown_cell("""## Example 4: Scaling Analysis -Let's analyze how performance scales with the number of models.""" - ) - ) +Let's analyze how performance scales with the number of models.""")) - cells.append( - nbf.v4.new_code_cell( - """# Scaling analysis + cells.append(nbf.v4.new_code_cell("""# Scaling analysis n_series_list = [10, 25, 50, 100, 200] sm_times = [] sf_times = [] @@ -407,22 +369,14 @@ def bootstrap_statsforecast(data, n_bootstraps): print(f"\\nSpeedup increases with scale:") for n, speedup in zip(n_series_list, speedups): - print(f" {n} models: {speedup:.1f}x faster")""" - ) - ) + print(f" {n} models: {speedup:.1f}x faster")""")) # Example 5: Memory Usage - cells.append( - nbf.v4.new_markdown_cell( - """## Example 5: Memory Usage Comparison + cells.append(nbf.v4.new_markdown_cell("""## Example 5: Memory Usage Comparison -Besides speed, statsforecast also uses memory more efficiently.""" - ) - ) +Besides speed, statsforecast also uses memory more efficiently.""")) - cells.append( - nbf.v4.new_code_cell( - """import psutil + cells.append(nbf.v4.new_code_cell("""import psutil import gc def measure_memory_usage(backend_type, n_models=100): @@ -495,9 +449,7 @@ def measure_memory_usage(backend_type, n_models=100): plt.show() print(f"\\nMemory reduction: {reduction:.1f}%") -print(f"StatsForecast uses {sm_memory/sf_memory:.1f}x less memory!")""" - ) - ) +print(f"StatsForecast uses {sm_memory/sf_memory:.1f}x less memory!")""")) # Example 6: Real-world scenario cells.append( @@ -508,9 +460,7 @@ def measure_memory_usage(backend_type, n_models=100): ) ) - cells.append( - nbf.v4.new_code_cell( - """# Simulate production forecasting pipeline + cells.append(nbf.v4.new_code_cell("""# Simulate production forecasting pipeline def production_pipeline(use_backend=False): \"\"\"Simulate a production forecasting pipeline.\"\"\" results = { @@ -624,14 +574,10 @@ def production_pipeline(use_backend=False): ax2.set_title('Processing Throughput', fontsize=14, fontweight='bold') plt.tight_layout() -plt.show()""" - ) - ) +plt.show()""")) # Summary and conclusions - cells.append( - nbf.v4.new_markdown_cell( - """## Summary and Conclusions + cells.append(nbf.v4.new_markdown_cell("""## Summary and Conclusions ### Performance Improvements Achieved: @@ -674,22 +620,14 @@ def production_pipeline(use_backend=False): model = TimeSeriesModel(X=data, model_type="arima", use_backend=True) ``` -The migration is designed to be gradual and safe, with 100% backward compatibility!""" - ) - ) +The migration is designed to be gradual and safe, with 100% backward compatibility!""")) # Add rollout monitoring example - cells.append( - nbf.v4.new_markdown_cell( - """## Bonus: Monitor Your Rollout + cells.append(nbf.v4.new_markdown_cell("""## Bonus: Monitor Your Rollout -Track the success of your migration with built-in monitoring tools.""" - ) - ) +Track the success of your migration with built-in monitoring tools.""")) - cells.append( - nbf.v4.new_code_cell( - """# Check current rollout status + cells.append(nbf.v4.new_code_cell("""# Check current rollout status from tsbootstrap.backends.feature_flags import get_rollout_monitor monitor = get_rollout_monitor() @@ -712,9 +650,7 @@ def production_pipeline(use_backend=False): # Calculate overall speedup from real usage if report['statsmodels']['avg_duration'] > 0 and report['statsforecast']['avg_duration'] > 0: real_speedup = report['statsmodels']['avg_duration'] / report['statsforecast']['avg_duration'] - print(f"\\nReal-world speedup: {real_speedup:.1f}x")""" - ) - ) + print(f"\\nReal-world speedup: {real_speedup:.1f}x")""")) nb.cells = cells return nb diff --git a/src/tsbootstrap/backends/batch_processor.py b/src/tsbootstrap/backends/batch_processor.py index 8d519e3e..4d8bfe60 100644 --- a/src/tsbootstrap/backends/batch_processor.py +++ b/src/tsbootstrap/backends/batch_processor.py @@ -11,20 +11,21 @@ - Batch prediction and evaluation """ -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, List, Optional + import numpy as np class BatchProcessor: """Batch processor for parallel model operations. - + Future implementation will provide efficient parallel processing of multiple time series models. """ - + def __init__(self, backend: str = "statsmodels", n_jobs: Optional[int] = None): """Initialize batch processor. - + Parameters ---------- backend : str @@ -39,15 +40,10 @@ def __init__(self, backend: str = "statsmodels", n_jobs: Optional[int] = None): "BatchProcessor is a planned feature that is not yet implemented. " "This stub exists to maintain test structure for future development." ) - - def fit_batch( - self, - series_list: List[np.ndarray], - model_type: str, - **kwargs: Any - ) -> List[Any]: + + def fit_batch(self, series_list: List[np.ndarray], model_type: str, **kwargs: Any) -> List[Any]: """Fit multiple models in batch. - + Parameters ---------- series_list : List[np.ndarray] @@ -56,22 +52,19 @@ def fit_batch( Type of model to fit **kwargs Additional model parameters - + Returns ------- List[Any] List of fitted models """ raise NotImplementedError(self._not_implemented_msg) - + def process_batch( - self, - series_list: List[np.ndarray], - func: Callable, - n_jobs: Optional[int] = None + self, series_list: List[np.ndarray], func: Callable, n_jobs: Optional[int] = None ) -> List[Any]: """Process series in batch with custom function. - + Parameters ---------- series_list : List[np.ndarray] @@ -80,31 +73,27 @@ def process_batch( Function to apply to each series n_jobs : int, optional Number of parallel jobs - + Returns ------- List[Any] Results from applying func to each series """ raise NotImplementedError(self._not_implemented_msg) - - def predict_batch( - self, - models: List[Any], - steps: int - ) -> List[np.ndarray]: + + def predict_batch(self, models: List[Any], steps: int) -> List[np.ndarray]: """Generate predictions from multiple models. - + Parameters ---------- models : List[Any] List of fitted models steps : int Number of steps to predict - + Returns ------- List[np.ndarray] List of predictions """ - raise NotImplementedError(self._not_implemented_msg) \ No newline at end of file + raise NotImplementedError(self._not_implemented_msg) diff --git a/src/tsbootstrap/backends/calibration.py b/src/tsbootstrap/backends/calibration.py index ba07ed16..e7fe55a6 100644 --- a/src/tsbootstrap/backends/calibration.py +++ b/src/tsbootstrap/backends/calibration.py @@ -12,33 +12,34 @@ - Grid and random search capabilities """ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Union + import numpy as np class CalibrationSystem: """Automatic calibration system for time series models. - + Future implementation will provide sophisticated parameter tuning and model selection capabilities. """ - + def __init__(self): """Initialize calibration system.""" self._not_implemented_msg = ( "CalibrationSystem is a planned feature that is not yet implemented. " "This stub exists to maintain test structure for future development." ) - + def calibrate( self, data: np.ndarray, model_type: str, param_grid: Dict[str, List[Any]], - metric: str = "aic" + metric: str = "aic", ) -> Dict[str, Any]: """Calibrate model parameters using grid search. - + Parameters ---------- data : np.ndarray @@ -49,24 +50,24 @@ def calibrate( Parameter grid for search metric : str Metric to optimize ('aic', 'bic', 'mse', etc.) - + Returns ------- Dict[str, Any] Best parameters found """ raise NotImplementedError(self._not_implemented_msg) - + def calibrate_cv( self, data: np.ndarray, model_type: str, param_grid: Dict[str, List[Any]], cv_splits: int = 5, - metric: str = "mse" + metric: str = "mse", ) -> Dict[str, Any]: """Calibrate using cross-validation. - + Parameters ---------- data : np.ndarray @@ -79,23 +80,19 @@ def calibrate_cv( Number of CV splits metric : str Metric to optimize - + Returns ------- Dict[str, Any] Best parameters """ raise NotImplementedError(self._not_implemented_msg) - + def auto_select_order( - self, - data: np.ndarray, - model_type: str, - max_order: int = 10, - criterion: str = "aic" + self, data: np.ndarray, model_type: str, max_order: int = 10, criterion: str = "aic" ) -> Union[int, tuple]: """Automatically select model order. - + Parameters ---------- data : np.ndarray @@ -106,10 +103,10 @@ def auto_select_order( Maximum order to consider criterion : str Information criterion to use - + Returns ------- Union[int, tuple] Selected order """ - raise NotImplementedError(self._not_implemented_msg) \ No newline at end of file + raise NotImplementedError(self._not_implemented_msg) diff --git a/src/tsbootstrap/backends/factory.py b/src/tsbootstrap/backends/factory.py index 790ff669..2ed29409 100644 --- a/src/tsbootstrap/backends/factory.py +++ b/src/tsbootstrap/backends/factory.py @@ -138,11 +138,11 @@ def create_backend( backend = StatsForecastBackend( model_type=backend_model_type, - order=order - if isinstance(order, tuple) - else (order, 0, 0) - if order is not None - else None, + order=( + order + if isinstance(order, tuple) + else (order, 0, 0) if order is not None else None + ), seasonal_order=seasonal_order, **kwargs, ) diff --git a/src/tsbootstrap/backends/feature_flags.py b/src/tsbootstrap/backends/feature_flags.py index f8cbe8f3..67f64125 100644 --- a/src/tsbootstrap/backends/feature_flags.py +++ b/src/tsbootstrap/backends/feature_flags.py @@ -359,11 +359,11 @@ def get_rollout_monitor() -> RolloutMonitor: # Compatibility wrapper for tests class FeatureFlags: """Test-compatible feature flag interface. - + This class provides the interface expected by tests while internally using the FeatureFlagConfig implementation. """ - + def __init__(self): """Initialize feature flags with default settings.""" self._config = FeatureFlagConfig() @@ -374,39 +374,39 @@ def __init__(self): "experimental_var_bootstrap": False, } self._original_flags = {} - + def is_enabled(self, feature: str) -> bool: """Check if a feature is enabled.""" return self._flags.get(feature, False) - + def set_flag(self, feature: str, value: bool) -> None: """Set a feature flag value.""" self._flags[feature] = value - + def enable_experimental_features(self) -> None: """Enable all experimental features.""" for key in self._flags: if key.startswith("experimental_"): self._flags[key] = True - + def temporary_override(self, feature: str, value: bool): """Context manager for temporary feature override.""" return self._TemporaryOverride(self, feature, value) - + class _TemporaryOverride: """Context manager for temporary feature flag override.""" - + def __init__(self, flags: "FeatureFlags", feature: str, value: bool): self.flags = flags self.feature = feature self.new_value = value self.old_value = None - + def __enter__(self): self.old_value = self.flags._flags.get(self.feature) self.flags._flags[self.feature] = self.new_value return self - + def __exit__(self, exc_type, exc_val, exc_tb): if self.old_value is not None: self.flags._flags[self.feature] = self.old_value diff --git a/src/tsbootstrap/backends/performance_utils.py b/src/tsbootstrap/backends/performance_utils.py index 821b74c6..18ccb6e9 100644 --- a/src/tsbootstrap/backends/performance_utils.py +++ b/src/tsbootstrap/backends/performance_utils.py @@ -11,19 +11,14 @@ - Performance regression detection """ -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List + import numpy as np -import time -def benchmark_backend( - backend: str, - model_type: str, - data: np.ndarray, - **kwargs: Any -) -> float: +def benchmark_backend(backend: str, model_type: str, data: np.ndarray, **kwargs: Any) -> float: """Benchmark backend performance. - + Parameters ---------- backend : str @@ -34,7 +29,7 @@ def benchmark_backend( Time series data **kwargs Model parameters - + Returns ------- float @@ -47,14 +42,9 @@ def benchmark_backend( raise NotImplementedError(_not_implemented_msg) -def measure_memory_usage( - backend: str, - model_type: str, - data_size: int, - **kwargs: Any -) -> float: +def measure_memory_usage(backend: str, model_type: str, data_size: int, **kwargs: Any) -> float: """Measure memory usage of backend. - + Parameters ---------- backend : str @@ -65,7 +55,7 @@ def measure_memory_usage( Size of data to test **kwargs Model parameters - + Returns ------- float @@ -79,13 +69,10 @@ def measure_memory_usage( def measure_scaling( - backend: str, - model_type: str, - data_sizes: List[int], - **kwargs: Any + backend: str, model_type: str, data_sizes: List[int], **kwargs: Any ) -> Dict[str, List[float]]: """Measure scaling characteristics. - + Parameters ---------- backend : str @@ -96,7 +83,7 @@ def measure_scaling( Sizes to test **kwargs Model parameters - + Returns ------- Dict[str, List[float]] @@ -106,4 +93,4 @@ def measure_scaling( "measure_scaling is a planned feature that is not yet implemented. " "This stub exists to maintain test structure for future development." ) - raise NotImplementedError(_not_implemented_msg) \ No newline at end of file + raise NotImplementedError(_not_implemented_msg) diff --git a/src/tsbootstrap/backends/statsforecast_backend.py b/src/tsbootstrap/backends/statsforecast_backend.py index 16a2c730..0813b9d6 100644 --- a/src/tsbootstrap/backends/statsforecast_backend.py +++ b/src/tsbootstrap/backends/statsforecast_backend.py @@ -261,8 +261,8 @@ def fit( # Get fitted values using predict_in_sample try: in_sample_pred = fitted_model.predict_in_sample() - if isinstance(in_sample_pred, dict) and 'fitted' in in_sample_pred: - fitted_vals_rescaled = in_sample_pred['fitted'] + if isinstance(in_sample_pred, dict) and "fitted" in in_sample_pred: + fitted_vals_rescaled = in_sample_pred["fitted"] residuals_rescaled = series_data - fitted_vals_rescaled else: # Fallback if predict_in_sample doesn't return expected format diff --git a/src/tsbootstrap/base_bootstrap.py b/src/tsbootstrap/base_bootstrap.py index 9403fe4c..844b576e 100644 --- a/src/tsbootstrap/base_bootstrap.py +++ b/src/tsbootstrap/base_bootstrap.py @@ -523,23 +523,23 @@ def get_test_params(cls): def get_n_bootstraps(self) -> int: """Get the number of bootstrap samples.""" return self.n_bootstraps - + # sklearn Transformer Interface Methods - + def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> BaseTimeSeriesBootstrap: """ Fit the bootstrap method to the data. - + For bootstrap methods, fitting primarily validates and stores the data characteristics. The actual bootstrap sampling happens during transform. - + Parameters ---------- X : array-like of shape (n_samples,) or (n_samples, n_features) Time series data to bootstrap y : array-like of shape (n_samples,) or (n_samples, n_outputs), optional Exogenous variables for model-based methods - + Returns ------- self : BaseTimeSeriesBootstrap @@ -547,27 +547,28 @@ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> BaseTimeSeriesBo """ # Validate input data from tsbootstrap.utils.validate import validate_X_and_y + X, y = validate_X_and_y(X, y) - + # Store data characteristics (don't store actual data) self._n_samples = X.shape[0] self._n_features = X.shape[1] if X.ndim > 1 else 1 self._is_fitted = True - + return self - + def transform(self, X: np.ndarray) -> list[np.ndarray]: """ Generate bootstrap samples (transformer interface). - + This method provides sklearn transformer compatibility by wrapping the bootstrap() method. It returns bootstrap samples as a list of arrays. - + Parameters ---------- X : array-like of shape (n_samples,) or (n_samples, n_features) Time series data to bootstrap - + Returns ------- samples : list of arrays @@ -576,18 +577,18 @@ def transform(self, X: np.ndarray) -> list[np.ndarray]: # For bootstrap, we don't require fit() to be called first # as each call can work independently return list(self.bootstrap(X)) - + def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> list[np.ndarray]: """ Fit and generate bootstrap samples in one step. - + Parameters ---------- X : array-like of shape (n_samples,) or (n_samples, n_features) Time series data to bootstrap y : array-like of shape (n_samples,) or (n_samples, n_outputs), optional Exogenous variables for model-based methods - + Returns ------- samples : list of arrays diff --git a/src/tsbootstrap/block_generator.py b/src/tsbootstrap/block_generator.py index 46be1229..3f147a13 100644 --- a/src/tsbootstrap/block_generator.py +++ b/src/tsbootstrap/block_generator.py @@ -376,7 +376,7 @@ def generate_overlapping_blocks(self): sampled_block_length = self.block_length_sampler.sample_block_length() logger.debug(f"sampled_block_length: {sampled_block_length}\n") block_length = self._get_next_block_length(sampled_block_length, total_length_covered) - if block_length < self.min_block_length: # type:ignore + if block_length < self.min_block_length: # type: ignore break overlap_length = self._calculate_overlap_length(block_length) diff --git a/src/tsbootstrap/common_fields.py b/src/tsbootstrap/common_fields.py index 4f9e6fc8..b4a41f2c 100644 --- a/src/tsbootstrap/common_fields.py +++ b/src/tsbootstrap/common_fields.py @@ -17,6 +17,7 @@ a better default or need to clarify a description, we update it once here rather than hunting through every bootstrap class. """ + from __future__ import annotations from typing import Optional diff --git a/src/tsbootstrap/services/model_scoring_service.py b/src/tsbootstrap/services/model_scoring_service.py index 1c5202dd..1a6d4ae0 100644 --- a/src/tsbootstrap/services/model_scoring_service.py +++ b/src/tsbootstrap/services/model_scoring_service.py @@ -22,7 +22,6 @@ and that switching backends doesn't mysteriously change your evaluation metrics. """ - import numpy as np diff --git a/src/tsbootstrap/services/service_container.py b/src/tsbootstrap/services/service_container.py index d3bd5f2d..ffa961f2 100644 --- a/src/tsbootstrap/services/service_container.py +++ b/src/tsbootstrap/services/service_container.py @@ -124,7 +124,7 @@ class BootstrapServices: reconstructor: Optional[TimeSeriesReconstructionService] = None order_selector: Optional[SieveOrderSelectionService] = None batch_bootstrap: Optional[BatchBootstrapService] = None - + # Block bootstrap services block_generator: Optional[BlockGenerationService] = None block_resampler: Optional[BlockResamplingService] = None @@ -332,9 +332,4 @@ def create_for_block_bootstrap( BootstrapServices Configured service container for block bootstrap """ - return ( - cls() - .with_block_generation() - .with_block_resampling() - .with_window_functions() - ) + return cls().with_block_generation().with_block_resampling().with_window_functions() diff --git a/src/tsbootstrap/sklearn_integration.py b/src/tsbootstrap/sklearn_integration.py index ed2c70bf..21917c35 100644 --- a/src/tsbootstrap/sklearn_integration.py +++ b/src/tsbootstrap/sklearn_integration.py @@ -183,27 +183,35 @@ def __getattr__(self, name: str) -> Any: "_validate_probability": lambda: self._services.validator.validate_probability, "_validate_array_shape": lambda: self._services.validator.validate_array_shape, # Model fitting methods - "_fit_model": lambda: self._services.model_fitter.fit_model - if self._services.model_fitter - else None, - "_get_fitted_values": lambda: self._services.model_fitter.fitted_values - if self._services.model_fitter - else None, + "_fit_model": lambda: ( + self._services.model_fitter.fit_model if self._services.model_fitter else None + ), + "_get_fitted_values": lambda: ( + self._services.model_fitter.fitted_values if self._services.model_fitter else None + ), # Residual resampling methods - "_resample_residuals_whole": lambda: self._services.residual_resampler.resample_residuals_whole - if self._services.residual_resampler - else None, - "_resample_residuals_block": lambda: self._services.residual_resampler.resample_residuals_block - if self._services.residual_resampler - else None, + "_resample_residuals_whole": lambda: ( + self._services.residual_resampler.resample_residuals_whole + if self._services.residual_resampler + else None + ), + "_resample_residuals_block": lambda: ( + self._services.residual_resampler.resample_residuals_block + if self._services.residual_resampler + else None + ), # Time series reconstruction methods - "_reconstruct_series": lambda: self._services.reconstructor.reconstruct_time_series - if self._services.reconstructor - else None, + "_reconstruct_series": lambda: ( + self._services.reconstructor.reconstruct_time_series + if self._services.reconstructor + else None + ), # Sieve order selection methods - "_select_order": lambda: self._services.order_selector.select_order - if self._services.order_selector - else None, + "_select_order": lambda: ( + self._services.order_selector.select_order + if self._services.order_selector + else None + ), } if name in method_mapping: diff --git a/src/tsbootstrap/tests/test_all_estimators.py b/src/tsbootstrap/tests/test_all_estimators.py index ec48ad79..0fa75d59 100644 --- a/src/tsbootstrap/tests/test_all_estimators.py +++ b/src/tsbootstrap/tests/test_all_estimators.py @@ -1,6 +1,5 @@ """Automated tests based on the skbase test suite template.""" - import numpy as np # Added for type checking (type(np.mean)) import pytest # Added for xfail from skbase.testing import BaseFixtureGenerator as _BaseFixtureGenerator diff --git a/src/tsbootstrap/time_series_model.py b/src/tsbootstrap/time_series_model.py index 6d2a2d8e..84a5925b 100644 --- a/src/tsbootstrap/time_series_model.py +++ b/src/tsbootstrap/time_series_model.py @@ -74,7 +74,7 @@ def __init__( >>> # Old API (backward compatibility) >>> time_series_model = TimeSeriesModel(X=data, model_type="ar") >>> results = time_series_model.fit() - + >>> # New sklearn-compatible API >>> time_series_model = TimeSeriesModel(model_type="ar", order=2) >>> results = time_series_model.fit(X) @@ -83,7 +83,7 @@ def __init__( self.order = order self.verbose = verbose self.use_backend = use_backend - + # Handle both old and new API if X is not None: # Old API - data provided in constructor @@ -92,7 +92,7 @@ def __init__( # New API - data will be provided in fit() self._X = None self._y = None - + self._fitted_model = None @property @@ -628,11 +628,11 @@ def _fit_model(self, order: OrderTypes = None, seasonal_order: Optional[tuple] = def fit(self, X=None, y=None, order: OrderTypes = None, seasonal_order: Optional[tuple] = None, **kwargs): # type: ignore """Fit method supporting both old and new API. - + This method maintains backward compatibility by accepting order parameters like the old API, while also supporting the sklearn pattern when called with X parameter. - + Parameters ---------- X : np.ndarray, optional @@ -646,7 +646,7 @@ def fit(self, X=None, y=None, order: OrderTypes = None, seasonal_order: Optional The seasonal order of the model for SARIMA. **kwargs Additional keyword arguments for the model. - + Returns ------- For backward compatibility: The fitted time series model if called without X. @@ -660,42 +660,46 @@ def fit(self, X=None, y=None, order: OrderTypes = None, seasonal_order: Optional if order is None: order = self.order # Fit the model - self._fitted_model = self._fit_model(order=order, seasonal_order=seasonal_order, **kwargs) + self._fitted_model = self._fit_model( + order=order, seasonal_order=seasonal_order, **kwargs + ) # Return self for sklearn compatibility return self else: # Old API - X should already be set in constructor if self._X is None: raise ValueError("No data provided. Pass X to constructor or use sklearn pattern.") - + # Use provided order or the one from constructor if order is None: order = self.order - + # Fit the model using the existing fit method - self._fitted_model = self._fit_model(order=order, seasonal_order=seasonal_order, **kwargs) - + self._fitted_model = self._fit_model( + order=order, seasonal_order=seasonal_order, **kwargs + ) + # For backward compatibility, return the fitted model object that has forecast() method return self._fitted_model - + def predict(self, n_periods: int = 1) -> np.ndarray: """Generate predictions from the fitted model. - + Parameters ---------- n_periods : int, default 1 Number of periods to forecast. - + Returns ------- np.ndarray Forecasted values. """ check_is_fitted(self, "_fitted_model") - + if self._fitted_model is None: raise NotFittedError("Model must be fitted before making predictions.") - + # Use the predict method of the fitted model if hasattr(self._fitted_model, "predict"): # For statsmodels ARIMA/SARIMAX models @@ -704,37 +708,37 @@ def predict(self, n_periods: int = 1) -> np.ndarray: # For statsmodels AR, VAR, and other models if self.model_type.lower() == "var": # VAR models need the last observations - last_obs = self._X[-self.order:] if isinstance(self.order, int) else self._X[-2:] + last_obs = self._X[-self.order :] if isinstance(self.order, int) else self._X[-2:] return self._fitted_model.forecast(y=last_obs, steps=n_periods) else: return self._fitted_model.forecast(steps=n_periods) else: raise AttributeError("Fitted model does not have a predict or forecast method.") - + def score(self, X: np.ndarray, y: np.ndarray) -> float: """Sklearn-compatible score method. - + Parameters ---------- X : np.ndarray Test data. y : np.ndarray Target values. - + Returns ------- float R² score. """ from sklearn.metrics import r2_score - + # Generate predictions for the length of y predictions = self.predict(n_periods=len(y)) - + # Ensure same shape if predictions.shape != y.shape: - predictions = predictions[:len(y)] - + predictions = predictions[: len(y)] + return r2_score(y, predictions) def __repr__(self) -> str: @@ -747,18 +751,18 @@ def __eq__(self, other: object) -> bool: if isinstance(other, TimeSeriesModel): # Check X equality x_equal = ( - np.array_equal(self._X, other._X) - if (self._X is not None and other._X is not None) + np.array_equal(self._X, other._X) + if (self._X is not None and other._X is not None) else (self._X is None and other._X is None) ) - + # Check y equality y_equal = ( np.array_equal(self._y, other._y) if (self._y is not None and other._y is not None) else (self._y is None and other._y is None) ) - + return ( x_equal and y_equal diff --git a/src/tsbootstrap/utils/auto_order_selector.py b/src/tsbootstrap/utils/auto_order_selector.py index e7851a5a..e0b4e700 100644 --- a/src/tsbootstrap/utils/auto_order_selector.py +++ b/src/tsbootstrap/utils/auto_order_selector.py @@ -131,7 +131,7 @@ def __init__( ): # Store original parameter for sklearn compatibility self.model_type = model_type - + # Normalize model type to handle Auto models internally if isinstance(model_type, str): model_type_lower = model_type.lower() @@ -161,9 +161,9 @@ def __init__( self.auto_model = None self.max_lag = max_lag - self.order: Union[ - OrderTypesWithoutNone, None - ] = order # Allow None initially, will be set in fit + self.order: Union[OrderTypesWithoutNone, None] = ( + order # Allow None initially, will be set in fit + ) self.seasonal_order: Optional[tuple] = seasonal_order self.information_criterion = information_criterion self.save_models = save_models @@ -203,7 +203,9 @@ def _compute_best_order(self, X: np.ndarray) -> Union[OrderTypesWithoutNone, tup endog=endog, exog=None, order=None, # Let AutoARIMA determine order - seasonal_order=self.seasonal_order if self._internal_model_type == "sarima" else None, + seasonal_order=( + self.seasonal_order if self._internal_model_type == "sarima" else None + ), force_backend="statsforecast", # Use efficient statsforecast backend return_backend=False, max_p=self.max_lag, # Use max_lag as upper bound for p @@ -434,7 +436,9 @@ def predict(self, X: np.ndarray, y: Optional[np.ndarray] = None, n_steps: int = ) # Use the fitted adapter's predict method # Note: Most backends expect steps parameter, not X for predict - return self.fitted_adapter.predict(steps=n_steps, X=X if self._internal_model_type == "var" else None) + return self.fitted_adapter.predict( + steps=n_steps, X=X if self._internal_model_type == "var" else None + ) def score( self, diff --git a/src/tsbootstrap/validators.py b/src/tsbootstrap/validators.py index a4ce3006..f03d424a 100644 --- a/src/tsbootstrap/validators.py +++ b/src/tsbootstrap/validators.py @@ -18,6 +18,7 @@ lengths must be positive, why certain model orders have specific structures. Each validation rule represents a lesson learned from real-world usage. """ + from __future__ import annotations from typing import Annotated, Any, List, Optional, Union diff --git a/tests/compatibility/test_dependencies.py b/tests/compatibility/test_dependencies.py index 00760f92..c602fdb0 100644 --- a/tests/compatibility/test_dependencies.py +++ b/tests/compatibility/test_dependencies.py @@ -18,6 +18,7 @@ from unittest.mock import Mock, patch import pytest + from tsbootstrap.utils.dependencies import ( SeverityEnum, _check_estimator_dependencies, @@ -55,9 +56,10 @@ def test_valid_object_with_dependencies_met(self): }.get(tag, default) ) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True), + ): result = _check_estimator_dependencies(mock_obj) assert result is True @@ -81,9 +83,10 @@ def test_python_version_incompatible_error(self): mock_obj = Mock() mock_obj.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=False - ), pytest.raises(ModuleNotFoundError, match="Python version incompatible"): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=False), + pytest.raises(ModuleNotFoundError, match="Python version incompatible"), + ): _check_estimator_dependencies(mock_obj, severity="error") def test_python_version_incompatible_warning(self): @@ -91,9 +94,10 @@ def test_python_version_incompatible_warning(self): mock_obj = Mock() mock_obj.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=False - ), patch("tsbootstrap.utils.dependencies.logger") as mock_logger: + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=False), + patch("tsbootstrap.utils.dependencies.logger") as mock_logger, + ): result = _check_estimator_dependencies(mock_obj, severity="warning") assert result is False @@ -119,12 +123,10 @@ def test_missing_soft_dependencies_error(self): }.get(tag, default) ) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch( - "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=False - ), pytest.raises( - ModuleNotFoundError, match="Missing dependencies" + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=False), + pytest.raises(ModuleNotFoundError, match="Missing dependencies"), ): _check_estimator_dependencies(mock_obj, severity="error") @@ -138,13 +140,11 @@ def test_missing_soft_dependencies_warning(self): }.get(tag, default) ) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch( - "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=False - ), patch( - "tsbootstrap.utils.dependencies.logger" - ) as mock_logger: + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=False), + patch("tsbootstrap.utils.dependencies.logger") as mock_logger, + ): result = _check_estimator_dependencies(mock_obj, severity="warning") assert result is False @@ -156,9 +156,10 @@ def test_custom_error_message(self): mock_obj.get_class_tag = Mock(return_value=None) custom_msg = "Custom error message for testing" - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=False - ), pytest.raises(ModuleNotFoundError, match=custom_msg): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=False), + pytest.raises(ModuleNotFoundError, match=custom_msg), + ): _check_estimator_dependencies(mock_obj, severity="error", msg=custom_msg) def test_list_of_objects(self): @@ -169,9 +170,10 @@ def test_list_of_objects(self): mock_obj2 = Mock() mock_obj2.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True), + ): result = _check_estimator_dependencies([mock_obj1, mock_obj2]) assert result is True @@ -184,9 +186,10 @@ def test_tuple_of_objects(self): mock_obj2 = Mock() mock_obj2.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch("tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True), + ): result = _check_estimator_dependencies((mock_obj1, mock_obj2)) assert result is True @@ -199,9 +202,12 @@ def test_list_with_incompatible_object_error(self): mock_obj2 = Mock() mock_obj2.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", side_effect=[True, False] - ), pytest.raises(ModuleNotFoundError): + with ( + patch( + "tsbootstrap.utils.dependencies._check_python_version", side_effect=[True, False] + ), + pytest.raises(ModuleNotFoundError), + ): _check_estimator_dependencies([mock_obj1, mock_obj2], severity="error") def test_list_with_incompatible_object_warning(self): @@ -212,9 +218,12 @@ def test_list_with_incompatible_object_warning(self): mock_obj2 = Mock() mock_obj2.get_class_tag = Mock(return_value=None) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", side_effect=[True, False] - ), patch("tsbootstrap.utils.dependencies.logger"): + with ( + patch( + "tsbootstrap.utils.dependencies._check_python_version", side_effect=[True, False] + ), + patch("tsbootstrap.utils.dependencies.logger"), + ): result = _check_estimator_dependencies([mock_obj1, mock_obj2], severity="warning") assert result is False @@ -229,11 +238,12 @@ def test_single_string_dependency(self): }.get(tag, default) ) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch( - "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True - ) as mock_check: + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch( + "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True + ) as mock_check, + ): result = _check_estimator_dependencies(mock_obj) assert result is True @@ -276,9 +286,10 @@ def test_list_with_object_missing_method(self): mock_obj2 = Mock(spec=[]) # No methods # With error severity, should raise - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), pytest.raises(TypeError): + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + pytest.raises(TypeError), + ): _check_estimator_dependencies([mock_obj1, mock_obj2], severity="error") def test_list_with_object_missing_method_warning(self): @@ -304,11 +315,12 @@ def test_multiple_dependencies_with_alias(self): }.get(tag, default) ) - with patch( - "tsbootstrap.utils.dependencies._check_python_version", return_value=True - ), patch( - "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True - ) as mock_check: + with ( + patch("tsbootstrap.utils.dependencies._check_python_version", return_value=True), + patch( + "tsbootstrap.utils.dependencies._check_soft_dependencies", return_value=True + ) as mock_check, + ): result = _check_estimator_dependencies(mock_obj) assert result is True @@ -346,9 +358,13 @@ def test_early_exit_on_error(self): mock_obj3.get_class_tag = Mock(return_value=None) # Second object will fail - with patch( - "tsbootstrap.utils.dependencies._check_python_version", side_effect=[True, False, True] - ) as mock_check, pytest.raises(ModuleNotFoundError): + with ( + patch( + "tsbootstrap.utils.dependencies._check_python_version", + side_effect=[True, False, True], + ) as mock_check, + pytest.raises(ModuleNotFoundError), + ): _check_estimator_dependencies([mock_obj1, mock_obj2, mock_obj3], severity="error") # Verify that the third object was never checked due to early exit diff --git a/tests/compatibility/test_estimator_checks.py b/tests/compatibility/test_estimator_checks.py index e9455114..be903e29 100644 --- a/tests/compatibility/test_estimator_checks.py +++ b/tests/compatibility/test_estimator_checks.py @@ -16,6 +16,7 @@ from unittest.mock import Mock, patch import pytest + from tsbootstrap.utils.estimator_checks import check_estimator @@ -54,10 +55,13 @@ def test_check_estimator_with_raise_exceptions(self): mock_test_cls.return_value = mock_test_instance mock_test_instance.run_tests.side_effect = ValueError("Test failed") - with patch( - "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", - return_value=[mock_test_cls], - ), pytest.raises(ValueError, match="Test failed"): + with ( + patch( + "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", + return_value=[mock_test_cls], + ), + pytest.raises(ValueError, match="Test failed"), + ): check_estimator(mock_estimator, raise_exceptions=True) def test_check_estimator_with_tests_to_run(self): @@ -168,10 +172,13 @@ def test_check_estimator_verbose_output(self): "test_transform": "FAILED: ValueError", } - with patch( - "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", - return_value=[mock_test_cls], - ), patch("builtins.print") as mock_print: + with ( + patch( + "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", + return_value=[mock_test_cls], + ), + patch("builtins.print") as mock_print, + ): check_estimator(mock_estimator, verbose=True) # Check that verbose output was printed @@ -214,10 +221,11 @@ def test_soft_dependency_check(self): """Test that soft dependency check is called.""" mock_estimator = Mock() - with patch( - "tsbootstrap.utils.estimator_checks._check_soft_dependencies" - ) as mock_check_deps, patch( - "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", return_value=[] + with ( + patch("tsbootstrap.utils.estimator_checks._check_soft_dependencies") as mock_check_deps, + patch( + "tsbootstrap.tests.test_class_register.get_test_classes_for_obj", return_value=[] + ), ): check_estimator(mock_estimator) diff --git a/tests/compatibility/test_skbase_compat.py b/tests/compatibility/test_skbase_compat.py index 2e0e919f..0a430014 100644 --- a/tests/compatibility/test_skbase_compat.py +++ b/tests/compatibility/test_skbase_compat.py @@ -5,6 +5,7 @@ from unittest.mock import patch import pytest + import tsbootstrap.utils.skbase_compat diff --git a/tests/conftest.py b/tests/conftest.py index 4e93348b..2eedd225 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ approach reflects our testing philosophy: fast feedback loops for common cases, thorough validation when it matters. """ + # Engineering principle: Clean output is non-negotiable # Suppress pkg_resources warnings at import time import warnings diff --git a/tests/integration/test_async_bootstrap.py b/tests/integration/test_async_bootstrap.py index e8b7a4de..9a43937b 100644 --- a/tests/integration/test_async_bootstrap.py +++ b/tests/integration/test_async_bootstrap.py @@ -20,6 +20,7 @@ import numpy as np import pytest + from tsbootstrap.async_bootstrap import ( AsyncBlockResidualBootstrap, AsyncBootstrap, diff --git a/tests/integration/test_backend_compatibility.py b/tests/integration/test_backend_compatibility.py index dc210790..8900ffed 100644 --- a/tests/integration/test_backend_compatibility.py +++ b/tests/integration/test_backend_compatibility.py @@ -15,6 +15,7 @@ import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_almost_equal + from tsbootstrap.backends.statsforecast_backend import StatsForecastBackend from tsbootstrap.backends.statsmodels_backend import StatsModelsBackend from tsbootstrap.services.rescaling_service import RescalingService diff --git a/tests/integration/test_end_to_end.py b/tests/integration/test_end_to_end.py index 1a1bfbe6..dcfaf83a 100644 --- a/tests/integration/test_end_to_end.py +++ b/tests/integration/test_end_to_end.py @@ -17,19 +17,16 @@ """ import numpy as np -import pandas as pd -import pytest -from scipy import stats -from tsbootstrap.bootstrap import ( - WholeResidualBootstrap, - BlockResidualBootstrap, - WholeSieveBootstrap, -) from tsbootstrap.block_bootstrap import ( + CircularBlockBootstrap, MovingBlockBootstrap, StationaryBlockBootstrap, - CircularBlockBootstrap, +) +from tsbootstrap.bootstrap import ( + BlockResidualBootstrap, + WholeResidualBootstrap, + WholeSieveBootstrap, ) @@ -39,40 +36,37 @@ class TestConfidenceIntervalWorkflow: def test_mean_confidence_interval(self): """Test confidence interval for mean estimation.""" np.random.seed(42) - + # Generate AR(1) data with known mean n = 200 true_mean = 5.0 data = np.zeros(n) data[0] = true_mean + np.random.randn() - + for i in range(1, n): - data[i] = true_mean + 0.5 * (data[i-1] - true_mean) + np.random.randn() + data[i] = true_mean + 0.5 * (data[i - 1] - true_mean) + np.random.randn() # Use residual bootstrap for CI bootstrap = WholeResidualBootstrap( - n_bootstraps=1000, - model_type="ar", - order=1, - random_state=42 + n_bootstraps=1000, model_type="ar", order=1, random_state=42 ) # Generate bootstrap samples samples = list(bootstrap.bootstrap(data)) - + # Calculate means bootstrap_means = [np.mean(sample) for sample in samples] - + # 95% confidence interval ci_lower = np.percentile(bootstrap_means, 2.5) ci_upper = np.percentile(bootstrap_means, 97.5) - + # Check that CI is reasonable # The bootstrap CI might not always contain the true mean due to finite sample effects # and model misspecification, but it should be close sample_mean = np.mean(data) assert ci_lower < sample_mean < ci_upper - + # CI should be reasonable width ci_width = ci_upper - ci_lower assert 0.1 < ci_width < 3.0 # Wider tolerance for AR data @@ -80,46 +74,44 @@ def test_mean_confidence_interval(self): def test_autocorrelation_confidence_interval(self): """Test confidence interval for autocorrelation.""" np.random.seed(42) - + # Generate AR(1) with known autocorrelation n = 300 phi = 0.7 data = np.zeros(n) data[0] = np.random.randn() - + for i in range(1, n): - data[i] = phi * data[i-1] + np.random.randn() + data[i] = phi * data[i - 1] + np.random.randn() # Use block bootstrap to preserve correlation bootstrap = MovingBlockBootstrap( - n_bootstraps=500, - block_length=int(n**0.33), # Optimal block length - random_state=42 + n_bootstraps=500, block_length=int(n**0.33), random_state=42 # Optimal block length ) samples = list(bootstrap.bootstrap(data)) - + # Calculate lag-1 autocorrelation for each sample def lag1_acf(x): if len(x) < 2: return 0 return np.corrcoef(x[:-1], x[1:])[0, 1] - + bootstrap_acf = [lag1_acf(sample) for sample in samples] - + # 95% CI ci_lower = np.percentile(bootstrap_acf, 2.5) ci_upper = np.percentile(bootstrap_acf, 97.5) - + # Check that the CI is reasonable and contains plausible values # The sample ACF might not always be within the bootstrap CI due to # finite sample effects and the way block bootstrap works sample_acf = lag1_acf(data) - + # Check that CI is reasonable assert 0.3 < ci_lower < 0.8 assert 0.5 < ci_upper < 0.95 - + # CI should contain values close to the sample ACF assert abs(sample_acf - np.median(bootstrap_acf)) < 0.2 @@ -130,40 +122,36 @@ class TestHypothesisTestingWorkflow: def test_two_sample_test(self): """Test two-sample hypothesis test using bootstrap.""" np.random.seed(42) - + # Generate two time series with different means n = 150 series1 = np.cumsum(np.random.randn(n)) + 0.1 * np.arange(n) series2 = np.cumsum(np.random.randn(n)) + 0.15 * np.arange(n) # Steeper trend - + # Use block bootstrap for both - bootstrap = MovingBlockBootstrap( - n_bootstraps=500, - block_length=15, - random_state=42 - ) - + bootstrap = MovingBlockBootstrap(n_bootstraps=500, block_length=15, random_state=42) + # Bootstrap samples samples1 = list(bootstrap.bootstrap(series1)) samples2 = list(bootstrap.bootstrap(series2)) - + # Test statistic: difference in trend slopes def estimate_trend(x): t = np.arange(len(x)) return np.polyfit(t, x, 1)[0] - + # Bootstrap distribution of difference diff_slopes = [] for s1, s2 in zip(samples1, samples2): slope1 = estimate_trend(s1) slope2 = estimate_trend(s2) diff_slopes.append(slope2 - slope1) - + # Check that we can detect a difference # The observed difference should be positive (series2 has steeper trend) observed_diff = estimate_trend(series2) - estimate_trend(series1) assert observed_diff > 0 - + # Most bootstrap differences should also be positive proportion_positive = np.mean([d > 0 for d in diff_slopes]) assert proportion_positive > 0.5 # At least 50% should show the same direction @@ -171,47 +159,43 @@ def estimate_trend(x): def test_stationarity_test(self): """Test stationarity using bootstrap.""" np.random.seed(42) - + # Generate non-stationary data (random walk) n = 200 random_walk = np.cumsum(np.random.randn(n)) - + # Generate stationary data (AR(1)) stationary = np.zeros(n) for i in range(1, n): - stationary[i] = 0.5 * stationary[i-1] + np.random.randn() - + stationary[i] = 0.5 * stationary[i - 1] + np.random.randn() + # Use block bootstrap - bootstrap = StationaryBlockBootstrap( - n_bootstraps=300, - block_length=20, - random_state=42 - ) - + bootstrap = StationaryBlockBootstrap(n_bootstraps=300, block_length=20, random_state=42) + def variance_ratio_stat(x): """Variance ratio test statistic.""" n = len(x) var1 = np.var(x[1:] - x[:-1]) # 1-period returns var2 = np.var(x[2:] - x[:-2]) / 2 # 2-period returns return var2 / var1 if var1 > 0 else 1.0 - + # Bootstrap distribution for random walk samples_rw = list(bootstrap.bootstrap(random_walk)) vr_rw = [variance_ratio_stat(s) for s in samples_rw] - + # Bootstrap distribution for stationary samples_st = list(bootstrap.bootstrap(stationary)) vr_st = [variance_ratio_stat(s) for s in samples_st] - + # Check that the two distributions are different # The variance ratio test might not always work perfectly with bootstrap # due to the block structure preserving some dependencies mean_vr_rw = np.mean(vr_rw) mean_vr_st = np.mean(vr_st) - + # Stationary series should have lower VR on average assert mean_vr_st < mean_vr_rw - + # Both should be reasonable values assert 0.5 < mean_vr_rw < 2.0 assert 0.5 < mean_vr_st < 1.5 @@ -223,46 +207,42 @@ class TestForecastingWorkflow: def test_forecast_intervals(self): """Test forecast interval construction.""" np.random.seed(42) - + # Generate ARIMA(1,1,1) data n = 150 data = np.cumsum(np.random.randn(n)) - + # Use sieve bootstrap for automatic order selection bootstrap = WholeSieveBootstrap( - n_bootstraps=200, - min_lag=1, - max_lag=5, - criterion="bic", - random_state=42 + n_bootstraps=200, min_lag=1, max_lag=5, criterion="bic", random_state=42 ) - + # Generate bootstrap samples samples = list(bootstrap.bootstrap(data)) - + # Forecast from each sample forecast_horizon = 10 forecasts = [] - + for sample in samples: # Simple forecast: linear trend + last value trend = np.polyfit(np.arange(len(sample)), sample, 1)[0] last_value = sample[-1] forecast = last_value + trend * np.arange(1, forecast_horizon + 1) forecasts.append(forecast) - + forecasts = np.array(forecasts) - + # Prediction intervals pi_lower = np.percentile(forecasts, 5, axis=0) pi_upper = np.percentile(forecasts, 95, axis=0) - + # Check that intervals exist and are reasonable widths = pi_upper - pi_lower - + # All widths should be positive assert np.all(widths > 0) - + # Widths should be reasonable (not too narrow or too wide) assert np.all(widths > 0.1) assert np.all(widths < 20.0) @@ -270,7 +250,7 @@ def test_forecast_intervals(self): def test_multi_step_forecast_evaluation(self): """Test multi-step forecast evaluation with bootstrap.""" np.random.seed(42) - + # Generate seasonal data n = 144 # 12 years of monthly data t = np.arange(n) @@ -278,22 +258,20 @@ def test_multi_step_forecast_evaluation(self): trend = 0.1 * t noise = np.random.randn(n) data = trend + seasonal + noise - + # Use circular bootstrap for seasonal data bootstrap = CircularBlockBootstrap( - n_bootstraps=100, - block_length=12, # Monthly blocks - random_state=42 + n_bootstraps=100, block_length=12, random_state=42 # Monthly blocks ) - + # Split data train_size = 120 train_data = data[:train_size] test_data = data[train_size:] - + # Bootstrap prediction intervals samples = list(bootstrap.bootstrap(train_data)) - + forecasts = [] for sample in samples: # Simple seasonal forecast @@ -305,16 +283,16 @@ def test_multi_step_forecast_evaluation(self): trend_adj = 0 # Create full forecast for test period forecast = np.tile(last_year + trend_adj, 2) # Repeat for 24 months - forecasts.append(forecast[:len(test_data)]) - + forecasts.append(forecast[: len(test_data)]) + forecasts = np.array(forecasts) - + # Coverage test pi_lower = np.percentile(forecasts, 10, axis=0) pi_upper = np.percentile(forecasts, 90, axis=0) - + coverage = np.mean((test_data >= pi_lower) & (test_data <= pi_upper)) - + # Should have reasonable coverage (bootstrap might not be perfectly calibrated) # CircularBlockBootstrap preserves structure but may not give exact nominal coverage assert 0.2 < coverage < 1.0 # Very relaxed bounds due to simple forecast method @@ -326,51 +304,45 @@ class TestModelComparisonWorkflow: def test_model_selection_workflow(self): """Test selecting between models using bootstrap.""" np.random.seed(42) - + # Generate MA(2) data (to test model selection) n = 200 ma_coefs = [0.5, -0.3] errors = np.random.randn(n + 2) data = errors[2:] + ma_coefs[0] * errors[1:-1] + ma_coefs[1] * errors[:-2] - + # Compare AR vs MA models using bootstrap n_bootstrap = 100 - + # AR model bootstrap ar_bootstrap = WholeResidualBootstrap( - n_bootstraps=n_bootstrap, - model_type="ar", - order=3, - random_state=42 + n_bootstraps=n_bootstrap, model_type="ar", order=3, random_state=42 ) - + # MA model bootstrap (using ARIMA(0,0,q)) ma_bootstrap = WholeResidualBootstrap( - n_bootstraps=n_bootstrap, - model_type="arima", - order=(0, 0, 2), - random_state=42 + n_bootstraps=n_bootstrap, model_type="arima", order=(0, 0, 2), random_state=42 ) - + # Generate samples and compute prediction errors ar_samples = list(ar_bootstrap.bootstrap(data)) ma_samples = list(ma_bootstrap.bootstrap(data)) - + # One-step-ahead prediction errors ar_errors = [] ma_errors = [] - + for ar_s, ma_s in zip(ar_samples, ma_samples): # Simple proxy: variance of first differences ar_errors.append(np.var(np.diff(ar_s))) ma_errors.append(np.var(np.diff(ma_s))) - + # Both models should produce reasonable error distributions # Note: The variance of first differences is not a perfect proxy for model fit # In practice, AR models can sometimes approximate MA processes well assert len(ar_errors) == n_bootstrap assert len(ma_errors) == n_bootstrap - + # Check that errors are reasonable (not extreme) assert 0.5 < np.mean(ar_errors) < 5.0 assert 0.5 < np.mean(ma_errors) < 5.0 @@ -382,85 +354,74 @@ class TestComplexDataWorkflow: def test_multivariate_analysis(self): """Test multivariate time series analysis.""" np.random.seed(42) - + # Generate VAR(1) data n = 200 n_vars = 3 - + # Coefficient matrix with cross-dependencies - A = np.array([ - [0.5, 0.1, 0.0], - [0.2, 0.3, 0.1], - [0.0, 0.2, 0.4] - ]) - + A = np.array([[0.5, 0.1, 0.0], [0.2, 0.3, 0.1], [0.0, 0.2, 0.4]]) + # Generate data data = np.zeros((n, n_vars)) data[0] = np.random.randn(n_vars) - + for t in range(1, n): - data[t] = A @ data[t-1] + np.random.randn(n_vars) - + data[t] = A @ data[t - 1] + np.random.randn(n_vars) + # Use block bootstrap for multivariate data bootstrap = BlockResidualBootstrap( - n_bootstraps=200, - block_length=10, - model_type="var", - order=1, - random_state=42 + n_bootstraps=200, block_length=10, model_type="var", order=1, random_state=42 ) - + # Generate samples samples = list(bootstrap.bootstrap(data)) - + # Test: estimate cross-correlation matrix cross_corrs = [] for sample in samples: corr = np.corrcoef(sample.T) cross_corrs.append(corr[0, 1]) # Correlation between series 1 and 2 - + # Confidence interval for cross-correlation ci_lower = np.percentile(cross_corrs, 2.5) ci_upper = np.percentile(cross_corrs, 97.5) - + # Should detect positive correlation assert ci_lower > 0 def test_missing_data_workflow(self): """Test workflow with missing data.""" np.random.seed(42) - + # Generate data with missing values n = 200 complete_data = np.cumsum(np.random.randn(n)) - + # Randomly remove 10% of values data = complete_data.copy() missing_mask = np.random.random(n) < 0.1 data[missing_mask] = np.nan - + # Simple imputation before bootstrap from scipy.interpolate import interp1d - + valid_idx = ~np.isnan(data) valid_data = data[valid_idx] valid_times = np.arange(n)[valid_idx] - + # Interpolate missing values - f = interp1d(valid_times, valid_data, kind='linear', - bounds_error=False, fill_value='extrapolate') + f = interp1d( + valid_times, valid_data, kind="linear", bounds_error=False, fill_value="extrapolate" + ) imputed_data = f(np.arange(n)) - + # Bootstrap on imputed data - bootstrap = MovingBlockBootstrap( - n_bootstraps=100, - block_length=20, - random_state=42 - ) - + bootstrap = MovingBlockBootstrap(n_bootstraps=100, block_length=20, random_state=42) + samples = list(bootstrap.bootstrap(imputed_data)) - + # Check that bootstrap works with imputed data assert len(samples) == 100 assert all(len(s) == n for s in samples) - assert all(~np.isnan(s).any() for s in samples) \ No newline at end of file + assert all(~np.isnan(s).any() for s in samples) diff --git a/tests/integration/test_sklearn_integration.py b/tests/integration/test_sklearn_integration.py index ecf1e070..3ef2d51c 100644 --- a/tests/integration/test_sklearn_integration.py +++ b/tests/integration/test_sklearn_integration.py @@ -10,8 +10,8 @@ """ import numpy as np -import pytest from sklearn.base import clone +from sklearn.metrics import mean_squared_error from sklearn.model_selection import ( GridSearchCV, TimeSeriesSplit, @@ -19,10 +19,9 @@ ) from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.metrics import mean_squared_error -from tsbootstrap.bootstrap import WholeResidualBootstrap from tsbootstrap.block_bootstrap import MovingBlockBootstrap +from tsbootstrap.bootstrap import WholeResidualBootstrap from tsbootstrap.time_series_model import TimeSeriesModel from tsbootstrap.utils.auto_order_selector import AutoOrderSelector @@ -36,10 +35,12 @@ def test_bootstrap_in_pipeline(self): X = np.cumsum(np.random.randn(100, 1), axis=0) # Create pipeline with preprocessing and bootstrap - pipeline = Pipeline([ - ('scaler', StandardScaler()), - ('bootstrap', MovingBlockBootstrap(n_bootstraps=10, block_length=10)) - ]) + pipeline = Pipeline( + [ + ("scaler", StandardScaler()), + ("bootstrap", MovingBlockBootstrap(n_bootstraps=10, block_length=10)), + ] + ) # Should be able to use pipeline samples = list(pipeline.fit_transform(X)) @@ -51,15 +52,13 @@ def test_model_in_pipeline(self): X = np.cumsum(np.random.randn(100)) # Create pipeline with model - pipeline = Pipeline([ - ('model', TimeSeriesModel(model_type="ar", order=2)) - ]) + pipeline = Pipeline([("model", TimeSeriesModel(model_type="ar", order=2))]) # Fit pipeline pipeline.fit(X) # Predict - predictions = pipeline.named_steps['model'].predict(n_periods=5) + predictions = pipeline.named_steps["model"].predict(n_periods=5) assert len(predictions) == 5 @@ -74,16 +73,12 @@ def test_time_series_cv_with_bootstrap(self): # Time series cross-validation tscv = TimeSeriesSplit(n_splits=3) - bootstrap = WholeResidualBootstrap( - n_bootstraps=5, - model_type="ar", - order=2 - ) + bootstrap = WholeResidualBootstrap(n_bootstraps=5, model_type="ar", order=2) # Should work with cross-validation for train_idx, test_idx in tscv.split(X): X_train = X[train_idx] - + # Generate bootstrap samples samples = list(bootstrap.bootstrap(X_train)) assert len(samples) == 5 @@ -93,7 +88,7 @@ def test_cross_val_score_with_model(self): np.random.seed(42) n = 300 X = np.cumsum(np.random.randn(n)) - + # Create simple target (next value) y = np.roll(X, -1) y[-1] = X[-1] # Fill last value @@ -104,17 +99,13 @@ def test_cross_val_score_with_model(self): # Custom scorer that handles time series def ts_scorer(model, X, y): try: - model.fit(X[:len(X)//2]) # Fit on first half - pred = model.predict(n_periods=len(X)//2) - return -mean_squared_error(y[len(X)//2:], pred) + model.fit(X[: len(X) // 2]) # Fit on first half + pred = model.predict(n_periods=len(X) // 2) + return -mean_squared_error(y[len(X) // 2 :], pred) except: return -999 # Bad score for failed fits - scores = cross_val_score( - model, X, y, - cv=tscv, - scoring=ts_scorer - ) + scores = cross_val_score(model, X, y, cv=tscv, scoring=ts_scorer) assert len(scores) == 3 assert all(score < 0 for score in scores) # Negative MSE @@ -130,10 +121,7 @@ def test_grid_search_with_bootstrap(self): bootstrap = MovingBlockBootstrap(n_bootstraps=5) - param_grid = { - 'block_length': [5, 10, 20], - 'n_bootstraps': [5, 10] - } + param_grid = {"block_length": [5, 10, 20], "n_bootstraps": [5, 10]} # Create custom scorer def bootstrap_scorer(estimator, X): @@ -143,16 +131,13 @@ def bootstrap_scorer(estimator, X): return -np.var(means) # Lower variance is better grid_search = GridSearchCV( - bootstrap, - param_grid, - cv=2, # Simple 2-fold CV - scoring=bootstrap_scorer + bootstrap, param_grid, cv=2, scoring=bootstrap_scorer # Simple 2-fold CV ) grid_search.fit(X) - assert hasattr(grid_search, 'best_params_') - assert 'block_length' in grid_search.best_params_ + assert hasattr(grid_search, "best_params_") + assert "block_length" in grid_search.best_params_ def test_grid_search_with_auto_selector(self): """Test GridSearchCV with AutoOrderSelector.""" @@ -161,30 +146,24 @@ def test_grid_search_with_auto_selector(self): selector = AutoOrderSelector(model_type="ar") - param_grid = { - 'max_lag': [5, 10, 15], - 'information_criterion': ['aic', 'bic'] - } + param_grid = {"max_lag": [5, 10, 15], "information_criterion": ["aic", "bic"]} # Custom scorer based on in-sample fit def fit_scorer(estimator, X): estimator.fit(X) - if hasattr(estimator, 'get_residuals'): + if hasattr(estimator, "get_residuals"): residuals = estimator.get_residuals() return -np.mean(residuals**2) return -999 grid_search = GridSearchCV( - selector, - param_grid, - cv=2, # Simple 2-fold CV - scoring=fit_scorer + selector, param_grid, cv=2, scoring=fit_scorer # Simple 2-fold CV ) grid_search.fit(X) - assert hasattr(grid_search, 'best_params_') - assert grid_search.best_params_['max_lag'] in [5, 10, 15] + assert hasattr(grid_search, "best_params_") + assert grid_search.best_params_["max_lag"] in [5, 10, 15] class TestEstimatorMethods: @@ -192,18 +171,13 @@ class TestEstimatorMethods: def test_get_params_set_params(self): """Test get_params and set_params methods.""" - bootstrap = WholeResidualBootstrap( - n_bootstraps=10, - model_type="ar", - order=2, - rng=42 - ) + bootstrap = WholeResidualBootstrap(n_bootstraps=10, model_type="ar", order=2, rng=42) # Get params params = bootstrap.get_params() - assert params['n_bootstraps'] == 10 - assert params['model_type'] == 'ar' - assert params['order'] == 2 + assert params["n_bootstraps"] == 10 + assert params["model_type"] == "ar" + assert params["order"] == 2 # Set params bootstrap.set_params(n_bootstraps=20, order=3) @@ -216,11 +190,7 @@ def test_get_params_set_params(self): def test_clone_estimator(self): """Test cloning estimators.""" - original = MovingBlockBootstrap( - n_bootstraps=15, - block_length=12, - rng=42 - ) + original = MovingBlockBootstrap(n_bootstraps=15, block_length=12, rng=42) # Clone cloned = clone(original) @@ -231,7 +201,7 @@ def test_clone_estimator(self): assert cloned.block_length == 12 # rng is the parameter name, not random_state params = cloned.get_params() - assert params['rng'] == 42 + assert params["rng"] == 42 # Modifying clone shouldn't affect original cloned.set_params(n_bootstraps=30) @@ -240,17 +210,13 @@ def test_clone_estimator(self): def test_repr_html(self): """Test HTML representation for notebooks.""" - bootstrap = WholeResidualBootstrap( - n_bootstraps=10, - model_type="arima", - order=(1, 1, 1) - ) + bootstrap = WholeResidualBootstrap(n_bootstraps=10, model_type="arima", order=(1, 1, 1)) # Should have _repr_html_ for notebook display - if hasattr(bootstrap, '_repr_html_'): + if hasattr(bootstrap, "_repr_html_"): html = bootstrap._repr_html_() assert isinstance(html, str) - assert 'WholeResidualBootstrap' in html + assert "WholeResidualBootstrap" in html class TestCompositeEstimators: @@ -274,7 +240,7 @@ def predict(self, X): # Should be able to use with bootstrap model = CustomARModel(lag=2) params = model.get_params() - assert params['lag'] == 2 + assert params["lag"] == 2 def test_ensemble_bootstrap(self): """Test ensemble of bootstrap methods.""" @@ -305,14 +271,11 @@ def test_fit_transform(self): np.random.seed(42) X = np.cumsum(np.random.randn(80, 1), axis=0) - bootstrap = MovingBlockBootstrap( - n_bootstraps=10, - block_length=8 - ) + bootstrap = MovingBlockBootstrap(n_bootstraps=10, block_length=8) # fit_transform should work samples = bootstrap.fit_transform(X) - + # Should return array of shape (n_bootstraps, *X.shape) assert isinstance(samples, list) or isinstance(samples, np.ndarray) assert len(samples) == 10 @@ -322,14 +285,11 @@ def test_transform_without_fit(self): np.random.seed(42) X = np.cumsum(np.random.randn(100, 1), axis=0) - bootstrap = MovingBlockBootstrap( - n_bootstraps=5, - block_length=10 - ) + bootstrap = MovingBlockBootstrap(n_bootstraps=5, block_length=10) # Fit first bootstrap.fit(X) # Transform should work samples = bootstrap.transform(X) - assert len(samples) == 5 \ No newline at end of file + assert len(samples) == 5 diff --git a/tests/unit/test_async_bootstrap.py b/tests/unit/test_async_bootstrap.py index a8668017..b3dcca75 100644 --- a/tests/unit/test_async_bootstrap.py +++ b/tests/unit/test_async_bootstrap.py @@ -16,16 +16,14 @@ random seeds across async boundaries. """ +from unittest.mock import Mock, patch + import numpy as np import pytest -import asyncio -import logging -from unittest.mock import Mock, patch from tsbootstrap.async_bootstrap import ( - AsyncBootstrap, - AsyncWholeResidualBootstrap, AsyncBlockResidualBootstrap, + AsyncWholeResidualBootstrap, AsyncWholeSieveBootstrap, DynamicAsyncBootstrap, ) @@ -33,124 +31,117 @@ class TestAsyncBootstrap: """Tests for AsyncBootstrap classes.""" - + def test_bootstrap_without_indices(self): """Test bootstrap method without return_indices.""" - bootstrap = AsyncWholeResidualBootstrap( - n_bootstraps=3, - model_type="ar", - order=2 - ) + bootstrap = AsyncWholeResidualBootstrap(n_bootstraps=3, model_type="ar", order=2) X = np.random.randn(50) - + # Test without return_indices (default False) samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 3 for sample in samples: # Should just be arrays, not tuples assert isinstance(sample, np.ndarray) assert len(sample) == len(X) - + def test_destructor_exception_handling(self): """Test __del__ exception handling.""" bootstrap = AsyncWholeResidualBootstrap(n_bootstraps=1) - + # Mock the async service to raise an exception during cleanup mock_service = Mock() mock_service.cleanup_executor.side_effect = RuntimeError("Cleanup failed") bootstrap._async_service = mock_service - + # Capture logging to verify the debug message - with patch('logging.getLogger') as mock_logger: + with patch("logging.getLogger") as mock_logger: logger_instance = Mock() mock_logger.return_value = logger_instance - + # Call __del__ directly bootstrap.__del__() - + # Verify cleanup was attempted mock_service.cleanup_executor.assert_called_once() - + # Verify logging occurred - mock_logger.assert_called_with('tsbootstrap.async_bootstrap') + mock_logger.assert_called_with("tsbootstrap.async_bootstrap") logger_instance.debug.assert_called_once() call_args = logger_instance.debug.call_args assert "Cleanup error during async bootstrap destruction" in call_args[0][0] - assert call_args[1]['exc_info'] is True - + assert call_args[1]["exc_info"] is True + def test_destructor_during_shutdown(self): """Test __del__ when sys is None during interpreter shutdown.""" bootstrap = AsyncWholeResidualBootstrap(n_bootstraps=1) - + # Mock the async service to raise an exception mock_service = Mock() mock_service.cleanup_executor.side_effect = RuntimeError("Cleanup failed") bootstrap._async_service = mock_service - + # Mock the sys module to be None after import # This simulates the case where sys exists but returns None during shutdown - with patch('builtins.__import__') as mock_import: + with patch("builtins.__import__") as mock_import: + def side_effect(name, *args, **kwargs): - if name == 'sys': - # Return a mock that evaluates to None + if name == "sys": + # Return a mock that evaluates to None return None return __import__(name, *args, **kwargs) - + mock_import.side_effect = side_effect - + # Should not raise any exceptions even when sys is None bootstrap.__del__() - + # Cleanup was still attempted mock_service.cleanup_executor.assert_called_once() - + def test_all_get_test_params(self): """Test get_test_params for all async bootstrap classes.""" # AsyncWholeResidualBootstrap.get_test_params params = AsyncWholeResidualBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 - + # AsyncBlockResidualBootstrap.get_test_params params = AsyncBlockResidualBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 - + # AsyncWholeSieveBootstrap.get_test_params params = AsyncWholeSieveBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 - + # DynamicAsyncBootstrap.get_test_params params = DynamicAsyncBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 - + def test_async_service_initialization_edge_cases(self): """Test edge cases in async service initialization.""" # Test that async service is properly initialized bootstrap = AsyncWholeResidualBootstrap(n_bootstraps=2, model_type="ar", order=2) - + # Check async service initialization through parallel bootstrap X = np.random.randn(30) - + # Use synchronous interface which doesn't require async service samples = list(bootstrap.bootstrap(X)) assert len(samples) == 2 - + def test_bootstrap_with_indices_multivariate(self): """Test bootstrap with return_indices=True for multivariate data.""" - bootstrap = AsyncWholeResidualBootstrap( - n_bootstraps=2, - model_type="var", - order=2 - ) + bootstrap = AsyncWholeResidualBootstrap(n_bootstraps=2, model_type="var", order=2) X = np.random.randn(50, 3) # Multivariate - + # Test with return_indices=True results = list(bootstrap.bootstrap(X, return_indices=True)) - + assert len(results) == 2 for sample, indices in results: assert isinstance(sample, np.ndarray) @@ -160,45 +151,39 @@ def test_bootstrap_with_indices_multivariate(self): # Indices should be in valid range assert np.all(indices >= 0) assert np.all(indices < len(X)) - + def test_parallel_bootstrap_edge_cases(self): """Test edge cases in parallel bootstrap processing.""" bootstrap = AsyncBlockResidualBootstrap( - n_bootstraps=5, - model_type="ar", - order=2, - block_length=10 + n_bootstraps=5, model_type="ar", order=2, block_length=10 ) X = np.random.randn(100) - + # Test that parallel bootstrap works with batch size # We'll test the synchronous interface which covers lines we need samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 5 for sample in samples: assert len(sample) == len(X) - + def test_dynamic_bootstrap_initialization(self): """Test DynamicAsyncBootstrap initialization scenarios.""" # Test with default settings bootstrap = DynamicAsyncBootstrap(n_bootstraps=3) assert bootstrap.bootstrap_method == "residual" # Default - + # Test with specific method bootstrap2 = DynamicAsyncBootstrap( - n_bootstraps=3, - bootstrap_method="sieve", - min_lag=1, - max_lag=5 + n_bootstraps=3, bootstrap_method="sieve", min_lag=1, max_lag=5 ) assert bootstrap2.bootstrap_method == "sieve" - + # Generate samples to ensure method is set X = np.random.randn(50) samples = list(bootstrap2.bootstrap(X)) assert len(samples) == 3 - + # The bootstrap implementation is created on demand # Test block_residual method bootstrap3 = DynamicAsyncBootstrap( @@ -206,7 +191,7 @@ def test_dynamic_bootstrap_initialization(self): bootstrap_method="block_residual", model_type="ar", order=2, - block_length=10 + block_length=10, ) samples3 = list(bootstrap3.bootstrap(X)) assert len(samples3) == 2 @@ -214,4 +199,4 @@ def test_dynamic_bootstrap_initialization(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_backend_features.py b/tests/unit/test_backend_features.py index a2cdd7a5..1a583eaa 100644 --- a/tests/unit/test_backend_features.py +++ b/tests/unit/test_backend_features.py @@ -10,14 +10,14 @@ ensuring backends meet the requirements for large-scale time series analysis. """ +from unittest.mock import Mock + import numpy as np import pytest -from unittest.mock import Mock, patch from tsbootstrap.backends.batch_processor import BatchProcessor from tsbootstrap.backends.calibration import CalibrationSystem from tsbootstrap.backends.feature_flags import FeatureFlags -from tsbootstrap.backends.protocol import ModelBackend, FittedModelBackend class TestBatchProcessing: @@ -33,11 +33,7 @@ def test_batch_model_fitting(self): series_list = [np.cumsum(np.random.randn(100)) for _ in range(10)] # Fit models in batch - models = processor.fit_batch( - series_list, - model_type="ARIMA", - order=(1, 1, 1) - ) + models = processor.fit_batch(series_list, model_type="ARIMA", order=(1, 1, 1)) assert len(models) == 10 assert all(hasattr(m, "predict") for m in models) @@ -45,20 +41,13 @@ def test_batch_model_fitting(self): @pytest.mark.skip(reason="BatchProcessor is a planned future feature") def test_parallel_batch_processing(self): """Test parallel batch processing.""" - processor = BatchProcessor( - backend="statsforecast", - n_jobs=2 - ) + processor = BatchProcessor(backend="statsforecast", n_jobs=2) # Generate data series_list = [np.random.randn(50) for _ in range(20)] # Process in parallel - results = processor.process_batch( - series_list, - func=lambda x: np.mean(x), - n_jobs=2 - ) + results = processor.process_batch(series_list, func=lambda x: np.mean(x), n_jobs=2) assert len(results) == 20 assert all(isinstance(r, float) for r in results) @@ -96,14 +85,11 @@ def test_parameter_calibration(self): n = 200 data = np.zeros(n) for i in range(2, n): - data[i] = 0.7 * data[i-1] - 0.3 * data[i-2] + np.random.randn() + data[i] = 0.7 * data[i - 1] - 0.3 * data[i - 2] + np.random.randn() # Calibrate AR model best_params = calibrator.calibrate( - data, - model_type="ar", - param_grid={"order": [1, 2, 3, 4]}, - metric="aic" + data, model_type="ar", param_grid={"order": [1, 2, 3, 4]}, metric="aic" ) assert "order" in best_params @@ -121,11 +107,9 @@ def test_cross_validation_calibration(self): best_params = calibrator.calibrate_cv( data, model_type="arima", - param_grid={ - "order": [(1,0,1), (1,1,1), (2,1,1)] - }, + param_grid={"order": [(1, 0, 1), (1, 1, 1), (2, 1, 1)]}, cv_splits=3, - metric="mse" + metric="mse", ) assert "order" in best_params @@ -141,10 +125,7 @@ def test_calibration_metrics(self): # Test different metrics for metric in ["aic", "bic", "mse", "mae"]: result = calibrator.calibrate( - data, - model_type="ar", - param_grid={"order": [1, 2]}, - metric=metric + data, model_type="ar", param_grid={"order": [1, 2]}, metric=metric ) assert "order" in result @@ -201,8 +182,8 @@ class TestProtocolCompliance: def test_backend_protocol_methods(self): """Test that backends implement required protocol methods.""" - from tsbootstrap.backends.statsmodels_backend import StatsModelsBackend from tsbootstrap.backends.statsforecast_backend import StatsForecastBackend + from tsbootstrap.backends.statsmodels_backend import StatsModelsBackend # Backend classes should have fit method backend_required_methods = ["fit"] @@ -211,17 +192,17 @@ def test_backend_protocol_methods(self): backend = backend_class(model_type="AR", order=1) for method in backend_required_methods: assert hasattr(backend, method) - + # Fitted model should have these methods data = np.random.randn(100) fitted = backend.fit(data) fitted_required_methods = [ "predict", "params", - "residuals", + "residuals", "fitted_values", "get_info_criteria", - "score" + "score", ] for method in fitted_required_methods: assert hasattr(fitted, method), f"Fitted model missing {method}" @@ -255,10 +236,7 @@ def test_performance_benchmarks(self): # Small dataset benchmark small_data = np.random.randn(100) small_time = benchmark_backend( - "statsforecast", - model_type="ARIMA", - order=(1,1,1), - data=small_data + "statsforecast", model_type="ARIMA", order=(1, 1, 1), data=small_data ) # Should fit in reasonable time @@ -266,12 +244,7 @@ def test_performance_benchmarks(self): # Large dataset benchmark large_data = np.random.randn(10000) - large_time = benchmark_backend( - "statsforecast", - model_type="AR", - order=2, - data=large_data - ) + large_time = benchmark_backend("statsforecast", model_type="AR", order=2, data=large_data) # Should still be reasonably fast assert large_time < 5.0 # Less than 5 seconds @@ -283,17 +256,11 @@ def test_memory_efficiency(self): # Measure memory for different data sizes memory_100 = measure_memory_usage( - backend="statsforecast", - model_type="AR", - order=2, - data_size=100 + backend="statsforecast", model_type="AR", order=2, data_size=100 ) memory_1000 = measure_memory_usage( - backend="statsforecast", - model_type="AR", - order=2, - data_size=1000 + backend="statsforecast", model_type="AR", order=2, data_size=1000 ) # Memory should scale sub-linearly @@ -306,18 +273,15 @@ def test_scaling_characteristics(self): from tsbootstrap.backends.performance_utils import measure_scaling scaling_results = measure_scaling( - backend="statsforecast", - model_type="AR", - order=2, - data_sizes=[100, 500, 1000, 5000] + backend="statsforecast", model_type="AR", order=2, data_sizes=[100, 500, 1000, 5000] ) # Check that scaling is reasonable times = scaling_results["times"] - + # Time should not grow quadratically time_ratio = times[-1] / times[0] size_ratio = 5000 / 100 - + # Should be better than O(n²) - assert time_ratio < size_ratio ** 1.5 \ No newline at end of file + assert time_ratio < size_ratio**1.5 diff --git a/tests/unit/test_backends.py b/tests/unit/test_backends.py index 33084ee2..b24f063f 100644 --- a/tests/unit/test_backends.py +++ b/tests/unit/test_backends.py @@ -11,12 +11,11 @@ import numpy as np import pytest -from numpy.testing import assert_allclose -from tsbootstrap.backends.statsmodels_backend import StatsModelsBackend -from tsbootstrap.backends.statsforecast_backend import StatsForecastBackend from tsbootstrap.backends.adapter import BackendToStatsmodelsAdapter, fit_with_backend from tsbootstrap.backends.factory import create_backend +from tsbootstrap.backends.statsforecast_backend import StatsForecastBackend +from tsbootstrap.backends.statsmodels_backend import StatsModelsBackend class TestStatsModelsBackend: @@ -172,9 +171,7 @@ def test_backend_selection(self): assert isinstance(backend, StatsModelsBackend) # Can force statsforecast for ARIMA - backend = create_backend( - model_type="ARIMA", order=(1, 0, 1), force_backend="statsforecast" - ) + backend = create_backend(model_type="ARIMA", order=(1, 0, 1), force_backend="statsforecast") assert isinstance(backend, StatsForecastBackend) def test_fit_with_backend(self): @@ -184,10 +181,7 @@ def test_fit_with_backend(self): # Fit with automatic backend selection fitted = fit_with_backend( - model_type="ARIMA", - endog=data, - order=(1, 0, 1), - return_backend=False # Get adapter + model_type="ARIMA", endog=data, order=(1, 0, 1), return_backend=False # Get adapter ) assert isinstance(fitted, BackendToStatsmodelsAdapter) @@ -197,11 +191,14 @@ def test_fit_with_backend(self): class TestBackendCompatibility: """Test compatibility between backends.""" - @pytest.mark.parametrize("model_type,order", [ - ("AR", 2), - ("ARIMA", (1, 0, 1)), - ("ARIMA", (2, 1, 1)), - ]) + @pytest.mark.parametrize( + "model_type,order", + [ + ("AR", 2), + ("ARIMA", (1, 0, 1)), + ("ARIMA", (2, 1, 1)), + ], + ) def test_consistent_predictions(self, model_type, order): """Test that backends produce similar predictions.""" np.random.seed(42) @@ -241,4 +238,4 @@ def test_parameter_consistency(self): # Both should have AR parameters assert "ar" in sm_fitted.params or "ar_coef" in sm_fitted.params - assert "ar" in sf_fitted.params \ No newline at end of file + assert "ar" in sf_fitted.params diff --git a/tests/unit/test_base_bootstrap.py b/tests/unit/test_base_bootstrap.py index 07245ef3..a39e0c10 100644 --- a/tests/unit/test_base_bootstrap.py +++ b/tests/unit/test_base_bootstrap.py @@ -13,6 +13,7 @@ import numpy as np import pytest + from tsbootstrap.base_bootstrap import ( BaseTimeSeriesBootstrap, BlockBasedBootstrap, @@ -23,7 +24,7 @@ class TestBaseTimeSeriesBootstrap: """Test the composition-based base bootstrap class. - + This test suite validates the core functionality of the base bootstrap architecture, including service injection, parameter management, and the fundamental bootstrap generation mechanisms that all concrete @@ -473,13 +474,13 @@ class MinimalBootstrap(BaseTimeSeriesBootstrap): class ConcreteBootstrap(BaseTimeSeriesBootstrap): """Concrete implementation for testing abstract base class.""" - + _tags = { "object_type": "bootstrap", "bootstrap_type": "test", "capability:multivariate": True, } - + def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Simple implementation that returns X as-is return X @@ -487,175 +488,175 @@ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): class ConcreteBlockBootstrap(BlockBasedBootstrap): """Concrete implementation for testing block-based abstract class.""" - + _tags = { "object_type": "bootstrap", "bootstrap_type": "block", "capability:multivariate": True, } - + def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): return X class TestBaseBootstrap: """Tests targeting specific uncovered lines in base_bootstrap.py.""" - + def test_rng_validation_edge_cases(self): """Test RNG validation edge cases .""" # Test with integer seed bootstrap = ConcreteBootstrap(rng=42) assert isinstance(bootstrap.rng, np.random.Generator) - + # Test with Generator instance gen = np.random.default_rng(123) bootstrap2 = ConcreteBootstrap(rng=gen) assert bootstrap2.rng is gen - + # Test with None (should create default) bootstrap3 = ConcreteBootstrap(rng=None) assert isinstance(bootstrap3.rng, np.random.Generator) - + # Test RNG serialization for JSON mode data = bootstrap.model_dump(mode="json") assert data["rng"] == 42 # Should return original seed value - + def test_get_params_fallback(self): """Test get_params fallback when sklearn_adapter is None .""" bootstrap = ConcreteBootstrap(n_bootstraps=5, rng=42) - + # Temporarily disable sklearn adapter original_adapter = bootstrap._services.sklearn_adapter bootstrap._services.sklearn_adapter = None - + params = bootstrap.get_params() - + # Should use fallback logic assert params["n_bootstraps"] == 5 assert "rng" in params - + # Restore adapter bootstrap._services.sklearn_adapter = original_adapter - + # Note: Line 314 (NotImplementedError in abstract method) cannot be tested directly # since Python prevents instantiating abstract classes. The line is there for # documentation and will never be executed. - + def test_get_test_params(self): """Test get_test_params methods .""" # BaseTimeSeriesBootstrap.get_test_params params = BaseTimeSeriesBootstrap.get_test_params() assert params == [] # Abstract class returns empty list - + # BlockBasedBootstrap.get_test_params params = BlockBasedBootstrap.get_test_params() assert params == [] # Abstract class returns empty list - + def test_sklearn_transformer_interface(self): """Test sklearn transformer interface methods .""" bootstrap = ConcreteBootstrap(n_bootstraps=3) X = np.random.randn(100) # 1D array for simple bootstrap - - # Test fit method + + # Test fit method fitted = bootstrap.fit(X) assert fitted is bootstrap # Should return self assert hasattr(bootstrap, "_n_samples") assert bootstrap._n_samples == 100 assert bootstrap._n_features == 1 # 1D array has 1 feature assert bootstrap._is_fitted is True - + # Test fit with y y = np.random.randn(100) bootstrap2 = ConcreteBootstrap(n_bootstraps=3) bootstrap2.fit(X, y) assert bootstrap2._is_fitted is True - - # Test transform without fit + + # Test transform without fit bootstrap3 = ConcreteBootstrap(n_bootstraps=3) # Transform should work even without fit samples = bootstrap3.transform(X) assert len(samples) == 3 assert all(isinstance(s, np.ndarray) for s in samples) - - # Test fit_transform + + # Test fit_transform bootstrap4 = ConcreteBootstrap(n_bootstraps=3) samples = bootstrap4.fit_transform(X, y) assert len(samples) == 3 assert bootstrap4._is_fitted is True - + def test_block_length_validation_error(self): """Test block length validation error .""" # Pydantic validates this at construction time # The error message is different from the custom validator with pytest.raises(ValueError, match="Input should be greater than or equal to 1"): ConcreteBlockBootstrap(block_length=0) - + with pytest.raises(ValueError, match="Input should be greater than or equal to 1"): ConcreteBlockBootstrap(block_length=-5) - + def test_bootstrap_with_return_indices(self): """Test bootstrap with return_indices=True to cover more edge cases.""" bootstrap = ConcreteBootstrap(n_bootstraps=2, rng=42) X = np.random.randn(50) - + # Test with return_indices=True results = list(bootstrap.bootstrap(X, return_indices=True)) - + assert len(results) == 2 for sample, indices in results: assert isinstance(sample, np.ndarray) assert isinstance(indices, np.ndarray) assert len(indices) == len(X) - + def test_whole_data_bootstrap(self): """Test WholeDataBootstrap implementation.""" bootstrap = WholeDataBootstrap(n_bootstraps=3, rng=42) X = np.array([1, 2, 3, 4, 5]) - + samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 3 # Each sample should be same length as original for sample in samples: assert len(sample) == len(X) # All values should come from original data assert all(val in X for val in sample) - + def test_model_dump_json_mode(self): """Test model_dump with JSON mode for numpy serialization.""" bootstrap = ConcreteBootstrap(n_bootstraps=5, rng=42) - + # Test JSON mode serialization data = bootstrap.model_dump(mode="json") - + # Should serialize properly assert isinstance(data, dict) assert data["n_bootstraps"] == 5 assert data["rng"] == 42 # Original seed value - + def test_service_lazy_initialization(self): """Test lazy initialization of services.""" bootstrap = ConcreteBootstrap() - + # Services should not be initialized yet assert not bootstrap._services_initialized - + # Access services services = bootstrap._services - + # Now should be initialized assert bootstrap._services_initialized assert isinstance(services, BootstrapServices) - + def test_rng_init_val_preservation(self): """Test that original RNG value is preserved for sklearn compatibility.""" # Test with integer seed bootstrap = ConcreteBootstrap(rng=123) assert bootstrap._rng_init_val == 123 - + params = bootstrap.get_params() assert params["rng"] == 123 # Should return original value - + # Test set_params with new RNG bootstrap.set_params(rng=456) assert bootstrap._rng_init_val == 456 @@ -664,4 +665,4 @@ def test_rng_init_val_preservation(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_batch_bootstrap.py b/tests/unit/test_batch_bootstrap.py index 6d1d5a1c..912537a2 100644 --- a/tests/unit/test_batch_bootstrap.py +++ b/tests/unit/test_batch_bootstrap.py @@ -1,7 +1,6 @@ """Tests for batch_bootstrap.py.""" import numpy as np -import pytest from tsbootstrap.batch_bootstrap import ( BatchOptimizedBlockBootstrap, @@ -14,12 +13,8 @@ class TestBatchOptimizedBlockBootstrap: def test_initialization(self): """Test basic initialization.""" - bootstrap = BatchOptimizedBlockBootstrap( - n_bootstraps=10, - block_length=5, - batch_size=5 - ) - + bootstrap = BatchOptimizedBlockBootstrap(n_bootstraps=10, block_length=5, batch_size=5) + assert bootstrap.n_bootstraps == 10 assert bootstrap.block_length == 5 assert bootstrap.batch_size == 5 @@ -29,14 +24,11 @@ def test_bootstrap_generation(self): """Test bootstrap sample generation.""" X = np.random.randn(100) bootstrap = BatchOptimizedBlockBootstrap( - n_bootstraps=6, - block_length=10, - batch_size=3, - rng=42 + n_bootstraps=6, block_length=10, batch_size=3, rng=42 ) - + samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 6 for sample in samples: assert len(sample) == len(X) @@ -45,41 +37,31 @@ def test_bootstrap_generation(self): def test_batch_size_effect(self): """Test that batch_size is properly used.""" X = np.random.randn(50) - + # Small batch size bootstrap1 = BatchOptimizedBlockBootstrap( - n_bootstraps=4, - block_length=5, - batch_size=2, - rng=42 + n_bootstraps=4, block_length=5, batch_size=2, rng=42 ) - + # Large batch size bootstrap2 = BatchOptimizedBlockBootstrap( - n_bootstraps=4, - block_length=5, - batch_size=4, - rng=42 + n_bootstraps=4, block_length=5, batch_size=4, rng=42 ) - + # Both should produce same results with same seed samples1 = list(bootstrap1.bootstrap(X)) samples2 = list(bootstrap2.bootstrap(X)) - + assert len(samples1) == len(samples2) # Results might differ due to batching implementation def test_multivariate_data(self): """Test with multivariate data.""" X = np.random.randn(100, 3) - bootstrap = BatchOptimizedBlockBootstrap( - n_bootstraps=5, - block_length=10, - batch_size=5 - ) - + bootstrap = BatchOptimizedBlockBootstrap(n_bootstraps=5, block_length=10, batch_size=5) + samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 5 for sample in samples: assert sample.shape == X.shape @@ -91,12 +73,9 @@ class TestBatchOptimizedModelBootstrap: def test_initialization(self): """Test basic initialization.""" bootstrap = BatchOptimizedModelBootstrap( - n_bootstraps=10, - model_type="ar", - order=2, - batch_size=5 + n_bootstraps=10, model_type="ar", order=2, batch_size=5 ) - + assert bootstrap.n_bootstraps == 10 assert bootstrap.model_type == "ar" assert bootstrap.order == 2 @@ -107,15 +86,11 @@ def test_bootstrap_generation(self): """Test bootstrap sample generation.""" X = np.random.randn(100) bootstrap = BatchOptimizedModelBootstrap( - n_bootstraps=4, - model_type="ar", - order=2, - batch_size=2, - rng=42 + n_bootstraps=4, model_type="ar", order=2, batch_size=2, rng=42 ) - + samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 4 for sample in samples: assert len(sample) == len(X) @@ -124,23 +99,17 @@ def test_bootstrap_generation(self): def test_different_models(self): """Test with different model types.""" X = np.random.randn(100) - + # AR model ar_bootstrap = BatchOptimizedModelBootstrap( - n_bootstraps=2, - model_type="ar", - order=1, - batch_size=2 + n_bootstraps=2, model_type="ar", order=1, batch_size=2 ) ar_samples = list(ar_bootstrap.bootstrap(X)) assert len(ar_samples) == 2 - + # ARIMA model (MA is not directly supported, use ARIMA with MA component) arima_bootstrap = BatchOptimizedModelBootstrap( - n_bootstraps=2, - model_type="arima", - order=(0, 0, 1), # Pure MA(1) model - batch_size=2 + n_bootstraps=2, model_type="arima", order=(0, 0, 1), batch_size=2 # Pure MA(1) model ) arima_samples = list(arima_bootstrap.bootstrap(X)) assert len(arima_samples) == 2 @@ -150,7 +119,7 @@ def test_get_test_params(self): params = BatchOptimizedBlockBootstrap.get_test_params() assert isinstance(params, list) assert len(params) > 0 - + params = BatchOptimizedModelBootstrap.get_test_params() assert isinstance(params, list) - assert len(params) > 0 \ No newline at end of file + assert len(params) > 0 diff --git a/tests/unit/test_batch_bootstrap_service.py b/tests/unit/test_batch_bootstrap_service.py index 3834d6e3..c5246268 100644 --- a/tests/unit/test_batch_bootstrap_service.py +++ b/tests/unit/test_batch_bootstrap_service.py @@ -1,294 +1,295 @@ """Tests for batch_bootstrap_service.py.""" +from unittest.mock import ANY, Mock, patch + import numpy as np import pytest -from unittest.mock import Mock, patch, ANY from tsbootstrap.services.batch_bootstrap_service import ( - BatchBootstrapService, - IndividualModelWrapper + BatchBootstrapService, + IndividualModelWrapper, ) class TestIndividualModelWrapper: """Tests targeting specific uncovered lines in IndividualModelWrapper.""" - + def test_init_with_params_list_underscore(self): """Test initialization with _params_list attribute .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - - assert wrapper.params == {'param1': 1} + + assert wrapper.params == {"param1": 1} assert wrapper.series_index == 0 assert wrapper.model_type == "ar" assert wrapper.order == 1 - + def test_init_with_params_list_no_underscore(self): """Test initialization with params_list attribute .""" mock_backend = Mock() del mock_backend._params_list # Remove _params_list - mock_backend.params_list = [{'param1': 1}, {'param2': 2}] + mock_backend.params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 1, "arima", (1, 0, 1)) - - assert wrapper.params == {'param2': 2} - + + assert wrapper.params == {"param2": 2} + def test_init_with_params_fallback_dict(self): """Test initialization with params fallback for dict with series_params .""" mock_backend = Mock() del mock_backend._params_list del mock_backend.params_list - mock_backend.params = { - 'series_params': [{'param1': 1}, {'param2': 2}] - } + mock_backend.params = {"series_params": [{"param1": 1}, {"param2": 2}]} mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - - assert wrapper.params == {'param1': 1} - + + assert wrapper.params == {"param1": 1} + def test_init_with_params_fallback_direct(self): """Test initialization with params fallback for direct params .""" mock_backend = Mock() del mock_backend._params_list del mock_backend.params_list - mock_backend.params = {'direct_param': 42} + mock_backend.params = {"direct_param": 42} mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - - assert wrapper.params == {'direct_param': 42} - + + assert wrapper.params == {"direct_param": 42} + def test_init_residuals_underscore_attribute(self): """Test residual extraction with _residuals attribute .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] # Need 2 elements for index 1 + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] # Need 2 elements for index 1 # Use a real numpy array - it already has the ndim attribute mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 1, "ar", 1) - + np.testing.assert_array_equal(wrapper.residuals, np.array([4, 5, 6])) - assert wrapper.params == {'param2': 2} - + assert wrapper.params == {"param2": 2} + def test_init_residuals_no_underscore_attribute(self): """Test residual extraction with residuals attribute .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] del mock_backend._residuals mock_backend.residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + np.testing.assert_array_equal(wrapper.residuals, np.array([1, 2, 3])) - + def test_init_residuals_1d_fallback(self): """Test residual extraction with 1D array fallback .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([1, 2, 3]) # 1D array mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + np.testing.assert_array_equal(wrapper.residuals, np.array([1, 2, 3])) - + def test_init_residuals_exception_handling(self): """Test residual extraction exception handling .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] # Make residuals access raise an exception mock_backend._residuals = Mock(side_effect=AttributeError("No residuals")) mock_backend.residuals = Mock(side_effect=TypeError("Type error")) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + assert wrapper.residuals is None - + def test_init_fitted_values_underscore_attribute(self): """Test fitted values extraction with _fitted_values attribute .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) # Use a real numpy array - it already has the ndim attribute mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 1, "ar", 1) - + np.testing.assert_array_equal(wrapper.fitted_values, np.array([0.4, 0.5, 0.6])) - + def test_init_fitted_values_no_underscore_attribute(self): """Test fitted values extraction with fitted_values attribute .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) del mock_backend._fitted_values mock_backend.fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + np.testing.assert_array_equal(wrapper.fitted_values, np.array([0.1, 0.2, 0.3])) - + def test_init_fitted_values_1d_fallback(self): """Test fitted values extraction with 1D array fallback .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([0.1, 0.2, 0.3]) # 1D array - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + np.testing.assert_array_equal(wrapper.fitted_values, np.array([0.1, 0.2, 0.3])) - + def test_init_fitted_values_exception_handling(self): """Test fitted values extraction exception handling .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) # Make fitted_values access raise an exception mock_backend._fitted_values = Mock(side_effect=AttributeError("No fitted values")) mock_backend.fitted_values = Mock(side_effect=TypeError("Type error")) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) - + assert wrapper.fitted_values is None - + def test_predict_multidimensional(self): """Test predict with multidimensional predictions .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + # Mock backend predict to return 2D array mock_backend.predict.return_value = np.array([[1, 2, 3], [4, 5, 6]]) - + wrapper = IndividualModelWrapper(mock_backend, 1, "ar", 1) result = wrapper.predict(steps=3) - + np.testing.assert_array_equal(result, np.array([4, 5, 6])) mock_backend.predict.assert_called_once_with(steps=3, X=None) - + def test_predict_1d_fallback(self): """Test predict with 1D prediction fallback .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + # Mock backend predict to return 1D array mock_backend.predict.return_value = np.array([1, 2, 3]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.predict(steps=3, X=np.array([1, 2, 3])) - + np.testing.assert_array_equal(result, np.array([1, 2, 3])) # Use ANY to avoid array comparison issues mock_backend.predict.assert_called_once_with(steps=3, X=ANY) - + def test_simulate_multidimensional(self): """Test simulate with multidimensional simulations .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + # Mock backend simulate to return 3D array (n_series, steps, n_paths) - mock_backend.simulate.return_value = np.array([[[1, 2], [2, 3], [3, 4]], [[4, 5], [5, 6], [6, 7]]]) - + mock_backend.simulate.return_value = np.array( + [[[1, 2], [2, 3], [3, 4]], [[4, 5], [5, 6], [6, 7]]] + ) + wrapper = IndividualModelWrapper(mock_backend, 1, "ar", 1) result = wrapper.simulate(steps=3, n_paths=2, random_state=42) - + np.testing.assert_array_equal(result, np.array([[4, 5], [5, 6], [6, 7]])) mock_backend.simulate.assert_called_once_with(steps=3, n_paths=2, X=None, random_state=42) - + def test_simulate_fallback(self): """Test simulate with fallback for lower dimensional arrays .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + # Mock backend simulate to return 2D array mock_backend.simulate.return_value = np.array([[1, 2], [2, 3]]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.simulate(steps=2, n_paths=2) - + np.testing.assert_array_equal(result, np.array([[1, 2], [2, 3]])) - + def test_forecast_alias(self): """Test forecast method as alias for predict .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + mock_backend.predict.return_value = np.array([1, 2, 3]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.forecast(steps=3) - + np.testing.assert_array_equal(result, np.array([1, 2, 3])) mock_backend.predict.assert_called_once_with(steps=3, X=None) - + def test_get_prediction_with_backend_method(self): """Test get_prediction when backend has the method .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([[1, 2, 3], [4, 5, 6]]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + mock_backend.get_prediction.return_value = "prediction_result" - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.get_prediction(start=0, end=5) - + assert result == "prediction_result" mock_backend.get_prediction.assert_called_once_with(start=0, end=5) - + def test_get_prediction_fallback_with_defaults(self): """Test get_prediction fallback with default parameters .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([1, 2, 3]) # Length 3 mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + # Remove get_prediction method to trigger fallback del mock_backend.get_prediction mock_backend.predict.return_value = np.array([4, 5, 6]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.get_prediction() # No start/end provided - + # Should use defaults: start=0, end=len(residuals)=3, steps=3 np.testing.assert_array_equal(result, np.array([4, 5, 6])) mock_backend.predict.assert_called_once_with(steps=3, X=None) - + def test_get_prediction_fallback_with_parameters(self): """Test get_prediction fallback with explicit parameters .""" mock_backend = Mock() - mock_backend._params_list = [{'param1': 1}, {'param2': 2}] + mock_backend._params_list = [{"param1": 1}, {"param2": 2}] mock_backend._residuals = np.array([1, 2, 3, 4, 5]) mock_backend._fitted_values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) - + del mock_backend.get_prediction mock_backend.predict.return_value = np.array([6, 7]) - + wrapper = IndividualModelWrapper(mock_backend, 0, "ar", 1) result = wrapper.get_prediction(start=1, end=3) - + # steps = end - start = 3 - 1 = 2 np.testing.assert_array_equal(result, np.array([6, 7])) mock_backend.predict.assert_called_once_with(steps=2, X=None) @@ -296,212 +297,209 @@ def test_get_prediction_fallback_with_parameters(self): class TestBatchBootstrapService: """Tests targeting specific uncovered lines in BatchBootstrapService.""" - + def test_init(self): """Test initialization .""" # Test with default use_backend=False service = BatchBootstrapService() assert service.use_backend is False - + # Test with use_backend=True service = BatchBootstrapService(use_backend=True) assert service.use_backend is True - + def test_fit_models_batch_fallback_no_backend(self): """Test fit_models_batch fallback when use_backend=False .""" service = BatchBootstrapService(use_backend=False) - + bootstrap_samples = [ np.array([1, 2, 3, 4, 5]), np.array([2, 3, 4, 5, 6]), - np.array([3, 4, 5, 6, 7]) + np.array([3, 4, 5, 6, 7]), ] - - with patch.object(service, '_fit_models_sequential') as mock_sequential: + + with patch.object(service, "_fit_models_sequential") as mock_sequential: mock_sequential.return_value = ["model1", "model2", "model3"] - + result = service.fit_models_batch(bootstrap_samples, model_type="ar", order=2) - + assert result == ["model1", "model2", "model3"] - mock_sequential.assert_called_once_with( - bootstrap_samples, "ar", 2, None - ) - + mock_sequential.assert_called_once_with(bootstrap_samples, "ar", 2, None) + def test_fit_models_batch_fallback_unsupported_model(self): """Test fit_models_batch fallback for unsupported model type .""" service = BatchBootstrapService(use_backend=True) - - bootstrap_samples = [ - np.array([1, 2, 3, 4, 5]), - np.array([2, 3, 4, 5, 6]) - ] - - with patch.object(service, '_fit_models_sequential') as mock_sequential: + + bootstrap_samples = [np.array([1, 2, 3, 4, 5]), np.array([2, 3, 4, 5, 6])] + + with patch.object(service, "_fit_models_sequential") as mock_sequential: mock_sequential.return_value = ["model1", "model2"] - + # VAR model should trigger fallback result = service.fit_models_batch(bootstrap_samples, model_type="var", order=2) - + assert result == ["model1", "model2"] - mock_sequential.assert_called_once_with( - bootstrap_samples, "var", 2, None - ) - + mock_sequential.assert_called_once_with(bootstrap_samples, "var", 2, None) + def test_fit_models_batch_length_validation(self): """Test fit_models_batch length validation .""" service = BatchBootstrapService(use_backend=True) - + bootstrap_samples = [ - np.array([1, 2, 3, 4, 5]), # length 5 - np.array([2, 3, 4, 5]), # length 4 - different! + np.array([1, 2, 3, 4, 5]), # length 5 + np.array([2, 3, 4, 5]), # length 4 - different! ] - + with pytest.raises(ValueError, match="All bootstrap samples must have same length"): service.fit_models_batch(bootstrap_samples, model_type="ar", order=1) - + with pytest.raises(ValueError, match="Sample 0 has length 5, sample 1 has length 4"): service.fit_models_batch(bootstrap_samples, model_type="ar", order=1) - + def test_fit_models_batch_2d_data_handling(self): """Test fit_models_batch with 2D data handling .""" service = BatchBootstrapService(use_backend=True) - + bootstrap_samples = [ np.array([1, 2, 3, 4, 5]), np.array([2, 3, 4, 5, 6]), ] - - with patch('tsbootstrap.services.batch_bootstrap_service.create_backend') as mock_create: + + with patch("tsbootstrap.services.batch_bootstrap_service.create_backend") as mock_create: mock_backend = Mock() mock_fitted = Mock() - mock_fitted._params_list = [{'param1': 1}, {'param2': 2}] + mock_fitted._params_list = [{"param1": 1}, {"param2": 2}] mock_fitted._residuals = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6]]) - mock_fitted._fitted_values = np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.2, 0.3, 0.4, 0.5, 0.6]]) - + mock_fitted._fitted_values = np.array( + [[0.1, 0.2, 0.3, 0.4, 0.5], [0.2, 0.3, 0.4, 0.5, 0.6]] + ) + mock_backend.fit.return_value = mock_fitted mock_create.return_value = mock_backend - + result = service.fit_models_batch(bootstrap_samples, model_type="ar", order=1) - + # Check that stacked data has correct shape call_args = mock_backend.fit.call_args[0][0] assert call_args.shape == (2, 5) # (n_samples, n_obs) - + assert len(result) == 2 assert all(isinstance(model, IndividualModelWrapper) for model in result) - + def test_fit_models_batch_3d_data_handling(self): """Test fit_models_batch with 3D data handling .""" service = BatchBootstrapService(use_backend=True) - + # Create 3D bootstrap samples (multivariate) bootstrap_samples = [ np.array([[1, 2], [2, 3], [3, 4]]), # shape (3, 2) np.array([[2, 3], [3, 4], [4, 5]]), # shape (3, 2) ] - - with patch('tsbootstrap.services.batch_bootstrap_service.create_backend') as mock_create: + + with patch("tsbootstrap.services.batch_bootstrap_service.create_backend") as mock_create: mock_backend = Mock() mock_fitted = Mock() - mock_fitted._params_list = [{'param1': 1}, {'param2': 2}] + mock_fitted._params_list = [{"param1": 1}, {"param2": 2}] mock_fitted._residuals = np.array([[1, 2, 3], [2, 3, 4]]) mock_fitted._fitted_values = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) - + mock_backend.fit.return_value = mock_fitted mock_create.return_value = mock_backend - - result = service.fit_models_batch(bootstrap_samples, model_type="arima", order=(1, 0, 1)) - + + result = service.fit_models_batch( + bootstrap_samples, model_type="arima", order=(1, 0, 1) + ) + # Check that 3D data was converted to 2D by taking first variable call_args = mock_backend.fit.call_args[0][0] assert call_args.shape == (2, 3) # (n_samples, n_obs) np.testing.assert_array_equal(call_args[0], [1, 2, 3]) # First variable of first sample - np.testing.assert_array_equal(call_args[1], [2, 3, 4]) # First variable of second sample - + np.testing.assert_array_equal( + call_args[1], [2, 3, 4] + ) # First variable of second sample + def test_fit_models_batch_backend_creation(self): """Test fit_models_batch backend creation and fitting .""" service = BatchBootstrapService(use_backend=True) - + bootstrap_samples = [ np.array([1, 2, 3]), np.array([2, 3, 4]), ] - - with patch('tsbootstrap.services.batch_bootstrap_service.create_backend') as mock_create: + + with patch("tsbootstrap.services.batch_bootstrap_service.create_backend") as mock_create: mock_backend = Mock() mock_fitted = Mock() - mock_fitted._params_list = [{'param1': 1}, {'param2': 2}] + mock_fitted._params_list = [{"param1": 1}, {"param2": 2}] mock_fitted._residuals = np.array([[1, 2, 3], [2, 3, 4]]) mock_fitted._fitted_values = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) - + mock_backend.fit.return_value = mock_fitted mock_create.return_value = mock_backend - + result = service.fit_models_batch( - bootstrap_samples, - model_type="sarima", + bootstrap_samples, + model_type="sarima", order=(1, 1, 1), - seasonal_order=(1, 0, 1, 12) + seasonal_order=(1, 0, 1, 12), ) - + # Verify backend creation mock_create.assert_called_once_with( - model_type="SARIMA", - order=(1, 1, 1), - force_backend="statsforecast" + model_type="SARIMA", order=(1, 1, 1), force_backend="statsforecast" ) - + # Verify fitting was called mock_backend.fit.assert_called_once() - + # Verify individual model wrappers were created assert len(result) == 2 assert all(isinstance(model, IndividualModelWrapper) for model in result) assert result[0].series_index == 0 assert result[1].series_index == 1 - + def test_fit_models_sequential(self): """Test _fit_models_sequential method .""" service = BatchBootstrapService() - + bootstrap_samples = [ np.array([1, 2, 3, 4, 5]), np.array([2, 3, 4, 5, 6]), ] - - with patch('tsbootstrap.time_series_model.TimeSeriesModel') as mock_ts_model: + + with patch("tsbootstrap.time_series_model.TimeSeriesModel") as mock_ts_model: # Create mock instances mock_instance1 = Mock() mock_instance2 = Mock() mock_fitted1 = Mock() mock_fitted2 = Mock() - + mock_instance1.fit.return_value = mock_fitted1 mock_instance2.fit.return_value = mock_fitted2 - + # Mock the constructor to return our instances mock_ts_model.side_effect = [mock_instance1, mock_instance2] - + result = service._fit_models_sequential( bootstrap_samples, "ar", 2, (1, 0, 1, 12), extra_param="test" ) - + # Verify TimeSeriesModel was called correctly assert mock_ts_model.call_count == 2 # Check call arguments manually to avoid array comparison issues calls = mock_ts_model.call_args_list assert len(calls) == 2 - + # Check first call call0_kwargs = calls[0].kwargs - assert call0_kwargs['model_type'] == "ar" - np.testing.assert_array_equal(call0_kwargs['X'], bootstrap_samples[0]) - - # Check second call + assert call0_kwargs["model_type"] == "ar" + np.testing.assert_array_equal(call0_kwargs["X"], bootstrap_samples[0]) + + # Check second call call1_kwargs = calls[1].kwargs - assert call1_kwargs['model_type'] == "ar" - np.testing.assert_array_equal(call1_kwargs['X'], bootstrap_samples[1]) - + assert call1_kwargs["model_type"] == "ar" + np.testing.assert_array_equal(call1_kwargs["X"], bootstrap_samples[1]) + # Verify fit was called correctly mock_instance1.fit.assert_called_once_with( order=2, seasonal_order=(1, 0, 1, 12), extra_param="test" @@ -509,148 +507,139 @@ def test_fit_models_sequential(self): mock_instance2.fit.assert_called_once_with( order=2, seasonal_order=(1, 0, 1, 12), extra_param="test" ) - + # Verify results assert result == [mock_fitted1, mock_fitted2] - + def test_simulate_batch_with_batch_support(self): """Test simulate_batch when first model has simulate_batch method .""" service = BatchBootstrapService() - + mock_model = Mock() mock_model.simulate_batch.return_value = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) - + fitted_models = [mock_model] - + result = service.simulate_batch(fitted_models, steps=2, n_paths=2) - + np.testing.assert_array_equal(result, np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])) mock_model.simulate_batch.assert_called_once_with(steps=2, n_paths=2) - + def test_simulate_batch_fallback_with_simulate(self): """Test simulate_batch fallback with simulate method .""" service = BatchBootstrapService() - + mock_model1 = Mock() mock_model2 = Mock() del mock_model1.simulate_batch # No batch support del mock_model2.simulate_batch - + mock_model1.simulate.return_value = np.array([[1, 2], [3, 4]]) mock_model2.simulate.return_value = np.array([[5, 6], [7, 8]]) - + fitted_models = [mock_model1, mock_model2] - + result = service.simulate_batch(fitted_models, steps=2, n_paths=2) - + expected = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) np.testing.assert_array_equal(result, expected) - + mock_model1.simulate.assert_called_once_with(steps=2, n_paths=2) mock_model2.simulate.assert_called_once_with(steps=2, n_paths=2) - + def test_simulate_batch_fallback_with_forecast_single_path(self): """Test simulate_batch fallback with forecast method for single path .""" service = BatchBootstrapService() - + mock_model = Mock() del mock_model.simulate_batch del mock_model.simulate # No simulate method - + mock_model.forecast.return_value = np.array([1, 2, 3]) - + fitted_models = [mock_model] - + result = service.simulate_batch(fitted_models, steps=3, n_paths=1) - + np.testing.assert_array_equal(result, np.array([[1, 2, 3]])) mock_model.forecast.assert_called_once_with(steps=3) - + def test_simulate_batch_fallback_with_forecast_multiple_paths(self): """Test simulate_batch fallback with forecast method for multiple paths .""" service = BatchBootstrapService() - + mock_model = Mock() del mock_model.simulate_batch del mock_model.simulate - + mock_model.forecast.return_value = np.array([1, 2, 3]) - + fitted_models = [mock_model] - + result = service.simulate_batch(fitted_models, steps=3, n_paths=2) - + # Should replicate forecast for multiple paths expected = np.array([[[1, 1], [2, 2], [3, 3]]]) np.testing.assert_array_equal(result, expected) mock_model.forecast.assert_called_once_with(steps=3) - + def test_simulate_batch_fallback_unsupported_model(self): """Test simulate_batch fallback with unsupported model .""" service = BatchBootstrapService() - + mock_model = Mock() del mock_model.simulate_batch del mock_model.simulate del mock_model.forecast # No simulation methods - + fitted_models = [mock_model] - + with pytest.raises(ValueError, match="does not support simulation"): service.simulate_batch(fitted_models, steps=3, n_paths=1) - + def test_comprehensive_integration(self): """Test comprehensive integration scenario.""" service = BatchBootstrapService(use_backend=True) - + # Create realistic bootstrap samples np.random.seed(42) - bootstrap_samples = [ - np.random.randn(20) + i for i in range(2) # Use 2 instead of 3 - ] - - with patch('tsbootstrap.services.batch_bootstrap_service.create_backend') as mock_create: + bootstrap_samples = [np.random.randn(20) + i for i in range(2)] # Use 2 instead of 3 + + with patch("tsbootstrap.services.batch_bootstrap_service.create_backend") as mock_create: mock_backend = Mock() mock_fitted = Mock() - + # Mock fitted backend attributes - mock_fitted._params_list = [ - {'ar_coef': [0.5]}, - {'ar_coef': [0.6]} # Only 2 elements - ] + mock_fitted._params_list = [{"ar_coef": [0.5]}, {"ar_coef": [0.6]}] # Only 2 elements mock_fitted._residuals = np.random.randn(2, 20) # 2 series mock_fitted._fitted_values = np.random.randn(2, 20) # 2 series - + mock_backend.fit.return_value = mock_fitted mock_create.return_value = mock_backend - + # Test batch fitting - fitted_models = service.fit_models_batch( - bootstrap_samples, - model_type="ar", - order=1 - ) - + fitted_models = service.fit_models_batch(bootstrap_samples, model_type="ar", order=1) + assert len(fitted_models) == 2 - + # Test that each model has correct attributes for i, model in enumerate(fitted_models): assert isinstance(model, IndividualModelWrapper) assert model.series_index == i assert model.model_type == "ar" assert model.order == 1 - assert model.params['ar_coef'] == [0.5 + i * 0.1] - + assert model.params["ar_coef"] == [0.5 + i * 0.1] + # Test simulation - create a simple mock that doesn't rely on array indexing mock_sims = [np.random.randn(5, 3) for _ in range(2)] # Individual simulations for i, model in enumerate(fitted_models): model.simulate = Mock(return_value=mock_sims[i]) - + simulations = service.simulate_batch(fitted_models, steps=5, n_paths=3) - + assert simulations.shape == (2, 5, 3) # (n_models, steps, n_paths) if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_block_bootstrap.py b/tests/unit/test_block_bootstrap.py index 9e83f133..4949fb09 100644 --- a/tests/unit/test_block_bootstrap.py +++ b/tests/unit/test_block_bootstrap.py @@ -19,6 +19,7 @@ import numpy as np import pytest + from tsbootstrap.block_bootstrap import ( BartlettsBootstrap, BlackmanBootstrap, @@ -345,86 +346,81 @@ def test_all_block_bootstrap_composition_based_classes_exist(): class TestBlockBootstrap: """Tests targeting specific uncovered lines in block_bootstrap.py.""" - + def test_get_test_params(self): """Test get_test_params method .""" params = BlockBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - + def test_cache_blocks_initialization(self): """Test blocks caching .""" bootstrap = MovingBlockBootstrap( - n_bootstraps=2, + n_bootstraps=2, block_length=5, - combine_generation_and_sampling_flag=False # Force caching + combine_generation_and_sampling_flag=False, # Force caching ) - + # Initially no cached blocks assert bootstrap._blocks is None - + X = np.random.randn(50) # Generate blocks will initialize cache blocks = bootstrap._generate_blocks_if_needed(X) - + # Blocks should be cached assert bootstrap._blocks is not None assert len(bootstrap._blocks) > 0 - + def test_block_generation_caching(self): """Test block generation and caching .""" bootstrap = MovingBlockBootstrap( - n_bootstraps=2, - block_length=5, - combine_generation_and_sampling_flag=False + n_bootstraps=2, block_length=5, combine_generation_and_sampling_flag=False ) - + X = np.random.randn(30) - + # First call generates and caches blocks1 = bootstrap._generate_blocks_if_needed(X) assert bootstrap._blocks is not None - + # Second call should use cached blocks blocks2 = bootstrap._generate_blocks_if_needed(X) # Should be the same blocks assert len(blocks1) == len(blocks2) - + def test_recombine_all_blocks_from_cache(self): """Test _recombine_all_blocks_from_cache .""" bootstrap = MovingBlockBootstrap( - n_bootstraps=3, - block_length=5, - combine_generation_and_sampling_flag=False + n_bootstraps=3, block_length=5, combine_generation_and_sampling_flag=False ) - + X = np.random.randn(50) - + # Generate initial sample to populate cache sample1 = bootstrap._generate_samples_single_bootstrap(X) - + # Now cache should be populated, next samples will use cache sample2 = bootstrap._generate_samples_single_bootstrap(X) sample3 = bootstrap._generate_samples_single_bootstrap(X) - + # All should have same length as X assert len(sample1) == len(X) assert len(sample2) == len(X) assert len(sample3) == len(X) - + def test_circular_block_edge_cases(self): """Test CircularBlockBootstrap edge cases .""" # Test with small data that wraps around X = np.array([1, 2, 3, 4, 5], dtype=float) - + bootstrap = CircularBlockBootstrap( - n_bootstraps=2, - block_length=3 # Smaller block length for small data + n_bootstraps=2, block_length=3 # Smaller block length for small data ) - + samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 2 for sample in samples: assert len(sample) == len(X) @@ -432,92 +428,86 @@ def test_circular_block_edge_cases(self): # Note: values might be repeated due to block structure unique_vals = np.unique(sample) assert all(val in X for val in unique_vals) - + def test_non_overlapping_block_specific_logic(self): """Test NonOverlappingBlockBootstrap specific logic .""" - bootstrap = NonOverlappingBlockBootstrap( - n_bootstraps=2, - block_length=10 - ) - + bootstrap = NonOverlappingBlockBootstrap(n_bootstraps=2, block_length=10) + # Test with data length that's not multiple of block_length X = np.random.randn(45) # 45 is not divisible by 10 samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 2 for sample in samples: assert len(sample) == len(X) - + def test_stationary_block_resampling(self): """Test StationaryBlockBootstrap block resampling .""" - bootstrap = StationaryBlockBootstrap( - n_bootstraps=3, - avg_block_length=10 - ) - + bootstrap = StationaryBlockBootstrap(n_bootstraps=3, avg_block_length=10) + X = np.random.randn(100) samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 3 for sample in samples: assert len(sample) == len(X) assert isinstance(sample, np.ndarray) - + def test_window_function_applications(self): """Test window function applications for various windowed bootstraps.""" X = np.random.randn(50) - - # Test BartlettsBootstrap + + # Test BartlettsBootstrap bartletts = BartlettsBootstrap(n_bootstraps=1, block_length=10) bartletts_samples = list(bartletts.bootstrap(X)) assert len(bartletts_samples[0]) == len(X) - - # Test BlackmanBootstrap + + # Test BlackmanBootstrap # BlackmanBootstrap uses composition and doesn't have an 'a' parameter blackman = BlackmanBootstrap(n_bootstraps=1, block_length=10) blackman_samples = list(blackman.bootstrap(X)) assert len(blackman_samples[0]) == len(X) assert blackman.window_type == "blackman" - - # Test HammingBootstrap + + # Test HammingBootstrap hamming = HammingBootstrap(n_bootstraps=1, block_length=10) hamming_samples = list(hamming.bootstrap(X)) assert len(hamming_samples[0]) == len(X) - - # Test HanningBootstrap + + # Test HanningBootstrap hanning = HanningBootstrap(n_bootstraps=1, block_length=10) hanning_samples = list(hanning.bootstrap(X)) assert len(hanning_samples[0]) == len(X) - - # Test TukeyBootstrap + + # Test TukeyBootstrap tukey = TukeyBootstrap(n_bootstraps=1, block_length=10) assert tukey.alpha == 0.5 # Default alpha tukey_samples = list(tukey.bootstrap(X)) assert len(tukey_samples[0]) == len(X) - + # Test with custom alpha tukey2 = TukeyBootstrap(n_bootstraps=1, block_length=10, alpha=0.7) assert tukey2.alpha == 0.7 - + def test_window_function_compute_length(self): """Test compute_window_length for windowed bootstraps .""" # Create a windowed bootstrap bootstrap = BartlettsBootstrap(n_bootstraps=1, block_length=10) - + # The compute_window_length is used internally # Test that windowed bootstraps work correctly with different block lengths X = np.random.randn(100) - + # Test with different block lengths for block_length in [5, 10, 20]: bootstrap = BartlettsBootstrap(n_bootstraps=1, block_length=block_length) samples = list(bootstrap.bootstrap(X)) assert len(samples[0]) == len(X) - + def test_block_bootstrap_with_multivariate_data(self): """Test block bootstraps with multivariate data.""" X = np.random.randn(100, 3) # Multivariate data - + # Test various block bootstrap methods bootstraps = [ MovingBlockBootstrap(n_bootstraps=1, block_length=10), @@ -526,53 +516,52 @@ def test_block_bootstrap_with_multivariate_data(self): StationaryBlockBootstrap(n_bootstraps=1, avg_block_length=10), BartlettsBootstrap(n_bootstraps=1, block_length=10), ] - + for bootstrap in bootstraps: samples = list(bootstrap.bootstrap(X)) assert len(samples) == 1 assert samples[0].shape == X.shape - + def test_block_length_edge_cases(self): """Test block bootstrap with edge case block lengths.""" X = np.random.randn(50) - + # Test with block_length = 1 (essentially iid bootstrap) bootstrap = MovingBlockBootstrap(n_bootstraps=1, block_length=1) samples = list(bootstrap.bootstrap(X)) assert len(samples[0]) == len(X) - + # Test with block_length = data length bootstrap = MovingBlockBootstrap(n_bootstraps=1, block_length=len(X)) samples = list(bootstrap.bootstrap(X)) assert len(samples[0]) == len(X) - + def test_stationary_block_with_small_avg_length(self): """Test StationaryBlockBootstrap with small average block length.""" bootstrap = StationaryBlockBootstrap( - n_bootstraps=2, - avg_block_length=2 # Very small average + n_bootstraps=2, avg_block_length=2 # Very small average ) - + X = np.random.randn(30) samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 2 for sample in samples: assert len(sample) == len(X) - + def test_windowed_bootstrap_caching_behavior(self): """Test caching behavior in windowed bootstraps.""" bootstrap = HammingBootstrap( n_bootstraps=3, block_length=8, - combine_generation_and_sampling_flag=False # Force caching + combine_generation_and_sampling_flag=False, # Force caching ) - + X = np.random.randn(40) - + # Generate multiple samples - should use caching after first samples = list(bootstrap.bootstrap(X)) - + assert len(samples) == 3 assert all(len(s) == len(X) for s in samples) # Check that blocks are cached (the attribute is _blocks, not _cache_blocks) @@ -581,192 +570,189 @@ def test_windowed_bootstrap_caching_behavior(self): class TestAdditionalCoverage: """Additional tests for missing lines to reach 95% coverage.""" - + def test_all_get_test_params(self): """Test get_test_params for all bootstrap classes .""" - # MovingBlockBootstrap.get_test_params + # MovingBlockBootstrap.get_test_params params = MovingBlockBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # StationaryBlockBootstrap.get_test_params + + # StationaryBlockBootstrap.get_test_params params = StationaryBlockBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # CircularBlockBootstrap.get_test_params + + # CircularBlockBootstrap.get_test_params params = CircularBlockBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # NonOverlappingBlockBootstrap.get_test_params + + # NonOverlappingBlockBootstrap.get_test_params params = NonOverlappingBlockBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # BartlettsBootstrap.get_test_params + + # BartlettsBootstrap.get_test_params params = BartlettsBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # BlackmanBootstrap.get_test_params + + # BlackmanBootstrap.get_test_params params = BlackmanBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # HammingBootstrap.get_test_params + + # HammingBootstrap.get_test_params params = HammingBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # HanningBootstrap.get_test_params + + # HanningBootstrap.get_test_params params = HanningBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - - # TukeyBootstrap.get_test_params + + # TukeyBootstrap.get_test_params params = TukeyBootstrap.get_test_params() assert len(params) == 1 assert params[0]["n_bootstraps"] == 10 assert params[0]["block_length"] == 10 - + def test_generate_samples_edge_cases(self): """Test edge cases in _generate_samples_single_bootstrap .""" # Test when result is longer than original bootstrap = MovingBlockBootstrap(n_bootstraps=1, block_length=3) X = np.array([1, 2, 3, 4, 5]) - + # Mock the block resample service to return longer data original_resample = bootstrap._block_resample_service.resample_blocks - + def mock_resample(X, blocks, n, block_weights, tapered_weights, rng): # Return block indices and data that results in longer series indices = [0, 1] # Two blocks data = [np.array([1, 2, 3]), np.array([3, 4, 5])] # 6 elements total return indices, data - + bootstrap._block_resample_service.resample_blocks = mock_resample - + # Generate sample - should be truncated to original length sample = bootstrap._generate_samples_single_bootstrap(X) - + # Restore original bootstrap._block_resample_service.resample_blocks = original_resample - + assert len(sample) == len(X) # Should be truncated to 5 - + # Test with empty block data bootstrap2 = MovingBlockBootstrap(n_bootstraps=1, block_length=3) - + def mock_empty_resample(X, blocks, n, block_weights, tapered_weights, rng): return [], [] # Empty blocks - + bootstrap2._block_resample_service.resample_blocks = mock_empty_resample - + # Should return array with same shape as X (uses np.empty_like) sample2 = bootstrap2._generate_samples_single_bootstrap(X) assert sample2.shape == X.shape # The array will be uninitialized but have same shape - + bootstrap2._block_resample_service.resample_blocks = original_resample - + def test_get_params_with_callable_block_weights(self): """Test get_params and set_params with callable block_weights .""" + # Define a callable block weight function def custom_weights(n_blocks): return np.ones(n_blocks) / n_blocks - + # Create bootstrap with callable block_weights bootstrap = MovingBlockBootstrap( - n_bootstraps=2, - block_length=5, - block_weights=custom_weights + n_bootstraps=2, block_length=5, block_weights=custom_weights ) - + # get_params should exclude callable block_weights params = bootstrap.get_params() assert "block_weights" not in params assert "n_bootstraps" in params assert params["n_bootstraps"] == 2 - + # set_params with callable should be handled new_weights = lambda n: np.ones(n) params_with_callable = {"block_weights": new_weights, "n_bootstraps": 3} bootstrap.set_params(**params_with_callable) - + # n_bootstraps should be updated, but callable should be ignored assert bootstrap.n_bootstraps == 3 # The original callable should still be there (set_params filtered it out) assert bootstrap.block_weights is custom_weights - + # Test with array block_weights (non-callable) bootstrap2 = MovingBlockBootstrap( - n_bootstraps=2, - block_length=5, - block_weights=np.array([0.5, 0.5]) + n_bootstraps=2, block_length=5, block_weights=np.array([0.5, 0.5]) ) - + params2 = bootstrap2.get_params() # Array block_weights might be excluded in get_params due to serialization constraints # The important part is that callable weights are filtered out # This test verifies the callable filtering works correctly - + def test_windowed_bootstrap_base_methods(self): """Test WindowedBlockBootstrap base class methods .""" # WindowedBlockBootstrap.get_test_params returns empty list params = WindowedBlockBootstrap.get_test_params() assert params == [] - - # Test _create_tapered_weights when window_service is None + + # Test _create_tapered_weights when window_service is None bootstrap = BartlettsBootstrap(n_bootstraps=1, block_length=5) # Force window service to None and clear cache bootstrap._window_service = None bootstrap._tapered_weights_cache = None - + # Call _create_tapered_weights directly - should recreate service weights_func = bootstrap._create_tapered_weights() assert weights_func is not None assert bootstrap._window_service is not None - + # Test that weights function works weights = weights_func(10) assert len(weights) == 10 assert np.all(weights >= 0) # Weights should be non-negative - + def test_reshape_logic_in_generate_samples(self): """Test reshape logic in _generate_samples_single_bootstrap with extra dimensions.""" bootstrap = MovingBlockBootstrap(n_bootstraps=1, block_length=3) X = np.array([[1], [2], [3], [4], [5]]) # 2D array with shape (5, 1) - + # Mock to return data with extra trailing dimension original_resample = bootstrap._block_resample_service.resample_blocks - + def mock_resample_extra_dim(X, blocks, n, block_weights, tapered_weights, rng): # Return data with extra dimension: shape (5, 1, 1) indices = [0] data = [np.array([[[1]], [[2]], [[3]], [[4]], [[5]]])] # Extra dimension return indices, data - + bootstrap._block_resample_service.resample_blocks = mock_resample_extra_dim - + # Should handle the extra dimension sample = bootstrap._generate_samples_single_bootstrap(X) - + # Restore bootstrap._block_resample_service.resample_blocks = original_resample - + # Should maintain original shape assert sample.shape == X.shape - + if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_block_bootstrap_services.py b/tests/unit/test_block_bootstrap_services.py index c43a2431..560a4d45 100644 --- a/tests/unit/test_block_bootstrap_services.py +++ b/tests/unit/test_block_bootstrap_services.py @@ -6,98 +6,100 @@ from tsbootstrap.services.block_bootstrap_services import ( BlockGenerationService, BlockResamplingService, - WindowFunctionService, - MarkovBootstrapService, DistributionBootstrapService, + MarkovBootstrapService, StatisticPreservingService, + WindowFunctionService, ) class TestBlockBootstrapServices: """Tests targeting specific uncovered lines in block_bootstrap_services.py.""" - + def test_block_generation_length_validation(self): """Test block_length validation error .""" service = BlockGenerationService() X = np.random.randn(10) # Small array - + # Test with block_length greater than array size - with pytest.raises(ValueError, match="block_length cannot be greater than the size of the input array"): + with pytest.raises( + ValueError, match="block_length cannot be greater than the size of the input array" + ): service.generate_blocks(X, block_length=15) # 15 > 10 - + # Test with block_length equal to array size (should work) blocks = service.generate_blocks(X, block_length=10) assert len(blocks) > 0 - + # Test with valid block_length blocks = service.generate_blocks(X, block_length=5) assert len(blocks) > 0 - + def test_markov_bootstrap_service(self): """Test MarkovBootstrapService .""" - # Test initialization + # Test initialization service = MarkovBootstrapService() assert service.transition_matrix is None - - # Test fit_markov_model + + # Test fit_markov_model X = np.random.randn(50) order = 3 service.fit_markov_model(X, order=order) - - # Should have set transition_matrix + + # Should have set transition_matrix assert service.transition_matrix is not None assert service.transition_matrix.shape == (order, order) assert np.allclose(service.transition_matrix, np.eye(order)) - - # Test generate_markov_sample + + # Test generate_markov_sample rng = np.random.default_rng(42) n_samples = 20 sample = service.generate_markov_sample(n_samples, rng) - + assert isinstance(sample, np.ndarray) assert len(sample) == n_samples - + def test_distribution_bootstrap_service(self): """Test DistributionBootstrapService .""" - # Test initialization + # Test initialization service = DistributionBootstrapService() assert service.distribution is None - - # Test fit_distribution + + # Test fit_distribution residuals = np.random.randn(100) service.fit_distribution(residuals) - - # Should have set distribution + + # Should have set distribution assert service.distribution is not None assert "mean" in service.distribution assert "std" in service.distribution assert service.distribution["mean"] == np.mean(residuals) assert service.distribution["std"] == np.std(residuals) - - # Test sample_from_distribution with fitted distribution + + # Test sample_from_distribution with fitted distribution rng = np.random.default_rng(42) n_samples = 25 sample = service.sample_from_distribution(n_samples, rng) - + assert isinstance(sample, np.ndarray) assert len(sample) == n_samples - + # Test sample_from_distribution without fitted distribution service2 = DistributionBootstrapService() sample2 = service2.sample_from_distribution(n_samples, rng) assert isinstance(sample2, np.ndarray) assert len(sample2) == n_samples - + def test_statistic_preserving_service(self): """Test StatisticPreservingService .""" - # Test initialization + # Test initialization service = StatisticPreservingService() assert service.target_statistics == {} - - # Test compute_statistics + + # Test compute_statistics X = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) stats = service.compute_statistics(X) - + assert isinstance(stats, dict) assert "mean" in stats assert "variance" in stats @@ -107,233 +109,227 @@ def test_statistic_preserving_service(self): assert stats["variance"] == np.var(X) assert stats["skewness"] == 0.0 # Placeholder assert stats["kurtosis"] == 3.0 # Placeholder - - # Test adjust_sample with valid standard deviation + + # Test adjust_sample with valid standard deviation sample = np.array([0.0, 1.0, 2.0, 3.0, 4.0]) target_stats = {"mean": 10.0, "variance": 4.0} - + adjusted_sample = service.adjust_sample(sample, target_stats) - + assert isinstance(adjusted_sample, np.ndarray) assert len(adjusted_sample) == len(sample) # Check that the adjustment actually changed the sample assert not np.array_equal(sample, adjusted_sample) # Check that the mean is close to target assert abs(np.mean(adjusted_sample) - target_stats["mean"]) < 1e-10 - + # Test adjust_sample with zero standard deviation (edge case) constant_sample = np.array([5.0, 5.0, 5.0, 5.0, 5.0]) adjusted_constant = service.adjust_sample(constant_sample, target_stats) - + # Should return the original sample when std is 0 assert np.array_equal(constant_sample, adjusted_constant) - + def test_additional_coverage_for_remaining_lines(self): """Test additional scenarios to reach closer to 95% coverage.""" # Test BlockGenerationService with various parameters service = BlockGenerationService() X = np.random.randn(20) - + # Test with wrap_around_flag blocks = service.generate_blocks(X, block_length=5, wrap_around_flag=True) assert len(blocks) > 0 - + # Test with overlap blocks = service.generate_blocks(X, block_length=5, overlap_flag=True, overlap_length=2) assert len(blocks) > 0 - + def test_edge_cases_and_error_conditions(self): """Test edge cases and error conditions for all services.""" # Test BlockGenerationService with edge cases service = BlockGenerationService() - + # Small array with minimum size (3 is the minimum for BlockGenerator) X_small = np.array([1, 2, 3]) blocks = service.generate_blocks(X_small, block_length=2) assert len(blocks) > 0 - + # Test with None block_length (should use default) X = np.random.randn(100) blocks = service.generate_blocks(X, block_length=None) assert len(blocks) > 0 - + # Test MarkovBootstrapService edge cases markov_service = MarkovBootstrapService() - + # Test with different orders for order in [1, 2, 5]: markov_service.fit_markov_model(X, order=order) assert markov_service.transition_matrix.shape == (order, order) - + # Test DistributionBootstrapService edge cases dist_service = DistributionBootstrapService() - + # Test with constant residuals constant_residuals = np.ones(50) dist_service.fit_distribution(constant_residuals) assert dist_service.distribution["std"] == 0.0 - + # Test StatisticPreservingService edge cases stat_service = StatisticPreservingService() - + # Test with single-value array single_val = np.array([42.0]) stats = stat_service.compute_statistics(single_val) assert stats["mean"] == 42.0 assert stats["variance"] == 0.0 - + # Test adjust_sample with empty target_stats sample = np.array([1, 2, 3]) adjusted = stat_service.adjust_sample(sample, {}) # Should use default values (variance=1.0, mean=0.0) assert not np.array_equal(sample, adjusted) - + def test_block_resampling_service_comprehensive(self): """Test BlockResamplingService .""" - # Test initialization + # Test initialization service = BlockResamplingService() assert service._block_resampler is None - - # Test resample_blocks method + + # Test resample_blocks method X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) blocks = [X[:3], X[3:6], X[6:9]] # Three blocks - + # Test basic resampling block_indices, block_data = service.resample_blocks(X, blocks, n=12) - + assert isinstance(block_indices, list) assert isinstance(block_data, list) assert len(block_indices) > 0 assert len(block_data) > 0 - + # Test with custom RNG rng = np.random.default_rng(42) block_indices, block_data = service.resample_blocks(X, blocks, n=12, rng=rng) assert len(block_indices) > 0 - + # Test with block weights block_weights = np.array([0.5, 0.3, 0.2]) block_indices, block_data = service.resample_blocks( X, blocks, n=12, block_weights=block_weights ) assert len(block_indices) > 0 - + # Test with tapered weights function def tapered_weights_func(size): # Function receives the size, not block data return np.ones(size) * 0.8 - + block_indices, block_data = service.resample_blocks( X, blocks, n=12, tapered_weights=tapered_weights_func ) assert len(block_indices) > 0 - + # Test with all parameters block_indices, block_data = service.resample_blocks( - X, blocks, n=15, - block_weights=block_weights, + X, + blocks, + n=15, + block_weights=block_weights, tapered_weights=tapered_weights_func, - rng=rng + rng=rng, ) assert len(block_indices) > 0 assert len(block_data) > 0 - + def test_window_function_service_comprehensive(self): """Test WindowFunctionService .""" service = WindowFunctionService() - + # Test all static window methods block_length = 10 - - # Test bartletts_window + + # Test bartletts_window bartlett_window = service.bartletts_window(block_length) assert isinstance(bartlett_window, np.ndarray) assert len(bartlett_window) == block_length np.testing.assert_array_equal(bartlett_window, np.bartlett(block_length)) - - # Test blackman_window + + # Test blackman_window blackman_window = service.blackman_window(block_length) assert isinstance(blackman_window, np.ndarray) assert len(blackman_window) == block_length np.testing.assert_array_equal(blackman_window, np.blackman(block_length)) - - # Test hamming_window + + # Test hamming_window hamming_window = service.hamming_window(block_length) assert isinstance(hamming_window, np.ndarray) assert len(hamming_window) == block_length np.testing.assert_array_equal(hamming_window, np.hamming(block_length)) - - # Test hanning_window + + # Test hanning_window hanning_window = service.hanning_window(block_length) assert isinstance(hanning_window, np.ndarray) assert len(hanning_window) == block_length np.testing.assert_array_equal(hanning_window, np.hanning(block_length)) - - # Test tukey_window + + # Test tukey_window tukey_window = service.tukey_window(block_length, alpha=0.5) assert isinstance(tukey_window, np.ndarray) assert len(tukey_window) == block_length - + # Test tukey_window with different alpha tukey_window_alpha = service.tukey_window(block_length, alpha=0.25) assert isinstance(tukey_window_alpha, np.ndarray) assert len(tukey_window_alpha) == block_length - - # Test get_window_function method + + # Test get_window_function method window_types = ["bartletts", "blackman", "hamming", "hanning", "tukey"] - + for window_type in window_types: window_func = service.get_window_function(window_type) assert callable(window_func) - + # Test that the function works if window_type == "tukey": # Tukey requires alpha parameter window = window_func(block_length, alpha=0.5) else: window = window_func(block_length) - + assert isinstance(window, np.ndarray) assert len(window) == block_length - - # Test window function mapping + + # Test window function mapping assert service.get_window_function("bartletts") == service.bartletts_window assert service.get_window_function("blackman") == service.blackman_window assert service.get_window_function("hamming") == service.hamming_window assert service.get_window_function("hanning") == service.hanning_window assert service.get_window_function("tukey") == service.tukey_window - - # Test invalid window type + + # Test invalid window type with pytest.raises(ValueError, match="Window type 'invalid' not recognized"): service.get_window_function("invalid") - + with pytest.raises(ValueError, match="Available window functions"): service.get_window_function("unknown") - + with pytest.raises(ValueError, match="For custom windows, extend WindowFunctionService"): service.get_window_function("nonexistent") - + def test_block_generation_service_comprehensive_parameters(self): """Test BlockGenerationService with comprehensive parameter coverage.""" service = BlockGenerationService() X = np.random.randn(50) - + # Test with block_length_distribution parameter - blocks = service.generate_blocks( - X, - block_length=8, - block_length_distribution="exponential" - ) + blocks = service.generate_blocks(X, block_length=8, block_length_distribution="exponential") assert len(blocks) > 0 - + # Test with min_block_length parameter - blocks = service.generate_blocks( - X, - block_length=10, - min_block_length=3 - ) + blocks = service.generate_blocks(X, block_length=10, min_block_length=3) assert len(blocks) > 0 - + # Test with all parameters combined rng = np.random.default_rng(42) blocks = service.generate_blocks( @@ -344,15 +340,15 @@ def test_block_generation_service_comprehensive_parameters(self): overlap_flag=True, overlap_length=3, min_block_length=4, - rng=rng + rng=rng, ) assert len(blocks) > 0 - + # Test default block_length calculation (sqrt of array length) X_large = np.random.randn(144) # sqrt(144) = 12 blocks = service.generate_blocks(X_large, block_length=None) assert len(blocks) > 0 - + def test_service_integration_workflow(self): """Test integration between all services.""" # Initialize all services @@ -362,40 +358,40 @@ def test_service_integration_workflow(self): markov = MarkovBootstrapService() dist = DistributionBootstrapService() stat_preserve = StatisticPreservingService() - + # Generate sample data np.random.seed(42) X = np.random.randn(60) - + # Test workflow: generate blocks blocks = block_gen.generate_blocks(X, block_length=10) assert len(blocks) > 0 - + # Test workflow: resample blocks block_indices, block_data = block_resample.resample_blocks(X, blocks, n=60) assert len(block_indices) > 0 assert len(block_data) > 0 - + # Test workflow: apply window function window = window_func.get_window_function("hanning") weights = window(10) assert len(weights) == 10 - + # Test workflow: use markov bootstrap markov.fit_markov_model(X, order=2) markov_sample = markov.generate_markov_sample(30, np.random.default_rng(42)) assert len(markov_sample) == 30 - + # Test workflow: use distribution bootstrap dist.fit_distribution(X) dist_sample = dist.sample_from_distribution(25, np.random.default_rng(42)) assert len(dist_sample) == 25 - + # Test workflow: preserve statistics original_stats = stat_preserve.compute_statistics(X) adjusted_sample = stat_preserve.adjust_sample(X[:20], original_stats) assert len(adjusted_sample) == 20 - + # Verify all services worked together assert markov.transition_matrix is not None assert dist.distribution is not None @@ -404,4 +400,4 @@ def test_service_integration_workflow(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_block_generation.py b/tests/unit/test_block_generation.py index 49bf153d..8bd352d6 100644 --- a/tests/unit/test_block_generation.py +++ b/tests/unit/test_block_generation.py @@ -12,7 +12,6 @@ import numpy as np import pytest -from numpy.testing import assert_array_equal from tsbootstrap.block_generator import BlockGenerator from tsbootstrap.block_length_sampler import BlockLengthSampler @@ -28,16 +27,16 @@ def test_fixed_length_blocks(self): sampler = BlockLengthSampler(avg_block_length=10) generator = BlockGenerator(input_length=50, block_length_sampler=sampler) blocks = generator.generate_blocks(overlap_flag=False) - + # Verify blocks are generated assert len(blocks) > 0 - + # Verify all indices are within valid range for block in blocks: assert isinstance(block, np.ndarray) assert len(block) > 0 assert all(0 <= idx < 50 for idx in block) - + # For non-overlapping blocks, verify coverage all_indices = np.concatenate(blocks) assert len(all_indices) >= 50 # Should cover at least the input length @@ -47,29 +46,29 @@ def test_non_overlapping_blocks(self): sampler = BlockLengthSampler(avg_block_length=5) generator = BlockGenerator(input_length=20, block_length_sampler=sampler) blocks = generator.generate_blocks(overlap_flag=False) - + # Verify no overlaps in non-overlapping blocks all_indices = [] for block in blocks: all_indices.extend(block) - + # Each index should appear only once in non-overlapping blocks unique_indices = set(all_indices) - assert len(unique_indices) == len(all_indices), "Found overlapping indices in non-overlapping blocks" + assert len(unique_indices) == len( + all_indices + ), "Found overlapping indices in non-overlapping blocks" def test_circular_blocks(self): """Test circular block generation with wrap-around.""" sampler = BlockLengthSampler(avg_block_length=8) generator = BlockGenerator( - input_length=20, - block_length_sampler=sampler, - wrap_around_flag=True + input_length=20, block_length_sampler=sampler, wrap_around_flag=True ) blocks = generator.generate_blocks(overlap_flag=False) - + # With wrap-around, verify blocks can wrap around the data assert len(blocks) > 0 - + # Check if any block actually wraps around has_wraparound = False for block in blocks: @@ -79,28 +78,25 @@ def test_circular_blocks(self): if not consecutive: has_wraparound = True break - + # Note: wrap-around may not always occur depending on random sampling # so we just verify the mechanism works without errors def test_variable_length_blocks(self): """Test variable-length block generation.""" - sampler = BlockLengthSampler( - avg_block_length=6, - block_length_distribution="geometric" - ) + sampler = BlockLengthSampler(avg_block_length=6, block_length_distribution="geometric") generator = BlockGenerator( - input_length=30, + input_length=30, block_length_sampler=sampler, min_block_length=1, # Explicitly set min_block_length - overlap_length=2 # Explicitly set overlap_length + overlap_length=2, # Explicitly set overlap_length ) blocks = generator.generate_blocks(overlap_flag=True) - + # Verify blocks have different lengths block_lengths = [len(block) for block in blocks] assert len(blocks) > 1 - + # With geometric distribution, we should see some variation in block lengths # (though not guaranteed with small samples) assert min(block_lengths) >= 1 @@ -114,9 +110,7 @@ def test_geometric_distribution(self): """Test geometric block length distribution.""" rng = np.random.default_rng(42) sampler = BlockLengthSampler( - avg_block_length=20, - block_length_distribution="geometric", - rng=rng + avg_block_length=20, block_length_distribution="geometric", rng=rng ) # Sample many block lengths @@ -134,22 +128,19 @@ def test_uniform_distribution(self): """Test uniform block length distribution.""" rng = np.random.default_rng(42) sampler = BlockLengthSampler( - avg_block_length=15, - block_length_distribution="uniform", - rng=rng + avg_block_length=15, block_length_distribution="uniform", rng=rng ) lengths = [sampler.sample_block_length() for _ in range(1000)] - assert all(1 <= length < 30 for length in lengths) # uniform samples 1 to 2*avg_block_length + assert all( + 1 <= length < 30 for length in lengths + ) # uniform samples 1 to 2*avg_block_length assert 14 <= np.mean(lengths) <= 16 # Should be around 15 def test_fixed_length(self): """Test fixed block length (no distribution).""" - sampler = BlockLengthSampler( - avg_block_length=25, - block_length_distribution=None - ) + sampler = BlockLengthSampler(avg_block_length=25, block_length_distribution=None) lengths = [sampler.sample_block_length() for _ in range(100)] assert all(length == 25 for length in lengths) @@ -163,16 +154,16 @@ def test_basic_resampling(self): # Create sample data and blocks X = np.arange(20).reshape(-1, 1) blocks = [np.array([0, 1, 2]), np.array([5, 6, 7]), np.array([10, 11, 12])] - + resampler = BlockResampler(X=X, blocks=blocks) block_indices, block_data = resampler.resample_block_indices_and_data(n=20) - + # Verify output structure assert isinstance(block_indices, list) assert isinstance(block_data, list) assert len(block_indices) > 0 assert len(block_data) == len(block_indices) - + # Verify total length approximately matches requested total_length = sum(len(block) for block in block_indices) assert total_length <= 20 # Should not exceed requested length @@ -182,17 +173,17 @@ def test_weighted_resampling(self): # Create sample data and blocks X = np.arange(15).reshape(-1, 1) blocks = [np.array([0, 1, 2]), np.array([5, 6, 7]), np.array([10, 11, 12])] - + # Heavily weight the first block block_weights = np.array([0.8, 0.1, 0.1]) - + resampler = BlockResampler(X=X, blocks=blocks, block_weights=block_weights) block_indices, block_data = resampler.resample_block_indices_and_data(n=15) - + # Verify resampling works with weights assert len(block_indices) > 0 assert len(block_data) == len(block_indices) - + # With heavy weighting on first block, it should appear more frequently # (statistical test - may occasionally fail due to randomness) first_block_count = sum(1 for block in block_indices if np.array_equal(block, blocks[0])) @@ -203,17 +194,17 @@ def test_tapered_blocks(self): # Create sample data and blocks X = np.arange(12).reshape(-1, 1) blocks = [np.array([0, 1, 2]), np.array([4, 5, 6])] - + # Create tapered weights for each block tapered_weights = [np.array([0.5, 1.0, 0.5]), np.array([0.2, 0.8, 0.2])] - + resampler = BlockResampler(X=X, blocks=blocks, tapered_weights=tapered_weights) block_indices, block_data = resampler.resample_block_indices_and_data(n=12) - + # Verify tapered resampling works assert len(block_indices) > 0 assert len(block_data) == len(block_indices) - + # Verify that data has been modified by tapered weights for i, data_block in enumerate(block_data): assert data_block.shape[1] == 1 # Single feature @@ -228,20 +219,21 @@ def test_transition_matrix_estimation(self): # Skip if dtaidistance is not available try: from tsbootstrap.markov_sampler import dtaidistance_installed + if not dtaidistance_installed: pytest.skip("dtaidistance package not available") except ImportError: pytest.skip("dtaidistance package not available") - + # Create sample blocks for transition calculation blocks = [np.array([[1, 2], [3, 4]]), np.array([[5, 6], [7, 8]])] - + calculator = MarkovTransitionMatrixCalculator() transition_probs = calculator.calculate_transition_probabilities(blocks) assert transition_probs.shape == (2, 2) assert np.allclose(transition_probs.sum(axis=1), 1.0) - + # Verify all probabilities are non-negative assert np.all(transition_probs >= 0) @@ -255,13 +247,13 @@ def test_markov_block_sampling(self): ] sampler = MarkovSampler(random_seed=42, blocks_as_hidden_states_flag=False) - + # Fit the model sampler.fit(blocks, n_states=2) - + # Generate samples samples, states = sampler.sample(n_to_sample=20) - + assert samples.shape[0] == 20 assert len(states) == 20 assert samples.shape[1] == 2 # Same number of features as input blocks @@ -271,21 +263,21 @@ def test_state_detection(self): # Create data with clear regimes high_regime = np.ones((20, 1)) * 10 + np.random.RandomState(42).randn(20, 1) * 0.1 low_regime = np.ones((20, 1)) * 0 + np.random.RandomState(42).randn(20, 1) * 0.1 - + # Combine into single array (as if it's one continuous time series) data = np.vstack([high_regime, low_regime, high_regime]) - + sampler = MarkovSampler(random_seed=42, blocks_as_hidden_states_flag=False) - + # Fit with 2 states to detect the two regimes sampler.fit(data, n_states=2) - + # Generate samples samples, states = sampler.sample(n_to_sample=30) - + assert samples.shape[0] == 30 assert len(states) == 30 assert samples.shape[1] == 1 # Single feature - + # Verify states are valid - assert all(state in [0, 1] for state in states) \ No newline at end of file + assert all(state in [0, 1] for state in states) diff --git a/tests/unit/test_bootstrap.py b/tests/unit/test_bootstrap.py index 3d9c9954..08ef9862 100644 --- a/tests/unit/test_bootstrap.py +++ b/tests/unit/test_bootstrap.py @@ -19,6 +19,7 @@ from hypothesis import given, settings from hypothesis import strategies as st from pydantic import ValidationError + from tsbootstrap.bootstrap import ( BlockResidualBootstrap, BlockSieveBootstrap, @@ -691,31 +692,27 @@ def test_1d_padding_edge_case(self): """Test 1D array padding when bootstrap series is shorter.""" np.random.seed(42) X = np.random.randn(100) # 1D array - - bootstrap = WholeResidualBootstrap( - n_bootstraps=1, - model_type="ar", - order=2 - ) - + + bootstrap = WholeResidualBootstrap(n_bootstraps=1, model_type="ar", order=2) + # We need to mock the reconstruction to return a shorter series # This will trigger the padding logic short_series = X[:80] # Shorter than original - + # Mock the reconstructor to return shorter series original_reconstruct = bootstrap._services.reconstructor.reconstruct_time_series - + def mock_reconstruct(fitted_values, resampled_residuals): return short_series - + bootstrap._services.reconstructor.reconstruct_time_series = mock_reconstruct - + # Generate samples samples = list(bootstrap.bootstrap(X)) - + # Restore bootstrap._services.reconstructor.reconstruct_time_series = original_reconstruct - + # Should be padded to original length assert len(samples[0]) == len(X) # Last 20 values should all be the same (padding) @@ -725,43 +722,36 @@ def test_shape_mismatch_error(self): """Test _pad_to_original_length shape mismatch error.""" np.random.seed(42) X = np.random.randn(100, 3) # 2D array with 3 columns - + bootstrap = WholeResidualBootstrap(n_bootstraps=1, model_type="var", order=2) - + # Directly test the _pad_to_original_length method to ensure line 173 is covered # Create a 1D array that needs padding when X is 2D with multiple columns bootstrapped_1d = np.random.randn(80) # 1D array, shorter than X - + # This should trigger the ValueError at line 173 - with pytest.raises(ValueError, match="Shape mismatch: bootstrapped series is 1D but X has 3 columns"): + with pytest.raises( + ValueError, match="Shape mismatch: bootstrapped series is 1D but X has 3 columns" + ): bootstrap._pad_to_original_length(bootstrapped_1d, X) def test_sieve_bootstrap_edge_cases(self): """Test sieve bootstrap validation edge case.""" # Test max_lag < min_lag validation with pytest.raises(ValueError, match="max_lag must be >= min_lag"): - WholeSieveBootstrap( - n_bootstraps=1, - min_lag=10, - max_lag=5 # Invalid: less than min_lag - ) + WholeSieveBootstrap(n_bootstraps=1, min_lag=10, max_lag=5) # Invalid: less than min_lag def test_sieve_bootstrap_order_selection_flow(self): """Test sieve bootstrap order selection flow.""" np.random.seed(42) X = np.random.randn(100) - + # Create sieve bootstrap with order selection - bootstrap = WholeSieveBootstrap( - n_bootstraps=1, - min_lag=1, - max_lag=5, - criterion="aic" - ) - + bootstrap = WholeSieveBootstrap(n_bootstraps=1, min_lag=1, max_lag=5, criterion="aic") + # Verify order selection happens samples = list(bootstrap.bootstrap(X)) - + # For sieve bootstrap, order is selected dynamically during each bootstrap # The instance order remains None since it's selected per-sample # Verify the bootstrap completed successfully @@ -772,26 +762,27 @@ def test_docstring_example_execution(self): """Execute the docstring example code.""" # Execute the docstring example code directly import numpy as np + from tsbootstrap.bootstrap import WholeResidualBootstrap from tsbootstrap.services.service_container import BootstrapServices - + # Generate sample data np.random.seed(42) n = 100 X = np.cumsum(np.random.randn(n)).reshape(-1, 1) - + # Standard usage with default services bootstrap = WholeResidualBootstrap(n_bootstraps=5, model_type="ar", order=2) samples = list(bootstrap.bootstrap(X)) - + # Advanced usage with custom service configuration custom_services = BootstrapServices.create_for_model_based_bootstrap() - + bootstrap_custom = WholeResidualBootstrap( services=custom_services, n_bootstraps=5, model_type="ar", order=2 ) samples_custom = list(bootstrap_custom.bootstrap(X)) - + # Verify results assert len(samples) == 5 # n_bootstraps=5 assert len(samples_custom) == 5 @@ -803,18 +794,18 @@ def test_block_residual_padding_edge_case(self): """Test edge case for BlockResidualBootstrap padding.""" np.random.seed(42) X = np.random.randn(100) - + # Create block bootstrap that might need padding bootstrap = BlockResidualBootstrap( n_bootstraps=1, model_type="ar", order=10, - block_length=30 # Large blocks might cause short series + block_length=30, # Large blocks might cause short series ) - + # Generate samples samples = list(bootstrap.bootstrap(X)) - + # Should maintain original length assert len(samples[0]) == len(X) @@ -822,14 +813,14 @@ def test_whole_residual_with_large_order(self): """Test WholeResidualBootstrap with order approaching data length.""" np.random.seed(42) X = np.random.randn(200) # Larger dataset to support high order - + # Order that will cause shorter bootstrap series bootstrap = WholeResidualBootstrap( n_bootstraps=1, - model_type="ar", - order=50 # High order but still reasonable for 200 samples + model_type="ar", + order=50, # High order but still reasonable for 200 samples ) - + # Should still work and maintain length samples = list(bootstrap.bootstrap(X)) assert len(samples[0]) == len(X) @@ -837,19 +828,17 @@ def test_whole_residual_with_large_order(self): def test_multivariate_padding_scenarios(self): """Test various multivariate padding scenarios.""" np.random.seed(42) - + # Test different multivariate shapes for n_features in [1, 2, 5]: X = np.random.randn(100, n_features) - + bootstrap = WholeResidualBootstrap( - n_bootstraps=2, - model_type="var" if n_features > 1 else "ar", - order=10 + n_bootstraps=2, model_type="var" if n_features > 1 else "ar", order=10 ) - + samples = list(bootstrap.bootstrap(X)) - + # All samples should maintain shape for sample in samples: assert sample.shape == X.shape @@ -858,14 +847,9 @@ def test_block_sieve_multivariate(self): """Test BlockSieveBootstrap with multivariate data.""" np.random.seed(42) X = np.random.randn(100, 2) - - bootstrap = BlockSieveBootstrap( - n_bootstraps=1, - block_length=10, - min_lag=1, - max_lag=5 - ) - + + bootstrap = BlockSieveBootstrap(n_bootstraps=1, block_length=10, min_lag=1, max_lag=5) + samples = list(bootstrap.bootstrap(X)) assert samples[0].shape == X.shape @@ -877,7 +861,7 @@ def test_invalid_bootstrap_parameters(self): {"n_bootstraps": -1}, # Negative {"model_type": "ar", "order": 0}, # Invalid order ] - + for config in invalid_configs: with pytest.raises(ValueError): WholeResidualBootstrap(**config) @@ -886,92 +870,85 @@ def test_data_too_short_for_model(self): """Test bootstrap with data too short for model order.""" np.random.seed(42) X = np.random.randn(20) # Short but workable - + bootstrap = WholeResidualBootstrap( - n_bootstraps=1, - model_type="ar", - order=5 # Reasonable for this data length + n_bootstraps=1, model_type="ar", order=5 # Reasonable for this data length ) - + # Should handle gracefully samples = list(bootstrap.bootstrap(X)) assert len(samples) == 1 assert len(samples[0]) == len(X) - + def test_demonstrate_service_architecture(self): """Test the demonstrate_service_architecture function.""" from tsbootstrap.bootstrap import demonstrate_service_architecture - + # This function is part of the documentation samples, samples_custom = demonstrate_service_architecture() - + # Verify it returns valid results assert len(list(samples)) == 5 assert len(list(samples_custom)) == 5 - + def test_1d_padding_concatenate(self): """Test 1D padding concatenation logic.""" np.random.seed(42) X = np.random.randn(100) # 1D array - + bootstrap = WholeResidualBootstrap(n_bootstraps=1, model_type="ar", order=2) - + # Directly test the padding method with a 1D array that needs padding short_series = np.random.randn(80) - + # This should use the 1D padding logic (lines 165-166) padded = bootstrap._pad_to_original_length(short_series, X) - + assert len(padded) == 100 # Check that the last 20 values are all the same (padding) assert np.all(padded[-20:] == short_series[-1]) - + def test_block_residual_specific_padding(self): """Test BlockResidualBootstrap padding scenarios.""" np.random.seed(42) X = np.random.randn(100) - + # Create bootstrap with parameters that might trigger padding bootstrap = BlockResidualBootstrap( n_bootstraps=1, - model_type="ar", + model_type="ar", order=20, # High order to ensure shorter series - block_length=15 + block_length=15, ) - + # Mock the block resampler to create a shorter series original_resample = bootstrap._services.residual_resampler.resample_residuals_block - + def mock_resample(residuals, block_length, n_samples): # Return residuals that will result in a shorter series return residuals[:70] # Only 70 samples instead of 100 - + bootstrap._services.residual_resampler.resample_residuals_block = mock_resample - + # Generate sample - should trigger padding samples = list(bootstrap.bootstrap(X)) - + # Restore original bootstrap._services.residual_resampler.resample_residuals_block = original_resample - + # Should maintain original length through padding assert len(samples[0]) == 100 - + def test_sieve_fit_model_order_selection(self): """Test sieve bootstrap _fit_model_if_needed with order selection.""" np.random.seed(42) X = np.random.randn(100) - - bootstrap = WholeSieveBootstrap( - n_bootstraps=1, - min_lag=1, - max_lag=5, - criterion="aic" - ) - + + bootstrap = WholeSieveBootstrap(n_bootstraps=1, min_lag=1, max_lag=5, criterion="aic") + # Directly call _fit_model_if_needed to trigger order selection bootstrap._fit_model_if_needed(X) - + # The order should have been selected and model fitted assert bootstrap._fitted_model is not None assert bootstrap.order is not None diff --git a/tests/unit/test_bootstrap_common.py b/tests/unit/test_bootstrap_common.py index 94ae1ea9..09d920de 100644 --- a/tests/unit/test_bootstrap_common.py +++ b/tests/unit/test_bootstrap_common.py @@ -8,6 +8,7 @@ import numpy as np import pytest + from tsbootstrap.bootstrap_common import BootstrapUtilities diff --git a/tests/unit/test_bootstrap_ext.py b/tests/unit/test_bootstrap_ext.py index 4dbbea92..709569ef 100644 --- a/tests/unit/test_bootstrap_ext.py +++ b/tests/unit/test_bootstrap_ext.py @@ -18,6 +18,7 @@ from hypothesis import given, settings from hypothesis import strategies as st from hypothesis.extra.numpy import arrays + from tsbootstrap.bootstrap_ext import ( BlockDistributionBootstrap, BlockMarkovBootstrap, @@ -33,7 +34,7 @@ class TestMarkovBootstrapService: """Test MarkovBootstrapService class methods. - + This test suite validates the Markov-based bootstrap service, which models time series as Markov chains to capture state-dependent dynamics during resampling. diff --git a/tests/unit/test_bootstrap_factory.py b/tests/unit/test_bootstrap_factory.py index 2236ff66..0b6553de 100644 --- a/tests/unit/test_bootstrap_factory.py +++ b/tests/unit/test_bootstrap_factory.py @@ -2,11 +2,11 @@ import numpy as np import pytest +from pydantic import Field # Import actual bootstrap implementations first to ensure they're registered import tsbootstrap.bootstrap # noqa: F401 import tsbootstrap.bootstrap_ext # noqa: F401 -from pydantic import Field from tsbootstrap.base_bootstrap import BaseTimeSeriesBootstrap from tsbootstrap.bootstrap_factory import ( BootstrapFactory, diff --git a/tests/unit/test_bootstrap_services.py b/tests/unit/test_bootstrap_services.py index ae6eab7e..2d91f624 100644 --- a/tests/unit/test_bootstrap_services.py +++ b/tests/unit/test_bootstrap_services.py @@ -16,246 +16,244 @@ and numerical instabilities near machine precision. """ +from unittest.mock import Mock, patch + import numpy as np import pytest -from unittest.mock import Mock, patch from tsbootstrap.services.bootstrap_services import ( ModelFittingService, ResidualResamplingService, - TimeSeriesReconstructionService, SieveOrderSelectionService, + TimeSeriesReconstructionService, ) class TestModelFittingService: """Tests targeting specific uncovered lines in ModelFittingService.""" - + def test_fit_model_empty_data_error(self): """Test error handling for empty data .""" service = ModelFittingService() - + # Test with completely empty array empty_data = np.array([]) - + with pytest.raises(ValueError, match="Cannot fit time series model on empty data"): service.fit_model(empty_data) - + # Test with zero-size array zero_size_data = np.array([]).reshape(0, 1) - + with pytest.raises(ValueError, match="Cannot fit time series model on empty data"): service.fit_model(zero_size_data) - + def test_fit_model_1d_to_2d_conversion(self): """Test conversion of 1D to 2D data .""" service = ModelFittingService() - + # Create 1D data data_1d = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - + # Should work without error (internally converts to 2D) - fitted_model, fitted_values, residuals = service.fit_model(data_1d, model_type="ar", order=1) - + fitted_model, fitted_values, residuals = service.fit_model( + data_1d, model_type="ar", order=1 + ) + assert fitted_model is not None assert fitted_values is not None assert residuals is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + def test_multivariate_ar_to_var_conversion(self): """Test automatic conversion from AR to VAR for multivariate data .""" service = ModelFittingService() - + # Create multivariate data (should trigger VAR model) np.random.seed(42) multivariate_data = np.random.randn(50, 3) # 3 variables - + # Should automatically convert AR to VAR fitted_model, fitted_values, residuals = service.fit_model( multivariate_data, model_type="ar", order=2 ) - + assert fitted_model is not None assert fitted_values.shape[1] == 3 # Should preserve dimensionality assert residuals.shape[1] == 3 - + def test_backend_system_ar_model(self): """Test backend system for AR models .""" service = ModelFittingService(use_backend=True) - + # Create test data np.random.seed(42) data = np.random.randn(30, 1) - + # Mock the backend to avoid dependency issues - with patch('tsbootstrap.backends.adapter.fit_with_backend') as mock_backend: + with patch("tsbootstrap.backends.adapter.fit_with_backend") as mock_backend: # Create a mock fitted backend mock_fitted = Mock() mock_fitted.fitted_values = np.random.randn(30) mock_fitted.residuals = np.random.randn(30) mock_backend.return_value = mock_fitted - + # Test AR model with backend (should convert int order to tuple) fitted_model, fitted_values, residuals = service.fit_model( data, model_type="ar", order=2 ) - + # Verify backend was called mock_backend.assert_called_once() # Check that the results are returned properly assert fitted_model is mock_fitted assert len(fitted_values) == 30 assert len(residuals) == 30 - + def test_backend_system_arima_model(self): """Test backend system for ARIMA models.""" service = ModelFittingService(use_backend=True) - + np.random.seed(42) data = np.random.randn(30, 1) - - with patch('tsbootstrap.backends.adapter.fit_with_backend') as mock_backend: + + with patch("tsbootstrap.backends.adapter.fit_with_backend") as mock_backend: mock_fitted = Mock() mock_fitted.fitted_values = np.random.randn(30) mock_fitted.residuals = np.random.randn(30) mock_backend.return_value = mock_fitted - + # Test ARIMA model with tuple order (should pass through) fitted_model, fitted_values, residuals = service.fit_model( data, model_type="arima", order=(1, 1, 1) ) - + # Verify backend was called and results returned mock_backend.assert_called_once() assert fitted_model is mock_fitted assert len(fitted_values) == 30 assert len(residuals) == 30 - + def test_statsmodels_arima_path(self): """Test original statsmodels implementation .""" service = ModelFittingService(use_backend=False) # Disable backend - + np.random.seed(42) data = np.random.randn(50, 1) - + # Test with int order - fitted_model, fitted_values, residuals = service.fit_model( - data, model_type="ar", order=2 - ) - + fitted_model, fitted_values, residuals = service.fit_model(data, model_type="ar", order=2) + assert fitted_model is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + # Test with tuple order fitted_model2, fitted_values2, residuals2 = service.fit_model( data, model_type="arima", order=(1, 0, 1) ) - + assert fitted_model2 is not None assert len(fitted_values2) > 0 assert len(residuals2) > 0 - + def test_seasonal_arima_parameters(self): """Test ARIMA with seasonal parameters .""" service = ModelFittingService(use_backend=False) - + np.random.seed(42) # Generate longer series for seasonal model data = np.random.randn(100, 1) - + # Test SARIMA model fitted_model, fitted_values, residuals = service.fit_model( - data, - model_type="sarima", - order=(1, 0, 1), - seasonal_order=(1, 0, 1, 12) + data, model_type="sarima", order=(1, 0, 1), seasonal_order=(1, 0, 1, 12) ) - + assert fitted_model is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + def test_var_model_multivariate(self): """Test VAR model fitting .""" service = ModelFittingService() - + np.random.seed(42) # Create multivariate data multivariate_data = np.random.randn(50, 3) - + fitted_model, fitted_values, residuals = service.fit_model( multivariate_data, model_type="var", order=2 ) - + assert fitted_model is not None assert fitted_values.shape[1] == 3 # Should preserve dimensions assert residuals.shape[1] == 3 - + def test_var_model_univariate_conversion(self): """Test VAR model with univariate data conversion .""" service = ModelFittingService() - + np.random.seed(42) # Create univariate data (should convert to AR) univariate_data = np.random.randn(50, 1) - + fitted_model, fitted_values, residuals = service.fit_model( univariate_data, model_type="var", order=2 ) - + assert fitted_model is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + def test_arch_garch_models(self): """Test ARCH/GARCH family models .""" service = ModelFittingService() - + np.random.seed(42) # Generate data with volatility clustering for GARCH models data = np.random.randn(100) * (0.1 + 0.05 * np.abs(np.random.randn(100))) data_2d = data.reshape(-1, 1) - + # Test ARCH model fitted_model, fitted_values, residuals = service.fit_model( data_2d, model_type="arch", order=1 ) - + assert fitted_model is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + # Test GARCH model fitted_model, fitted_values, residuals = service.fit_model( data_2d, model_type="garch", order=(1, 1) ) - + assert fitted_model is not None assert len(fitted_values) > 0 assert len(residuals) > 0 - + def test_unknown_model_type_error(self): """Test error for unknown model type .""" service = ModelFittingService() - + data = np.random.randn(20, 1) - + with pytest.raises(ValueError, match="Unknown time series model type"): service.fit_model(data, model_type="unknown_model") - + with pytest.raises(ValueError, match="Supported model types include"): service.fit_model(data, model_type="invalid") - + def test_fit_arch_model_types(self): """Test _fit_arch_model with different model types .""" service = ModelFittingService() - + np.random.seed(42) # Create data with more variance for ARCH models data = np.random.randn(100) * 5 # Scale up for better convergence - + # Test ARCH model try: fitted, residuals = service._fit_arch_model(data, "arch", 1) # Use simpler order @@ -264,137 +262,131 @@ def test_fit_arch_model_types(self): except Exception: # ARCH models can be sensitive, so we just test that the method exists pass - + # Test GARCH model with simple order try: fitted, residuals = service._fit_arch_model(data, "garch", 1) assert fitted is not None except Exception: pass - + # The main goal is to test the different model type paths in the code # ARCH models can be finicky with random data, so we focus on coverage - + def test_fit_arch_model_unknown_type_error(self): """Test error for unknown ARCH model type .""" service = ModelFittingService() - + data = np.random.randn(20) - + with pytest.raises(ValueError, match="Unknown ARCH family model type"): service._fit_arch_model(data, "unknown_arch", 1) - + def test_fitted_model_property_error(self): """Test fitted_model property error when not fitted .""" service = ModelFittingService() - + with pytest.raises(ValueError, match="Model has not been fitted yet"): _ = service.fitted_model - + def test_residuals_property_error(self): """Test residuals property error when not fitted .""" service = ModelFittingService() - + with pytest.raises(ValueError, match="Model has not been fitted yet"): _ = service.residuals class TestResidualResamplingService: """Tests targeting specific uncovered lines in ResidualResamplingService.""" - + def test_init_with_rng(self): """Test initialization with custom RNG .""" custom_rng = np.random.default_rng(42) service = ResidualResamplingService(rng=custom_rng) - + assert service.rng is custom_rng - + def test_init_without_rng(self): """Test initialization without RNG (default case).""" service = ResidualResamplingService() - + assert isinstance(service.rng, np.random.Generator) - + def test_resample_residuals_whole_1d(self): """Test whole resampling with 1D residuals .""" service = ResidualResamplingService(rng=np.random.default_rng(42)) - + residuals = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - + # Test with default n_samples (should use length of residuals) resampled = service.resample_residuals_whole(residuals) assert len(resampled) == len(residuals) - + # Test with custom n_samples resampled = service.resample_residuals_whole(residuals, n_samples=10) assert len(resampled) == 10 - + # All values should be from original residuals assert all(val in residuals for val in resampled) - + def test_resample_residuals_whole_2d(self): """Test whole resampling with 2D residuals.""" service = ResidualResamplingService(rng=np.random.default_rng(42)) - + residuals = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) - + # Test with default n_samples resampled = service.resample_residuals_whole(residuals) assert resampled.shape[0] == residuals.shape[0] assert resampled.shape[1] == residuals.shape[1] - + # Test with custom n_samples resampled = service.resample_residuals_whole(residuals, n_samples=5) assert resampled.shape[0] == 5 assert resampled.shape[1] == 2 - + def test_resample_residuals_block_1d(self): """Test block resampling with 1D residuals .""" service = ResidualResamplingService(rng=np.random.default_rng(42)) - + residuals = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) block_length = 3 - + # Test with default n_samples resampled = service.resample_residuals_block(residuals, block_length) assert len(resampled) == len(residuals) - + # Test with custom n_samples resampled = service.resample_residuals_block(residuals, block_length, n_samples=10) assert len(resampled) == 10 - + def test_resample_residuals_block_2d(self): """Test block resampling with 2D residuals .""" service = ResidualResamplingService(rng=np.random.default_rng(42)) - - residuals = np.array([ - [1.0, 2.0], - [3.0, 4.0], - [5.0, 6.0], - [7.0, 8.0], - [9.0, 10.0] - ]) + + residuals = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]]) block_length = 2 - + # Test with default n_samples resampled = service.resample_residuals_block(residuals, block_length) assert resampled.shape[0] == residuals.shape[0] assert resampled.shape[1] == residuals.shape[1] - + # Test with custom n_samples resampled = service.resample_residuals_block(residuals, block_length, n_samples=3) assert resampled.shape[0] == 3 assert resampled.shape[1] == 2 - + def test_resample_residuals_block_edge_cases(self): """Test block resampling edge cases.""" service = ResidualResamplingService(rng=np.random.default_rng(42)) - + # Test with block_length equal to residuals length residuals = np.array([1.0, 2.0, 3.0]) resampled = service.resample_residuals_block(residuals, block_length=3) assert len(resampled) == 3 - + # Test with small residuals and large n_samples residuals = np.array([1.0, 2.0]) resampled = service.resample_residuals_block(residuals, block_length=1, n_samples=10) @@ -403,54 +395,54 @@ def test_resample_residuals_block_edge_cases(self): class TestTimeSeriesReconstructionService: """Tests targeting specific uncovered lines in TimeSeriesReconstructionService.""" - + def test_reconstruct_univariate(self): """Test reconstruction with univariate data .""" fitted_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) resampled_residuals = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) - + reconstructed = TimeSeriesReconstructionService.reconstruct_time_series( fitted_values, resampled_residuals ) - + expected = fitted_values + resampled_residuals np.testing.assert_array_equal(reconstructed, expected) - + def test_reconstruct_multivariate(self): """Test reconstruction with multivariate data .""" fitted_values = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) resampled_residuals = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) - + reconstructed = TimeSeriesReconstructionService.reconstruct_time_series( fitted_values, resampled_residuals ) - + expected = fitted_values + resampled_residuals np.testing.assert_array_equal(reconstructed, expected) - + def test_reconstruct_mismatched_lengths(self): """Test reconstruction with mismatched lengths.""" # Fitted values longer than residuals fitted_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) resampled_residuals = np.array([0.1, 0.2, 0.3]) - + reconstructed = TimeSeriesReconstructionService.reconstruct_time_series( fitted_values, resampled_residuals ) - + # Should use minimum length assert len(reconstructed) == 3 expected = fitted_values[:3] + resampled_residuals np.testing.assert_array_equal(reconstructed, expected) - + # Residuals longer than fitted values fitted_values = np.array([1.0, 2.0]) resampled_residuals = np.array([0.1, 0.2, 0.3, 0.4]) - + reconstructed = TimeSeriesReconstructionService.reconstruct_time_series( fitted_values, resampled_residuals ) - + assert len(reconstructed) == 2 expected = fitted_values + resampled_residuals[:2] np.testing.assert_array_equal(reconstructed, expected) @@ -458,136 +450,136 @@ def test_reconstruct_mismatched_lengths(self): class TestSieveOrderSelectionService: """Tests targeting specific uncovered lines in SieveOrderSelectionService.""" - + def test_init(self): """Test initialization .""" service = SieveOrderSelectionService() # Should initialize without error assert service is not None - + def test_get_criterion_score_aic(self): """Test _get_criterion_score with AIC .""" service = SieveOrderSelectionService() - + # Mock fitted model with AIC mock_fitted = Mock() mock_fitted.aic = 100.5 - + score = service._get_criterion_score(mock_fitted, "aic") assert score == 100.5 - + # Test case insensitive score = service._get_criterion_score(mock_fitted, "AIC") assert score == 100.5 - + def test_get_criterion_score_bic(self): """Test _get_criterion_score with BIC .""" service = SieveOrderSelectionService() - + mock_fitted = Mock() mock_fitted.bic = 105.2 - + score = service._get_criterion_score(mock_fitted, "bic") assert score == 105.2 - + def test_get_criterion_score_hqic(self): """Test _get_criterion_score with HQIC .""" service = SieveOrderSelectionService() - + mock_fitted = Mock() mock_fitted.hqic = 102.8 - + score = service._get_criterion_score(mock_fitted, "hqic") assert score == 102.8 - + def test_get_criterion_score_unknown_error(self): """Test _get_criterion_score with unknown criterion .""" service = SieveOrderSelectionService() - + mock_fitted = Mock() - + with pytest.raises(ValueError, match="Unknown information criterion"): service._get_criterion_score(mock_fitted, "unknown") - + with pytest.raises(ValueError, match="Supported criteria are"): service._get_criterion_score(mock_fitted, "invalid") - + def test_select_order_basic(self): """Test select_order basic functionality .""" service = SieveOrderSelectionService() - + # Generate AR(2) data for order selection np.random.seed(42) n = 100 data = np.zeros(n) for i in range(2, n): - data[i] = 0.3 * data[i-1] + 0.2 * data[i-2] + np.random.normal(0, 0.1) - + data[i] = 0.3 * data[i - 1] + 0.2 * data[i - 2] + np.random.normal(0, 0.1) + # Select order selected_order = service.select_order(data, min_lag=1, max_lag=5, criterion="aic") - + assert isinstance(selected_order, int) assert 1 <= selected_order <= 5 - + def test_select_order_multivariate_to_univariate(self): """Test select_order with multivariate data conversion .""" service = SieveOrderSelectionService() - + np.random.seed(42) # Create multivariate data (should use first column) multivariate_data = np.random.randn(50, 3) - + selected_order = service.select_order(multivariate_data, min_lag=1, max_lag=3) - + assert isinstance(selected_order, int) assert 1 <= selected_order <= 3 - + def test_select_order_different_criteria(self): """Test select_order with different criteria.""" service = SieveOrderSelectionService() - + np.random.seed(42) data = np.random.randn(50) - + # Test with BIC order_bic = service.select_order(data, min_lag=1, max_lag=3, criterion="bic") assert isinstance(order_bic, int) - + # Test with HQIC order_hqic = service.select_order(data, min_lag=1, max_lag=3, criterion="hqic") assert isinstance(order_hqic, int) - + def test_select_order_exception_handling(self): """Test select_order exception handling .""" service = SieveOrderSelectionService() - + # Create problematic data that might cause fitting issues problematic_data = np.array([0.0] * 20) # Constant data - + # Should handle exceptions gracefully and return a valid order selected_order = service.select_order( problematic_data, min_lag=1, max_lag=3, criterion="aic" ) - + assert isinstance(selected_order, int) assert 1 <= selected_order <= 3 - + def test_select_order_with_exception_handling(self): """Test select_order exception handling without complex mocking.""" service = SieveOrderSelectionService() - + # This test verifies the exception handling code path exists # by testing with data that might cause some orders to fail np.random.seed(42) data = np.array([0.0] * 10 + list(np.random.randn(10))) # Mixed constant and random - + # Should handle any potential exceptions and return a valid order selected_order = service.select_order(data, min_lag=1, max_lag=5) - + assert isinstance(selected_order, int) assert 1 <= selected_order <= 5 if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_model_scoring_service.py b/tests/unit/test_model_scoring_service.py index a5a6139d..e809e167 100644 --- a/tests/unit/test_model_scoring_service.py +++ b/tests/unit/test_model_scoring_service.py @@ -8,289 +8,289 @@ class TestModelScoringService: """Tests targeting specific uncovered lines in ModelScoringService.""" - + def test_score_basic_functionality(self): """Test basic score functionality with different metrics.""" service = ModelScoringService() - + # Create test data y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) y_pred = np.array([1.1, 2.1, 2.9, 3.9, 5.1]) - + # Test R² metric r2_score = service.score(y_true, y_pred, metric="r2") assert isinstance(r2_score, float) assert r2_score <= 1.0 # R² should be <= 1 - + # Test MSE metric mse_score = service.score(y_true, y_pred, metric="mse") assert isinstance(mse_score, float) assert mse_score >= 0.0 # MSE should be non-negative - + # Test MAE metric mae_score = service.score(y_true, y_pred, metric="mae") assert isinstance(mae_score, float) assert mae_score >= 0.0 # MAE should be non-negative - + # Test RMSE metric rmse_score = service.score(y_true, y_pred, metric="rmse") assert isinstance(rmse_score, float) assert rmse_score >= 0.0 # RMSE should be non-negative assert rmse_score == np.sqrt(mse_score) # RMSE = sqrt(MSE) - + # Test MAPE metric mape_score = service.score(y_true, y_pred, metric="mape") assert isinstance(mape_score, float) assert mape_score >= 0.0 # MAPE should be non-negative - + def test_score_shape_mismatch_error(self): """Test error handling for shape mismatch .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0]) y_pred = np.array([1.0, 2.0]) # Different shape - + with pytest.raises(ValueError, match="Shape mismatch"): service.score(y_true, y_pred) - + # Test with 2D arrays having different shapes y_true_2d = np.array([[1.0, 2.0], [3.0, 4.0]]) y_pred_2d = np.array([[1.0], [2.0]]) # Different shape - + with pytest.raises(ValueError, match="Shape mismatch"): service.score(y_true_2d, y_pred_2d) - + def test_score_array_flattening(self): """Test array flattening for consistent calculations .""" service = ModelScoringService() - + # Test with 2D arrays y_true_2d = np.array([[1.0, 2.0], [3.0, 4.0]]) y_pred_2d = np.array([[1.1, 2.1], [2.9, 3.9]]) - + # Should work with 2D arrays (gets flattened internally) score_2d = service.score(y_true_2d, y_pred_2d, metric="mse") - + # Compare with equivalent 1D arrays y_true_1d = y_true_2d.ravel() y_pred_1d = y_pred_2d.ravel() score_1d = service.score(y_true_1d, y_pred_1d, metric="mse") - + assert np.isclose(score_2d, score_1d) - + def test_score_unknown_metric_error(self): """Test error handling for unknown metric .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0]) y_pred = np.array([1.1, 2.1, 2.9]) - + with pytest.raises(ValueError, match="Unknown metric"): service.score(y_true, y_pred, metric="unknown") - + with pytest.raises(ValueError, match="Available: 'r2', 'mse', 'mae', 'rmse', 'mape'"): service.score(y_true, y_pred, metric="invalid") - + def test_calculate_mse_convenience_method(self): """Test calculate_mse convenience method .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0, 4.0]) y_pred = np.array([1.1, 2.1, 2.9, 3.9]) - + # Test convenience method mse_convenience = service.calculate_mse(y_true, y_pred) - + # Should be same as calling score with metric='mse' mse_score = service.score(y_true, y_pred, metric="mse") - + assert mse_convenience == mse_score - + # Verify the calculation manually expected_mse = np.mean((y_true - y_pred) ** 2) assert np.isclose(mse_convenience, expected_mse) - + def test_calculate_mae_convenience_method(self): """Test calculate_mae convenience method .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0, 4.0]) y_pred = np.array([1.1, 2.1, 2.9, 3.9]) - + # Test convenience method mae_convenience = service.calculate_mae(y_true, y_pred) - + # Should be same as calling score with metric='mae' mae_score = service.score(y_true, y_pred, metric="mae") - + assert mae_convenience == mae_score - + # Verify the calculation manually expected_mae = np.mean(np.abs(y_true - y_pred)) assert np.isclose(mae_convenience, expected_mae) - + def test_r2_score_empty_array(self): """Test R² score with empty array .""" service = ModelScoringService() - + y_true = np.array([]) y_pred = np.array([]) - + r2_score = service._r2_score(y_true, y_pred) assert np.isnan(r2_score) - + def test_r2_score_constant_true_values(self): """Test R² score with constant true values .""" service = ModelScoringService() - + # Case 1: Constant true values, perfect predictions y_true = np.array([5.0, 5.0, 5.0, 5.0]) y_pred = np.array([5.0, 5.0, 5.0, 5.0]) - + r2_score = service._r2_score(y_true, y_pred) assert r2_score == 1.0 # Perfect prediction of constant values - + # Case 2: Constant true values, imperfect predictions y_true = np.array([5.0, 5.0, 5.0, 5.0]) y_pred = np.array([4.0, 6.0, 5.0, 5.5]) - + r2_score = service._r2_score(y_true, y_pred) assert r2_score == 0.0 # Undefined, returns 0 - + def test_r2_score_normal_case(self): """Test R² score normal calculation .""" service = ModelScoringService() - + # Create data with known R² value y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) y_pred = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) # Perfect predictions - + r2_score = service._r2_score(y_true, y_pred) assert np.isclose(r2_score, 1.0) # Perfect fit should give R² = 1 - + # Test with imperfect predictions y_pred_imperfect = np.array([1.1, 1.9, 3.1, 3.9, 5.1]) r2_score_imperfect = service._r2_score(y_true, y_pred_imperfect) assert r2_score_imperfect < 1.0 # Should be less than perfect assert r2_score_imperfect > 0.0 # But still positive for reasonable predictions - + def test_mse_calculation(self): """Test MSE calculation .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0]) y_pred = np.array([1.1, 2.1, 2.9]) - + mse = service._mse(y_true, y_pred) - + # Verify manual calculation expected_mse = np.mean((y_true - y_pred) ** 2) assert np.isclose(mse, expected_mse) - + # Test with perfect predictions mse_perfect = service._mse(y_true, y_true) assert mse_perfect == 0.0 - + def test_mae_calculation(self): """Test MAE calculation .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0]) y_pred = np.array([1.1, 2.1, 2.9]) - + mae = service._mae(y_true, y_pred) - + # Verify manual calculation expected_mae = np.mean(np.abs(y_true - y_pred)) assert np.isclose(mae, expected_mae) - + # Test with perfect predictions mae_perfect = service._mae(y_true, y_true) assert mae_perfect == 0.0 - + def test_rmse_calculation(self): """Test RMSE calculation .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 3.0]) y_pred = np.array([1.1, 2.1, 2.9]) - + rmse = service._rmse(y_true, y_pred) - + # Verify it's sqrt of MSE mse = service._mse(y_true, y_pred) expected_rmse = np.sqrt(mse) assert np.isclose(rmse, expected_rmse) - + # Test with perfect predictions rmse_perfect = service._rmse(y_true, y_true) assert rmse_perfect == 0.0 - + def test_mape_calculation_normal_case(self): """Test MAPE calculation with normal values .""" service = ModelScoringService() - + y_true = np.array([1.0, 2.0, 4.0, 5.0]) y_pred = np.array([1.1, 2.2, 3.8, 5.5]) - + mape = service._mape(y_true, y_pred) - + # Verify manual calculation abs_percentage_errors = np.abs((y_true - y_pred) / y_true) expected_mape = np.mean(abs_percentage_errors) * 100 assert np.isclose(mape, expected_mape) - + # Test with perfect predictions mape_perfect = service._mape(y_true, y_true) assert mape_perfect == 0.0 - + def test_mape_calculation_zero_mask(self): """Test MAPE calculation with zero masking .""" service = ModelScoringService() - + # Test with some zero values in y_true y_true = np.array([0.0, 2.0, 3.0, 0.0, 5.0]) y_pred = np.array([1.0, 2.1, 2.9, 1.0, 5.1]) - + mape = service._mape(y_true, y_pred) - + # Should only consider non-zero true values mask = y_true != 0 expected_errors = np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]) expected_mape = np.mean(expected_errors) * 100 - + assert np.isclose(mape, expected_mape) - + def test_mape_calculation_all_zeros(self): """Test MAPE calculation with all zero true values .""" service = ModelScoringService() - + # All zeros in y_true y_true = np.array([0.0, 0.0, 0.0]) y_pred = np.array([1.0, 2.0, 3.0]) - + mape = service._mape(y_true, y_pred) - + # Should return infinity when all true values are zero assert mape == np.inf - + def test_comprehensive_metric_workflow(self): """Test complete workflow with all metrics.""" service = ModelScoringService() - + # Create realistic test data np.random.seed(42) y_true = np.random.randn(100) * 10 + 50 # Mean around 50 noise = np.random.randn(100) * 2 y_pred = y_true + noise # Add some noise - + # Test all metrics r2 = service.score(y_true, y_pred, metric="r2") mse = service.score(y_true, y_pred, metric="mse") mae = service.score(y_true, y_pred, metric="mae") rmse = service.score(y_true, y_pred, metric="rmse") mape = service.score(y_true, y_pred, metric="mape") - + # Verify relationships assert rmse == np.sqrt(mse) assert 0 <= r2 <= 1 # R² should be reasonable for this data @@ -299,77 +299,77 @@ def test_comprehensive_metric_workflow(self): assert mae >= 0 assert rmse >= 0 assert mape >= 0 - + # Test convenience methods mse_convenience = service.calculate_mse(y_true, y_pred) mae_convenience = service.calculate_mae(y_true, y_pred) - + assert mse_convenience == mse assert mae_convenience == mae - + def test_edge_cases_and_boundary_conditions(self): """Test various edge cases and boundary conditions.""" service = ModelScoringService() - + # Single value arrays y_true_single = np.array([5.0]) y_pred_single = np.array([5.1]) - + for metric in ["r2", "mse", "mae", "rmse", "mape"]: score = service.score(y_true_single, y_pred_single, metric=metric) assert isinstance(score, float) assert not np.isnan(score) or metric == "r2" # R² might be nan for single values - + # Large arrays y_true_large = np.random.randn(10000) y_pred_large = y_true_large + np.random.randn(10000) * 0.1 - + r2_large = service.score(y_true_large, y_pred_large, metric="r2") assert isinstance(r2_large, float) assert not np.isnan(r2_large) - + # Test with negative values y_true_neg = np.array([-5.0, -3.0, -1.0, 1.0, 3.0]) y_pred_neg = np.array([-4.8, -3.2, -0.9, 1.1, 2.9]) - + for metric in ["r2", "mse", "mae", "rmse"]: # MAPE has issues with negative values score = service.score(y_true_neg, y_pred_neg, metric=metric) assert isinstance(score, float) - + # MAPE with negative values (should handle the mask correctly) mape_neg = service.score(y_true_neg, y_pred_neg, metric="mape") assert isinstance(mape_neg, float) - + def test_metric_mathematical_properties(self): """Test mathematical properties of metrics.""" service = ModelScoringService() - + # Create test data y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - + # Perfect predictions should give optimal scores r2_perfect = service.score(y_true, y_true, metric="r2") mse_perfect = service.score(y_true, y_true, metric="mse") mae_perfect = service.score(y_true, y_true, metric="mae") rmse_perfect = service.score(y_true, y_true, metric="rmse") mape_perfect = service.score(y_true, y_true, metric="mape") - + assert np.isclose(r2_perfect, 1.0) assert np.isclose(mse_perfect, 0.0) assert np.isclose(mae_perfect, 0.0) assert np.isclose(rmse_perfect, 0.0) assert np.isclose(mape_perfect, 0.0) - + # Worse predictions should give worse scores y_pred_bad = y_true + 1.0 # Add constant error - + r2_bad = service.score(y_true, y_pred_bad, metric="r2") mse_bad = service.score(y_true, y_pred_bad, metric="mse") - + assert r2_bad < r2_perfect assert mse_bad > mse_perfect if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py index 82a20123..d1f51c8c 100644 --- a/tests/unit/test_models.py +++ b/tests/unit/test_models.py @@ -16,19 +16,12 @@ The goal is a smooth experience where the wrapper handles these gracefully. """ -import os -from unittest.mock import MagicMock, patch - import numpy as np import pytest -from sklearn.base import BaseEstimator from sklearn.base import clone from sklearn.metrics import mean_squared_error -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import TimeSeriesSplit -from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, cross_val_score from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler from tsbootstrap.time_series_model import TimeSeriesModel @@ -366,18 +359,18 @@ def test_time_series_split(self): # Additional coverage tests from phase 2 class TestTimeSeriesModelAdditionalCoverage: """Additional tests for complete coverage of time_series_model.py.""" - + def test_verbose_setter_validation(self): """Test verbose setter with invalid value.""" model = TimeSeriesModel(X=np.random.randn(100), model_type="ar") - + # Test invalid verbose values with pytest.raises(ValueError, match="verbose must be one of"): model.verbose = 3 - + with pytest.raises(ValueError, match="verbose must be one of"): model.verbose = -1 - + # Test valid values model.verbose = 0 assert model.verbose == 0 @@ -385,248 +378,252 @@ def test_verbose_setter_validation(self): assert model.verbose == 1 model.verbose = 2 assert model.verbose == 2 - + def test_validate_order_list_max_lag_exceeded(self): """Test _validate_order with list where max exceeds limit.""" X = np.random.randn(50) model = TimeSeriesModel(X=X, model_type="ar") - + # Calculate what the max_lag should be for this data # max_lag = (N - k - seasonal_terms - trend_parameters) // 2 # For simple AR with no exog: max_lag = 50 // 2 = 25 - + # Test with list of orders where max exceeds limit with pytest.raises(ValueError, match="Maximum allowed lag value exceeded"): model._validate_order([10, 20, 30], len(X), {}) # 30 > 25 - + def test_validate_order_single_value_exceeded(self): """Test _validate_order with single order exceeding limit.""" X = np.random.randn(50) model = TimeSeriesModel(X=X, model_type="ar") - + # Test with single order exceeding limit with pytest.raises(ValueError, match="Maximum allowed lag value exceeded"): model._validate_order(30, len(X), {}) # 30 > 25 - + def test_calculate_terms_seasonal_validation(self): """Test _calculate_terms seasonal validation.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="ar") - + # Test seasonal=True without period kwargs = {"seasonal": True} - with pytest.raises(ValueError, match="A period must be specified when using seasonal terms"): + with pytest.raises( + ValueError, match="A period must be specified when using seasonal terms" + ): model._calculate_terms(kwargs) - + # Test seasonal=True with period < 2 kwargs = {"seasonal": True, "period": 1} with pytest.raises(ValueError, match="The seasonal period must be >= 2"): model._calculate_terms(kwargs) - + # Test seasonal=True with non-integer period kwargs = {"seasonal": True, "period": 2.5} with pytest.raises(TypeError, match="The seasonal period must be an integer"): model._calculate_terms(kwargs) - + def test_fit_ar_default_order(self): """Test fit_ar with default order.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="ar", verbose=0) - + # Call fit_ar without order - should use default of 1 result = model.fit_ar() assert result is not None - - @pytest.mark.skip(reason="Backend has issue with data shape handling - not related to sklearn compatibility changes") + + @pytest.mark.skip( + reason="Backend has issue with data shape handling - not related to sklearn compatibility changes" + ) def test_fit_ar_with_backend(self): """Test fit_ar using backend system.""" # Use more data to avoid maxlag issues np.random.seed(42) # For reproducibility X = np.random.randn(200) - + # Test actual backend usage without mocking (this will hit the backend path) # Using old API pattern where X is passed in constructor model = TimeSeriesModel(X=X, model_type="ar", use_backend=True, verbose=0) - + # This should trigger the backend code path and still work # Note: fit_ar is called on the model, not fit() result = model.fit_ar(order=2) assert result is not None - + def test_fit_arima_default_order(self): """Test fit_arima with default order.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="arima", verbose=0) - + # Call fit_arima without order - should use default (1, 0, 0) result = model.fit_arima() assert result is not None - + def test_fit_arima_invalid_order_length(self): """Test fit_arima with invalid order tuple length.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="arima") - + # Test with wrong tuple length with pytest.raises(ValueError, match="The order must be a 3-tuple"): model.fit_arima(order=(1, 0)) # Only 2 elements - + with pytest.raises(ValueError, match="The order must be a 3-tuple"): model.fit_arima(order=(1, 0, 0, 1)) # 4 elements - + def test_fit_arima_with_backend(self): """Test fit_arima using backend system.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="arima", use_backend=True, verbose=0) - + # Test actual backend usage - should work with statsforecast backend result = model.fit_arima(order=(2, 1, 1)) assert result is not None - + def test_fit_sarima_full_functionality(self): """Test fit_sarima with all validations.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="sarima", verbose=0) - + # Test default orders result = model.fit_sarima() assert result is not None - + # Test invalid non-seasonal order with pytest.raises(ValueError, match="The non-seasonal order must be a 3-tuple"): model.fit_sarima(order=(1, 0)) - + # Test invalid seasonal order with pytest.raises(ValueError, match="The seasonal order must be a 4-tuple"): model.fit_sarima(seasonal_order=(1, 0, 0)) - + # Test seasonal period validation with pytest.raises(ValueError, match="Seasonal period 's' must be greater than 1"): model.fit_sarima(seasonal_order=(1, 0, 0, 1)) - + # Test duplication of order (p >= s and P != 0) with pytest.raises(ValueError, match="could lead to duplication of order"): model.fit_sarima(order=(12, 0, 0), seasonal_order=(1, 0, 0, 12)) - + # Test duplication of order (q >= s and Q != 0) with pytest.raises(ValueError, match="could lead to duplication of order"): model.fit_sarima(order=(0, 0, 12), seasonal_order=(0, 0, 1, 12)) - + def test_fit_sarima_with_backend(self): """Test fit_sarima using backend system.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="sarima", use_backend=True, verbose=0) - + # Test actual backend usage - should work with statsforecast backend result = model.fit_sarima(order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) assert result is not None - + def test_fit_arch_all_paths(self): """Test fit_arch with all model types and validations.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="arch", verbose=0) - + # Test default parameters result = model.fit_arch() assert result is not None - + # Test invalid mean_type with pytest.raises(ValueError, match="mean_type must be one of"): model.fit_arch(mean_type="invalid") - + # Test GARCH model result = model.fit_arch(arch_model_type="GARCH", p=2, q=1) assert result is not None - + # Test EGARCH model result = model.fit_arch(arch_model_type="EGARCH", p=1, q=1) assert result is not None - + # Test TARCH model result = model.fit_arch(arch_model_type="TARCH", p=1, q=1) assert result is not None - + # Test AGARCH model result = model.fit_arch(arch_model_type="AGARCH", p=1, q=1) assert result is not None - + # Test invalid arch_model_type with pytest.raises(ValueError, match="arch_model_type must be one of"): model.fit_arch(arch_model_type="INVALID") - + def test_fit_dispatch_sarima(self): """Test fit method dispatching to sarima.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="sarima", verbose=0) - + # Test fit with sarima parameters result = model.fit(order=(1, 1, 1), seasonal_order=(1, 0, 1, 12)) assert result is not None - + def test_fit_unsupported_model(self): """Test fit with unsupported model type.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="ar") - + # Mock the model_type to be unsupported model._model_type = "unsupported" - + with pytest.raises(ValueError, match="Unsupported fitted model type"): model.fit() - + def test_repr_method(self): """Test __repr__ method.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="ar", verbose=1) - + repr_str = repr(model) assert repr_str == "TimeSeriesModel(model_type=ar, verbose=1)" - + def test_str_method(self): """Test __str__ method.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="arima", verbose=2) - + str_repr = str(model) assert str_repr == "TimeSeriesModel using model_type=arima with verbosity level 2" - + def test_eq_method_comprehensive(self): """Test __eq__ method with all scenarios.""" X1 = np.random.randn(100) X2 = np.random.randn(100) y1 = np.random.randn(100) y2 = np.random.randn(100) - + # Test equal models model1 = TimeSeriesModel(X=X1, y=y1, model_type="ar", verbose=1) model2 = TimeSeriesModel(X=X1.copy(), y=y1.copy(), model_type="ar", verbose=1) assert model1 == model2 - + # Test different X model3 = TimeSeriesModel(X=X2, y=y1, model_type="ar", verbose=1) assert model1 != model3 - + # Test different y model4 = TimeSeriesModel(X=X1, y=y2, model_type="ar", verbose=1) assert model1 != model4 - + # Test None y values model5 = TimeSeriesModel(X=X1, y=None, model_type="ar", verbose=1) model6 = TimeSeriesModel(X=X1.copy(), y=None, model_type="ar", verbose=1) assert model5 == model6 - + # Test one None, one not None # Models should NOT be equal if one has y and the other doesn't assert model1 != model5 # model1 has y, model5 has y=None - + # Test different model_type model7 = TimeSeriesModel(X=X1, y=y1, model_type="arima", verbose=1) assert model1 != model7 - + # Test different verbose model8 = TimeSeriesModel(X=X1, y=y1, model_type="ar", verbose=2) assert model1 != model8 - + # Test comparison with non-TimeSeriesModel object assert model1 != "not a model" assert model1 != 123 @@ -635,88 +632,88 @@ def test_eq_method_comprehensive(self): class TestTimeSeriesModelEdgeCases: """Additional edge case tests for complete coverage.""" - + def test_multivariate_ar_with_exog(self): """Test AR model with multivariate data and exogenous variables.""" X = np.random.randn(100) # AR models in statsmodels expect 1D data y = np.random.randn(100, 1) # Exogenous - + model = TimeSeriesModel(X=X, y=y, model_type="ar", verbose=0) - + # Should work with exogenous data result = model.fit_ar(order=2) assert result is not None - + def test_var_model_fitting(self): """Test VAR model fitting.""" X = np.random.randn(100, 3) # Multivariate required for VAR - + model = TimeSeriesModel(X=X, model_type="var", verbose=0) result = model.fit_var(order=2) assert result is not None - + def test_arch_model_with_ar_mean(self): """Test ARCH model with AR mean specification.""" X = np.random.randn(200) # Need more data for ARCH - + model = TimeSeriesModel(X=X, model_type="arch", verbose=0) - + # Test with AR mean type result = model.fit_arch(order=2, mean_type="AR", p=1, q=1) assert result is not None - + def test_seasonal_ar_with_calculate_terms(self): """Test AR model with seasonal terms to exercise _calculate_terms.""" X = np.random.randn(100) model = TimeSeriesModel(X=X, model_type="ar", verbose=0) - + # Test valid seasonal configuration kwargs = {"seasonal": True, "period": 12} seasonal_terms, trend_params = model._calculate_terms(kwargs) assert seasonal_terms == 11 # period - 1 assert trend_params == 1 # default trend='c' - + # Test with different trend kwargs = {"seasonal": True, "period": 4, "trend": "ct"} seasonal_terms, trend_params = model._calculate_terms(kwargs) assert seasonal_terms == 3 assert trend_params == 2 # 'ct' gives 2 parameters - + # Test with no trend kwargs = {"seasonal": False, "trend": "n"} seasonal_terms, trend_params = model._calculate_terms(kwargs) assert seasonal_terms == 0 assert trend_params == 0 - + def test_validate_order_with_exog_and_seasonal(self): """Test _validate_order with exogenous variables and seasonal terms.""" X = np.random.randn(100) y = np.random.randn(100, 2) # 2 exogenous variables - + model = TimeSeriesModel(X=X, y=y, model_type="ar", verbose=0) - + # With seasonal terms and exog, max_lag should be reduced kwargs = {"seasonal": True, "period": 12} - + # max_lag = (100 - 2 - 11 - 1) // 2 = 86 // 2 = 43 # So order=50 should exceed this with pytest.raises(ValueError, match="Maximum allowed lag value exceeded"): model._validate_order(50, len(X), kwargs) - + def test_verbose_suppression_levels(self): """Test different verbose suppression levels in _fit_with_verbose_handling.""" X = np.random.randn(100) - + # Test verbose=0 (suppress both stdout and stderr) model = TimeSeriesModel(X=X, model_type="ar", verbose=0) result = model.fit_ar(order=2) assert result is not None - + # Test verbose=1 (suppress stdout only) model.verbose = 1 result = model.fit_ar(order=2) assert result is not None - + # Test verbose=2 (no suppression) model.verbose = 2 result = model.fit_ar(order=2) @@ -725,7 +722,7 @@ def test_verbose_suppression_levels(self): class TestTimeSeriesModelIntegration: """Integration tests for complex scenarios.""" - + def test_full_sarima_workflow(self): """Test complete SARIMA workflow with all features.""" # Generate seasonal data @@ -735,50 +732,47 @@ def test_full_sarima_workflow(self): trend = 0.1 * t noise = np.random.randn(n) X = trend + seasonal_component + noise - + model = TimeSeriesModel(X=X, model_type="sarima", verbose=0) - + # Fit with seasonal components - result = model.fit( - order=(1, 1, 1), - seasonal_order=(1, 1, 1, 12) - ) - + result = model.fit(order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) + assert result is not None - + def test_model_type_case_handling(self): """Test that model_type preserves original case for sklearn compatibility.""" X = np.random.randn(100) - + # Test with uppercase - now preserves case model = TimeSeriesModel(X=X, model_type="AR") assert model.model_type == "AR" # Preserved for sklearn compatibility - + # Test with mixed case - now preserves case model = TimeSeriesModel(X=X, model_type="ArImA") assert model.model_type == "ArImA" # Preserved for sklearn compatibility - + # Model should still work with case-insensitive model types result = model.fit(order=(1, 0, 1)) assert result is not None - + def test_fit_dispatch_to_non_sarima(self): """Test fit method dispatch to non-sarima models.""" # Make X multivariate for VAR (needs at least 2 columns) X = np.random.randn(100, 3) model = TimeSeriesModel(X=X, model_type="var", verbose=0) - + result = model.fit(order=2) assert result is not None - + def test_eq_method_false_case(self): """Test __eq__ method false case.""" X1 = np.random.randn(100) X2 = np.random.randn(100) - + model1 = TimeSeriesModel(X=X1, model_type="ar", verbose=1) model2 = TimeSeriesModel(X=X2, model_type="ar", verbose=1) - + # These should not be equal due to different X arrays result = model1.__eq__(model2) assert result is False # Explicitly test the False return @@ -786,4 +780,4 @@ def test_eq_method_false_case(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_numpy_serialization.py b/tests/unit/test_numpy_serialization.py index 518d1cfe..3afd2c8b 100644 --- a/tests/unit/test_numpy_serialization.py +++ b/tests/unit/test_numpy_serialization.py @@ -2,525 +2,498 @@ import numpy as np import pytest -from typing import Protocol -from unittest.mock import Mock -from tsbootstrap.services.numpy_serialization import NumpySerializationService, SerializableModel +from tsbootstrap.services.numpy_serialization import NumpySerializationService class MockPydanticModel: """Mock Pydantic model for testing.""" - + def __init__(self, data: dict): self.data = data - + def model_dump(self, mode: str = "python") -> dict: return self.data class TestNumpySerializationService: """Tests targeting specific uncovered lines in NumpySerializationService.""" - + def test_init_with_strict_mode(self): """Test initialization with strict mode .""" # Test strict mode enabled service = NumpySerializationService(strict_mode=True) assert service.strict_mode is True assert service._serialization_cache == {} - + # Test strict mode disabled service = NumpySerializationService(strict_mode=False) assert service.strict_mode is False assert service._serialization_cache == {} - + def test_serialize_none_value(self): """Test serialization of None values .""" service = NumpySerializationService() - + result = service.serialize_numpy_arrays(None) assert result is None - + def test_serialize_datetime_arrays(self): """Test serialization of datetime64 arrays .""" service = NumpySerializationService() - + # Create datetime64 array - dates = np.array(['2023-01-01', '2023-01-02', '2023-01-03'], dtype='datetime64[D]') + dates = np.array(["2023-01-01", "2023-01-02", "2023-01-03"], dtype="datetime64[D]") result = service.serialize_numpy_arrays(dates) - + assert isinstance(result, list) assert all(isinstance(item, str) for item in result) - assert '2023-01-01' in result[0] - + assert "2023-01-01" in result[0] + def test_serialize_timedelta_arrays(self): """Test serialization of timedelta64 arrays .""" service = NumpySerializationService() - + # Create timedelta64 array - deltas = np.array([1, 2, 3], dtype='timedelta64[D]') + deltas = np.array([1, 2, 3], dtype="timedelta64[D]") result = service.serialize_numpy_arrays(deltas) - + assert isinstance(result, list) assert all(isinstance(item, str) for item in result) - + def test_serialize_regular_arrays(self): """Test serialization of regular numpy arrays .""" service = NumpySerializationService() - + # Test 1D array arr_1d = np.array([1, 2, 3]) result = service.serialize_numpy_arrays(arr_1d) assert result == [1, 2, 3] - + # Test 2D array arr_2d = np.array([[1, 2], [3, 4]]) result = service.serialize_numpy_arrays(arr_2d) assert result == [[1, 2], [3, 4]] - + def test_serialize_numpy_scalars(self): """Test serialization of numpy scalars .""" service = NumpySerializationService() - + # Test integer scalar int_scalar = np.int64(42) result = service.serialize_numpy_arrays(int_scalar) assert result == 42 assert isinstance(result, int) - + # Test float scalar float_scalar = np.float64(3.14) result = service.serialize_numpy_arrays(float_scalar) assert result == 3.14 assert isinstance(result, float) - + # Test boolean scalar bool_scalar = np.bool_(True) result = service.serialize_numpy_arrays(bool_scalar) assert result is True assert isinstance(result, bool) - + def test_serialize_datetime_scalars(self): """Test serialization of datetime64 and timedelta64 scalars .""" service = NumpySerializationService() - + # Test datetime64 scalar - dt_scalar = np.datetime64('2023-01-01') + dt_scalar = np.datetime64("2023-01-01") result = service.serialize_numpy_arrays(dt_scalar) assert isinstance(result, str) - assert '2023-01-01' in result - + assert "2023-01-01" in result + # Test timedelta64 scalar - td_scalar = np.timedelta64(5, 'D') + td_scalar = np.timedelta64(5, "D") result = service.serialize_numpy_arrays(td_scalar) # Note: timedelta64 scalars convert to Python timedelta objects, not strings # The str() conversion happens inside the method assert result is not None - + def test_serialize_random_generator(self): """Test serialization of numpy random generator .""" service = NumpySerializationService() - + rng = np.random.default_rng(42) result = service.serialize_numpy_arrays(rng) assert result is None - + def test_serialize_lists_tuples(self): """Test serialization of lists and tuples recursively .""" service = NumpySerializationService() - + # Test list with numpy arrays input_list = [np.array([1, 2]), np.int64(42), "string"] result = service.serialize_numpy_arrays(input_list) assert result == [[1, 2], 42, "string"] assert isinstance(result, list) - + # Test tuple with numpy arrays input_tuple = (np.array([1, 2]), np.float64(3.14)) result = service.serialize_numpy_arrays(input_tuple) assert result == ([1, 2], 3.14) assert isinstance(result, tuple) - + def test_serialize_dicts(self): """Test serialization of dictionaries recursively .""" service = NumpySerializationService() - + input_dict = { - 'array': np.array([1, 2, 3]), - 'scalar': np.int64(42), - 'nested': { - 'inner_array': np.array([4, 5]), - 'string': 'test' - } + "array": np.array([1, 2, 3]), + "scalar": np.int64(42), + "nested": {"inner_array": np.array([4, 5]), "string": "test"}, } - + result = service.serialize_numpy_arrays(input_dict) expected = { - 'array': [1, 2, 3], - 'scalar': 42, - 'nested': { - 'inner_array': [4, 5], - 'string': 'test' - } + "array": [1, 2, 3], + "scalar": 42, + "nested": {"inner_array": [4, 5], "string": "test"}, } assert result == expected - + def test_serialize_pydantic_models(self): """Test serialization of Pydantic models .""" service = NumpySerializationService() - + # Create mock model with numpy data - model_data = { - 'array': np.array([1, 2, 3]), - 'scalar': np.float64(3.14), - 'string': 'test' - } + model_data = {"array": np.array([1, 2, 3]), "scalar": np.float64(3.14), "string": "test"} mock_model = MockPydanticModel(model_data) - + result = service.serialize_numpy_arrays(mock_model) - expected = { - 'array': [1, 2, 3], - 'scalar': 3.14, - 'string': 'test' - } + expected = {"array": [1, 2, 3], "scalar": 3.14, "string": "test"} assert result == expected - + def test_serialize_other_types(self): """Test serialization returns other types as-is .""" service = NumpySerializationService() - + # Test string result = service.serialize_numpy_arrays("test") assert result == "test" - + # Test int result = service.serialize_numpy_arrays(42) assert result == 42 - + # Test custom object class CustomObj: pass - + obj = CustomObj() result = service.serialize_numpy_arrays(obj) assert result is obj - + def test_check_numeric_dtype_object_array(self): """Test _check_numeric_dtype with object array .""" service = NumpySerializationService() - + # Test object array - obj_array = np.array(['string', 'data'], dtype=object) + obj_array = np.array(["string", "data"], dtype=object) with pytest.raises(TypeError, match="must contain numeric data"): service._check_numeric_dtype(obj_array, "test_param") - + with pytest.raises(TypeError, match="objects"): service._check_numeric_dtype(obj_array, "test_param") - + def test_check_numeric_dtype_string_array(self): """Test _check_numeric_dtype with string array .""" service = NumpySerializationService() - + # Test unicode string array - str_array = np.array(['a', 'b', 'c'], dtype='U1') + str_array = np.array(["a", "b", "c"], dtype="U1") with pytest.raises(TypeError, match="must contain numeric data"): service._check_numeric_dtype(str_array, "test_param") - + with pytest.raises(TypeError, match="strings"): service._check_numeric_dtype(str_array, "test_param") - + # Test byte string array - byte_array = np.array([b'a', b'b'], dtype='S1') + byte_array = np.array([b"a", b"b"], dtype="S1") with pytest.raises(TypeError, match="strings"): service._check_numeric_dtype(byte_array, "test_param") - + def test_validate_array_input_none(self): """Test validate_array_input with None input .""" service = NumpySerializationService() - + with pytest.raises(TypeError, match="cannot be None"): service.validate_array_input(None, "test_param") - + with pytest.raises(TypeError, match="Please provide array-like data"): service.validate_array_input(None) - + def test_validate_array_input_non_array_strict(self): """Test validate_array_input with non-array in strict mode .""" service = NumpySerializationService(strict_mode=True) - + # Test successful conversion result = service.validate_array_input([1, 2, 3]) np.testing.assert_array_equal(result, np.array([1, 2, 3])) - + # Test failed conversion class NonConvertible: def __array__(self): raise ValueError("Cannot convert") - + with pytest.raises(TypeError, match="must be array-like"): service.validate_array_input(NonConvertible()) - + def test_validate_array_input_non_array_permissive(self): """Test validate_array_input with non-array in permissive mode .""" service = NumpySerializationService(strict_mode=False) - + # Test scalar wrapping result = service.validate_array_input(42) np.testing.assert_array_equal(result, np.array([42])) - + # Test completely unconvertible class NonConvertible: def __array__(self): raise ValueError("Cannot convert") - - with pytest.raises(TypeError, match="cannot be converted to a numpy array even in permissive mode"): + + with pytest.raises( + TypeError, match="cannot be converted to a numpy array even in permissive mode" + ): service.validate_array_input(NonConvertible()) - + def test_validate_array_input_string_dtype_check(self): """Test validate_array_input with string data .""" service = NumpySerializationService() - + # The error message is different than expected - it throws the array-like error first with pytest.raises(TypeError, match="must be array-like"): - service.validate_array_input(['a', 'b', 'c']) - + service.validate_array_input(["a", "b", "c"]) + def test_validate_array_input_0d_strict(self): """Test validate_array_input with 0D array in strict mode .""" service = NumpySerializationService(strict_mode=True) - + scalar_array = np.array(42) # 0D array with pytest.raises(ValueError, match="0-dimensional array"): service.validate_array_input(scalar_array) - + with pytest.raises(ValueError, match="scalar"): service.validate_array_input(scalar_array) - + def test_validate_array_input_0d_permissive(self): """Test validate_array_input with 0D array in permissive mode .""" service = NumpySerializationService(strict_mode=False) - + scalar_array = np.array(42) # 0D array result = service.validate_array_input(scalar_array) - + assert result.ndim == 1 assert result.shape == (1,) assert result[0] == 42 - + def test_ensure_2d_1d_input(self): """Test ensure_2d with 1D input .""" service = NumpySerializationService() - + arr_1d = np.array([1, 2, 3]) result = service.ensure_2d(arr_1d) - + assert result.ndim == 2 assert result.shape == (3, 1) np.testing.assert_array_equal(result.ravel(), arr_1d) - + def test_ensure_2d_2d_input(self): """Test ensure_2d with 2D input .""" service = NumpySerializationService() - + arr_2d = np.array([[1, 2], [3, 4]]) result = service.ensure_2d(arr_2d) - + assert result is arr_2d # Should return same array assert result.shape == (2, 2) - + def test_ensure_2d_3d_strict(self): """Test ensure_2d with 3D array in strict mode .""" service = NumpySerializationService(strict_mode=True) - + arr_3d = np.array([[[1, 2]], [[3, 4]]]) with pytest.raises(ValueError, match="has 3 dimensions"): service.ensure_2d(arr_3d) - + with pytest.raises(ValueError, match="must be 1D or 2D"): service.ensure_2d(arr_3d) - + def test_ensure_2d_3d_permissive(self): """Test ensure_2d with 3D array in permissive mode .""" service = NumpySerializationService(strict_mode=False) - + arr_3d = np.array([[[1, 2]], [[3, 4]]]) # Shape (2, 1, 2) result = service.ensure_2d(arr_3d) - + assert result.ndim == 2 assert result.shape[0] == 2 # First dimension preserved assert result.shape[1] == 2 # Flattened other dimensions - + def test_validate_consistent_length_single_array(self): """Test validate_consistent_length with single array .""" service = NumpySerializationService() - + # Should not raise error with single array arr = np.array([1, 2, 3]) service.validate_consistent_length(arr) # Should pass without error - + # Should not raise error with no arrays service.validate_consistent_length() # Should pass without error - + def test_validate_consistent_length_matching(self): """Test validate_consistent_length with matching lengths .""" service = NumpySerializationService() - + arr1 = np.array([1, 2, 3]) arr2 = np.array([4, 5, 6]) arr3 = np.array([7, 8, 9]) - + # Should not raise error service.validate_consistent_length(arr1, arr2, arr3) - + def test_validate_consistent_length_with_none(self): """Test validate_consistent_length with None values .""" service = NumpySerializationService() - + arr1 = np.array([1, 2, 3]) arr2 = None arr3 = np.array([4, 5, 6]) - + # Should not raise error (None is filtered out) service.validate_consistent_length(arr1, arr2, arr3) - + def test_validate_consistent_length_mismatched(self): """Test validate_consistent_length with mismatched lengths .""" service = NumpySerializationService() - - arr1 = np.array([1, 2, 3]) # length 3 - arr2 = np.array([4, 5]) # length 2 - arr3 = np.array([7, 8, 9, 10]) # length 4 - + + arr1 = np.array([1, 2, 3]) # length 3 + arr2 = np.array([4, 5]) # length 2 + arr3 = np.array([7, 8, 9, 10]) # length 4 + with pytest.raises(ValueError, match="same length"): service.validate_consistent_length(arr1, arr2, arr3) - + with pytest.raises(ValueError, match="Received arrays with lengths"): service.validate_consistent_length(arr1, arr2, arr3) - + def test_serialize_model_pydantic(self): """Test serialize_model with Pydantic model .""" service = NumpySerializationService() - - model_data = { - 'array': np.array([1, 2, 3]), - 'scalar': np.float64(3.14), - 'string': 'test' - } + + model_data = {"array": np.array([1, 2, 3]), "scalar": np.float64(3.14), "string": "test"} mock_model = MockPydanticModel(model_data) - + result = service.serialize_model(mock_model) - expected = { - 'array': [1, 2, 3], - 'scalar': 3.14, - 'string': 'test' - } + expected = {"array": [1, 2, 3], "scalar": 3.14, "string": "test"} assert result == expected - + def test_serialize_model_regular_object(self): """Test serialize_model with regular object .""" service = NumpySerializationService() - + class RegularObject: def __init__(self): self.array = np.array([1, 2, 3]) self.scalar = np.int64(42) - self.string = 'test' - self._private = 'hidden' - + self.string = "test" + self._private = "hidden" + obj = RegularObject() result = service.serialize_model(obj) - - assert 'array' in result - assert result['array'] == [1, 2, 3] - assert result['scalar'] == 42 - assert result['string'] == 'test' - assert '_private' in result # Include arrays is True by default - + + assert "array" in result + assert result["array"] == [1, 2, 3] + assert result["scalar"] == 42 + assert result["string"] == "test" + assert "_private" in result # Include arrays is True by default + def test_serialize_model_primitive(self): """Test serialize_model with primitive value .""" service = NumpySerializationService() - + # Test with numpy array arr = np.array([1, 2, 3]) result = service.serialize_model(arr) - assert result == {'value': [1, 2, 3]} - + assert result == {"value": [1, 2, 3]} + # Test with scalar result = service.serialize_model(42) - assert result == {'value': 42} - + assert result == {"value": 42} + def test_serialize_model_exclude_arrays(self): """Test serialize_model with include_arrays=False .""" service = NumpySerializationService() - + class ObjectWithPrivate: def __init__(self): self.public = np.array([1, 2, 3]) self._private = np.array([4, 5, 6]) - self.__dunder = 'hidden' - + self.__dunder = "hidden" + obj = ObjectWithPrivate() result = service.serialize_model(obj, include_arrays=False) - - assert 'public' in result - assert '_private' not in result # Excluded because starts with _ - assert '__dunder' not in result - + + assert "public" in result + assert "_private" not in result # Excluded because starts with _ + assert "__dunder" not in result + def test_comprehensive_edge_cases(self): """Test comprehensive edge cases and integration.""" service = NumpySerializationService() - + # Complex nested structure complex_data = { - 'arrays': { - 'int_array': np.array([1, 2, 3]), - 'float_array': np.array([1.1, 2.2, 3.3]), - 'bool_array': np.array([True, False, True]), - 'datetime_array': np.array(['2023-01-01'], dtype='datetime64[D]') + "arrays": { + "int_array": np.array([1, 2, 3]), + "float_array": np.array([1.1, 2.2, 3.3]), + "bool_array": np.array([True, False, True]), + "datetime_array": np.array(["2023-01-01"], dtype="datetime64[D]"), }, - 'scalars': { - 'np_int': np.int64(42), - 'np_float': np.float64(3.14), - 'np_bool': np.bool_(True), - 'datetime_scalar': np.datetime64('2023-01-01') + "scalars": { + "np_int": np.int64(42), + "np_float": np.float64(3.14), + "np_bool": np.bool_(True), + "datetime_scalar": np.datetime64("2023-01-01"), }, - 'collections': [ + "collections": [ np.array([1, 2]), (np.int64(3), np.float64(4.5)), - {'nested': np.array([5, 6])} + {"nested": np.array([5, 6])}, ], - 'other': { - 'string': 'test', - 'int': 42, - 'rng': np.random.default_rng(42), - 'none': None - } + "other": {"string": "test", "int": 42, "rng": np.random.default_rng(42), "none": None}, } - + result = service.serialize_numpy_arrays(complex_data) - + # Check arrays were converted - assert result['arrays']['int_array'] == [1, 2, 3] - assert result['arrays']['float_array'] == [1.1, 2.2, 3.3] - assert result['arrays']['bool_array'] == [True, False, True] - assert isinstance(result['arrays']['datetime_array'][0], str) - + assert result["arrays"]["int_array"] == [1, 2, 3] + assert result["arrays"]["float_array"] == [1.1, 2.2, 3.3] + assert result["arrays"]["bool_array"] == [True, False, True] + assert isinstance(result["arrays"]["datetime_array"][0], str) + # Check scalars were converted - assert result['scalars']['np_int'] == 42 - assert result['scalars']['np_float'] == 3.14 - assert result['scalars']['np_bool'] is True - assert isinstance(result['scalars']['datetime_scalar'], str) - + assert result["scalars"]["np_int"] == 42 + assert result["scalars"]["np_float"] == 3.14 + assert result["scalars"]["np_bool"] is True + assert isinstance(result["scalars"]["datetime_scalar"], str) + # Check collections - assert result['collections'][0] == [1, 2] - assert result['collections'][1] == (3, 4.5) - assert result['collections'][2]['nested'] == [5, 6] - + assert result["collections"][0] == [1, 2] + assert result["collections"][1] == (3, 4.5) + assert result["collections"][2]["nested"] == [5, 6] + # Check other types - assert result['other']['string'] == 'test' - assert result['other']['int'] == 42 - assert result['other']['rng'] is None - assert result['other']['none'] is None + assert result["other"]["string"] == "test" + assert result["other"]["int"] == 42 + assert result["other"]["rng"] is None + assert result["other"]["none"] is None if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_ranklags.py b/tests/unit/test_ranklags.py index 62af4950..4b5811bc 100644 --- a/tests/unit/test_ranklags.py +++ b/tests/unit/test_ranklags.py @@ -2,6 +2,7 @@ import numpy as np import pytest + from tsbootstrap.ranklags import RankLags from tsbootstrap.utils.skbase_compat import safe_check_soft_dependencies as _check_soft_dependencies diff --git a/tests/unit/test_rescaling_service.py b/tests/unit/test_rescaling_service.py index 23635fbb..6f209966 100644 --- a/tests/unit/test_rescaling_service.py +++ b/tests/unit/test_rescaling_service.py @@ -8,240 +8,231 @@ class TestRescalingService: """Tests targeting specific uncovered lines in rescaling_service.py.""" - + def test_rescale_residuals_with_factors(self): """Test rescale_residuals method with rescaling factors.""" service = RescalingService() - + # Create test residuals residuals = np.array([0.1, -0.2, 0.3, -0.1, 0.05]) - + # Create rescaling factors rescale_factors = {"shift": 100.0, "scale": 50.0} - - # Test rescale_residuals + + # Test rescale_residuals rescaled_residuals = service.rescale_residuals(residuals, rescale_factors) - + # Should only apply scale, not shift expected = residuals * rescale_factors["scale"] assert np.array_equal(rescaled_residuals, expected) - + # Verify the result is different from input assert not np.array_equal(rescaled_residuals, residuals) - + def test_rescale_residuals_without_factors(self): """Test rescale_residuals method without rescaling factors .""" service = RescalingService() - + residuals = np.array([0.1, -0.2, 0.3]) - + # Test with empty factors rescaled_residuals = service.rescale_residuals(residuals, {}) assert np.array_equal(rescaled_residuals, residuals) - + # Test with None factors rescaled_residuals = service.rescale_residuals(residuals, None) assert np.array_equal(rescaled_residuals, residuals) - + def test_rescale_parameters_with_sigma2(self): """Test rescale_parameters method with sigma2 parameter .""" service = RescalingService() - + # Create test parameters with sigma2 - params = { - "sigma2": 2.0, - "ar": [0.5, 0.3], - "ma": [0.2], - "other_param": 1.0 - } - + params = {"sigma2": 2.0, "ar": [0.5, 0.3], "ma": [0.2], "other_param": 1.0} + rescale_factors = {"shift": 10.0, "scale": 5.0} - - # Test rescale_parameters + + # Test rescale_parameters adjusted_params = service.rescale_parameters(params, rescale_factors) - - # sigma2 should be adjusted by scale^2 + + # sigma2 should be adjusted by scale^2 expected_sigma2 = params["sigma2"] * (rescale_factors["scale"] ** 2) assert adjusted_params["sigma2"] == expected_sigma2 - - # Other parameters should remain unchanged + + # Other parameters should remain unchanged assert adjusted_params["ar"] == params["ar"] assert adjusted_params["ma"] == params["ma"] assert adjusted_params["other_param"] == params["other_param"] - + # Original params should not be modified assert params["sigma2"] == 2.0 - + def test_rescale_parameters_without_sigma2(self): """Test rescale_parameters method without sigma2 parameter.""" service = RescalingService() - + # Create test parameters without sigma2 - params = { - "ar": [0.5, 0.3], - "ma": [0.2], - "intercept": 1.5 - } - + params = {"ar": [0.5, 0.3], "ma": [0.2], "intercept": 1.5} + rescale_factors = {"shift": 10.0, "scale": 5.0} - + # Test rescale_parameters adjusted_params = service.rescale_parameters(params, rescale_factors) - + # All parameters should remain unchanged assert adjusted_params == params - + # Original params should not be modified assert adjusted_params is not params # Should be a copy - + def test_rescale_parameters_without_factors(self): """Test rescale_parameters method without rescaling factors .""" service = RescalingService() - + params = {"sigma2": 2.0, "ar": [0.5]} - + # Test with empty factors adjusted_params = service.rescale_parameters(params, {}) assert adjusted_params == params - - # Test with None factors + + # Test with None factors adjusted_params = service.rescale_parameters(params, None) assert adjusted_params == params - + def test_check_if_rescale_needed_edge_cases(self): """Test edge cases in check_if_rescale_needed method.""" service = RescalingService() - - # Test very small values + + # Test very small values small_data = np.array([0.0001, 0.0002, 0.0003]) needs_rescaling, factors = service.check_if_rescale_needed(small_data) assert needs_rescaling assert "shift" in factors assert "scale" in factors - - # Test very large values + + # Test very large values large_data = np.array([2e6, 3e6, 4e6]) needs_rescaling, factors = service.check_if_rescale_needed(large_data) assert needs_rescaling - - # Test very small standard deviation + + # Test very small standard deviation constant_data = np.array([1000, 1000, 1000]) needs_rescaling, factors = service.check_if_rescale_needed(constant_data) assert needs_rescaling - - # Test very large standard deviation + + # Test very large standard deviation high_variance_data = np.array([-5e6, 0, 5e6]) needs_rescaling, factors = service.check_if_rescale_needed(high_variance_data) assert needs_rescaling - + def test_check_if_rescale_needed_zero_std_protection(self): """Test protection against division by zero in rescale factors .""" service = RescalingService() - + # Create constant data that will have zero std constant_data = np.array([5.0, 5.0, 5.0, 5.0]) needs_rescaling, factors = service.check_if_rescale_needed(constant_data) - + if needs_rescaling: - # Should use minimum scale to avoid division by zero + # Should use minimum scale to avoid division by zero assert factors["scale"] >= 1e-8 - + # Test that rescaling works even with constant data rescaled = service.rescale_data(constant_data, factors) recovered = service.rescale_back_data(rescaled, factors) assert np.allclose(constant_data, recovered, rtol=1e-10) - + def test_rescale_data_edge_cases(self): """Test edge cases in rescale_data method.""" service = RescalingService() - - # Test with empty factors + + # Test with empty factors data = np.array([1, 2, 3]) rescaled = service.rescale_data(data, {}) assert np.array_equal(rescaled, data) - + # Test with None factors rescaled = service.rescale_data(data, None) assert np.array_equal(rescaled, data) - + # Test with missing scale or shift factors_no_scale = {"shift": 5.0} rescaled = service.rescale_data(data, factors_no_scale) expected = (data - 5.0) / 1.0 # Default scale is 1.0 assert np.array_equal(rescaled, expected) - + factors_no_shift = {"scale": 2.0} rescaled = service.rescale_data(data, factors_no_shift) expected = (data - 0.0) / 2.0 # Default shift is 0.0 assert np.array_equal(rescaled, expected) - + def test_rescale_back_data_edge_cases(self): """Test edge cases in rescale_back_data method.""" service = RescalingService() - - # Test with empty factors + + # Test with empty factors data = np.array([1, 2, 3]) rescaled_back = service.rescale_back_data(data, {}) assert np.array_equal(rescaled_back, data) - + # Test with None factors rescaled_back = service.rescale_back_data(data, None) assert np.array_equal(rescaled_back, data) - + # Test with missing scale or shift factors_no_scale = {"shift": 5.0} rescaled_back = service.rescale_back_data(data, factors_no_scale) expected = data * 1.0 + 5.0 # Default scale is 1.0 assert np.array_equal(rescaled_back, expected) - + factors_no_shift = {"scale": 2.0} rescaled_back = service.rescale_back_data(data, factors_no_shift) expected = data * 2.0 + 0.0 # Default shift is 0.0 assert np.array_equal(rescaled_back, expected) - + def test_comprehensive_rescaling_workflow(self): """Test complete rescaling workflow including all methods.""" service = RescalingService() - + # Create test data that needs rescaling original_data = np.array([5000, 6000, 7000, 8000, 9000]) - + # Step 1: Check if rescaling needed needs_rescaling, factors = service.check_if_rescale_needed(original_data) assert needs_rescaling - + # Step 2: Rescale data rescaled_data = service.rescale_data(original_data, factors) - + # Step 3: Test with residuals residuals = np.array([10, -20, 15, -5, 8]) rescaled_residuals = service.rescale_residuals(residuals, factors) - + # Step 4: Test with parameters params = {"sigma2": 4.0, "ar": [0.7], "constant": 2.0} rescaled_params = service.rescale_parameters(params, factors) - + # Step 5: Rescale back recovered_data = service.rescale_back_data(rescaled_data, factors) - + # Verify workflow assert np.allclose(original_data, recovered_data, rtol=1e-10) assert rescaled_params["sigma2"] != params["sigma2"] # Should be adjusted assert rescaled_params["ar"] == params["ar"] # Should remain same assert len(rescaled_residuals) == len(residuals) - + def test_rescaling_with_different_data_types(self): """Test rescaling with different numpy data types.""" service = RescalingService() - + # Test with different dtypes data_types = [ (np.array([1000, 2000, 3000], dtype=np.float32), 1e-6), # Lower precision for float32 (np.array([1000, 2000, 3000], dtype=np.float64), 1e-10), (np.array([1000, 2000, 3000], dtype=np.int32), 1e-10), - (np.array([1000, 2000, 3000], dtype=np.int64), 1e-10) + (np.array([1000, 2000, 3000], dtype=np.int64), 1e-10), ] - + for data, tolerance in data_types: needs_rescaling, factors = service.check_if_rescale_needed(data) if needs_rescaling: @@ -252,4 +243,4 @@ def test_rescaling_with_different_data_types(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_service_container.py b/tests/unit/test_service_container.py index ad0fdafb..1b7cd4e1 100644 --- a/tests/unit/test_service_container.py +++ b/tests/unit/test_service_container.py @@ -1,7 +1,6 @@ """Tests for service_container.py.""" import pytest -from unittest.mock import Mock from pydantic import BaseModel from tsbootstrap.services.service_container import BootstrapServices @@ -9,120 +8,127 @@ class TestModel(BaseModel): """Test Pydantic model for sklearn adapter testing.""" + param1: int = 1 param2: float = 1.0 class TestServiceContainer: """Tests targeting specific uncovered lines in service_container.py.""" - + def test_with_sklearn_adapter(self): """Test with_sklearn_adapter method .""" # Create a proper Pydantic model test_model = TestModel(param1=5, param2=2.5) - + # Create services instance services = BootstrapServices() - + # Test with_sklearn_adapter method result = services.with_sklearn_adapter(test_model) - + # Should return self for chaining assert result is services - + # Should have created sklearn_adapter assert services.sklearn_adapter is not None - assert hasattr(services.sklearn_adapter, '__class__') - + assert hasattr(services.sklearn_adapter, "__class__") + # The adapter should have been created with the model # Verify it's the correct type from tsbootstrap.services.sklearn_compatibility import SklearnCompatibilityAdapter + assert isinstance(services.sklearn_adapter, SklearnCompatibilityAdapter) - + def test_with_batch_bootstrap(self): """Test with_batch_bootstrap method .""" # Create services instance services = BootstrapServices() - + # Test with_batch_bootstrap method without backend result = services.with_batch_bootstrap(use_backend=False) - + # Should return self for chaining assert result is services - + # Should have created batch_bootstrap service assert services.batch_bootstrap is not None - assert hasattr(services.batch_bootstrap, '__class__') - + assert hasattr(services.batch_bootstrap, "__class__") + # Test with backend enabled services2 = BootstrapServices() result2 = services2.with_batch_bootstrap(use_backend=True) - + # Should return self for chaining assert result2 is services2 - + # Should have created batch_bootstrap service assert services2.batch_bootstrap is not None - + def test_method_chaining_with_new_methods(self): """Test that new methods can be used in method chaining.""" test_model = TestModel() - + # Test chaining with sklearn adapter - services = (BootstrapServices() - .with_sklearn_adapter(test_model) - .with_batch_bootstrap(use_backend=False)) - + services = ( + BootstrapServices() + .with_sklearn_adapter(test_model) + .with_batch_bootstrap(use_backend=False) + ) + # Both services should be present assert services.sklearn_adapter is not None assert services.batch_bootstrap is not None - + def test_sklearn_adapter_with_different_models(self): """Test sklearn adapter with different model types.""" + # Create different Pydantic models class ModelA(BaseModel): param_a: int = 1 - + class ModelB(BaseModel): param_b: str = "test" param_c: float = 1.0 - + test_models = [ModelA(), ModelB(), TestModel()] - + for model in test_models: services = BootstrapServices() result = services.with_sklearn_adapter(model) - + assert result is services assert services.sklearn_adapter is not None - + def test_batch_bootstrap_configuration_options(self): """Test batch bootstrap with different configuration options.""" # Test with backend disabled services1 = BootstrapServices().with_batch_bootstrap(use_backend=False) assert services1.batch_bootstrap is not None - + # Test with backend enabled services2 = BootstrapServices().with_batch_bootstrap(use_backend=True) assert services2.batch_bootstrap is not None - + # Test default parameter (should be False) services3 = BootstrapServices().with_batch_bootstrap() assert services3.batch_bootstrap is not None - + def test_comprehensive_service_creation(self): """Test comprehensive service creation including all methods.""" test_model = TestModel() - + # Create services with the available methods including the new ones - services = (BootstrapServices() - .with_model_fitting(use_backend=False) - .with_residual_resampling() - .with_reconstruction() - .with_sklearn_adapter(test_model) # Line 147-148 - .with_batch_bootstrap(use_backend=True) # Line 224-225 - .with_block_generation()) - + services = ( + BootstrapServices() + .with_model_fitting(use_backend=False) + .with_residual_resampling() + .with_reconstruction() + .with_sklearn_adapter(test_model) # Line 147-148 + .with_batch_bootstrap(use_backend=True) # Line 224-225 + .with_block_generation() + ) + # Verify services are created (using correct attribute names) assert services.model_fitter is not None assert services.residual_resampler is not None @@ -130,30 +136,30 @@ def test_comprehensive_service_creation(self): assert services.sklearn_adapter is not None # New service assert services.batch_bootstrap is not None # New service assert services.block_generator is not None - + def test_factory_methods_with_new_services(self): """Test factory methods still work with new services available.""" # Test create_for_model_based_bootstrap factory services = BootstrapServices.create_for_model_based_bootstrap() - + # Should have core services (using correct attribute names) assert services.validator is not None assert services.model_fitter is not None assert services.residual_resampler is not None assert services.reconstructor is not None - + # New services should be None by default assert services.sklearn_adapter is None assert services.batch_bootstrap is None - + # Test create_for_block_bootstrap factory services2 = BootstrapServices.create_for_block_bootstrap() - + # Should have block-specific services assert services2.validator is not None assert services2.block_generator is not None assert services2.block_resampler is not None - + # New services should be None by default assert services2.sklearn_adapter is None assert services2.batch_bootstrap is None @@ -161,4 +167,4 @@ def test_factory_methods_with_new_services(self): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index 6da132af..273642d8 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -14,25 +14,26 @@ # Consolidate imports from all service test files import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_almost_equal +from numpy.testing import assert_allclose -from tsbootstrap.services.bootstrap_services import ( - ModelFittingService, - ResidualResamplingService, - TimeSeriesReconstructionService, - SieveOrderSelectionService, +from tsbootstrap.services import ( + SklearnCompatibilityAdapter, + ValidationService, ) from tsbootstrap.services.block_bootstrap_services import ( BlockGenerationService, BlockResamplingService, WindowFunctionService, ) -from tsbootstrap.services import ( - SklearnCompatibilityAdapter, - ValidationService, +from tsbootstrap.services.bootstrap_services import ( + ModelFittingService, + ResidualResamplingService, + SieveOrderSelectionService, + TimeSeriesReconstructionService, ) from tsbootstrap.services.rescaling_service import RescalingService from tsbootstrap.services.service_container import BootstrapServices + # AsyncBootstrapService not available @@ -143,22 +144,22 @@ class TestSieveOrderSelectionService: def test_select_order(self): """Test order selection for sieve bootstrap.""" service = SieveOrderSelectionService() - + # Generate AR(2) data np.random.seed(42) n = 150 data = np.zeros(n) for i in range(2, n): - data[i] = 0.5 * data[i-1] + 0.3 * data[i-2] + np.random.randn() * 0.1 - + data[i] = 0.5 * data[i - 1] + 0.3 * data[i - 2] + np.random.randn() * 0.1 + # Test order selection selected_order = service.select_order(data, min_lag=1, max_lag=5, criterion="aic") assert 1 <= selected_order <= 5 - + # Test with different criterion selected_order_bic = service.select_order(data, min_lag=1, max_lag=5, criterion="bic") assert 1 <= selected_order_bic <= 5 - + # Test with 2D input (should use first column) data_2d = data.reshape(-1, 1) selected_order_2d = service.select_order(data_2d, min_lag=1, max_lag=3) @@ -171,11 +172,11 @@ class TestBlockGenerationService: def test_generate_fixed_blocks(self): """Test generation of fixed-length blocks.""" service = BlockGenerationService() - + # Test fixed block generation X = np.arange(20) blocks = service.generate_blocks(X, block_length=5) - + assert len(blocks) > 0 # Each block should be length 5 or less (last block may be shorter) for block in blocks: @@ -186,18 +187,18 @@ def test_generate_fixed_blocks(self): def test_generate_variable_blocks(self): """Test generation of variable-length blocks.""" service = BlockGenerationService() - + # Test variable block generation with geometric distribution X = np.arange(30) rng = np.random.default_rng(42) blocks = service.generate_blocks( - X, + X, block_length=None, # Will use sqrt(n) as average block_length_distribution="geometric", min_block_length=2, - rng=rng + rng=rng, ) - + assert len(blocks) > 0 # Check that blocks have different lengths block_lengths = [len(block) for block in blocks] @@ -216,7 +217,7 @@ def test_resample_blocks(self): rng = np.random.default_rng(42) X = np.arange(30) - blocks = [X[i:i+5] for i in range(0, 25, 5)] + blocks = [X[i : i + 5] for i in range(0, 25, 5)] indices, data = service.resample_blocks( X, blocks, n=30, block_weights=None, tapered_weights=None, rng=rng @@ -233,18 +234,20 @@ def test_window_functions(self): """Test various window functions.""" service = WindowFunctionService() block_length = 10 - + # Test all window types window_types = ["bartletts", "blackman", "hamming", "hanning"] - + for window_type in window_types: window_func = service.get_window_function(window_type) weights = window_func(block_length) - + assert len(weights) == block_length - assert all(w >= -1e-10 for w in weights) # Weights should be non-negative (allow for floating point precision) + assert all( + w >= -1e-10 for w in weights + ) # Weights should be non-negative (allow for floating point precision) assert isinstance(weights, np.ndarray) - + # Test invalid window type with pytest.raises(ValueError, match="Window type 'invalid' not recognized"): service.get_window_function("invalid") @@ -253,21 +256,21 @@ def test_tukey_window(self): """Test Tukey window with alpha parameter.""" service = WindowFunctionService() block_length = 10 - + # Test default alpha weights_default = service.tukey_window(block_length) assert len(weights_default) == block_length assert isinstance(weights_default, np.ndarray) - + # Test different alpha values weights_alpha_0 = service.tukey_window(block_length, alpha=0.0) # Rectangular weights_alpha_1 = service.tukey_window(block_length, alpha=1.0) # Hann - + # Alpha=0 should be mostly flat (rectangular) # Alpha=1 should taper more at edges (Hann) assert len(weights_alpha_0) == block_length assert len(weights_alpha_1) == block_length - + # Different alpha values should produce different results assert not np.allclose(weights_alpha_0, weights_alpha_1) @@ -310,65 +313,65 @@ class TestValidationService: def test_validate_array_input(self): """Test array input validation.""" service = ValidationService() - + # Test positive integer validation assert service.validate_positive_int(5, "test_param") == 5 assert service.validate_positive_int(np.int64(10), "test_param") == 10 - + with pytest.raises(ValueError, match="must be a positive integer"): service.validate_positive_int(-1, "test_param") - + with pytest.raises(ValueError, match="must be a positive integer"): service.validate_positive_int(0, "test_param") - + # Test probability validation assert service.validate_probability(0.5, "prob") == 0.5 assert service.validate_probability(0.0, "prob") == 0.0 assert service.validate_probability(1.0, "prob") == 1.0 - + with pytest.raises(ValueError, match="must be a valid probability"): service.validate_probability(-0.1, "prob") - + with pytest.raises(ValueError, match="must be a valid probability"): service.validate_probability(1.1, "prob") - + # Test array shape validation X = np.random.randn(10, 2) service.validate_array_shape(X, (10, 2), "X") # Should not raise - + with pytest.raises(ValueError, match="shape .* does not match expected shape"): service.validate_array_shape(X, (5, 2), "X") def test_validate_invalid_input(self): """Test validation of invalid inputs.""" service = ValidationService() - + # Test block length validation assert service.validate_block_length(5, 20) == 5 - + with pytest.raises(ValueError, match="Block length must be a positive integer"): service.validate_block_length(0, 20) - + with pytest.raises(ValueError, match="cannot be larger than number of samples"): service.validate_block_length(25, 20) - + # Test model order validation assert service.validate_model_order(2) == 2 assert service.validate_model_order((1, 1, 1)) == (1, 1, 1) - + with pytest.raises(ValueError, match="must be non-negative"): service.validate_model_order(-1) - + with pytest.raises(ValueError, match="tuple must have exactly 3 elements"): service.validate_model_order((1, 2)) - + # Test random state validation rng = service.validate_random_state(42) assert isinstance(rng, np.random.Generator) - + rng2 = service.validate_random_state(np.random.default_rng(42)) assert isinstance(rng2, np.random.Generator) - + with pytest.raises(ValueError, match="must be None, int, or np.random.Generator"): service.validate_random_state("invalid") @@ -431,20 +434,20 @@ def test_create_sieve_services(self): def test_create_block_services(self): """Test creation of block bootstrap services.""" services = BootstrapServices.create_for_block_bootstrap() - + # Verify core services are present assert services.numpy_serializer is not None assert services.validator is not None - + # Verify block bootstrap services are present assert services.block_generator is not None assert services.block_resampler is not None assert services.window_function is not None - + # Verify services are of correct type assert isinstance(services.block_generator, BlockGenerationService) assert isinstance(services.block_resampler, BlockResamplingService) assert isinstance(services.window_function, WindowFunctionService) -# AsyncBootstrapService tests not available - module doesn't exist \ No newline at end of file +# AsyncBootstrapService tests not available - module doesn't exist diff --git a/tests/unit/test_sklearn_compatibility.py b/tests/unit/test_sklearn_compatibility.py index 35e85366..830a8eb6 100644 --- a/tests/unit/test_sklearn_compatibility.py +++ b/tests/unit/test_sklearn_compatibility.py @@ -2,13 +2,13 @@ import pytest from pydantic import BaseModel, Field -from unittest.mock import Mock from tsbootstrap.services.sklearn_compatibility import SklearnCompatibilityAdapter class TestModel(BaseModel): """Test Pydantic model for sklearn adapter testing.""" + param1: int = Field(default=5) param2: float = Field(default=2.5) param3: str = Field(default="test") @@ -17,21 +17,22 @@ class TestModel(BaseModel): class NestedTestModel(BaseModel): """Test Pydantic model with nested estimator.""" + model_config = {"arbitrary_types_allowed": True} - + simple_param: int = Field(default=10) nested_estimator: TestModel = Field(default_factory=TestModel) class MockEstimator: """Mock sklearn estimator for nested parameter testing.""" - + def __init__(self, mock_param=42): self.mock_param = mock_param - + def get_params(self, deep=True): return {"mock_param": self.mock_param} - + def set_params(self, **params): for key, value in params.items(): setattr(self, key, value) @@ -40,295 +41,311 @@ def set_params(self, **params): class TestSklearnCompatibilityAdapter: """Tests targeting specific uncovered lines in sklearn_compatibility.py.""" - + def test_init_with_valid_model(self): """Test adapter initialization with valid Pydantic model.""" model = TestModel() adapter = SklearnCompatibilityAdapter(model) - + assert adapter.model is model assert isinstance(adapter.model, BaseModel) - + def test_init_with_invalid_model_type_error(self): """Test adapter initialization with invalid model .""" # Test with non-Pydantic model invalid_model = {"not": "a_pydantic_model"} - - with pytest.raises(TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel"): + + with pytest.raises( + TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel" + ): SklearnCompatibilityAdapter(invalid_model) - + # Test with None - with pytest.raises(TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel"): + with pytest.raises( + TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel" + ): SklearnCompatibilityAdapter(None) - + # Test with regular object class RegularObject: pass - - with pytest.raises(TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel"): + + with pytest.raises( + TypeError, match="SklearnCompatibilityAdapter requires a Pydantic BaseModel" + ): SklearnCompatibilityAdapter(RegularObject()) - + def test_get_params_basic_functionality(self): """Test get_params with basic model .""" model = TestModel(param1=10, param2=3.14, param3="hello") adapter = SklearnCompatibilityAdapter(model) - + params = adapter.get_params(deep=True) - + # Should include public parameters assert params["param1"] == 10 assert params["param2"] == 3.14 assert params["param3"] == "hello" - - # Should exclude excluded attributes + + # Should exclude excluded attributes assert "excluded_attr" not in params - + def test_get_params_private_attribute_filtering(self): """Test private attribute filtering in get_params .""" + class ModelWithPrivate(BaseModel): public_param: int = Field(default=1) # We'll test filtering by adding attributes after model creation - + model = ModelWithPrivate() - # Add private attributes directly to the instance + # Add private attributes directly to the instance model._private_param = 2 model.__very_private = 3 - + adapter = SklearnCompatibilityAdapter(model) - + params = adapter.get_params() - + # Only public parameters should be included assert "public_param" in params # Private attributes won't be in model_fields so they won't appear in params assert "_private_param" not in params assert "__very_private" not in params - + def test_get_params_with_nested_estimator(self): """Test get_params with nested estimator .""" + class ModelWithEstimator(BaseModel): model_config = {"arbitrary_types_allowed": True} - + simple_param: int = Field(default=5) estimator: MockEstimator = Field(default_factory=MockEstimator) - + model = ModelWithEstimator() model.estimator = MockEstimator(mock_param=99) adapter = SklearnCompatibilityAdapter(model) - + # Test with deep=True (should include nested parameters) params = adapter.get_params(deep=True) - + assert params["simple_param"] == 5 assert params["estimator__mock_param"] == 99 assert isinstance(params["estimator"], MockEstimator) - + def test_get_params_deep_false(self): """Test get_params with deep=False.""" + class ModelWithEstimator(BaseModel): model_config = {"arbitrary_types_allowed": True} - + simple_param: int = Field(default=5) estimator: MockEstimator = Field(default_factory=MockEstimator) - + model = ModelWithEstimator() model.estimator = MockEstimator(mock_param=99) adapter = SklearnCompatibilityAdapter(model) - + # Test with deep=False (should not include nested parameters) params = adapter.get_params(deep=False) - + assert params["simple_param"] == 5 assert isinstance(params["estimator"], MockEstimator) # Should not have nested parameters assert "estimator__mock_param" not in params - + def test_set_params_empty_params(self): """Test set_params with empty parameters .""" model = TestModel() adapter = SklearnCompatibilityAdapter(model) - + # Should return the model unchanged result = adapter.set_params() assert result is model - + # Should also work with explicit empty dict result = adapter.set_params(**{}) assert result is model - + def test_set_params_simple_parameters(self): """Test set_params with simple parameters .""" model = TestModel(param1=5, param2=2.5) adapter = SklearnCompatibilityAdapter(model) - + # Set simple parameters result = adapter.set_params(param1=15, param2=7.5) - + assert result is model assert model.param1 == 15 assert model.param2 == 7.5 - + def test_set_params_invalid_parameter_error(self): """Test set_params with invalid parameter .""" model = TestModel() adapter = SklearnCompatibilityAdapter(model) - + with pytest.raises(ValueError, match="Parameter 'invalid_param' is not valid"): adapter.set_params(invalid_param=999) - + # Error message should include available parameters with pytest.raises(ValueError, match="Available parameters are"): adapter.set_params(nonexistent=123) - + def test_set_params_nested_parameters(self): """Test set_params with nested parameters .""" + class ModelWithEstimator(BaseModel): model_config = {"arbitrary_types_allowed": True} - + simple_param: int = Field(default=5) estimator: MockEstimator = Field(default_factory=MockEstimator) - + model = ModelWithEstimator() model.estimator = MockEstimator(mock_param=42) adapter = SklearnCompatibilityAdapter(model) - + # Set nested parameter result = adapter.set_params(estimator__mock_param=100) - + assert result is model assert model.estimator.mock_param == 100 - + def test_set_params_nested_without_set_params_method(self): """Test set_params with nested object without set_params method .""" + class InvalidNested: def __init__(self): self.value = 10 - + class ModelWithInvalidNested(BaseModel): model_config = {"arbitrary_types_allowed": True} - + nested: InvalidNested = Field(default_factory=InvalidNested) - + model = ModelWithInvalidNested() adapter = SklearnCompatibilityAdapter(model) - + with pytest.raises(ValueError, match="Cannot set nested parameters for attribute 'nested'"): adapter.set_params(nested__value=20) - + # Error message should mention set_params method requirement with pytest.raises(ValueError, match="doesn't implement the set_params method"): adapter.set_params(nested__some_param=30) - + def test_set_params_multiple_nested_levels(self): """Test set_params with multiple levels of nesting.""" + class DeepNestedModel(BaseModel): model_config = {"arbitrary_types_allowed": True} - + level1: MockEstimator = Field(default_factory=MockEstimator) - + model = DeepNestedModel() adapter = SklearnCompatibilityAdapter(model) - + # Test nested parameter setting adapter.set_params(level1__mock_param=777) assert model.level1.mock_param == 777 - + def test_clone_method(self): """Test clone method .""" model = TestModel(param1=99, param2=3.14, param3="cloned") adapter = SklearnCompatibilityAdapter(model) - + # Clone the model cloned_model = adapter.clone(safe=True) - + # Should be a new instance with same parameters assert cloned_model is not model assert isinstance(cloned_model, TestModel) assert cloned_model.param1 == 99 assert cloned_model.param2 == 3.14 assert cloned_model.param3 == "cloned" - + def test_clone_method_safe_false(self): """Test clone method with safe=False.""" model = TestModel(param1=50, param2=1.5) adapter = SklearnCompatibilityAdapter(model) - - # Clone with safe=False + + # Clone with safe=False cloned_model = adapter.clone(safe=False) - + # Should still create new instance assert cloned_model is not model assert isinstance(cloned_model, TestModel) assert cloned_model.param1 == 50 assert cloned_model.param2 == 1.5 - + def test_complex_workflow_integration(self): """Test complete workflow integration.""" + class ComplexModel(BaseModel): model_config = {"arbitrary_types_allowed": True} - + basic_param: int = Field(default=1) float_param: float = Field(default=0.1) nested_estimator: MockEstimator = Field(default_factory=MockEstimator) - + model = ComplexModel(basic_param=10, float_param=0.5) model.nested_estimator = MockEstimator(mock_param=200) adapter = SklearnCompatibilityAdapter(model) - + # Test get_params params = adapter.get_params(deep=True) - expected_keys = {"basic_param", "float_param", "nested_estimator", "nested_estimator__mock_param"} + expected_keys = { + "basic_param", + "float_param", + "nested_estimator", + "nested_estimator__mock_param", + } assert set(params.keys()) == expected_keys - + # Test set_params with multiple parameter types - adapter.set_params( - basic_param=20, - float_param=0.8, - nested_estimator__mock_param=300 - ) - + adapter.set_params(basic_param=20, float_param=0.8, nested_estimator__mock_param=300) + assert model.basic_param == 20 assert model.float_param == 0.8 assert model.nested_estimator.mock_param == 300 - + # Test clone cloned = adapter.clone() assert cloned.basic_param == 20 assert cloned.float_param == 0.8 # Note: Clone uses get_params(deep=False), so nested estimator gets default values - + def test_field_info_edge_cases(self): """Test edge cases with field info attributes.""" + class EdgeCaseModel(BaseModel): normal_field: int = Field(default=1) # Test fields with various attributes that might not exist - + model = EdgeCaseModel() adapter = SklearnCompatibilityAdapter(model) - + # Should work without errors even with edge case field configurations params = adapter.get_params() assert "normal_field" in params - + # Test setting parameters adapter.set_params(normal_field=999) assert model.normal_field == 999 - + def test_adapter_with_inheritance(self): """Test adapter with inherited Pydantic models.""" + class BaseTestModel(BaseModel): base_param: int = Field(default=1) - + class InheritedModel(BaseTestModel): derived_param: str = Field(default="derived") - + model = InheritedModel(base_param=5, derived_param="test") adapter = SklearnCompatibilityAdapter(model) - + params = adapter.get_params() assert params["base_param"] == 5 assert params["derived_param"] == "test" - + # Test setting inherited parameters adapter.set_params(base_param=10, derived_param="updated") assert model.base_param == 10 @@ -337,4 +354,4 @@ class InheritedModel(BaseTestModel): if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_time_series_model_sklearn.py b/tests/unit/test_time_series_model_sklearn.py index fe4cd324..bd7f8f9c 100644 --- a/tests/unit/test_time_series_model_sklearn.py +++ b/tests/unit/test_time_series_model_sklearn.py @@ -6,6 +6,7 @@ from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler + from tsbootstrap.time_series_model_sklearn import TimeSeriesModelSklearn diff --git a/tests/unit/test_time_series_simulator.py b/tests/unit/test_time_series_simulator.py index 8f275710..3ee1fa12 100644 --- a/tests/unit/test_time_series_simulator.py +++ b/tests/unit/test_time_series_simulator.py @@ -7,6 +7,7 @@ from hypothesis import HealthCheck, given, settings from hypothesis import strategies as st from numpy.random import Generator, default_rng + from tsbootstrap import TimeSeriesSimulator from tsbootstrap.utils.odds_and_ends import assert_arrays_compare from tsbootstrap.utils.skbase_compat import safe_check_soft_dependencies as _check_soft_dependencies diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index a9890a9b..4cd81ae0 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -10,20 +10,20 @@ operations across diverse use cases and edge conditions. """ +from unittest.mock import Mock, patch + import numpy as np -import pandas as pd import pytest -from unittest.mock import Mock, patch -from tsbootstrap.utils.validate import ( - validate_integers, - validate_X_and_y, -) # BlockLengthValidator not available # validators module doesn't exist # bootstrap_common and bootstrap_factory modules don't exist # ranklags module doesn't exist from tsbootstrap.utils.auto_order_selector import AutoOrderSelector +from tsbootstrap.utils.validate import ( + validate_integers, + validate_X_and_y, +) class TestValidationFunctions: @@ -40,7 +40,6 @@ def test_validate_integers(self): # These tests need to be rewritten to match actual API pass - def test_validate_bootstrap_input(self): """Test bootstrap input validation.""" # Valid 1D array @@ -98,7 +97,7 @@ def test_order_selection_ar(self): n = 200 data = np.zeros(n) for i in range(3, n): - data[i] = 0.5 * data[i-1] + 0.2 * data[i-2] - 0.1 * data[i-3] + np.random.randn() + data[i] = 0.5 * data[i - 1] + 0.2 * data[i - 2] - 0.1 * data[i - 3] + np.random.randn() selector = AutoOrderSelector(model_type="ar", max_lag=10) selector.fit(data) @@ -144,4 +143,4 @@ def test_predict_interface(self): assert len(predictions) > 0 -# TestBootstrapFactory removed - bootstrap_factory module doesn't exist \ No newline at end of file +# TestBootstrapFactory removed - bootstrap_factory module doesn't exist diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py index 124752ba..1ec532aa 100644 --- a/tests/unit/test_validation.py +++ b/tests/unit/test_validation.py @@ -8,148 +8,154 @@ class TestValidationService: """Tests targeting specific uncovered lines in validation.py.""" - + def test_validate_random_state_none(self): """Test validate_random_state with None .""" # Test None case - should return default_rng() result = ValidationService.validate_random_state(None) - + # Should return a Generator assert isinstance(result, np.random.Generator) - + # Should be a different instance each time (new seed) result2 = ValidationService.validate_random_state(None) assert isinstance(result2, np.random.Generator) - + def test_validate_model_order_tuple_negative_values(self): """Test validate_model_order with tuple containing negative values .""" # Test tuple with negative value in first position with pytest.raises(ValueError, match="order\\[0\\] must be non-negative integer"): ValidationService.validate_model_order((-1, 0, 1)) - - # Test tuple with negative value in second position + + # Test tuple with negative value in second position with pytest.raises(ValueError, match="order\\[1\\] must be non-negative integer"): ValidationService.validate_model_order((1, -1, 1)) - + # Test tuple with negative value in third position with pytest.raises(ValueError, match="order\\[2\\] must be non-negative integer"): ValidationService.validate_model_order((1, 0, -1)) - + # Test with non-integer in tuple with pytest.raises(ValueError, match="order\\[0\\] must be non-negative integer"): ValidationService.validate_model_order((1.5, 0, 1)) - + def test_validate_model_order_invalid_type(self): """Test validate_model_order with invalid type .""" # Test with string with pytest.raises(TypeError, match="order must be int or tuple, got str"): ValidationService.validate_model_order("invalid") - - # Test with list + + # Test with list with pytest.raises(TypeError, match="order must be int or tuple, got list"): ValidationService.validate_model_order([1, 0, 1]) - + # Test with float with pytest.raises(TypeError, match="order must be int or tuple, got float"): ValidationService.validate_model_order(1.0) - + # Test with None with pytest.raises(TypeError, match="order must be int or tuple, got NoneType"): ValidationService.validate_model_order(None) - + def test_validate_random_state_comprehensive(self): """Test all paths in validate_random_state for complete coverage.""" - # Test None case + # Test None case result = ValidationService.validate_random_state(None) assert isinstance(result, np.random.Generator) - + # Test int case result = ValidationService.validate_random_state(42) assert isinstance(result, np.random.Generator) - + # Test np.integer case result = ValidationService.validate_random_state(np.int64(42)) assert isinstance(result, np.random.Generator) - + # Test existing Generator case gen = np.random.default_rng(42) result = ValidationService.validate_random_state(gen) assert result is gen - + # Test invalid type - with pytest.raises(ValueError, match="random_state must be None, int, or np.random.Generator"): + with pytest.raises( + ValueError, match="random_state must be None, int, or np.random.Generator" + ): ValidationService.validate_random_state("invalid") - + def test_validate_model_order_edge_cases(self): """Test edge cases for validate_model_order.""" # Test valid int orders assert ValidationService.validate_model_order(0) == 0 assert ValidationService.validate_model_order(1) == 1 assert ValidationService.validate_model_order(np.int64(5)) == 5 - + # Test valid tuple orders assert ValidationService.validate_model_order((1, 1, 1)) == (1, 1, 1) assert ValidationService.validate_model_order((0, 0, 0)) == (0, 0, 0) - assert ValidationService.validate_model_order((np.int64(1), np.int64(0), np.int64(1))) == (1, 0, 1) - + assert ValidationService.validate_model_order((np.int64(1), np.int64(0), np.int64(1))) == ( + 1, + 0, + 1, + ) + # Test invalid single int with pytest.raises(ValueError, match="order must be non-negative"): ValidationService.validate_model_order(-1) - + # Test tuple with wrong length with pytest.raises(ValueError, match="order tuple must have exactly 3 elements"): ValidationService.validate_model_order((1, 0)) - + with pytest.raises(ValueError, match="order tuple must have exactly 3 elements"): ValidationService.validate_model_order((1, 0, 1, 0)) - + def test_other_validation_methods_for_completeness(self): """Test other validation methods to ensure they work correctly.""" # Test validate_positive_int assert ValidationService.validate_positive_int(5, "test") == 5 assert ValidationService.validate_positive_int(np.int64(3), "test") == 3 - + with pytest.raises(ValueError, match="must be a positive integer"): ValidationService.validate_positive_int(0, "test") - + with pytest.raises(ValueError, match="must be a positive integer"): ValidationService.validate_positive_int(-1, "test") - + with pytest.raises(ValueError, match="must be a positive integer"): ValidationService.validate_positive_int(1.5, "test") - + # Test validate_probability assert ValidationService.validate_probability(0.5, "test") == 0.5 assert ValidationService.validate_probability(0.0, "test") == 0.0 assert ValidationService.validate_probability(1.0, "test") == 1.0 - + with pytest.raises(ValueError, match="must be a valid probability"): ValidationService.validate_probability(-0.1, "test") - + with pytest.raises(ValueError, match="must be a valid probability"): ValidationService.validate_probability(1.1, "test") - + # Test validate_array_shape arr = np.array([[1, 2], [3, 4]]) ValidationService.validate_array_shape(arr, (2, 2), "test") # Should not raise - + with pytest.raises(ValueError, match="shape .* does not match expected shape"): ValidationService.validate_array_shape(arr, (2, 3), "test") - + # Test validate_block_length assert ValidationService.validate_block_length(5, 10) == 5 assert ValidationService.validate_block_length(np.int64(3), 10) == 3 - + with pytest.raises(ValueError, match="Block length must be a positive integer"): ValidationService.validate_block_length(0, 10) - + with pytest.raises(ValueError, match="Block length must be a positive integer"): ValidationService.validate_block_length(-1, 10) - + with pytest.raises(ValueError, match="block_length .* cannot be larger than"): ValidationService.validate_block_length(15, 10) if __name__ == "__main__": # Run tests - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/tests/unit/test_validators.py b/tests/unit/test_validators.py index 2f81142f..53c42f94 100644 --- a/tests/unit/test_validators.py +++ b/tests/unit/test_validators.py @@ -25,6 +25,7 @@ from hypothesis import assume, given from hypothesis import strategies as st from pydantic import BaseModel, ValidationError + from tsbootstrap.validators import ( BlockLengthDistribution, BootstrapIndices,