Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
repos:
# Ruff - Fast Python linter and formatter (replaces autoflake, isort, and many other linters)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.13
rev: v0.15.14
hooks:
# Run the linter with autofix
- id: ruff
args: ['--fix', '--exit-non-zero-on-fix']
# Ruff will use the configuration from pyproject.toml

# Black - Still keep for consistent formatting
- repo: https://github.com/psf/black
rev: 23.11.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 26.5.1
hooks:
- id: black

Expand Down
1 change: 1 addition & 0 deletions docs/examples/auto_model_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import matplotlib.pyplot as plt
import numpy as np

from tsbootstrap.utils.auto_order_selector import AutoOrderSelector


Expand Down
128 changes: 32 additions & 96 deletions examples/performance_comparison_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def create_performance_notebook():
)

# Setup cell
cells.append(
nbf.v4.new_code_cell(
"""# Import required libraries
cells.append(nbf.v4.new_code_cell("""# Import required libraries
import os
import time
import numpy as np
Expand All @@ -59,14 +57,10 @@ def create_performance_notebook():
# Set random seed for reproducibility
np.random.seed(42)

print("Setup complete!")"""
)
)
print("Setup complete!")"""))

# Performance measurement utilities
cells.append(
nbf.v4.new_code_cell(
"""# Utility functions for performance measurement
cells.append(nbf.v4.new_code_cell("""# Utility functions for performance measurement

def measure_performance(func, *args, n_runs=5, **kwargs):
\"\"\"Measure average performance over multiple runs.\"\"\"
Expand Down Expand Up @@ -122,22 +116,14 @@ def plot_performance_comparison(results_dict, title="Performance Comparison"):
plt.tight_layout()
plt.show()

print("Utility functions loaded!")"""
)
)
print("Utility functions loaded!")"""))

# Example 1: Single Model Fitting
cells.append(
nbf.v4.new_markdown_cell(
"""## Example 1: Single Model Fitting
cells.append(nbf.v4.new_markdown_cell("""## Example 1: Single Model Fitting

First, let's compare the performance of fitting a single ARIMA model using both backends."""
)
)
First, let's compare the performance of fitting a single ARIMA model using both backends."""))

cells.append(
nbf.v4.new_code_cell(
"""# Generate sample time series data
cells.append(nbf.v4.new_code_cell("""# Generate sample time series data
data = np.cumsum(np.random.randn(1000)) # Random walk with 1000 points

print(f"Data shape: {data.shape}")
Expand All @@ -149,13 +135,9 @@ def plot_performance_comparison(results_dict, title="Performance Comparison"):
plt.title("Sample Time Series Data")
plt.xlabel("Time")
plt.ylabel("Value")
plt.show()"""
)
)
plt.show()"""))

cells.append(
nbf.v4.new_code_cell(
"""# Compare single ARIMA model fitting
cells.append(nbf.v4.new_code_cell("""# Compare single ARIMA model fitting

def fit_arima_statsmodels(data):
\"\"\"Fit ARIMA model using statsmodels backend.\"\"\"
Expand Down Expand Up @@ -186,9 +168,7 @@ def fit_arima_statsforecast(data):

print(f"\\nStatsModels: {sm_results['mean']:.3f} ± {sm_results['std']:.3f} seconds")
print(f"StatsForecast: {sf_results['mean']:.3f} ± {sf_results['std']:.3f} seconds")
print(f"Speedup: {sm_results['mean'] / sf_results['mean']:.1f}x faster!")"""
)
)
print(f"Speedup: {sm_results['mean'] / sf_results['mean']:.1f}x faster!")"""))

# Example 2: Batch Processing
cells.append(
Expand All @@ -199,9 +179,7 @@ def fit_arima_statsforecast(data):
)
)

cells.append(
nbf.v4.new_code_cell(
"""# Generate multiple time series
cells.append(nbf.v4.new_code_cell("""# Generate multiple time series
n_series = 100
series_length = 500

Expand All @@ -226,13 +204,9 @@ def fit_arima_statsforecast(data):
ax.set_xlabel("Time")
ax.set_ylabel("Value")
plt.tight_layout()
plt.show()"""
)
)
plt.show()"""))

cells.append(
nbf.v4.new_code_cell(
"""# Compare batch processing performance
cells.append(nbf.v4.new_code_cell("""# Compare batch processing performance

def batch_fit_statsmodels(series_list):
\"\"\"Sequential fitting with statsmodels.\"\"\"
Expand Down Expand Up @@ -275,9 +249,7 @@ def batch_fit_statsforecast(series_list):
print(f"Speedup: {sm_batch_results['mean'] / sf_batch_results['mean']:.1f}x faster!")
print(f"\\nTime per model:")
print(f" StatsModels: {sm_batch_results['mean']/n_series*1000:.1f}ms")
print(f" StatsForecast: {sf_batch_results['mean']/n_series*1000:.1f}ms")"""
)
)
print(f" StatsForecast: {sf_batch_results['mean']/n_series*1000:.1f}ms")"""))

# Example 3: Bootstrap Performance
cells.append(
Expand All @@ -288,9 +260,7 @@ def batch_fit_statsforecast(series_list):
)
)

cells.append(
nbf.v4.new_code_cell(
"""# Compare bootstrap performance
cells.append(nbf.v4.new_code_cell("""# Compare bootstrap performance
data = np.cumsum(np.random.randn(365)) # One year of daily data
n_bootstraps = 500

Expand Down Expand Up @@ -330,22 +300,14 @@ def bootstrap_statsforecast(data, n_bootstraps):

print(f"\\nStatsModels: {sm_bootstrap['mean']:.2f} seconds")
print(f"StatsForecast: {sf_bootstrap['mean']:.2f} seconds")
print(f"Speedup: {sm_bootstrap['mean'] / sf_bootstrap['mean']:.1f}x faster!")"""
)
)
print(f"Speedup: {sm_bootstrap['mean'] / sf_bootstrap['mean']:.1f}x faster!")"""))

# Example 4: Scaling Analysis
cells.append(
nbf.v4.new_markdown_cell(
"""## Example 4: Scaling Analysis
cells.append(nbf.v4.new_markdown_cell("""## Example 4: Scaling Analysis

Let's analyze how performance scales with the number of models."""
)
)
Let's analyze how performance scales with the number of models."""))

cells.append(
nbf.v4.new_code_cell(
"""# Scaling analysis
cells.append(nbf.v4.new_code_cell("""# Scaling analysis
n_series_list = [10, 25, 50, 100, 200]
sm_times = []
sf_times = []
Expand Down Expand Up @@ -407,22 +369,14 @@ def bootstrap_statsforecast(data, n_bootstraps):

print(f"\\nSpeedup increases with scale:")
for n, speedup in zip(n_series_list, speedups):
print(f" {n} models: {speedup:.1f}x faster")"""
)
)
print(f" {n} models: {speedup:.1f}x faster")"""))

# Example 5: Memory Usage
cells.append(
nbf.v4.new_markdown_cell(
"""## Example 5: Memory Usage Comparison
cells.append(nbf.v4.new_markdown_cell("""## Example 5: Memory Usage Comparison

Besides speed, statsforecast also uses memory more efficiently."""
)
)
Besides speed, statsforecast also uses memory more efficiently."""))

cells.append(
nbf.v4.new_code_cell(
"""import psutil
cells.append(nbf.v4.new_code_cell("""import psutil
import gc

def measure_memory_usage(backend_type, n_models=100):
Expand Down Expand Up @@ -495,9 +449,7 @@ def measure_memory_usage(backend_type, n_models=100):
plt.show()

print(f"\\nMemory reduction: {reduction:.1f}%")
print(f"StatsForecast uses {sm_memory/sf_memory:.1f}x less memory!")"""
)
)
print(f"StatsForecast uses {sm_memory/sf_memory:.1f}x less memory!")"""))

# Example 6: Real-world scenario
cells.append(
Expand All @@ -508,9 +460,7 @@ def measure_memory_usage(backend_type, n_models=100):
)
)

cells.append(
nbf.v4.new_code_cell(
"""# Simulate production forecasting pipeline
cells.append(nbf.v4.new_code_cell("""# Simulate production forecasting pipeline
def production_pipeline(use_backend=False):
\"\"\"Simulate a production forecasting pipeline.\"\"\"
results = {
Expand Down Expand Up @@ -624,14 +574,10 @@ def production_pipeline(use_backend=False):
ax2.set_title('Processing Throughput', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()"""
)
)
plt.show()"""))

# Summary and conclusions
cells.append(
nbf.v4.new_markdown_cell(
"""## Summary and Conclusions
cells.append(nbf.v4.new_markdown_cell("""## Summary and Conclusions

### Performance Improvements Achieved:

Expand Down Expand Up @@ -674,22 +620,14 @@ def production_pipeline(use_backend=False):
model = TimeSeriesModel(X=data, model_type="arima", use_backend=True)
```

The migration is designed to be gradual and safe, with 100% backward compatibility!"""
)
)
The migration is designed to be gradual and safe, with 100% backward compatibility!"""))

# Add rollout monitoring example
cells.append(
nbf.v4.new_markdown_cell(
"""## Bonus: Monitor Your Rollout
cells.append(nbf.v4.new_markdown_cell("""## Bonus: Monitor Your Rollout

Track the success of your migration with built-in monitoring tools."""
)
)
Track the success of your migration with built-in monitoring tools."""))

cells.append(
nbf.v4.new_code_cell(
"""# Check current rollout status
cells.append(nbf.v4.new_code_cell("""# Check current rollout status
from tsbootstrap.backends.feature_flags import get_rollout_monitor

monitor = get_rollout_monitor()
Expand All @@ -712,9 +650,7 @@ def production_pipeline(use_backend=False):
# Calculate overall speedup from real usage
if report['statsmodels']['avg_duration'] > 0 and report['statsforecast']['avg_duration'] > 0:
real_speedup = report['statsmodels']['avg_duration'] / report['statsforecast']['avg_duration']
print(f"\\nReal-world speedup: {real_speedup:.1f}x")"""
)
)
print(f"\\nReal-world speedup: {real_speedup:.1f}x")"""))

nb.cells = cells
return nb
Expand Down
Loading
Loading