|
| 1 | +from typing import List, TYPE_CHECKING, Union |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +from ._util import _data_period |
| 7 | + |
| 8 | +if TYPE_CHECKING: |
| 9 | + from .backtesting import Strategy, Trade |
| 10 | + |
| 11 | + |
| 12 | +def compute_drawdown_duration_peaks(dd: pd.Series): |
| 13 | + iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1]) |
| 14 | + iloc = pd.Series(iloc, index=dd.index[iloc]) |
| 15 | + df = iloc.to_frame('iloc').assign(prev=iloc.shift()) |
| 16 | + df = df[df['iloc'] > df['prev'] + 1].astype(int) |
| 17 | + |
| 18 | + # If no drawdown since no trade, avoid below for pandas sake and return nan series |
| 19 | + if not len(df): |
| 20 | + return (dd.replace(0, np.nan),) * 2 |
| 21 | + |
| 22 | + df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__) |
| 23 | + df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1) |
| 24 | + df = df.reindex(dd.index) |
| 25 | + return df['duration'], df['peak_dd'] |
| 26 | + |
| 27 | + |
| 28 | +def geometric_mean(returns: pd.Series) -> float: |
| 29 | + returns = returns.fillna(0) + 1 |
| 30 | + if np.any(returns <= 0): |
| 31 | + return 0 |
| 32 | + return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1 |
| 33 | + |
| 34 | + |
| 35 | +def compute_stats( |
| 36 | + trades: Union[List['Trade'], pd.DataFrame], |
| 37 | + equity: np.ndarray, |
| 38 | + ohlc_data: pd.DataFrame, |
| 39 | + strategy_instance: 'Strategy', |
| 40 | + risk_free_rate: float = 0, |
| 41 | +) -> pd.Series: |
| 42 | + assert -1 < risk_free_rate < 1 |
| 43 | + |
| 44 | + index = ohlc_data.index |
| 45 | + dd = 1 - equity / np.maximum.accumulate(equity) |
| 46 | + dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index)) |
| 47 | + |
| 48 | + equity_df = pd.DataFrame({ |
| 49 | + 'Equity': equity, |
| 50 | + 'DrawdownPct': dd, |
| 51 | + 'DrawdownDuration': dd_dur}, |
| 52 | + index=index) |
| 53 | + |
| 54 | + if isinstance(trades, pd.DataFrame): |
| 55 | + trades_df = trades |
| 56 | + else: |
| 57 | + # Came straight from Backtest.run() |
| 58 | + trades_df = pd.DataFrame({ |
| 59 | + 'Size': [t.size for t in trades], |
| 60 | + 'EntryBar': [t.entry_bar for t in trades], |
| 61 | + 'ExitBar': [t.exit_bar for t in trades], |
| 62 | + 'EntryPrice': [t.entry_price for t in trades], |
| 63 | + 'ExitPrice': [t.exit_price for t in trades], |
| 64 | + 'PnL': [t.pl for t in trades], |
| 65 | + 'ReturnPct': [t.pl_pct for t in trades], |
| 66 | + 'EntryTime': [t.entry_time for t in trades], |
| 67 | + 'ExitTime': [t.exit_time for t in trades], |
| 68 | + }) |
| 69 | + trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime'] |
| 70 | + del trades |
| 71 | + |
| 72 | + pl = trades_df['PnL'] |
| 73 | + returns = trades_df['ReturnPct'] |
| 74 | + durations = trades_df['Duration'] |
| 75 | + |
| 76 | + def _round_timedelta(value, _period=_data_period(index)): |
| 77 | + if not isinstance(value, pd.Timedelta): |
| 78 | + return value |
| 79 | + resolution = getattr(_period, 'resolution_string', None) or _period.resolution |
| 80 | + return value.ceil(resolution) |
| 81 | + |
| 82 | + s = pd.Series(dtype=object) |
| 83 | + s.loc['Start'] = index[0] |
| 84 | + s.loc['End'] = index[-1] |
| 85 | + s.loc['Duration'] = s.End - s.Start |
| 86 | + |
| 87 | + have_position = np.repeat(0, len(index)) |
| 88 | + for t in trades_df.itertuples(index=False): |
| 89 | + have_position[t.EntryBar:t.ExitBar + 1] = 1 |
| 90 | + |
| 91 | + s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time |
| 92 | + s.loc['Equity Final [$]'] = equity[-1] |
| 93 | + s.loc['Equity Peak [$]'] = equity.max() |
| 94 | + s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100 |
| 95 | + c = ohlc_data.Close.values |
| 96 | + s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return |
| 97 | + |
| 98 | + gmean_day_return: float = 0 |
| 99 | + day_returns = np.array(np.nan) |
| 100 | + annual_trading_days = np.nan |
| 101 | + if isinstance(index, pd.DatetimeIndex): |
| 102 | + day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change() |
| 103 | + gmean_day_return = geometric_mean(day_returns) |
| 104 | + annual_trading_days = float( |
| 105 | + 365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else |
| 106 | + 252) |
| 107 | + |
| 108 | + # Annualized return and risk metrics are computed based on the (mostly correct) |
| 109 | + # assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517 |
| 110 | + # Our annualized return matches `empyrical.annual_return(day_returns)` whereas |
| 111 | + # our risk doesn't; they use the simpler approach below. |
| 112 | + annualized_return = (1 + gmean_day_return)**annual_trading_days - 1 |
| 113 | + s.loc['Return (Ann.) [%]'] = annualized_return * 100 |
| 114 | + s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501 |
| 115 | + # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100 |
| 116 | + # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100 |
| 117 | + |
| 118 | + # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return |
| 119 | + # and simple standard deviation |
| 120 | + s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501 |
| 121 | + # Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return |
| 122 | + s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501 |
| 123 | + max_dd = -np.nan_to_num(dd.max()) |
| 124 | + s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf) |
| 125 | + s.loc['Max. Drawdown [%]'] = max_dd * 100 |
| 126 | + s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100 |
| 127 | + s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max()) |
| 128 | + s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean()) |
| 129 | + s.loc['# Trades'] = n_trades = len(trades_df) |
| 130 | + s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501 |
| 131 | + s.loc['Best Trade [%]'] = returns.max() * 100 |
| 132 | + s.loc['Worst Trade [%]'] = returns.min() * 100 |
| 133 | + mean_return = geometric_mean(returns) |
| 134 | + s.loc['Avg. Trade [%]'] = mean_return * 100 |
| 135 | + s.loc['Max. Trade Duration'] = _round_timedelta(durations.max()) |
| 136 | + s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean()) |
| 137 | + s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501 |
| 138 | + s.loc['Expectancy [%]'] = returns.mean() * 100 |
| 139 | + s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan) |
| 140 | + |
| 141 | + s.loc['_strategy'] = strategy_instance |
| 142 | + s.loc['_equity_curve'] = equity_df |
| 143 | + s.loc['_trades'] = trades_df |
| 144 | + |
| 145 | + s = _Stats(s) |
| 146 | + return s |
| 147 | + |
| 148 | + |
| 149 | +class _Stats(pd.Series): |
| 150 | + def __repr__(self): |
| 151 | + # Prevent expansion due to _equity and _trades dfs |
| 152 | + with pd.option_context('max_colwidth', 20): |
| 153 | + return super().__repr__() |
0 commit comments