Files
hyper/backtester.py

369 lines
17 KiB
Python

import argparse
import logging
import os
import sys
import sqlite3
import pandas as pd
import json
from datetime import datetime, timedelta
import itertools
import multiprocessing
from functools import partial
import time
import importlib
import signal
from logging_utils import setup_logging
def _run_trade_simulation(df: pd.DataFrame, capital: float, size_pct: float, leverage_long: int, leverage_short: int, taker_fee_pct: float, maker_fee_pct: float) -> tuple[float, list]:
"""
Simulates a trading strategy with portfolio management, including capital,
position sizing, leverage, and fees.
"""
df.dropna(inplace=True)
if df.empty: return capital, []
df['position_change'] = df['signal'].diff()
trades = []
entry_price = 0
asset_size = 0
current_position = 0 # 0=flat, 1=long, -1=short
equity = capital
for i, row in df.iterrows():
# --- Close Positions ---
if (current_position == 1 and row['signal'] != 1) or \
(current_position == -1 and row['signal'] != -1):
exit_value = asset_size * row['close']
fee = exit_value * (taker_fee_pct / 100)
if current_position == 1: # Closing a long
pnl_usd = (row['close'] - entry_price) * asset_size
equity += pnl_usd - fee
trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (row['close'] - entry_price) / entry_price, 'type': 'long'})
elif current_position == -1: # Closing a short
pnl_usd = (entry_price - row['close']) * asset_size
equity += pnl_usd - fee
trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (entry_price - row['close']) / entry_price, 'type': 'short'})
entry_price = 0
asset_size = 0
current_position = 0
# --- Open New Positions ---
if current_position == 0:
if row['signal'] == 1: # Open Long
margin_to_use = equity * (size_pct / 100)
trade_value = margin_to_use * leverage_long
asset_size = trade_value / row['close']
fee = trade_value * (taker_fee_pct / 100)
equity -= fee
entry_price = row['close']
current_position = 1
elif row['signal'] == -1: # Open Short
margin_to_use = equity * (size_pct / 100)
trade_value = margin_to_use * leverage_short
asset_size = trade_value / row['close']
fee = trade_value * (taker_fee_pct / 100)
equity -= fee
entry_price = row['close']
current_position = -1
return equity, trades
def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str, strategy_class, sim_params: dict) -> tuple[dict, float, list]:
"""
Worker function that loads data, runs the full simulation, and returns results.
"""
df = pd.DataFrame()
try:
with sqlite3.connect(db_path) as conn:
query = f'SELECT datetime_utc, open, high, low, close FROM "{coin}_{timeframe}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
if not df.empty:
df.set_index('datetime_utc', inplace=True)
except Exception as e:
print(f"Worker error loading data for params {params}: {e}")
return (params, sim_params['capital'], [])
if df.empty:
return (params, sim_params['capital'], [])
strategy_instance = strategy_class(params)
df_with_signals = strategy_instance.calculate_signals(df)
final_equity, trades = _run_trade_simulation(df_with_signals, **sim_params)
return (params, final_equity, trades)
def init_worker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
class Backtester:
def __init__(self, log_level: str, strategy_name_to_test: str, start_date: str, sim_params: dict):
setup_logging(log_level, 'Backtester')
self.db_path = os.path.join("_data", "market_data.db")
self.simulation_params = sim_params
self.backtest_config = self._load_backtest_config(strategy_name_to_test)
# ... (rest of __init__ is unchanged)
self.strategy_name = self.backtest_config.get('strategy_name')
self.strategy_config = self._load_strategy_config()
self.params = self.strategy_config.get('parameters', {})
self.coin = self.params.get('coin')
self.timeframe = self.params.get('timeframe')
self.pool = None
self.full_history_start_date = start_date
try:
module_path, class_name = self.backtest_config['script'].rsplit('.', 1)
module = importlib.import_module(module_path)
self.strategy_class = getattr(module, class_name)
logging.info(f"Successfully loaded strategy class '{class_name}'.")
except (ImportError, AttributeError, KeyError) as e:
logging.error(f"Could not load strategy script '{self.backtest_config.get('script')}': {e}")
sys.exit(1)
def _load_backtest_config(self, name_to_test: str):
# ... (unchanged)
config_path = os.path.join("_data", "backtesting_conf.json")
try:
with open(config_path, 'r') as f: return json.load(f).get(name_to_test)
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.error(f"Could not load backtesting configuration: {e}")
return None
def _load_strategy_config(self):
# ... (unchanged)
config_path = os.path.join("_data", "strategies.json")
try:
with open(config_path, 'r') as f: return json.load(f).get(self.strategy_name)
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.error(f"Could not load strategy configuration: {e}")
return None
def run_walk_forward_optimization(self, optimization_weeks: int, testing_weeks: int, step_weeks: int):
# ... (unchanged, will now use the new simulation logic via the worker)
full_df = self.load_data(self.full_history_start_date, datetime.now().strftime("%Y-%m-%d"))
if full_df.empty: return
optimization_delta = timedelta(weeks=optimization_weeks)
testing_delta = timedelta(weeks=testing_weeks)
step_delta = timedelta(weeks=step_weeks)
all_out_of_sample_trades = []
all_period_summaries = []
current_date = full_df.index[0]
end_date = full_df.index[-1]
period_num = 1
while current_date + optimization_delta + testing_delta <= end_date:
logging.info(f"\n--- Starting Walk-Forward Period {period_num} ---")
in_sample_start = current_date
in_sample_end = in_sample_start + optimization_delta
out_of_sample_end = in_sample_end + testing_delta
in_sample_df = full_df[in_sample_start:in_sample_end]
out_of_sample_df = full_df[in_sample_end:out_of_sample_end]
if in_sample_df.empty or out_of_sample_df.empty:
break
logging.info(f"In-Sample (Optimization): {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}")
logging.info(f"Out-of-Sample (Testing): {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}")
best_result = self._find_best_params(in_sample_df)
if not best_result:
all_period_summaries.append({"period": period_num, "params": "None Found"})
current_date += step_delta
period_num += 1
continue
print("\n--- [1] In-Sample Optimization Result ---")
print(f"Best Parameters Found: {best_result['params']}")
self._generate_report(best_result['final_equity'], best_result['trades_list'], "In-Sample Performance with Best Params")
logging.info(f"\n--- [2] Forward Testing on Out-of-Sample Data ---")
df_with_signals = self.strategy_class(best_result['params']).calculate_signals(out_of_sample_df.copy())
final_equity_oos, out_of_sample_trades = _run_trade_simulation(df_with_signals, **self.simulation_params)
all_out_of_sample_trades.extend(out_of_sample_trades)
oos_summary = self._generate_report(final_equity_oos, out_of_sample_trades, "Out-of-Sample Performance")
# Store the summary for the final table
summary_to_store = {"period": period_num, "params": best_result['params'], **oos_summary}
all_period_summaries.append(summary_to_store)
current_date += step_delta
period_num += 1
# ... (Final reports will be generated here, but need to adapt to equity tracking)
print("\n" + "="*50)
# self._generate_report(all_out_of_sample_trades, "FINAL AGGREGATE WALK-FORWARD PERFORMANCE")
print("="*50)
# --- ADDED: Final summary table of best parameters and performance per period ---
print("\n--- Summary of Best Parameters and Performance per Period ---")
header = f"{'#':<3} | {'Best Parameters':<30} | {'Trades':>8} | {'Longs':>6} | {'Shorts':>7} | {'Win %':>8} | {'L Win %':>9} | {'S Win %':>9} | {'Return %':>10} | {'Equity':>15}"
print(header)
print("-" * len(header))
for item in all_period_summaries:
params_str = str(item.get('params', 'N/A'))
trades = item.get('num_trades', 'N/A')
longs = item.get('num_longs', 'N/A')
shorts = item.get('num_shorts', 'N/A')
win_rate = f"{item.get('win_rate', 0):.2f}%" if 'win_rate' in item else 'N/A'
long_win_rate = f"{item.get('long_win_rate', 0):.2f}%" if 'long_win_rate' in item else 'N/A'
short_win_rate = f"{item.get('short_win_rate', 0):.2f}%" if 'short_win_rate' in item else 'N/A'
return_pct = f"{item.get('return_pct', 0):.2f}%" if 'return_pct' in item else 'N/A'
equity = f"${item.get('final_equity', 0):,.2f}" if 'final_equity' in item else 'N/A'
print(f"{item['period']:<3} | {params_str:<30} | {trades:>8} | {longs:>6} | {shorts:>7} | {win_rate:>8} | {long_win_rate:>9} | {short_win_rate:>9} | {return_pct:>10} | {equity:>15}")
def _find_best_params(self, df: pd.DataFrame) -> dict:
param_configs = self.backtest_config.get('optimization_params', {})
param_names = list(param_configs.keys())
param_ranges = [range(p['start'], p['end'] + 1, p['step']) for p in param_configs.values()]
all_combinations = list(itertools.product(*param_ranges))
param_dicts = [dict(zip(param_names, combo)) for combo in all_combinations]
logging.info(f"Optimizing on {len(all_combinations)} combinations...")
num_cores = 60
self.pool = multiprocessing.Pool(processes=num_cores, initializer=init_worker)
worker = partial(
simulation_worker,
db_path=self.db_path, coin=self.coin, timeframe=self.timeframe,
start_date=df.index[0].isoformat(), end_date=df.index[-1].isoformat(),
strategy_class=self.strategy_class,
sim_params=self.simulation_params
)
all_results = self.pool.map(worker, param_dicts)
self.pool.close()
self.pool.join()
self.pool = None
results = [{'params': params, 'final_equity': final_equity, 'trades_list': trades} for params, final_equity, trades in all_results if trades]
if not results: return None
return max(results, key=lambda x: x['final_equity'])
def load_data(self, start_date, end_date):
# ... (unchanged)
table_name = f"{self.coin}_{self.timeframe}"
logging.info(f"Loading full dataset for {table_name}...")
try:
with sqlite3.connect(self.db_path) as conn:
query = f'SELECT * FROM "{table_name}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
if df.empty: return pd.DataFrame()
df.set_index('datetime_utc', inplace=True)
return df
except Exception as e:
logging.error(f"Failed to load data for backtest: {e}")
return pd.DataFrame()
def _generate_report(self, final_equity: float, trades: list, title: str) -> dict:
"""Calculates, prints, and returns a detailed performance report."""
print(f"\n--- {title} ---")
initial_capital = self.simulation_params['capital']
if not trades:
print("No trades were executed during this period.")
print(f"Final Equity: ${initial_capital:,.2f}")
return {"num_trades": 0, "num_longs": 0, "num_shorts": 0, "win_rate": 0, "long_win_rate": 0, "short_win_rate": 0, "return_pct": 0, "final_equity": initial_capital}
num_trades = len(trades)
long_trades = [t for t in trades if t.get('type') == 'long']
short_trades = [t for t in trades if t.get('type') == 'short']
pnls_pct = pd.Series([t['pnl_pct'] for t in trades])
wins = pnls_pct[pnls_pct > 0]
win_rate = (len(wins) / num_trades) * 100 if num_trades > 0 else 0
long_wins = len([t for t in long_trades if t['pnl_pct'] > 0])
short_wins = len([t for t in short_trades if t['pnl_pct'] > 0])
long_win_rate = (long_wins / len(long_trades)) * 100 if long_trades else 0
short_win_rate = (short_wins / len(short_trades)) * 100 if short_trades else 0
total_return_pct = ((final_equity - initial_capital) / initial_capital) * 100
print(f"Final Equity: ${final_equity:,.2f}")
print(f"Total Return: {total_return_pct:.2f}%")
print(f"Total Trades: {num_trades} (Longs: {len(long_trades)}, Shorts: {len(short_trades)})")
print(f"Win Rate (Overall): {win_rate:.2f}%")
print(f"Win Rate (Longs): {long_win_rate:.2f}%")
print(f"Win Rate (Shorts): {short_win_rate:.2f}%")
# Return a dictionary of the key metrics for the summary table
return {
"num_trades": num_trades,
"num_longs": len(long_trades),
"num_shorts": len(short_trades),
"win_rate": win_rate,
"long_win_rate": long_win_rate,
"short_win_rate": short_win_rate,
"return_pct": total_return_pct,
"final_equity": final_equity
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run a Walk-Forward Optimization for a trading strategy.")
parser.add_argument("--strategy", required=True, help="The name of the backtest config to run.")
parser.add_argument("--start-date", default="2020-08-01", help="The overall start date for historical data.")
parser.add_argument("--optimization-weeks", type=int, default=4)
parser.add_argument("--testing-weeks", type=int, default=1)
parser.add_argument("--step-weeks", type=int, default=1)
parser.add_argument("--log-level", default="normal", choices=['off', 'normal', 'debug'])
parser.add_argument("--capital", type=float, default=1000)
parser.add_argument("--size-pct", type=float, default=50)
parser.add_argument("--leverage-long", type=int, default=3)
parser.add_argument("--leverage-short", type=int, default=2)
parser.add_argument("--taker-fee-pct", type=float, default=0.045)
parser.add_argument("--maker-fee-pct", type=float, default=0.015)
args = parser.parse_args()
sim_params = {
"capital": args.capital,
"size_pct": args.size_pct,
"leverage_long": args.leverage_long,
"leverage_short": args.leverage_short,
"taker_fee_pct": args.taker_fee_pct,
"maker_fee_pct": args.maker_fee_pct
}
backtester = Backtester(
log_level=args.log_level,
strategy_name_to_test=args.strategy,
start_date=args.start_date,
sim_params=sim_params
)
try:
backtester.run_walk_forward_optimization(
optimization_weeks=args.optimization_weeks,
testing_weeks=args.testing_weeks,
step_weeks=args.step_weeks
)
except KeyboardInterrupt:
logging.info("\nBacktest optimization cancelled by user.")
finally:
if backtester.pool:
logging.info("Terminating worker processes...")
backtester.pool.terminate()
backtester.pool.join()
logging.info("Worker processes terminated.")