import argparse import logging import os import sys import sqlite3 import pandas as pd import json from datetime import datetime, timedelta import itertools import multiprocessing from functools import partial import time import importlib import signal from logging_utils import setup_logging def _run_trade_simulation(df: pd.DataFrame, capital: float, size_pct: float, leverage_long: int, leverage_short: int, taker_fee_pct: float, maker_fee_pct: float) -> tuple[float, list]: """ Simulates a trading strategy with portfolio management, including capital, position sizing, leverage, and fees. """ df.dropna(inplace=True) if df.empty: return capital, [] df['position_change'] = df['signal'].diff() trades = [] entry_price = 0 asset_size = 0 current_position = 0 # 0=flat, 1=long, -1=short equity = capital for i, row in df.iterrows(): # --- Close Positions --- if (current_position == 1 and row['signal'] != 1) or \ (current_position == -1 and row['signal'] != -1): exit_value = asset_size * row['close'] fee = exit_value * (taker_fee_pct / 100) if current_position == 1: # Closing a long pnl_usd = (row['close'] - entry_price) * asset_size equity += pnl_usd - fee trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (row['close'] - entry_price) / entry_price, 'type': 'long'}) elif current_position == -1: # Closing a short pnl_usd = (entry_price - row['close']) * asset_size equity += pnl_usd - fee trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (entry_price - row['close']) / entry_price, 'type': 'short'}) entry_price = 0 asset_size = 0 current_position = 0 # --- Open New Positions --- if current_position == 0: if row['signal'] == 1: # Open Long margin_to_use = equity * (size_pct / 100) trade_value = margin_to_use * leverage_long asset_size = trade_value / row['close'] fee = trade_value * (taker_fee_pct / 100) equity -= fee entry_price = row['close'] current_position = 1 elif row['signal'] == -1: # Open Short margin_to_use = equity * (size_pct / 100) trade_value = margin_to_use * leverage_short asset_size = trade_value / row['close'] fee = trade_value * (taker_fee_pct / 100) equity -= fee entry_price = row['close'] current_position = -1 return equity, trades def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str, strategy_class, sim_params: dict) -> tuple[dict, float, list]: """ Worker function that loads data, runs the full simulation, and returns results. """ df = pd.DataFrame() try: with sqlite3.connect(db_path) as conn: query = f'SELECT datetime_utc, open, high, low, close FROM "{coin}_{timeframe}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc' df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc']) if not df.empty: df.set_index('datetime_utc', inplace=True) except Exception as e: print(f"Worker error loading data for params {params}: {e}") return (params, sim_params['capital'], []) if df.empty: return (params, sim_params['capital'], []) strategy_instance = strategy_class(params) df_with_signals = strategy_instance.calculate_signals(df) final_equity, trades = _run_trade_simulation(df_with_signals, **sim_params) return (params, final_equity, trades) def init_worker(): signal.signal(signal.SIGINT, signal.SIG_IGN) class Backtester: def __init__(self, log_level: str, strategy_name_to_test: str, start_date: str, sim_params: dict): setup_logging(log_level, 'Backtester') self.db_path = os.path.join("_data", "market_data.db") self.simulation_params = sim_params self.backtest_config = self._load_backtest_config(strategy_name_to_test) # ... (rest of __init__ is unchanged) self.strategy_name = self.backtest_config.get('strategy_name') self.strategy_config = self._load_strategy_config() self.params = self.strategy_config.get('parameters', {}) self.coin = self.params.get('coin') self.timeframe = self.params.get('timeframe') self.pool = None self.full_history_start_date = start_date try: module_path, class_name = self.backtest_config['script'].rsplit('.', 1) module = importlib.import_module(module_path) self.strategy_class = getattr(module, class_name) logging.info(f"Successfully loaded strategy class '{class_name}'.") except (ImportError, AttributeError, KeyError) as e: logging.error(f"Could not load strategy script '{self.backtest_config.get('script')}': {e}") sys.exit(1) def _load_backtest_config(self, name_to_test: str): # ... (unchanged) config_path = os.path.join("_data", "backtesting_conf.json") try: with open(config_path, 'r') as f: return json.load(f).get(name_to_test) except (FileNotFoundError, json.JSONDecodeError) as e: logging.error(f"Could not load backtesting configuration: {e}") return None def _load_strategy_config(self): # ... (unchanged) config_path = os.path.join("_data", "strategies.json") try: with open(config_path, 'r') as f: return json.load(f).get(self.strategy_name) except (FileNotFoundError, json.JSONDecodeError) as e: logging.error(f"Could not load strategy configuration: {e}") return None def run_walk_forward_optimization(self, optimization_weeks: int, testing_weeks: int, step_weeks: int): # ... (unchanged, will now use the new simulation logic via the worker) full_df = self.load_data(self.full_history_start_date, datetime.now().strftime("%Y-%m-%d")) if full_df.empty: return optimization_delta = timedelta(weeks=optimization_weeks) testing_delta = timedelta(weeks=testing_weeks) step_delta = timedelta(weeks=step_weeks) all_out_of_sample_trades = [] all_period_summaries = [] current_date = full_df.index[0] end_date = full_df.index[-1] period_num = 1 while current_date + optimization_delta + testing_delta <= end_date: logging.info(f"\n--- Starting Walk-Forward Period {period_num} ---") in_sample_start = current_date in_sample_end = in_sample_start + optimization_delta out_of_sample_end = in_sample_end + testing_delta in_sample_df = full_df[in_sample_start:in_sample_end] out_of_sample_df = full_df[in_sample_end:out_of_sample_end] if in_sample_df.empty or out_of_sample_df.empty: break logging.info(f"In-Sample (Optimization): {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}") logging.info(f"Out-of-Sample (Testing): {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}") best_result = self._find_best_params(in_sample_df) if not best_result: all_period_summaries.append({"period": period_num, "params": "None Found"}) current_date += step_delta period_num += 1 continue print("\n--- [1] In-Sample Optimization Result ---") print(f"Best Parameters Found: {best_result['params']}") self._generate_report(best_result['final_equity'], best_result['trades_list'], "In-Sample Performance with Best Params") logging.info(f"\n--- [2] Forward Testing on Out-of-Sample Data ---") df_with_signals = self.strategy_class(best_result['params']).calculate_signals(out_of_sample_df.copy()) final_equity_oos, out_of_sample_trades = _run_trade_simulation(df_with_signals, **self.simulation_params) all_out_of_sample_trades.extend(out_of_sample_trades) oos_summary = self._generate_report(final_equity_oos, out_of_sample_trades, "Out-of-Sample Performance") # Store the summary for the final table summary_to_store = {"period": period_num, "params": best_result['params'], **oos_summary} all_period_summaries.append(summary_to_store) current_date += step_delta period_num += 1 # ... (Final reports will be generated here, but need to adapt to equity tracking) print("\n" + "="*50) # self._generate_report(all_out_of_sample_trades, "FINAL AGGREGATE WALK-FORWARD PERFORMANCE") print("="*50) # --- ADDED: Final summary table of best parameters and performance per period --- print("\n--- Summary of Best Parameters and Performance per Period ---") header = f"{'#':<3} | {'Best Parameters':<30} | {'Trades':>8} | {'Longs':>6} | {'Shorts':>7} | {'Win %':>8} | {'L Win %':>9} | {'S Win %':>9} | {'Return %':>10} | {'Equity':>15}" print(header) print("-" * len(header)) for item in all_period_summaries: params_str = str(item.get('params', 'N/A')) trades = item.get('num_trades', 'N/A') longs = item.get('num_longs', 'N/A') shorts = item.get('num_shorts', 'N/A') win_rate = f"{item.get('win_rate', 0):.2f}%" if 'win_rate' in item else 'N/A' long_win_rate = f"{item.get('long_win_rate', 0):.2f}%" if 'long_win_rate' in item else 'N/A' short_win_rate = f"{item.get('short_win_rate', 0):.2f}%" if 'short_win_rate' in item else 'N/A' return_pct = f"{item.get('return_pct', 0):.2f}%" if 'return_pct' in item else 'N/A' equity = f"${item.get('final_equity', 0):,.2f}" if 'final_equity' in item else 'N/A' print(f"{item['period']:<3} | {params_str:<30} | {trades:>8} | {longs:>6} | {shorts:>7} | {win_rate:>8} | {long_win_rate:>9} | {short_win_rate:>9} | {return_pct:>10} | {equity:>15}") def _find_best_params(self, df: pd.DataFrame) -> dict: param_configs = self.backtest_config.get('optimization_params', {}) param_names = list(param_configs.keys()) param_ranges = [range(p['start'], p['end'] + 1, p['step']) for p in param_configs.values()] all_combinations = list(itertools.product(*param_ranges)) param_dicts = [dict(zip(param_names, combo)) for combo in all_combinations] logging.info(f"Optimizing on {len(all_combinations)} combinations...") num_cores = 60 self.pool = multiprocessing.Pool(processes=num_cores, initializer=init_worker) worker = partial( simulation_worker, db_path=self.db_path, coin=self.coin, timeframe=self.timeframe, start_date=df.index[0].isoformat(), end_date=df.index[-1].isoformat(), strategy_class=self.strategy_class, sim_params=self.simulation_params ) all_results = self.pool.map(worker, param_dicts) self.pool.close() self.pool.join() self.pool = None results = [{'params': params, 'final_equity': final_equity, 'trades_list': trades} for params, final_equity, trades in all_results if trades] if not results: return None return max(results, key=lambda x: x['final_equity']) def load_data(self, start_date, end_date): # ... (unchanged) table_name = f"{self.coin}_{self.timeframe}" logging.info(f"Loading full dataset for {table_name}...") try: with sqlite3.connect(self.db_path) as conn: query = f'SELECT * FROM "{table_name}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc' df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc']) if df.empty: return pd.DataFrame() df.set_index('datetime_utc', inplace=True) return df except Exception as e: logging.error(f"Failed to load data for backtest: {e}") return pd.DataFrame() def _generate_report(self, final_equity: float, trades: list, title: str) -> dict: """Calculates, prints, and returns a detailed performance report.""" print(f"\n--- {title} ---") initial_capital = self.simulation_params['capital'] if not trades: print("No trades were executed during this period.") print(f"Final Equity: ${initial_capital:,.2f}") return {"num_trades": 0, "num_longs": 0, "num_shorts": 0, "win_rate": 0, "long_win_rate": 0, "short_win_rate": 0, "return_pct": 0, "final_equity": initial_capital} num_trades = len(trades) long_trades = [t for t in trades if t.get('type') == 'long'] short_trades = [t for t in trades if t.get('type') == 'short'] pnls_pct = pd.Series([t['pnl_pct'] for t in trades]) wins = pnls_pct[pnls_pct > 0] win_rate = (len(wins) / num_trades) * 100 if num_trades > 0 else 0 long_wins = len([t for t in long_trades if t['pnl_pct'] > 0]) short_wins = len([t for t in short_trades if t['pnl_pct'] > 0]) long_win_rate = (long_wins / len(long_trades)) * 100 if long_trades else 0 short_win_rate = (short_wins / len(short_trades)) * 100 if short_trades else 0 total_return_pct = ((final_equity - initial_capital) / initial_capital) * 100 print(f"Final Equity: ${final_equity:,.2f}") print(f"Total Return: {total_return_pct:.2f}%") print(f"Total Trades: {num_trades} (Longs: {len(long_trades)}, Shorts: {len(short_trades)})") print(f"Win Rate (Overall): {win_rate:.2f}%") print(f"Win Rate (Longs): {long_win_rate:.2f}%") print(f"Win Rate (Shorts): {short_win_rate:.2f}%") # Return a dictionary of the key metrics for the summary table return { "num_trades": num_trades, "num_longs": len(long_trades), "num_shorts": len(short_trades), "win_rate": win_rate, "long_win_rate": long_win_rate, "short_win_rate": short_win_rate, "return_pct": total_return_pct, "final_equity": final_equity } if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run a Walk-Forward Optimization for a trading strategy.") parser.add_argument("--strategy", required=True, help="The name of the backtest config to run.") parser.add_argument("--start-date", default="2020-08-01", help="The overall start date for historical data.") parser.add_argument("--optimization-weeks", type=int, default=4) parser.add_argument("--testing-weeks", type=int, default=1) parser.add_argument("--step-weeks", type=int, default=1) parser.add_argument("--log-level", default="normal", choices=['off', 'normal', 'debug']) parser.add_argument("--capital", type=float, default=1000) parser.add_argument("--size-pct", type=float, default=50) parser.add_argument("--leverage-long", type=int, default=3) parser.add_argument("--leverage-short", type=int, default=2) parser.add_argument("--taker-fee-pct", type=float, default=0.045) parser.add_argument("--maker-fee-pct", type=float, default=0.015) args = parser.parse_args() sim_params = { "capital": args.capital, "size_pct": args.size_pct, "leverage_long": args.leverage_long, "leverage_short": args.leverage_short, "taker_fee_pct": args.taker_fee_pct, "maker_fee_pct": args.maker_fee_pct } backtester = Backtester( log_level=args.log_level, strategy_name_to_test=args.strategy, start_date=args.start_date, sim_params=sim_params ) try: backtester.run_walk_forward_optimization( optimization_weeks=args.optimization_weeks, testing_weeks=args.testing_weeks, step_weeks=args.step_weeks ) except KeyboardInterrupt: logging.info("\nBacktest optimization cancelled by user.") finally: if backtester.pool: logging.info("Terminating worker processes...") backtester.pool.terminate() backtester.pool.join() logging.info("Worker processes terminated.")