369 lines
17 KiB
Python
369 lines
17 KiB
Python
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
import sqlite3
|
|
import pandas as pd
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
import itertools
|
|
import multiprocessing
|
|
from functools import partial
|
|
import time
|
|
import importlib
|
|
import signal
|
|
|
|
from logging_utils import setup_logging
|
|
|
|
def _run_trade_simulation(df: pd.DataFrame, capital: float, size_pct: float, leverage_long: int, leverage_short: int, taker_fee_pct: float, maker_fee_pct: float) -> tuple[float, list]:
|
|
"""
|
|
Simulates a trading strategy with portfolio management, including capital,
|
|
position sizing, leverage, and fees.
|
|
"""
|
|
df.dropna(inplace=True)
|
|
if df.empty: return capital, []
|
|
|
|
df['position_change'] = df['signal'].diff()
|
|
trades = []
|
|
entry_price = 0
|
|
asset_size = 0
|
|
current_position = 0 # 0=flat, 1=long, -1=short
|
|
equity = capital
|
|
|
|
for i, row in df.iterrows():
|
|
# --- Close Positions ---
|
|
if (current_position == 1 and row['signal'] != 1) or \
|
|
(current_position == -1 and row['signal'] != -1):
|
|
|
|
exit_value = asset_size * row['close']
|
|
fee = exit_value * (taker_fee_pct / 100)
|
|
|
|
if current_position == 1: # Closing a long
|
|
pnl_usd = (row['close'] - entry_price) * asset_size
|
|
equity += pnl_usd - fee
|
|
trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (row['close'] - entry_price) / entry_price, 'type': 'long'})
|
|
|
|
elif current_position == -1: # Closing a short
|
|
pnl_usd = (entry_price - row['close']) * asset_size
|
|
equity += pnl_usd - fee
|
|
trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (entry_price - row['close']) / entry_price, 'type': 'short'})
|
|
|
|
entry_price = 0
|
|
asset_size = 0
|
|
current_position = 0
|
|
|
|
# --- Open New Positions ---
|
|
if current_position == 0:
|
|
if row['signal'] == 1: # Open Long
|
|
margin_to_use = equity * (size_pct / 100)
|
|
trade_value = margin_to_use * leverage_long
|
|
asset_size = trade_value / row['close']
|
|
fee = trade_value * (taker_fee_pct / 100)
|
|
equity -= fee
|
|
entry_price = row['close']
|
|
current_position = 1
|
|
elif row['signal'] == -1: # Open Short
|
|
margin_to_use = equity * (size_pct / 100)
|
|
trade_value = margin_to_use * leverage_short
|
|
asset_size = trade_value / row['close']
|
|
fee = trade_value * (taker_fee_pct / 100)
|
|
equity -= fee
|
|
entry_price = row['close']
|
|
current_position = -1
|
|
|
|
return equity, trades
|
|
|
|
|
|
def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str, strategy_class, sim_params: dict) -> tuple[dict, float, list]:
|
|
"""
|
|
Worker function that loads data, runs the full simulation, and returns results.
|
|
"""
|
|
df = pd.DataFrame()
|
|
try:
|
|
with sqlite3.connect(db_path) as conn:
|
|
query = f'SELECT datetime_utc, open, high, low, close FROM "{coin}_{timeframe}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
|
|
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
|
|
if not df.empty:
|
|
df.set_index('datetime_utc', inplace=True)
|
|
except Exception as e:
|
|
print(f"Worker error loading data for params {params}: {e}")
|
|
return (params, sim_params['capital'], [])
|
|
|
|
if df.empty:
|
|
return (params, sim_params['capital'], [])
|
|
|
|
strategy_instance = strategy_class(params)
|
|
df_with_signals = strategy_instance.calculate_signals(df)
|
|
|
|
final_equity, trades = _run_trade_simulation(df_with_signals, **sim_params)
|
|
return (params, final_equity, trades)
|
|
|
|
|
|
def init_worker():
|
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
|
|
|
|
class Backtester:
|
|
def __init__(self, log_level: str, strategy_name_to_test: str, start_date: str, sim_params: dict):
|
|
setup_logging(log_level, 'Backtester')
|
|
self.db_path = os.path.join("_data", "market_data.db")
|
|
self.simulation_params = sim_params
|
|
|
|
self.backtest_config = self._load_backtest_config(strategy_name_to_test)
|
|
# ... (rest of __init__ is unchanged)
|
|
self.strategy_name = self.backtest_config.get('strategy_name')
|
|
self.strategy_config = self._load_strategy_config()
|
|
self.params = self.strategy_config.get('parameters', {})
|
|
self.coin = self.params.get('coin')
|
|
self.timeframe = self.params.get('timeframe')
|
|
self.pool = None
|
|
self.full_history_start_date = start_date
|
|
try:
|
|
module_path, class_name = self.backtest_config['script'].rsplit('.', 1)
|
|
module = importlib.import_module(module_path)
|
|
self.strategy_class = getattr(module, class_name)
|
|
logging.info(f"Successfully loaded strategy class '{class_name}'.")
|
|
except (ImportError, AttributeError, KeyError) as e:
|
|
logging.error(f"Could not load strategy script '{self.backtest_config.get('script')}': {e}")
|
|
sys.exit(1)
|
|
|
|
def _load_backtest_config(self, name_to_test: str):
|
|
# ... (unchanged)
|
|
config_path = os.path.join("_data", "backtesting_conf.json")
|
|
try:
|
|
with open(config_path, 'r') as f: return json.load(f).get(name_to_test)
|
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
logging.error(f"Could not load backtesting configuration: {e}")
|
|
return None
|
|
|
|
def _load_strategy_config(self):
|
|
# ... (unchanged)
|
|
config_path = os.path.join("_data", "strategies.json")
|
|
try:
|
|
with open(config_path, 'r') as f: return json.load(f).get(self.strategy_name)
|
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
logging.error(f"Could not load strategy configuration: {e}")
|
|
return None
|
|
|
|
def run_walk_forward_optimization(self, optimization_weeks: int, testing_weeks: int, step_weeks: int):
|
|
# ... (unchanged, will now use the new simulation logic via the worker)
|
|
full_df = self.load_data(self.full_history_start_date, datetime.now().strftime("%Y-%m-%d"))
|
|
if full_df.empty: return
|
|
|
|
optimization_delta = timedelta(weeks=optimization_weeks)
|
|
testing_delta = timedelta(weeks=testing_weeks)
|
|
step_delta = timedelta(weeks=step_weeks)
|
|
|
|
all_out_of_sample_trades = []
|
|
all_period_summaries = []
|
|
|
|
current_date = full_df.index[0]
|
|
end_date = full_df.index[-1]
|
|
|
|
period_num = 1
|
|
while current_date + optimization_delta + testing_delta <= end_date:
|
|
logging.info(f"\n--- Starting Walk-Forward Period {period_num} ---")
|
|
|
|
in_sample_start = current_date
|
|
in_sample_end = in_sample_start + optimization_delta
|
|
out_of_sample_end = in_sample_end + testing_delta
|
|
|
|
in_sample_df = full_df[in_sample_start:in_sample_end]
|
|
out_of_sample_df = full_df[in_sample_end:out_of_sample_end]
|
|
|
|
if in_sample_df.empty or out_of_sample_df.empty:
|
|
break
|
|
|
|
logging.info(f"In-Sample (Optimization): {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}")
|
|
logging.info(f"Out-of-Sample (Testing): {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}")
|
|
|
|
best_result = self._find_best_params(in_sample_df)
|
|
if not best_result:
|
|
all_period_summaries.append({"period": period_num, "params": "None Found"})
|
|
current_date += step_delta
|
|
period_num += 1
|
|
continue
|
|
|
|
print("\n--- [1] In-Sample Optimization Result ---")
|
|
print(f"Best Parameters Found: {best_result['params']}")
|
|
self._generate_report(best_result['final_equity'], best_result['trades_list'], "In-Sample Performance with Best Params")
|
|
|
|
logging.info(f"\n--- [2] Forward Testing on Out-of-Sample Data ---")
|
|
df_with_signals = self.strategy_class(best_result['params']).calculate_signals(out_of_sample_df.copy())
|
|
final_equity_oos, out_of_sample_trades = _run_trade_simulation(df_with_signals, **self.simulation_params)
|
|
|
|
all_out_of_sample_trades.extend(out_of_sample_trades)
|
|
oos_summary = self._generate_report(final_equity_oos, out_of_sample_trades, "Out-of-Sample Performance")
|
|
|
|
# Store the summary for the final table
|
|
summary_to_store = {"period": period_num, "params": best_result['params'], **oos_summary}
|
|
all_period_summaries.append(summary_to_store)
|
|
|
|
current_date += step_delta
|
|
period_num += 1
|
|
|
|
# ... (Final reports will be generated here, but need to adapt to equity tracking)
|
|
print("\n" + "="*50)
|
|
# self._generate_report(all_out_of_sample_trades, "FINAL AGGREGATE WALK-FORWARD PERFORMANCE")
|
|
print("="*50)
|
|
|
|
# --- ADDED: Final summary table of best parameters and performance per period ---
|
|
print("\n--- Summary of Best Parameters and Performance per Period ---")
|
|
header = f"{'#':<3} | {'Best Parameters':<30} | {'Trades':>8} | {'Longs':>6} | {'Shorts':>7} | {'Win %':>8} | {'L Win %':>9} | {'S Win %':>9} | {'Return %':>10} | {'Equity':>15}"
|
|
print(header)
|
|
print("-" * len(header))
|
|
for item in all_period_summaries:
|
|
params_str = str(item.get('params', 'N/A'))
|
|
trades = item.get('num_trades', 'N/A')
|
|
longs = item.get('num_longs', 'N/A')
|
|
shorts = item.get('num_shorts', 'N/A')
|
|
win_rate = f"{item.get('win_rate', 0):.2f}%" if 'win_rate' in item else 'N/A'
|
|
long_win_rate = f"{item.get('long_win_rate', 0):.2f}%" if 'long_win_rate' in item else 'N/A'
|
|
short_win_rate = f"{item.get('short_win_rate', 0):.2f}%" if 'short_win_rate' in item else 'N/A'
|
|
return_pct = f"{item.get('return_pct', 0):.2f}%" if 'return_pct' in item else 'N/A'
|
|
equity = f"${item.get('final_equity', 0):,.2f}" if 'final_equity' in item else 'N/A'
|
|
print(f"{item['period']:<3} | {params_str:<30} | {trades:>8} | {longs:>6} | {shorts:>7} | {win_rate:>8} | {long_win_rate:>9} | {short_win_rate:>9} | {return_pct:>10} | {equity:>15}")
|
|
|
|
def _find_best_params(self, df: pd.DataFrame) -> dict:
|
|
param_configs = self.backtest_config.get('optimization_params', {})
|
|
param_names = list(param_configs.keys())
|
|
param_ranges = [range(p['start'], p['end'] + 1, p['step']) for p in param_configs.values()]
|
|
|
|
all_combinations = list(itertools.product(*param_ranges))
|
|
param_dicts = [dict(zip(param_names, combo)) for combo in all_combinations]
|
|
|
|
logging.info(f"Optimizing on {len(all_combinations)} combinations...")
|
|
|
|
num_cores = 60
|
|
self.pool = multiprocessing.Pool(processes=num_cores, initializer=init_worker)
|
|
|
|
worker = partial(
|
|
simulation_worker,
|
|
db_path=self.db_path, coin=self.coin, timeframe=self.timeframe,
|
|
start_date=df.index[0].isoformat(), end_date=df.index[-1].isoformat(),
|
|
strategy_class=self.strategy_class,
|
|
sim_params=self.simulation_params
|
|
)
|
|
|
|
all_results = self.pool.map(worker, param_dicts)
|
|
|
|
self.pool.close()
|
|
self.pool.join()
|
|
self.pool = None
|
|
|
|
results = [{'params': params, 'final_equity': final_equity, 'trades_list': trades} for params, final_equity, trades in all_results if trades]
|
|
if not results: return None
|
|
return max(results, key=lambda x: x['final_equity'])
|
|
|
|
def load_data(self, start_date, end_date):
|
|
# ... (unchanged)
|
|
table_name = f"{self.coin}_{self.timeframe}"
|
|
logging.info(f"Loading full dataset for {table_name}...")
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
query = f'SELECT * FROM "{table_name}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
|
|
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
|
|
if df.empty: return pd.DataFrame()
|
|
df.set_index('datetime_utc', inplace=True)
|
|
return df
|
|
except Exception as e:
|
|
logging.error(f"Failed to load data for backtest: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def _generate_report(self, final_equity: float, trades: list, title: str) -> dict:
|
|
"""Calculates, prints, and returns a detailed performance report."""
|
|
print(f"\n--- {title} ---")
|
|
|
|
initial_capital = self.simulation_params['capital']
|
|
|
|
if not trades:
|
|
print("No trades were executed during this period.")
|
|
print(f"Final Equity: ${initial_capital:,.2f}")
|
|
return {"num_trades": 0, "num_longs": 0, "num_shorts": 0, "win_rate": 0, "long_win_rate": 0, "short_win_rate": 0, "return_pct": 0, "final_equity": initial_capital}
|
|
|
|
num_trades = len(trades)
|
|
long_trades = [t for t in trades if t.get('type') == 'long']
|
|
short_trades = [t for t in trades if t.get('type') == 'short']
|
|
|
|
pnls_pct = pd.Series([t['pnl_pct'] for t in trades])
|
|
|
|
wins = pnls_pct[pnls_pct > 0]
|
|
win_rate = (len(wins) / num_trades) * 100 if num_trades > 0 else 0
|
|
|
|
long_wins = len([t for t in long_trades if t['pnl_pct'] > 0])
|
|
short_wins = len([t for t in short_trades if t['pnl_pct'] > 0])
|
|
long_win_rate = (long_wins / len(long_trades)) * 100 if long_trades else 0
|
|
short_win_rate = (short_wins / len(short_trades)) * 100 if short_trades else 0
|
|
|
|
total_return_pct = ((final_equity - initial_capital) / initial_capital) * 100
|
|
|
|
print(f"Final Equity: ${final_equity:,.2f}")
|
|
print(f"Total Return: {total_return_pct:.2f}%")
|
|
print(f"Total Trades: {num_trades} (Longs: {len(long_trades)}, Shorts: {len(short_trades)})")
|
|
print(f"Win Rate (Overall): {win_rate:.2f}%")
|
|
print(f"Win Rate (Longs): {long_win_rate:.2f}%")
|
|
print(f"Win Rate (Shorts): {short_win_rate:.2f}%")
|
|
|
|
# Return a dictionary of the key metrics for the summary table
|
|
return {
|
|
"num_trades": num_trades,
|
|
"num_longs": len(long_trades),
|
|
"num_shorts": len(short_trades),
|
|
"win_rate": win_rate,
|
|
"long_win_rate": long_win_rate,
|
|
"short_win_rate": short_win_rate,
|
|
"return_pct": total_return_pct,
|
|
"final_equity": final_equity
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Run a Walk-Forward Optimization for a trading strategy.")
|
|
parser.add_argument("--strategy", required=True, help="The name of the backtest config to run.")
|
|
parser.add_argument("--start-date", default="2020-08-01", help="The overall start date for historical data.")
|
|
parser.add_argument("--optimization-weeks", type=int, default=4)
|
|
parser.add_argument("--testing-weeks", type=int, default=1)
|
|
parser.add_argument("--step-weeks", type=int, default=1)
|
|
parser.add_argument("--log-level", default="normal", choices=['off', 'normal', 'debug'])
|
|
|
|
parser.add_argument("--capital", type=float, default=1000)
|
|
parser.add_argument("--size-pct", type=float, default=50)
|
|
parser.add_argument("--leverage-long", type=int, default=3)
|
|
parser.add_argument("--leverage-short", type=int, default=2)
|
|
parser.add_argument("--taker-fee-pct", type=float, default=0.045)
|
|
parser.add_argument("--maker-fee-pct", type=float, default=0.015)
|
|
|
|
args = parser.parse_args()
|
|
|
|
sim_params = {
|
|
"capital": args.capital,
|
|
"size_pct": args.size_pct,
|
|
"leverage_long": args.leverage_long,
|
|
"leverage_short": args.leverage_short,
|
|
"taker_fee_pct": args.taker_fee_pct,
|
|
"maker_fee_pct": args.maker_fee_pct
|
|
}
|
|
|
|
backtester = Backtester(
|
|
log_level=args.log_level,
|
|
strategy_name_to_test=args.strategy,
|
|
start_date=args.start_date,
|
|
sim_params=sim_params
|
|
)
|
|
|
|
try:
|
|
backtester.run_walk_forward_optimization(
|
|
optimization_weeks=args.optimization_weeks,
|
|
testing_weeks=args.testing_weeks,
|
|
step_weeks=args.step_weeks
|
|
)
|
|
except KeyboardInterrupt:
|
|
logging.info("\nBacktest optimization cancelled by user.")
|
|
finally:
|
|
if backtester.pool:
|
|
logging.info("Terminating worker processes...")
|
|
backtester.pool.terminate()
|
|
backtester.pool.join()
|
|
logging.info("Worker processes terminated.")
|
|
|