248 lines
10 KiB
Python
248 lines
10 KiB
Python
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
import sqlite3
|
|
import pandas as pd
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
import itertools
|
|
import multiprocessing
|
|
from functools import partial
|
|
import time
|
|
|
|
from logging_utils import setup_logging
|
|
|
|
def _run_single_simulation(df: pd.DataFrame, params: dict) -> list:
|
|
"""
|
|
Core simulation logic. Takes a DataFrame and parameters, returns a list of trades.
|
|
This is a pure function to be used by different data loaders.
|
|
"""
|
|
fast_ma_period = params.get('fast', 0)
|
|
slow_ma_period = params.get('slow', 0)
|
|
sma_period = params.get('sma_period', 0)
|
|
|
|
if fast_ma_period and slow_ma_period:
|
|
df['fast_sma'] = df['close'].rolling(window=fast_ma_period).mean()
|
|
df['slow_sma'] = df['close'].rolling(window=slow_ma_period).mean()
|
|
df['signal'] = (df['fast_sma'] > df['slow_sma']).astype(int)
|
|
elif sma_period:
|
|
df['sma'] = df['close'].rolling(window=sma_period).mean()
|
|
df['signal'] = (df['close'] > df['sma']).astype(int)
|
|
else:
|
|
return []
|
|
|
|
df.dropna(inplace=True)
|
|
if df.empty: return []
|
|
|
|
df['position'] = df['signal'].diff()
|
|
trades = []
|
|
entry_price = 0
|
|
|
|
for i, row in df.iterrows():
|
|
if row['position'] == 1:
|
|
if entry_price == 0: # Only enter if flat
|
|
entry_price = row['close']
|
|
elif row['position'] == -1:
|
|
if entry_price != 0: # Only exit if in a position
|
|
pnl = (row['close'] - entry_price) / entry_price
|
|
trades.append({'pnl_pct': pnl})
|
|
entry_price = 0
|
|
|
|
return trades
|
|
|
|
def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str) -> tuple[dict, list]:
|
|
"""
|
|
A worker function for multiprocessing. It loads its own data from the DB
|
|
and then runs the simulation, returning the parameters and results together.
|
|
"""
|
|
df = pd.DataFrame()
|
|
try:
|
|
with sqlite3.connect(db_path) as conn:
|
|
query = f'SELECT datetime_utc, close FROM "{coin}_{timeframe}" WHERE date(datetime_utc) >= ? AND date(datetime_utc) <= ? ORDER BY datetime_utc'
|
|
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
|
|
if not df.empty:
|
|
df.set_index('datetime_utc', inplace=True)
|
|
except Exception as e:
|
|
print(f"Worker error loading data for params {params}: {e}")
|
|
return (params, [])
|
|
|
|
if df.empty:
|
|
return (params, [])
|
|
|
|
trades = _run_single_simulation(df, params)
|
|
return (params, trades)
|
|
|
|
|
|
class Backtester:
|
|
"""
|
|
A class to run a Walk-Forward Optimization, which is the gold standard
|
|
for testing the robustness of a trading strategy.
|
|
"""
|
|
|
|
def __init__(self, log_level: str, strategy_name_to_test: str):
|
|
setup_logging(log_level, 'Backtester')
|
|
self.db_path = os.path.join("_data", "market_data.db")
|
|
|
|
self.backtest_config = self._load_backtest_config(strategy_name_to_test)
|
|
if not self.backtest_config:
|
|
logging.error(f"Backtest configuration for '{strategy_name_to_test}' not found.")
|
|
sys.exit(1)
|
|
|
|
self.strategy_name = self.backtest_config.get('strategy_name')
|
|
self.strategy_config = self._load_strategy_config()
|
|
if not self.strategy_config:
|
|
logging.error(f"Strategy '{self.strategy_name}' not found.")
|
|
sys.exit(1)
|
|
|
|
self.params = self.strategy_config.get('parameters', {})
|
|
self.coin = self.params.get('coin')
|
|
self.timeframe = self.params.get('timeframe')
|
|
self.pool = None
|
|
|
|
def _load_backtest_config(self, name_to_test: str) -> dict:
|
|
config_path = os.path.join("_data", "backtesting_conf.json")
|
|
try:
|
|
with open(config_path, 'r') as f: return json.load(f).get(name_to_test)
|
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
logging.error(f"Could not load backtesting configuration: {e}")
|
|
return None
|
|
|
|
def _load_strategy_config(self) -> dict:
|
|
config_path = os.path.join("_data", "strategies.json")
|
|
try:
|
|
with open(config_path, 'r') as f: return json.load(f).get(self.strategy_name)
|
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
logging.error(f"Could not load strategy configuration: {e}")
|
|
return None
|
|
|
|
def run_walk_forward_optimization(self, num_periods=10, in_sample_pct=0.9):
|
|
"""
|
|
Main function to orchestrate the walk-forward analysis.
|
|
"""
|
|
full_df = self.load_data("2020-01-01", datetime.now().strftime("%Y-%m-%d"))
|
|
if full_df.empty: return
|
|
|
|
period_length = len(full_df) // num_periods
|
|
all_out_of_sample_trades = []
|
|
|
|
for i in range(num_periods):
|
|
logging.info(f"\n--- Starting Walk-Forward Period {i+1}/{num_periods} ---")
|
|
|
|
# 1. Define the In-Sample (training) and Out-of-Sample (testing) periods
|
|
start_index = i * period_length
|
|
in_sample_end_index = start_index + int(period_length * in_sample_pct)
|
|
out_of_sample_end_index = start_index + period_length
|
|
|
|
if in_sample_end_index >= len(full_df) or out_of_sample_end_index > len(full_df):
|
|
logging.warning("Not enough data for the full final period. Ending analysis.")
|
|
break
|
|
|
|
in_sample_df = full_df.iloc[start_index:in_sample_end_index]
|
|
out_of_sample_df = full_df.iloc[in_sample_end_index:out_of_sample_end_index]
|
|
|
|
logging.info(f"In-Sample: {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}")
|
|
logging.info(f"Out-of-Sample: {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}")
|
|
|
|
# 2. Find the best parameters on the In-Sample data
|
|
best_params = self._find_best_params(in_sample_df)
|
|
if not best_params:
|
|
logging.warning("No profitable parameters found in this period. Skipping.")
|
|
continue
|
|
|
|
# 3. Test the best parameters on the Out-of-Sample data
|
|
logging.info(f"Testing best params {best_params} on Out-of-Sample data...")
|
|
out_of_sample_trades = _run_single_simulation(out_of_sample_df.copy(), best_params)
|
|
all_out_of_sample_trades.extend(out_of_sample_trades)
|
|
self._generate_report(out_of_sample_trades, f"Period {i+1} Out-of-Sample Results")
|
|
|
|
# 4. Generate a final report for all combined out-of-sample trades
|
|
print("\n" + "="*50)
|
|
self._generate_report(all_out_of_sample_trades, "AGGREGATE WALK-FORWARD PERFORMANCE")
|
|
print("="*50)
|
|
|
|
def _find_best_params(self, df: pd.DataFrame) -> dict:
|
|
"""Runs a multi-core optimization on a given slice of data."""
|
|
param_configs = self.backtest_config.get('optimization_params', {})
|
|
param_names = list(param_configs.keys())
|
|
param_ranges = [range(p['start'], p['end'] + 1, p['step']) for p in param_configs.values()]
|
|
|
|
all_combinations = list(itertools.product(*param_ranges))
|
|
param_dicts = [dict(zip(param_names, combo)) for combo in all_combinations]
|
|
|
|
logging.info(f"Optimizing on {len(all_combinations)} combinations...")
|
|
|
|
num_cores = 60
|
|
self.pool = multiprocessing.Pool(processes=num_cores)
|
|
|
|
worker = partial(_run_single_simulation, df.copy())
|
|
all_trades_results = self.pool.map(worker, param_dicts)
|
|
|
|
self.pool.close()
|
|
self.pool.join()
|
|
self.pool = None
|
|
|
|
results = []
|
|
for i, trades in enumerate(all_trades_results):
|
|
if trades:
|
|
results.append({'params': param_dicts[i], 'pnl': sum(t['pnl_pct'] for t in trades)})
|
|
|
|
if not results: return None
|
|
return max(results, key=lambda x: x['pnl'])['params']
|
|
|
|
def load_data(self, start_date, end_date):
|
|
# This is a simplified version for the main data load
|
|
table_name = f"{self.coin}_{self.timeframe}"
|
|
logging.info(f"Loading full dataset for {table_name}...")
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
query = f'SELECT * FROM "{table_name}" WHERE date(datetime_utc) >= ? AND date(datetime_utc) <= ? ORDER BY datetime_utc'
|
|
df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
|
|
if df.empty:
|
|
logging.warning("No data found for the specified date range.")
|
|
return pd.DataFrame()
|
|
df.set_index('datetime_utc', inplace=True)
|
|
return df
|
|
except Exception as e:
|
|
logging.error(f"Failed to load data for backtest: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def _generate_report(self, trades: list, title: str):
|
|
"""Calculates and prints key performance metrics."""
|
|
print(f"\n--- {title} ---")
|
|
if not trades:
|
|
print("No trades were executed during this period.")
|
|
return
|
|
|
|
num_trades = len(trades)
|
|
wins = [t for t in trades if t['pnl_pct'] > 0]
|
|
total_pnl = sum(t['pnl_pct'] for t in trades)
|
|
|
|
print(f"Total Trades: {num_trades}")
|
|
print(f"Win Rate: {(len(wins) / num_trades) * 100 if num_trades > 0 else 0:.2f}%")
|
|
print(f"Total PNL (Cumulative %): {total_pnl * 100:.2f}%")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Run a Walk-Forward Optimization for a trading strategy.")
|
|
parser.add_argument("--strategy", required=True, help="The name of the backtest config to run (from backtesting_conf.json).")
|
|
parser.add_argument("--log-level", default="normal", choices=['off', 'normal', 'debug'])
|
|
args = parser.parse_args()
|
|
|
|
backtester = Backtester(
|
|
log_level=args.log_level,
|
|
strategy_name_to_test=args.strategy
|
|
)
|
|
|
|
try:
|
|
backtester.run_walk_forward_optimization()
|
|
except KeyboardInterrupt:
|
|
logging.info("\nWalk-Forward Optimization cancelled by user.")
|
|
finally:
|
|
if backtester.pool:
|
|
logging.info("Terminating worker processes...")
|
|
backtester.pool.terminate()
|
|
backtester.pool.join()
|
|
logging.info("Worker processes terminated.")
|
|
|