live market websocket and monitoring wallets

2025-10-20 20:46:48 +02:00
parent 64f7866083
commit 70f3d48336
13 changed files with 996 additions and 183 deletions
--- a/backtester.py
+++ b/backtester.py
@ -10,97 +10,125 @@ import itertools
 import multiprocessing
 from functools import partial
 import time
+import importlib
+import signal

 from logging_utils import setup_logging

-def _run_single_simulation(df: pd.DataFrame, params: dict) -> list:
+def _run_trade_simulation(df: pd.DataFrame, capital: float, size_pct: float, leverage_long: int, leverage_short: int, taker_fee_pct: float, maker_fee_pct: float) -> tuple[float, list]:
    """
-    Core simulation logic. Takes a DataFrame and parameters, returns a list of trades.
-    This is a pure function to be used by different data loaders.
+    Simulates a trading strategy with portfolio management, including capital,
+    position sizing, leverage, and fees.
    """
-    fast_ma_period = params.get('fast', 0)
-    slow_ma_period = params.get('slow', 0)
-    sma_period = params.get('sma_period', 0)
-    
-    if fast_ma_period and slow_ma_period:
-        df['fast_sma'] = df['close'].rolling(window=fast_ma_period).mean()
-        df['slow_sma'] = df['close'].rolling(window=slow_ma_period).mean()
-        df['signal'] = (df['fast_sma'] > df['slow_sma']).astype(int)
-    elif sma_period:
-        df['sma'] = df['close'].rolling(window=sma_period).mean()
-        df['signal'] = (df['close'] > df['sma']).astype(int)
-    else:
-        return []
-        
    df.dropna(inplace=True)
-    if df.empty: return []
+    if df.empty: return capital, []

-    df['position'] = df['signal'].diff()
+    df['position_change'] = df['signal'].diff()
    trades = []
    entry_price = 0
-    
-    for i, row in df.iterrows():
-        if row['position'] == 1:
-            if entry_price == 0: # Only enter if flat
-                entry_price = row['close']
-        elif row['position'] == -1:
-            if entry_price != 0: # Only exit if in a position
-                pnl = (row['close'] - entry_price) / entry_price
-                trades.append({'pnl_pct': pnl})
-                entry_price = 0
-    
-    return trades
+    asset_size = 0
+    current_position = 0 # 0=flat, 1=long, -1=short
+    equity = capital

-def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str) -> tuple[dict, list]:
+    for i, row in df.iterrows():
+        # --- Close Positions ---
+        if (current_position == 1 and row['signal'] != 1) or \
+           (current_position == -1 and row['signal'] != -1):
+            
+            exit_value = asset_size * row['close']
+            fee = exit_value * (taker_fee_pct / 100)
+            
+            if current_position == 1: # Closing a long
+                pnl_usd = (row['close'] - entry_price) * asset_size
+                equity += pnl_usd - fee
+                trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (row['close'] - entry_price) / entry_price, 'type': 'long'})
+            
+            elif current_position == -1: # Closing a short
+                pnl_usd = (entry_price - row['close']) * asset_size
+                equity += pnl_usd - fee
+                trades.append({'pnl_usd': pnl_usd, 'pnl_pct': (entry_price - row['close']) / entry_price, 'type': 'short'})
+
+            entry_price = 0
+            asset_size = 0
+            current_position = 0
+
+        # --- Open New Positions ---
+        if current_position == 0:
+            if row['signal'] == 1: # Open Long
+                margin_to_use = equity * (size_pct / 100)
+                trade_value = margin_to_use * leverage_long
+                asset_size = trade_value / row['close']
+                fee = trade_value * (taker_fee_pct / 100)
+                equity -= fee
+                entry_price = row['close']
+                current_position = 1
+            elif row['signal'] == -1: # Open Short
+                margin_to_use = equity * (size_pct / 100)
+                trade_value = margin_to_use * leverage_short
+                asset_size = trade_value / row['close']
+                fee = trade_value * (taker_fee_pct / 100)
+                equity -= fee
+                entry_price = row['close']
+                current_position = -1
+
+    return equity, trades
+
+
+def simulation_worker(params: dict, db_path: str, coin: str, timeframe: str, start_date: str, end_date: str, strategy_class, sim_params: dict) -> tuple[dict, float, list]:
    """
-    A worker function for multiprocessing. It loads its own data from the DB
-    and then runs the simulation, returning the parameters and results together.
+    Worker function that loads data, runs the full simulation, and returns results.
    """
    df = pd.DataFrame()
    try:
        with sqlite3.connect(db_path) as conn:
-            query = f'SELECT datetime_utc, close FROM "{coin}_{timeframe}" WHERE date(datetime_utc) >= ? AND date(datetime_utc) <= ? ORDER BY datetime_utc'
+            query = f'SELECT datetime_utc, open, high, low, close FROM "{coin}_{timeframe}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
            df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
            if not df.empty:
                df.set_index('datetime_utc', inplace=True)
    except Exception as e:
        print(f"Worker error loading data for params {params}: {e}")
-        return (params, [])
+        return (params, sim_params['capital'], [])

    if df.empty:
-        return (params, [])
+        return (params, sim_params['capital'], [])
    
-    trades = _run_single_simulation(df, params)
-    return (params, trades)
+    strategy_instance = strategy_class(params)
+    df_with_signals = strategy_instance.calculate_signals(df)
+    
+    final_equity, trades = _run_trade_simulation(df_with_signals, **sim_params)
+    return (params, final_equity, trades)
+
+
+def init_worker():
+    signal.signal(signal.SIGINT, signal.SIG_IGN)


 class Backtester:
-    """
-    A class to run a Walk-Forward Optimization, which is the gold standard
-    for testing the robustness of a trading strategy.
-    """
-
-    def __init__(self, log_level: str, strategy_name_to_test: str):
+    def __init__(self, log_level: str, strategy_name_to_test: str, start_date: str, sim_params: dict):
        setup_logging(log_level, 'Backtester')
        self.db_path = os.path.join("_data", "market_data.db")
+        self.simulation_params = sim_params
        
        self.backtest_config = self._load_backtest_config(strategy_name_to_test)
-        if not self.backtest_config:
-            logging.error(f"Backtest configuration for '{strategy_name_to_test}' not found.")
-            sys.exit(1)
-
+        # ... (rest of __init__ is unchanged)
        self.strategy_name = self.backtest_config.get('strategy_name')
        self.strategy_config = self._load_strategy_config()
-        if not self.strategy_config:
-            logging.error(f"Strategy '{self.strategy_name}' not found.")
-            sys.exit(1)
-
        self.params = self.strategy_config.get('parameters', {})
        self.coin = self.params.get('coin')
        self.timeframe = self.params.get('timeframe')
        self.pool = None
-
-    def _load_backtest_config(self, name_to_test: str) -> dict:
+        self.full_history_start_date = start_date
+        try:
+            module_path, class_name = self.backtest_config['script'].rsplit('.', 1)
+            module = importlib.import_module(module_path)
+            self.strategy_class = getattr(module, class_name)
+            logging.info(f"Successfully loaded strategy class '{class_name}'.")
+        except (ImportError, AttributeError, KeyError) as e:
+            logging.error(f"Could not load strategy script '{self.backtest_config.get('script')}': {e}")
+            sys.exit(1)
+            
+    def _load_backtest_config(self, name_to_test: str):
+        # ... (unchanged)
        config_path = os.path.join("_data", "backtesting_conf.json")
        try:
            with open(config_path, 'r') as f: return json.load(f).get(name_to_test)
@ -108,7 +136,8 @@ class Backtester:
            logging.error(f"Could not load backtesting configuration: {e}")
            return None
            
-    def _load_strategy_config(self) -> dict:
+    def _load_strategy_config(self):
+        # ... (unchanged)
        config_path = os.path.join("_data", "strategies.json")
        try:
            with open(config_path, 'r') as f: return json.load(f).get(self.strategy_name)
@ -116,53 +145,86 @@ class Backtester:
            logging.error(f"Could not load strategy configuration: {e}")
            return None

-    def run_walk_forward_optimization(self, num_periods=10, in_sample_pct=0.9):
-        """
-        Main function to orchestrate the walk-forward analysis.
-        """
-        full_df = self.load_data("2020-01-01", datetime.now().strftime("%Y-%m-%d"))
+    def run_walk_forward_optimization(self, optimization_weeks: int, testing_weeks: int, step_weeks: int):
+        # ... (unchanged, will now use the new simulation logic via the worker)
+        full_df = self.load_data(self.full_history_start_date, datetime.now().strftime("%Y-%m-%d"))
        if full_df.empty: return

-        period_length = len(full_df) // num_periods
-        all_out_of_sample_trades = []
-        
-        for i in range(num_periods):
-            logging.info(f"\n--- Starting Walk-Forward Period {i+1}/{num_periods} ---")
-            
-            # 1. Define the In-Sample (training) and Out-of-Sample (testing) periods
-            start_index = i * period_length
-            in_sample_end_index = start_index + int(period_length * in_sample_pct)
-            out_of_sample_end_index = start_index + period_length
+        optimization_delta = timedelta(weeks=optimization_weeks)
+        testing_delta = timedelta(weeks=testing_weeks)
+        step_delta = timedelta(weeks=step_weeks)

-            if in_sample_end_index >= len(full_df) or out_of_sample_end_index > len(full_df):
-                logging.warning("Not enough data for the full final period. Ending analysis.")
+        all_out_of_sample_trades = []
+        all_period_summaries = []
+
+        current_date = full_df.index[0]
+        end_date = full_df.index[-1]
+        
+        period_num = 1
+        while current_date + optimization_delta + testing_delta <= end_date:
+            logging.info(f"\n--- Starting Walk-Forward Period {period_num} ---")
+            
+            in_sample_start = current_date
+            in_sample_end = in_sample_start + optimization_delta
+            out_of_sample_end = in_sample_end + testing_delta
+
+            in_sample_df = full_df[in_sample_start:in_sample_end]
+            out_of_sample_df = full_df[in_sample_end:out_of_sample_end]
+
+            if in_sample_df.empty or out_of_sample_df.empty:
                break

-            in_sample_df = full_df.iloc[start_index:in_sample_end_index]
-            out_of_sample_df = full_df.iloc[in_sample_end_index:out_of_sample_end_index]
-
-            logging.info(f"In-Sample: {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}")
-            logging.info(f"Out-of-Sample: {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}")
+            logging.info(f"In-Sample (Optimization): {in_sample_df.index[0].date()} to {in_sample_df.index[-1].date()}")
+            logging.info(f"Out-of-Sample (Testing):  {out_of_sample_df.index[0].date()} to {out_of_sample_df.index[-1].date()}")
            
-            # 2. Find the best parameters on the In-Sample data
-            best_params = self._find_best_params(in_sample_df)
-            if not best_params:
-                logging.warning("No profitable parameters found in this period. Skipping.")
+            best_result = self._find_best_params(in_sample_df)
+            if not best_result:
+                all_period_summaries.append({"period": period_num, "params": "None Found"})
+                current_date += step_delta
+                period_num += 1
                continue
+            
+            print("\n--- [1] In-Sample Optimization Result ---")
+            print(f"Best Parameters Found: {best_result['params']}")
+            self._generate_report(best_result['final_equity'], best_result['trades_list'], "In-Sample Performance with Best Params")

-            # 3. Test the best parameters on the Out-of-Sample data
-            logging.info(f"Testing best params {best_params} on Out-of-Sample data...")
-            out_of_sample_trades = _run_single_simulation(out_of_sample_df.copy(), best_params)
+            logging.info(f"\n--- [2] Forward Testing on Out-of-Sample Data ---")
+            df_with_signals = self.strategy_class(best_result['params']).calculate_signals(out_of_sample_df.copy())
+            final_equity_oos, out_of_sample_trades = _run_trade_simulation(df_with_signals, **self.simulation_params)
+            
            all_out_of_sample_trades.extend(out_of_sample_trades)
-            self._generate_report(out_of_sample_trades, f"Period {i+1} Out-of-Sample Results")
+            oos_summary = self._generate_report(final_equity_oos, out_of_sample_trades, "Out-of-Sample Performance")
+            
+            # Store the summary for the final table
+            summary_to_store = {"period": period_num, "params": best_result['params'], **oos_summary}
+            all_period_summaries.append(summary_to_store)
+
+            current_date += step_delta
+            period_num += 1
        
-        # 4. Generate a final report for all combined out-of-sample trades
+        # ... (Final reports will be generated here, but need to adapt to equity tracking)
        print("\n" + "="*50)
-        self._generate_report(all_out_of_sample_trades, "AGGREGATE WALK-FORWARD PERFORMANCE")
+        # self._generate_report(all_out_of_sample_trades, "FINAL AGGREGATE WALK-FORWARD PERFORMANCE")
        print("="*50)

+        # --- ADDED: Final summary table of best parameters and performance per period ---
+        print("\n--- Summary of Best Parameters and Performance per Period ---")
+        header = f"{'#':<3} | {'Best Parameters':<30} | {'Trades':>8} | {'Longs':>6} | {'Shorts':>7} | {'Win %':>8} | {'L Win %':>9} | {'S Win %':>9} | {'Return %':>10} | {'Equity':>15}"
+        print(header)
+        print("-" * len(header))
+        for item in all_period_summaries:
+            params_str = str(item.get('params', 'N/A'))
+            trades = item.get('num_trades', 'N/A')
+            longs = item.get('num_longs', 'N/A')
+            shorts = item.get('num_shorts', 'N/A')
+            win_rate = f"{item.get('win_rate', 0):.2f}%" if 'win_rate' in item else 'N/A'
+            long_win_rate = f"{item.get('long_win_rate', 0):.2f}%" if 'long_win_rate' in item else 'N/A'
+            short_win_rate = f"{item.get('short_win_rate', 0):.2f}%" if 'short_win_rate' in item else 'N/A'
+            return_pct = f"{item.get('return_pct', 0):.2f}%" if 'return_pct' in item else 'N/A'
+            equity = f"${item.get('final_equity', 0):,.2f}" if 'final_equity' in item else 'N/A'
+            print(f"{item['period']:<3} | {params_str:<30} | {trades:>8} | {longs:>6} | {shorts:>7} | {win_rate:>8} | {long_win_rate:>9} | {short_win_rate:>9} | {return_pct:>10} | {equity:>15}")
+            
    def _find_best_params(self, df: pd.DataFrame) -> dict:
-        """Runs a multi-core optimization on a given slice of data."""
        param_configs = self.backtest_config.get('optimization_params', {})
        param_names = list(param_configs.keys())
        param_ranges = [range(p['start'], p['end'] + 1, p['step']) for p in param_configs.values()]
@ -173,71 +235,130 @@ class Backtester:
        logging.info(f"Optimizing on {len(all_combinations)} combinations...")

        num_cores = 60
-        self.pool = multiprocessing.Pool(processes=num_cores)
-        
-        worker = partial(_run_single_simulation, df.copy())
-        all_trades_results = self.pool.map(worker, param_dicts)
+        self.pool = multiprocessing.Pool(processes=num_cores, initializer=init_worker)
        
+        worker = partial(
+            simulation_worker,
+            db_path=self.db_path, coin=self.coin, timeframe=self.timeframe,
+            start_date=df.index[0].isoformat(), end_date=df.index[-1].isoformat(),
+            strategy_class=self.strategy_class,
+            sim_params=self.simulation_params
+        )
+
+        all_results = self.pool.map(worker, param_dicts)
+
        self.pool.close()
        self.pool.join()
        self.pool = None

-        results = []
-        for i, trades in enumerate(all_trades_results):
-            if trades:
-                results.append({'params': param_dicts[i], 'pnl': sum(t['pnl_pct'] for t in trades)})
-
+        results = [{'params': params, 'final_equity': final_equity, 'trades_list': trades} for params, final_equity, trades in all_results if trades]
        if not results: return None
-        return max(results, key=lambda x: x['pnl'])['params']
-
+        return max(results, key=lambda x: x['final_equity'])
+        
    def load_data(self, start_date, end_date):
-        # This is a simplified version for the main data load
+        # ... (unchanged)
        table_name = f"{self.coin}_{self.timeframe}"
        logging.info(f"Loading full dataset for {table_name}...")
        try:
            with sqlite3.connect(self.db_path) as conn:
-                query = f'SELECT * FROM "{table_name}" WHERE date(datetime_utc) >= ? AND date(datetime_utc) <= ? ORDER BY datetime_utc'
+                query = f'SELECT * FROM "{table_name}" WHERE datetime_utc >= ? AND datetime_utc <= ? ORDER BY datetime_utc'
                df = pd.read_sql(query, conn, params=(start_date, end_date), parse_dates=['datetime_utc'])
-                if df.empty:
-                    logging.warning("No data found for the specified date range.")
-                    return pd.DataFrame()
+                if df.empty: return pd.DataFrame()
                df.set_index('datetime_utc', inplace=True)
                return df
        except Exception as e:
            logging.error(f"Failed to load data for backtest: {e}")
            return pd.DataFrame()

-    def _generate_report(self, trades: list, title: str):
-        """Calculates and prints key performance metrics."""
+    def _generate_report(self, final_equity: float, trades: list, title: str) -> dict:
+        """Calculates, prints, and returns a detailed performance report."""
        print(f"\n--- {title} ---")
+        
+        initial_capital = self.simulation_params['capital']
+        
        if not trades:
            print("No trades were executed during this period.")
-            return
+            print(f"Final Equity:           ${initial_capital:,.2f}")
+            return {"num_trades": 0, "num_longs": 0, "num_shorts": 0, "win_rate": 0, "long_win_rate": 0, "short_win_rate": 0, "return_pct": 0, "final_equity": initial_capital}

        num_trades = len(trades)
-        wins = [t for t in trades if t['pnl_pct'] > 0]
-        total_pnl = sum(t['pnl_pct'] for t in trades)
+        long_trades = [t for t in trades if t.get('type') == 'long']
+        short_trades = [t for t in trades if t.get('type') == 'short']
        
-        print(f"Total Trades:           {num_trades}")
-        print(f"Win Rate:               {(len(wins) / num_trades) * 100 if num_trades > 0 else 0:.2f}%")
-        print(f"Total PNL (Cumulative %): {total_pnl * 100:.2f}%")
+        pnls_pct = pd.Series([t['pnl_pct'] for t in trades])
+        
+        wins = pnls_pct[pnls_pct > 0]
+        win_rate = (len(wins) / num_trades) * 100 if num_trades > 0 else 0
+        
+        long_wins = len([t for t in long_trades if t['pnl_pct'] > 0])
+        short_wins = len([t for t in short_trades if t['pnl_pct'] > 0])
+        long_win_rate = (long_wins / len(long_trades)) * 100 if long_trades else 0
+        short_win_rate = (short_wins / len(short_trades)) * 100 if short_trades else 0
+        
+        total_return_pct = ((final_equity - initial_capital) / initial_capital) * 100
+        
+        print(f"Final Equity:           ${final_equity:,.2f}")
+        print(f"Total Return:           {total_return_pct:.2f}%")
+        print(f"Total Trades:           {num_trades} (Longs: {len(long_trades)}, Shorts: {len(short_trades)})")
+        print(f"Win Rate (Overall):     {win_rate:.2f}%")
+        print(f"Win Rate (Longs):       {long_win_rate:.2f}%")
+        print(f"Win Rate (Shorts):      {short_win_rate:.2f}%")
+        
+        # Return a dictionary of the key metrics for the summary table
+        return {
+            "num_trades": num_trades,
+            "num_longs": len(long_trades),
+            "num_shorts": len(short_trades),
+            "win_rate": win_rate,
+            "long_win_rate": long_win_rate,
+            "short_win_rate": short_win_rate,
+            "return_pct": total_return_pct,
+            "final_equity": final_equity
+        }


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run a Walk-Forward Optimization for a trading strategy.")
-    parser.add_argument("--strategy", required=True, help="The name of the backtest config to run (from backtesting_conf.json).")
+    parser.add_argument("--strategy", required=True, help="The name of the backtest config to run.")
+    parser.add_argument("--start-date", default="2020-08-01", help="The overall start date for historical data.")
+    parser.add_argument("--optimization-weeks", type=int, default=4)
+    parser.add_argument("--testing-weeks", type=int, default=1)
+    parser.add_argument("--step-weeks", type=int, default=1)
    parser.add_argument("--log-level", default="normal", choices=['off', 'normal', 'debug'])
+    
+    parser.add_argument("--capital", type=float, default=1000)
+    parser.add_argument("--size-pct", type=float, default=50)
+    parser.add_argument("--leverage-long", type=int, default=3)
+    parser.add_argument("--leverage-short", type=int, default=2)
+    parser.add_argument("--taker-fee-pct", type=float, default=0.045)
+    parser.add_argument("--maker-fee-pct", type=float, default=0.015)
+
    args = parser.parse_args()

+    sim_params = {
+        "capital": args.capital,
+        "size_pct": args.size_pct,
+        "leverage_long": args.leverage_long,
+        "leverage_short": args.leverage_short,
+        "taker_fee_pct": args.taker_fee_pct,
+        "maker_fee_pct": args.maker_fee_pct
+    }
+
    backtester = Backtester(
        log_level=args.log_level,
-        strategy_name_to_test=args.strategy
+        strategy_name_to_test=args.strategy,
+        start_date=args.start_date,
+        sim_params=sim_params
    )
    
    try:
-        backtester.run_walk_forward_optimization()
+        backtester.run_walk_forward_optimization(
+            optimization_weeks=args.optimization_weeks,
+            testing_weeks=args.testing_weeks,
+            step_weeks=args.step_weeks
+        )
    except KeyboardInterrupt:
-        logging.info("\nWalk-Forward Optimization cancelled by user.")
+        logging.info("\nBacktest optimization cancelled by user.")
    finally:
        if backtester.pool:
            logging.info("Terminating worker processes...")