Files
hyper/data_fetcher.py
2025-10-13 11:28:09 +02:00

196 lines
8.1 KiB
Python

import argparse
import json
import logging
import os
import sys
import time
import sqlite3
import pandas as pd
from datetime import datetime, timedelta, timezone
from hyperliquid.info import Info
from hyperliquid.utils import constants
from hyperliquid.utils.error import ClientError
# Assuming logging_utils.py is in the same directory
from logging_utils import setup_logging
class CandleFetcherDB:
"""
Fetches 1-minute candle data and saves/updates it directly in an SQLite database.
"""
def __init__(self, coins_to_fetch: list, interval: str, days_back: int):
self.info = Info(constants.MAINNET_API_URL, skip_ws=True)
self.coins = self._resolve_coins(coins_to_fetch)
self.interval = interval
self.days_back = days_back
self.db_path = os.path.join("_data", "market_data.db")
self.column_rename_map = {
't': 'timestamp_ms', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume', 'n': 'number_of_trades'
}
def _resolve_coins(self, coins_arg: list) -> list:
"""Determines the final list of coins to fetch."""
if coins_arg and "all" in [c.lower() for c in coins_arg]:
logging.info("Fetching data for all available coins.")
try:
with open("coin_precision.json", 'r') as f:
return list(json.load(f).keys())
except FileNotFoundError:
logging.error("'coin_precision.json' not found. Please run list_coins.py first.")
sys.exit(1)
else:
logging.info(f"Fetching data for specified coins: {coins_arg}")
return coins_arg
def run(self):
"""Starts the data fetching process and reports status after each coin."""
with sqlite3.connect(self.db_path, timeout=10) as self.conn:
self.conn.execute("PRAGMA journal_mode=WAL;")
for coin in self.coins:
logging.info(f"--- Starting process for {coin} ---")
num_updated = self._update_data_for_coin(coin)
self._report_status(coin, num_updated)
time.sleep(1)
def _report_status(self, last_coin: str, num_updated: int):
"""Saves the status of the fetcher run to a JSON file."""
status_file = os.path.join("_data", "fetcher_status.json")
status = {
"last_updated_coin": last_coin,
"num_updated_candles": num_updated,
"last_run_timestamp_utc": datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
}
try:
with open(status_file, 'w', encoding='utf-8') as f:
json.dump(status, f, indent=4)
logging.info(f"Updated status file. Last processed coin: {last_coin} ({num_updated} candles)")
except IOError as e:
logging.error(f"Failed to write status file: {e}")
def _get_start_time(self, coin: str) -> (int, bool):
"""Checks the database for an existing table and returns the last timestamp."""
table_name = f"{coin}_{self.interval}"
try:
cursor = self.conn.cursor()
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
if cursor.fetchone():
query = f'SELECT MAX(timestamp_ms) FROM "{table_name}"'
last_ts = pd.read_sql(query, self.conn).iloc[0, 0]
if pd.notna(last_ts):
logging.info(f"Existing table '{table_name}' found. Resuming from timestamp: {last_ts}")
return int(last_ts), True
except Exception as e:
logging.warning(f"Could not read from table '{table_name}'. Re-fetching history. Error: {e}")
start_dt = datetime.now() - timedelta(days=self.days_back)
start_ms = int(start_dt.timestamp() * 1000)
logging.info(f"No valid data in '{table_name}'. Fetching last {self.days_back} days.")
return start_ms, False
def _update_data_for_coin(self, coin: str) -> int:
"""Fetches, processes, and saves new candle data for a single coin to the database."""
start_time_ms, table_existed = self._get_start_time(coin)
end_time_ms = int(time.time() * 1000)
if start_time_ms >= end_time_ms:
logging.warning(f"Start time is in the future for {coin}. Skipping.")
return 0
all_candles = self._fetch_candles_aggressively(coin, start_time_ms, end_time_ms)
if not all_candles:
logging.info(f"No new data found for {coin}.")
return 0
df = pd.DataFrame(all_candles)
df.drop_duplicates(subset=['t'], keep='last', inplace=True)
if table_existed:
df = df[df['t'] > start_time_ms]
df.sort_values(by='t', inplace=True)
if not df.empty:
return self._save_to_sqlite_with_pandas(df, coin, table_existed)
else:
logging.info(f"No new candles to append for {coin}.")
return 0
def _fetch_candles_aggressively(self, coin, start_ms, end_ms):
"""Uses a greedy loop to fetch data efficiently."""
all_candles = []
current_start_time = start_ms
while current_start_time < end_ms:
candle_batch = self._fetch_batch_with_retry(coin, current_start_time, end_ms)
if not candle_batch:
break
all_candles.extend(candle_batch)
last_ts = candle_batch[-1]["t"]
if last_ts < current_start_time:
break
current_start_time = last_ts + 1
time.sleep(0.25)
return all_candles
def _fetch_batch_with_retry(self, coin, start_ms, end_ms):
"""Performs a single API call with a retry mechanism."""
max_retries = 3
for attempt in range(max_retries):
try:
return self.info.candles_snapshot(coin, self.interval, start_ms, end_ms)
except ClientError as e:
if e.status_code == 429 and attempt < max_retries - 1:
logging.warning("Rate limited. Retrying...")
time.sleep(2)
else:
logging.error(f"API Error for {coin}: {e}.")
return None
return None
def _save_to_sqlite_with_pandas(self, df: pd.DataFrame, coin: str, is_append: bool) -> int:
"""Saves a pandas DataFrame to an SQLite table and returns the number of saved rows."""
table_name = f"{coin}_{self.interval}"
try:
df.rename(columns=self.column_rename_map, inplace=True)
df['datetime_utc'] = pd.to_datetime(df['timestamp_ms'], unit='ms')
final_df = df[['datetime_utc', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume', 'number_of_trades']]
write_mode = 'append' if is_append else 'replace'
final_df.to_sql(table_name, self.conn, if_exists=write_mode, index=False)
self.conn.execute(f'CREATE INDEX IF NOT EXISTS "idx_{table_name}_time" ON "{table_name}"(datetime_utc);')
num_saved = len(final_df)
logging.info(f"Successfully saved {num_saved} candles to table '{table_name}'")
return num_saved
except Exception as e:
logging.error(f"Failed to write to SQLite table '{table_name}': {e}")
return 0
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Fetch historical candle data and save to SQLite.")
parser.add_argument(
"--coins",
nargs='+',
default=["BTC", "ETH"],
help="List of coins to fetch (e.g., BTC ETH), or 'all' to fetch all coins."
)
parser.add_argument("--interval", default="1m", help="Candle interval (e.g., 1m, 5m, 1h).")
parser.add_argument("--days", type=int, default=7, help="Number of days of history to fetch for new coins.")
parser.add_argument(
"--log-level",
default="normal",
choices=['off', 'normal', 'debug'],
help="Set the logging level."
)
args = parser.parse_args()
setup_logging(args.log_level, 'DataFetcherDB')
fetcher = CandleFetcherDB(coins_to_fetch=args.coins, interval=args.interval, days_back=args.days)
fetcher.run()