196 lines
8.1 KiB
Python
196 lines
8.1 KiB
Python
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
import sqlite3
|
|
import pandas as pd
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
from hyperliquid.info import Info
|
|
from hyperliquid.utils import constants
|
|
from hyperliquid.utils.error import ClientError
|
|
|
|
# Assuming logging_utils.py is in the same directory
|
|
from logging_utils import setup_logging
|
|
|
|
|
|
class CandleFetcherDB:
|
|
"""
|
|
Fetches 1-minute candle data and saves/updates it directly in an SQLite database.
|
|
"""
|
|
|
|
def __init__(self, coins_to_fetch: list, interval: str, days_back: int):
|
|
self.info = Info(constants.MAINNET_API_URL, skip_ws=True)
|
|
self.coins = self._resolve_coins(coins_to_fetch)
|
|
self.interval = interval
|
|
self.days_back = days_back
|
|
self.db_path = os.path.join("_data", "market_data.db")
|
|
self.column_rename_map = {
|
|
't': 'timestamp_ms', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume', 'n': 'number_of_trades'
|
|
}
|
|
|
|
def _resolve_coins(self, coins_arg: list) -> list:
|
|
"""Determines the final list of coins to fetch."""
|
|
if coins_arg and "all" in [c.lower() for c in coins_arg]:
|
|
logging.info("Fetching data for all available coins.")
|
|
try:
|
|
with open("coin_precision.json", 'r') as f:
|
|
return list(json.load(f).keys())
|
|
except FileNotFoundError:
|
|
logging.error("'coin_precision.json' not found. Please run list_coins.py first.")
|
|
sys.exit(1)
|
|
else:
|
|
logging.info(f"Fetching data for specified coins: {coins_arg}")
|
|
return coins_arg
|
|
|
|
def run(self):
|
|
"""Starts the data fetching process and reports status after each coin."""
|
|
with sqlite3.connect(self.db_path, timeout=10) as self.conn:
|
|
self.conn.execute("PRAGMA journal_mode=WAL;")
|
|
for coin in self.coins:
|
|
logging.info(f"--- Starting process for {coin} ---")
|
|
num_updated = self._update_data_for_coin(coin)
|
|
self._report_status(coin, num_updated)
|
|
time.sleep(1)
|
|
|
|
def _report_status(self, last_coin: str, num_updated: int):
|
|
"""Saves the status of the fetcher run to a JSON file."""
|
|
status_file = os.path.join("_data", "fetcher_status.json")
|
|
status = {
|
|
"last_updated_coin": last_coin,
|
|
"num_updated_candles": num_updated,
|
|
"last_run_timestamp_utc": datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
|
}
|
|
try:
|
|
with open(status_file, 'w', encoding='utf-8') as f:
|
|
json.dump(status, f, indent=4)
|
|
logging.info(f"Updated status file. Last processed coin: {last_coin} ({num_updated} candles)")
|
|
except IOError as e:
|
|
logging.error(f"Failed to write status file: {e}")
|
|
|
|
|
|
def _get_start_time(self, coin: str) -> (int, bool):
|
|
"""Checks the database for an existing table and returns the last timestamp."""
|
|
table_name = f"{coin}_{self.interval}"
|
|
try:
|
|
cursor = self.conn.cursor()
|
|
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
|
|
if cursor.fetchone():
|
|
query = f'SELECT MAX(timestamp_ms) FROM "{table_name}"'
|
|
last_ts = pd.read_sql(query, self.conn).iloc[0, 0]
|
|
if pd.notna(last_ts):
|
|
logging.info(f"Existing table '{table_name}' found. Resuming from timestamp: {last_ts}")
|
|
return int(last_ts), True
|
|
except Exception as e:
|
|
logging.warning(f"Could not read from table '{table_name}'. Re-fetching history. Error: {e}")
|
|
|
|
start_dt = datetime.now() - timedelta(days=self.days_back)
|
|
start_ms = int(start_dt.timestamp() * 1000)
|
|
logging.info(f"No valid data in '{table_name}'. Fetching last {self.days_back} days.")
|
|
return start_ms, False
|
|
|
|
def _update_data_for_coin(self, coin: str) -> int:
|
|
"""Fetches, processes, and saves new candle data for a single coin to the database."""
|
|
start_time_ms, table_existed = self._get_start_time(coin)
|
|
end_time_ms = int(time.time() * 1000)
|
|
|
|
if start_time_ms >= end_time_ms:
|
|
logging.warning(f"Start time is in the future for {coin}. Skipping.")
|
|
return 0
|
|
|
|
all_candles = self._fetch_candles_aggressively(coin, start_time_ms, end_time_ms)
|
|
|
|
if not all_candles:
|
|
logging.info(f"No new data found for {coin}.")
|
|
return 0
|
|
|
|
df = pd.DataFrame(all_candles)
|
|
df.drop_duplicates(subset=['t'], keep='last', inplace=True)
|
|
if table_existed:
|
|
df = df[df['t'] > start_time_ms]
|
|
df.sort_values(by='t', inplace=True)
|
|
|
|
if not df.empty:
|
|
return self._save_to_sqlite_with_pandas(df, coin, table_existed)
|
|
else:
|
|
logging.info(f"No new candles to append for {coin}.")
|
|
return 0
|
|
|
|
def _fetch_candles_aggressively(self, coin, start_ms, end_ms):
|
|
"""Uses a greedy loop to fetch data efficiently."""
|
|
all_candles = []
|
|
current_start_time = start_ms
|
|
while current_start_time < end_ms:
|
|
candle_batch = self._fetch_batch_with_retry(coin, current_start_time, end_ms)
|
|
if not candle_batch:
|
|
break
|
|
all_candles.extend(candle_batch)
|
|
last_ts = candle_batch[-1]["t"]
|
|
if last_ts < current_start_time:
|
|
break
|
|
current_start_time = last_ts + 1
|
|
time.sleep(0.25)
|
|
return all_candles
|
|
|
|
def _fetch_batch_with_retry(self, coin, start_ms, end_ms):
|
|
"""Performs a single API call with a retry mechanism."""
|
|
max_retries = 3
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return self.info.candles_snapshot(coin, self.interval, start_ms, end_ms)
|
|
except ClientError as e:
|
|
if e.status_code == 429 and attempt < max_retries - 1:
|
|
logging.warning("Rate limited. Retrying...")
|
|
time.sleep(2)
|
|
else:
|
|
logging.error(f"API Error for {coin}: {e}.")
|
|
return None
|
|
return None
|
|
|
|
def _save_to_sqlite_with_pandas(self, df: pd.DataFrame, coin: str, is_append: bool) -> int:
|
|
"""Saves a pandas DataFrame to an SQLite table and returns the number of saved rows."""
|
|
table_name = f"{coin}_{self.interval}"
|
|
try:
|
|
df.rename(columns=self.column_rename_map, inplace=True)
|
|
df['datetime_utc'] = pd.to_datetime(df['timestamp_ms'], unit='ms')
|
|
final_df = df[['datetime_utc', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume', 'number_of_trades']]
|
|
|
|
write_mode = 'append' if is_append else 'replace'
|
|
final_df.to_sql(table_name, self.conn, if_exists=write_mode, index=False)
|
|
|
|
self.conn.execute(f'CREATE INDEX IF NOT EXISTS "idx_{table_name}_time" ON "{table_name}"(datetime_utc);')
|
|
|
|
num_saved = len(final_df)
|
|
logging.info(f"Successfully saved {num_saved} candles to table '{table_name}'")
|
|
return num_saved
|
|
except Exception as e:
|
|
logging.error(f"Failed to write to SQLite table '{table_name}': {e}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Fetch historical candle data and save to SQLite.")
|
|
parser.add_argument(
|
|
"--coins",
|
|
nargs='+',
|
|
default=["BTC", "ETH"],
|
|
help="List of coins to fetch (e.g., BTC ETH), or 'all' to fetch all coins."
|
|
)
|
|
parser.add_argument("--interval", default="1m", help="Candle interval (e.g., 1m, 5m, 1h).")
|
|
parser.add_argument("--days", type=int, default=7, help="Number of days of history to fetch for new coins.")
|
|
parser.add_argument(
|
|
"--log-level",
|
|
default="normal",
|
|
choices=['off', 'normal', 'debug'],
|
|
help="Set the logging level."
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
setup_logging(args.log_level, 'DataFetcherDB')
|
|
|
|
fetcher = CandleFetcherDB(coins_to_fetch=args.coins, interval=args.interval, days_back=args.days)
|
|
fetcher.run()
|
|
|