Initial commit
This commit is contained in:
195
data_fetcher.py
Normal file
195
data_fetcher.py
Normal file
@ -0,0 +1,195 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from hyperliquid.info import Info
|
||||
from hyperliquid.utils import constants
|
||||
from hyperliquid.utils.error import ClientError
|
||||
|
||||
# Assuming logging_utils.py is in the same directory
|
||||
from logging_utils import setup_logging
|
||||
|
||||
|
||||
class CandleFetcherDB:
|
||||
"""
|
||||
Fetches 1-minute candle data and saves/updates it directly in an SQLite database.
|
||||
"""
|
||||
|
||||
def __init__(self, coins_to_fetch: list, interval: str, days_back: int):
|
||||
self.info = Info(constants.MAINNET_API_URL, skip_ws=True)
|
||||
self.coins = self._resolve_coins(coins_to_fetch)
|
||||
self.interval = interval
|
||||
self.days_back = days_back
|
||||
self.db_path = os.path.join("_data", "market_data.db")
|
||||
self.column_rename_map = {
|
||||
't': 'timestamp_ms', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume', 'n': 'number_of_trades'
|
||||
}
|
||||
|
||||
def _resolve_coins(self, coins_arg: list) -> list:
|
||||
"""Determines the final list of coins to fetch."""
|
||||
if coins_arg and "all" in [c.lower() for c in coins_arg]:
|
||||
logging.info("Fetching data for all available coins.")
|
||||
try:
|
||||
with open("coin_precision.json", 'r') as f:
|
||||
return list(json.load(f).keys())
|
||||
except FileNotFoundError:
|
||||
logging.error("'coin_precision.json' not found. Please run list_coins.py first.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logging.info(f"Fetching data for specified coins: {coins_arg}")
|
||||
return coins_arg
|
||||
|
||||
def run(self):
|
||||
"""Starts the data fetching process and reports status after each coin."""
|
||||
with sqlite3.connect(self.db_path, timeout=10) as self.conn:
|
||||
self.conn.execute("PRAGMA journal_mode=WAL;")
|
||||
for coin in self.coins:
|
||||
logging.info(f"--- Starting process for {coin} ---")
|
||||
num_updated = self._update_data_for_coin(coin)
|
||||
self._report_status(coin, num_updated)
|
||||
time.sleep(1)
|
||||
|
||||
def _report_status(self, last_coin: str, num_updated: int):
|
||||
"""Saves the status of the fetcher run to a JSON file."""
|
||||
status_file = os.path.join("_data", "fetcher_status.json")
|
||||
status = {
|
||||
"last_updated_coin": last_coin,
|
||||
"num_updated_candles": num_updated,
|
||||
"last_run_timestamp_utc": datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
try:
|
||||
with open(status_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(status, f, indent=4)
|
||||
logging.info(f"Updated status file. Last processed coin: {last_coin} ({num_updated} candles)")
|
||||
except IOError as e:
|
||||
logging.error(f"Failed to write status file: {e}")
|
||||
|
||||
|
||||
def _get_start_time(self, coin: str) -> (int, bool):
|
||||
"""Checks the database for an existing table and returns the last timestamp."""
|
||||
table_name = f"{coin}_{self.interval}"
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
|
||||
if cursor.fetchone():
|
||||
query = f'SELECT MAX(timestamp_ms) FROM "{table_name}"'
|
||||
last_ts = pd.read_sql(query, self.conn).iloc[0, 0]
|
||||
if pd.notna(last_ts):
|
||||
logging.info(f"Existing table '{table_name}' found. Resuming from timestamp: {last_ts}")
|
||||
return int(last_ts), True
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not read from table '{table_name}'. Re-fetching history. Error: {e}")
|
||||
|
||||
start_dt = datetime.now() - timedelta(days=self.days_back)
|
||||
start_ms = int(start_dt.timestamp() * 1000)
|
||||
logging.info(f"No valid data in '{table_name}'. Fetching last {self.days_back} days.")
|
||||
return start_ms, False
|
||||
|
||||
def _update_data_for_coin(self, coin: str) -> int:
|
||||
"""Fetches, processes, and saves new candle data for a single coin to the database."""
|
||||
start_time_ms, table_existed = self._get_start_time(coin)
|
||||
end_time_ms = int(time.time() * 1000)
|
||||
|
||||
if start_time_ms >= end_time_ms:
|
||||
logging.warning(f"Start time is in the future for {coin}. Skipping.")
|
||||
return 0
|
||||
|
||||
all_candles = self._fetch_candles_aggressively(coin, start_time_ms, end_time_ms)
|
||||
|
||||
if not all_candles:
|
||||
logging.info(f"No new data found for {coin}.")
|
||||
return 0
|
||||
|
||||
df = pd.DataFrame(all_candles)
|
||||
df.drop_duplicates(subset=['t'], keep='last', inplace=True)
|
||||
if table_existed:
|
||||
df = df[df['t'] > start_time_ms]
|
||||
df.sort_values(by='t', inplace=True)
|
||||
|
||||
if not df.empty:
|
||||
return self._save_to_sqlite_with_pandas(df, coin, table_existed)
|
||||
else:
|
||||
logging.info(f"No new candles to append for {coin}.")
|
||||
return 0
|
||||
|
||||
def _fetch_candles_aggressively(self, coin, start_ms, end_ms):
|
||||
"""Uses a greedy loop to fetch data efficiently."""
|
||||
all_candles = []
|
||||
current_start_time = start_ms
|
||||
while current_start_time < end_ms:
|
||||
candle_batch = self._fetch_batch_with_retry(coin, current_start_time, end_ms)
|
||||
if not candle_batch:
|
||||
break
|
||||
all_candles.extend(candle_batch)
|
||||
last_ts = candle_batch[-1]["t"]
|
||||
if last_ts < current_start_time:
|
||||
break
|
||||
current_start_time = last_ts + 1
|
||||
time.sleep(0.25)
|
||||
return all_candles
|
||||
|
||||
def _fetch_batch_with_retry(self, coin, start_ms, end_ms):
|
||||
"""Performs a single API call with a retry mechanism."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return self.info.candles_snapshot(coin, self.interval, start_ms, end_ms)
|
||||
except ClientError as e:
|
||||
if e.status_code == 429 and attempt < max_retries - 1:
|
||||
logging.warning("Rate limited. Retrying...")
|
||||
time.sleep(2)
|
||||
else:
|
||||
logging.error(f"API Error for {coin}: {e}.")
|
||||
return None
|
||||
return None
|
||||
|
||||
def _save_to_sqlite_with_pandas(self, df: pd.DataFrame, coin: str, is_append: bool) -> int:
|
||||
"""Saves a pandas DataFrame to an SQLite table and returns the number of saved rows."""
|
||||
table_name = f"{coin}_{self.interval}"
|
||||
try:
|
||||
df.rename(columns=self.column_rename_map, inplace=True)
|
||||
df['datetime_utc'] = pd.to_datetime(df['timestamp_ms'], unit='ms')
|
||||
final_df = df[['datetime_utc', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume', 'number_of_trades']]
|
||||
|
||||
write_mode = 'append' if is_append else 'replace'
|
||||
final_df.to_sql(table_name, self.conn, if_exists=write_mode, index=False)
|
||||
|
||||
self.conn.execute(f'CREATE INDEX IF NOT EXISTS "idx_{table_name}_time" ON "{table_name}"(datetime_utc);')
|
||||
|
||||
num_saved = len(final_df)
|
||||
logging.info(f"Successfully saved {num_saved} candles to table '{table_name}'")
|
||||
return num_saved
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to write to SQLite table '{table_name}': {e}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Fetch historical candle data and save to SQLite.")
|
||||
parser.add_argument(
|
||||
"--coins",
|
||||
nargs='+',
|
||||
default=["BTC", "ETH"],
|
||||
help="List of coins to fetch (e.g., BTC ETH), or 'all' to fetch all coins."
|
||||
)
|
||||
parser.add_argument("--interval", default="1m", help="Candle interval (e.g., 1m, 5m, 1h).")
|
||||
parser.add_argument("--days", type=int, default=7, help="Number of days of history to fetch for new coins.")
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
default="normal",
|
||||
choices=['off', 'normal', 'debug'],
|
||||
help="Set the logging level."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
setup_logging(args.log_level, 'DataFetcherDB')
|
||||
|
||||
fetcher = CandleFetcherDB(coins_to_fetch=args.coins, interval=args.interval, days_back=args.days)
|
||||
fetcher.run()
|
||||
|
||||
Reference in New Issue
Block a user