Files
hyper/market_cap_fetcher.py
2025-10-18 15:10:46 +02:00

284 lines
12 KiB
Python

import argparse
import logging
import os
import sys
import sqlite3
import pandas as pd
import requests
import time
from datetime import datetime, timezone, timedelta
import json
# Assuming logging_utils.py is in the same directory
from logging_utils import setup_logging
class MarketCapFetcher:
"""
Fetches historical daily market cap data from the CoinGecko API and
intelligently updates the SQLite database. It processes individual coins,
aggregates stablecoins, and captures total market cap metrics.
"""
COIN_ID_MAP = {
"BTC": "bitcoin",
"ETH": "ethereum",
"SOL": "solana",
"BNB": "binancecoin",
"HYPE": "hyperliquid",
"ASTER": "astar",
"ZEC": "zcash",
"PUMP": "pump-fun", # Correct ID is 'pump-fun'
"SUI": "sui"
}
STABLECOIN_ID_MAP = {
"USDT": "tether",
"USDC": "usd-coin",
"USDE": "ethena-usde",
"DAI": "dai",
"PYUSD": "paypal-usd"
}
def __init__(self, log_level: str, coins: list):
setup_logging(log_level, 'MarketCapFetcher')
self.coins_to_fetch = coins
self.db_path = os.path.join("_data", "market_data.db")
self.api_base_url = "https://api.coingecko.com/api/v3"
self.api_key = os.environ.get("COINGECKO_API_KEY")
if not self.api_key:
logging.error("CoinGecko API key not found. Please set the COINGECKO_API_KEY environment variable.")
sys.exit(1)
def run(self):
"""
Main execution function to process all configured coins and update the database.
"""
logging.info("Starting historical market cap fetch process from CoinGecko...")
with sqlite3.connect(self.db_path) as conn:
conn.execute("PRAGMA journal_mode=WAL;")
# 1. Process individual coins
for coin_symbol in self.coins_to_fetch:
coin_id = self.COIN_ID_MAP.get(coin_symbol.upper())
if not coin_id:
logging.warning(f"No CoinGecko ID found for '{coin_symbol}'. Skipping.")
continue
logging.info(f"--- Processing {coin_symbol} ({coin_id}) ---")
try:
self._update_market_cap_for_coin(coin_id, coin_symbol, conn)
except Exception as e:
logging.error(f"An unexpected error occurred while processing {coin_symbol}: {e}")
time.sleep(2)
# 2. Process and aggregate stablecoins
self._update_stablecoin_aggregate(conn)
# 3. Process total market cap metrics
self._update_total_market_cap(conn)
# 4. Save a summary of the latest data
self._save_summary(conn)
logging.info("--- Market cap fetch process complete ---")
def _save_summary(self, conn):
"""
Queries the last record from each market cap table and saves a summary to a JSON file.
"""
logging.info("--- Generating Market Cap Summary ---")
summary_data = {}
summary_file_path = os.path.join("_data", "market_cap_data.json")
try:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND (name LIKE '%_market_cap' OR name LIKE 'TOTAL_%');")
tables = [row[0] for row in cursor.fetchall()]
for table_name in tables:
try:
df_last = pd.read_sql(f'SELECT * FROM "{table_name}" ORDER BY datetime_utc DESC LIMIT 1', conn)
if not df_last.empty:
summary_data[table_name] = df_last.to_dict('records')[0]
except Exception as e:
logging.error(f"Could not read last record from table '{table_name}': {e}")
if summary_data:
summary_data['summary_last_updated_utc'] = datetime.now(timezone.utc).isoformat()
with open(summary_file_path, 'w', encoding='utf-8') as f:
json.dump(summary_data, f, indent=4)
logging.info(f"Successfully saved market cap summary to '{summary_file_path}'")
else:
logging.warning("No data found to create a summary.")
except Exception as e:
logging.error(f"Failed to generate summary: {e}")
def _update_total_market_cap(self, conn):
"""
Fetches the current total market cap and saves it for the current date.
"""
logging.info("--- Processing Total Market Cap ---")
table_name = "TOTAL_market_cap_daily"
try:
# --- FIX: Use the current date instead of yesterday's ---
today_date = datetime.now(timezone.utc).date()
cursor = conn.cursor()
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
table_exists = cursor.fetchone()
if table_exists:
# Check if we already have a record for today
cursor.execute(f"SELECT 1 FROM \"{table_name}\" WHERE date(datetime_utc) = ? LIMIT 1", (today_date.isoformat(),))
if cursor.fetchone():
logging.info(f"Total market cap for {today_date} already exists. Skipping.")
return
logging.info("Fetching current global market data...")
url = f"{self.api_base_url}/global"
headers = {"x-cg-demo-api-key": self.api_key}
response = requests.get(url, headers=headers)
response.raise_for_status()
global_data = response.json().get('data', {})
total_mc = global_data.get('total_market_cap', {}).get('usd')
if total_mc:
df_total = pd.DataFrame([{
'datetime_utc': pd.to_datetime(today_date),
'market_cap': total_mc
}])
df_total.to_sql(table_name, conn, if_exists='append', index=False)
logging.info(f"Saved total market cap for {today_date}: ${total_mc:,.2f}")
except requests.exceptions.RequestException as e:
logging.error(f"Failed to fetch global market data: {e}")
except Exception as e:
logging.error(f"An error occurred while updating total market cap: {e}")
def _update_stablecoin_aggregate(self, conn):
"""Fetches data for all stablecoins and saves the aggregated market cap."""
logging.info("--- Processing aggregated stablecoin market cap ---")
all_stablecoin_df = pd.DataFrame()
for symbol, coin_id in self.STABLECOIN_ID_MAP.items():
logging.info(f"Fetching historical data for stablecoin: {symbol}...")
df = self._fetch_historical_data(coin_id, days=365)
if not df.empty:
df['coin'] = symbol
all_stablecoin_df = pd.concat([all_stablecoin_df, df])
time.sleep(2)
if all_stablecoin_df.empty:
logging.warning("No data fetched for any stablecoins. Cannot create aggregate.")
return
aggregated_df = all_stablecoin_df.groupby(all_stablecoin_df['datetime_utc'].dt.date)['market_cap'].sum().reset_index()
aggregated_df['datetime_utc'] = pd.to_datetime(aggregated_df['datetime_utc'])
table_name = "STABLECOINS_market_cap"
last_date_in_db = self._get_last_date_from_db(table_name, conn)
if last_date_in_db:
aggregated_df = aggregated_df[aggregated_df['datetime_utc'] > last_date_in_db]
if not aggregated_df.empty:
aggregated_df.to_sql(table_name, conn, if_exists='append', index=False)
logging.info(f"Successfully saved {len(aggregated_df)} daily records to '{table_name}'.")
else:
logging.info("Aggregated stablecoin data is already up-to-date.")
def _update_market_cap_for_coin(self, coin_id: str, coin_symbol: str, conn):
"""Fetches and appends new market cap data for a single coin."""
table_name = f"{coin_symbol}_market_cap"
last_date_in_db = self._get_last_date_from_db(table_name, conn)
days_to_fetch = 365
if last_date_in_db:
delta_days = (datetime.now() - last_date_in_db).days
if delta_days <= 0:
logging.info(f"Market cap data for '{coin_symbol}' is already up-to-date.")
return
days_to_fetch = min(delta_days + 1, 365)
else:
logging.info(f"No existing data found. Fetching initial {days_to_fetch} days for {coin_symbol}.")
df = self._fetch_historical_data(coin_id, days=days_to_fetch)
if df.empty:
logging.warning(f"No market cap data returned from API for {coin_symbol}.")
return
if last_date_in_db:
df = df[df['datetime_utc'] > last_date_in_db]
if not df.empty:
df.to_sql(table_name, conn, if_exists='append', index=False)
logging.info(f"Successfully saved {len(df)} new daily market cap records for {coin_symbol}.")
else:
logging.info(f"Data was fetched, but no new records needed saving for '{coin_symbol}'.")
def _get_last_date_from_db(self, table_name: str, conn) -> pd.Timestamp:
"""Gets the most recent date from a market cap table as a pandas Timestamp."""
try:
cursor = conn.cursor()
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
if not cursor.fetchone():
return None
last_date_str = pd.read_sql(f'SELECT MAX(datetime_utc) FROM "{table_name}"', conn).iloc[0, 0]
return pd.to_datetime(last_date_str) if last_date_str else None
except Exception as e:
logging.error(f"Could not read last date from table '{table_name}': {e}")
return None
def _fetch_historical_data(self, coin_id: str, days: int) -> pd.DataFrame:
"""Fetches historical market chart data from CoinGecko for a specified number of days."""
url = f"{self.api_base_url}/coins/{coin_id}/market_chart"
params = { "vs_currency": "usd", "days": days, "interval": "daily" }
headers = {"x-cg-demo-api-key": self.api_key}
try:
logging.debug(f"Fetching last {days} days for {coin_id}...")
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
market_caps = data.get('market_caps', [])
if not market_caps: return pd.DataFrame()
df = pd.DataFrame(market_caps, columns=['timestamp_ms', 'market_cap'])
df['datetime_utc'] = pd.to_datetime(df['timestamp_ms'], unit='ms')
df.drop_duplicates(subset=['datetime_utc'], keep='last', inplace=True)
return df[['datetime_utc', 'market_cap']]
except requests.exceptions.RequestException as e:
logging.error(f"API request failed for {coin_id}: {e}.")
return pd.DataFrame()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Fetch historical market cap data from CoinGecko.")
parser.add_argument(
"--coins",
nargs='+',
default=["BTC", "ETH", "SOL", "BNB", "HYPE", "ASTER", "ZEC", "PUMP", "SUI"],
help="List of coin symbols to fetch (e.g., BTC ETH)."
)
parser.add_argument(
"--log-level",
default="normal",
choices=['off', 'normal', 'debug'],
help="Set the logging level for the script."
)
args = parser.parse_args()
fetcher = MarketCapFetcher(log_level=args.log_level, coins=args.coins)
fetcher.run()