Files
hyper/!migrate_to_sqlite.py
2025-10-13 11:28:09 +02:00

93 lines
3.7 KiB
Python

import argparse
import logging
import os
import sys
import sqlite3
import pandas as pd
# Assuming logging_utils.py is in the same directory
from logging_utils import setup_logging
class Migrator:
"""
Reads 1-minute candle data from CSV files and migrates it into an
SQLite database for improved performance and easier access.
"""
def __init__(self, log_level: str):
setup_logging(log_level, 'Migrator')
self.source_folder = os.path.join("_data", "candles")
self.db_path = os.path.join("_data", "market_data.db")
def run(self):
"""
Main execution function to find all CSV files and migrate them to the database.
"""
if not os.path.exists(self.source_folder):
logging.error(f"Source data folder '{self.source_folder}' not found. "
"Please ensure data has been fetched first.")
sys.exit(1)
csv_files = [f for f in os.listdir(self.source_folder) if f.endswith('_1m.csv')]
if not csv_files:
logging.warning("No 1-minute CSV files found in the source folder to migrate.")
return
logging.info(f"Found {len(csv_files)} source CSV files to migrate to SQLite.")
# Connect to the SQLite database (it will be created if it doesn't exist)
with sqlite3.connect(self.db_path) as conn:
for file_name in csv_files:
coin = file_name.split('_')[0]
table_name = f"{coin}_1m"
file_path = os.path.join(self.source_folder, file_name)
logging.info(f"Migrating '{file_name}' to table '{table_name}'...")
try:
# 1. Load the entire CSV file into a pandas DataFrame.
df = pd.read_csv(file_path)
if df.empty:
logging.warning(f"CSV file '{file_name}' is empty. Skipping.")
continue
# 2. Convert the timestamp column to a proper datetime object.
df['datetime_utc'] = pd.to_datetime(df['datetime_utc'])
# 3. Write the DataFrame to the SQLite database.
# 'replace' will drop the table first if it exists and create a new one.
# This is ideal for a migration script to ensure a clean import.
df.to_sql(
table_name,
conn,
if_exists='replace',
index=False # Do not write the pandas DataFrame index as a column
)
# 4. (Optional but Recommended) Create an index on the timestamp for fast queries.
logging.debug(f"Creating index on 'datetime_utc' for table '{table_name}'...")
conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_time ON {table_name}(datetime_utc);")
logging.info(f"Successfully migrated {len(df)} rows to '{table_name}'.")
except Exception as e:
logging.error(f"Failed to process and migrate file '{file_name}': {e}")
logging.info("--- Database migration complete ---")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Migrate 1-minute candle data from CSV files to an SQLite database.")
parser.add_argument(
"--log-level",
default="normal",
choices=['off', 'normal', 'debug'],
help="Set the logging level for the script."
)
args = parser.parse_args()
migrator = Migrator(log_level=args.log_level)
migrator.run()