Initial commit
This commit is contained in:
92
!migrate_to_sqlite.py
Normal file
92
!migrate_to_sqlite.py
Normal file
@ -0,0 +1,92 @@
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
|
||||
# Assuming logging_utils.py is in the same directory
|
||||
from logging_utils import setup_logging
|
||||
|
||||
class Migrator:
|
||||
"""
|
||||
Reads 1-minute candle data from CSV files and migrates it into an
|
||||
SQLite database for improved performance and easier access.
|
||||
"""
|
||||
|
||||
def __init__(self, log_level: str):
|
||||
setup_logging(log_level, 'Migrator')
|
||||
self.source_folder = os.path.join("_data", "candles")
|
||||
self.db_path = os.path.join("_data", "market_data.db")
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Main execution function to find all CSV files and migrate them to the database.
|
||||
"""
|
||||
if not os.path.exists(self.source_folder):
|
||||
logging.error(f"Source data folder '{self.source_folder}' not found. "
|
||||
"Please ensure data has been fetched first.")
|
||||
sys.exit(1)
|
||||
|
||||
csv_files = [f for f in os.listdir(self.source_folder) if f.endswith('_1m.csv')]
|
||||
|
||||
if not csv_files:
|
||||
logging.warning("No 1-minute CSV files found in the source folder to migrate.")
|
||||
return
|
||||
|
||||
logging.info(f"Found {len(csv_files)} source CSV files to migrate to SQLite.")
|
||||
|
||||
# Connect to the SQLite database (it will be created if it doesn't exist)
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
for file_name in csv_files:
|
||||
coin = file_name.split('_')[0]
|
||||
table_name = f"{coin}_1m"
|
||||
file_path = os.path.join(self.source_folder, file_name)
|
||||
|
||||
logging.info(f"Migrating '{file_name}' to table '{table_name}'...")
|
||||
|
||||
try:
|
||||
# 1. Load the entire CSV file into a pandas DataFrame.
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
if df.empty:
|
||||
logging.warning(f"CSV file '{file_name}' is empty. Skipping.")
|
||||
continue
|
||||
|
||||
# 2. Convert the timestamp column to a proper datetime object.
|
||||
df['datetime_utc'] = pd.to_datetime(df['datetime_utc'])
|
||||
|
||||
# 3. Write the DataFrame to the SQLite database.
|
||||
# 'replace' will drop the table first if it exists and create a new one.
|
||||
# This is ideal for a migration script to ensure a clean import.
|
||||
df.to_sql(
|
||||
table_name,
|
||||
conn,
|
||||
if_exists='replace',
|
||||
index=False # Do not write the pandas DataFrame index as a column
|
||||
)
|
||||
|
||||
# 4. (Optional but Recommended) Create an index on the timestamp for fast queries.
|
||||
logging.debug(f"Creating index on 'datetime_utc' for table '{table_name}'...")
|
||||
conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_time ON {table_name}(datetime_utc);")
|
||||
|
||||
logging.info(f"Successfully migrated {len(df)} rows to '{table_name}'.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to process and migrate file '{file_name}': {e}")
|
||||
|
||||
logging.info("--- Database migration complete ---")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Migrate 1-minute candle data from CSV files to an SQLite database.")
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
default="normal",
|
||||
choices=['off', 'normal', 'debug'],
|
||||
help="Set the logging level for the script."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
migrator = Migrator(log_level=args.log_level)
|
||||
migrator.run()
|
||||
Reference in New Issue
Block a user