tmiestamp_ms column added to all tables as primary key
This commit is contained in:
50
resampler.py
50
resampler.py
@ -37,7 +37,7 @@ class Resampler:
|
||||
|
||||
def _ensure_tables_exist(self):
|
||||
"""
|
||||
Ensures all resampled tables exist with a PRIMARY KEY on datetime_utc.
|
||||
Ensures all resampled tables exist with a PRIMARY KEY on timestamp_ms.
|
||||
Attempts to migrate existing tables if the schema is incorrect.
|
||||
"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
@ -48,13 +48,22 @@ class Resampler:
|
||||
cursor.execute(f"PRAGMA table_info('{table_name}')")
|
||||
columns = cursor.fetchall()
|
||||
if columns:
|
||||
pk_found = any(col[1] == 'datetime_utc' and col[5] == 1 for col in columns)
|
||||
# --- FIX: Check for the correct PRIMARY KEY on timestamp_ms ---
|
||||
pk_found = any(col[1] == 'timestamp_ms' and col[5] == 1 for col in columns)
|
||||
if not pk_found:
|
||||
logging.warning(f"Schema migration needed for table '{table_name}'.")
|
||||
try:
|
||||
conn.execute(f'ALTER TABLE "{table_name}" RENAME TO "{table_name}_old"')
|
||||
self._create_resampled_table(conn, table_name)
|
||||
conn.execute(f'INSERT OR IGNORE INTO "{table_name}" SELECT * FROM "{table_name}_old"')
|
||||
# Copy data, ensuring to create the timestamp_ms
|
||||
logging.info(f" -> Migrating data for '{table_name}'...")
|
||||
old_df = pd.read_sql(f'SELECT * FROM "{table_name}_old"', conn, parse_dates=['datetime_utc'])
|
||||
if not old_df.empty:
|
||||
old_df['timestamp_ms'] = (old_df['datetime_utc'].astype('int64') // 10**6)
|
||||
# Keep only unique timestamps, preserving the last entry
|
||||
old_df.drop_duplicates(subset=['timestamp_ms'], keep='last', inplace=True)
|
||||
old_df.to_sql(table_name, conn, if_exists='append', index=False)
|
||||
logging.info(f" -> Data migration complete.")
|
||||
conn.execute(f'DROP TABLE "{table_name}_old"')
|
||||
conn.commit()
|
||||
logging.info(f"Successfully migrated schema for '{table_name}'.")
|
||||
@ -67,9 +76,11 @@ class Resampler:
|
||||
|
||||
def _create_resampled_table(self, conn, table_name):
|
||||
"""Creates a new resampled table with the correct schema."""
|
||||
# --- FIX: Set PRIMARY KEY on timestamp_ms for performance and uniqueness ---
|
||||
conn.execute(f'''
|
||||
CREATE TABLE "{table_name}" (
|
||||
datetime_utc TEXT PRIMARY KEY,
|
||||
datetime_utc TEXT,
|
||||
timestamp_ms INTEGER PRIMARY KEY,
|
||||
open REAL,
|
||||
high REAL,
|
||||
low REAL,
|
||||
@ -123,22 +134,21 @@ class Resampler:
|
||||
source_table_name = f"{coin}_1m"
|
||||
logging.debug(f" Updating {tf_name} table...")
|
||||
|
||||
last_timestamp = self._get_last_timestamp(conn, target_table_name)
|
||||
last_timestamp_ms = self._get_last_timestamp(conn, target_table_name)
|
||||
|
||||
query = f'SELECT * FROM "{source_table_name}"'
|
||||
params = ()
|
||||
if last_timestamp:
|
||||
query += ' WHERE datetime_utc >= ?'
|
||||
if last_timestamp_ms:
|
||||
query += ' WHERE timestamp_ms >= ?'
|
||||
# Go back one interval to rebuild the last (potentially partial) candle
|
||||
try:
|
||||
# --- FIX: Try the fast method first ---
|
||||
interval_delta = pd.to_timedelta(tf_code)
|
||||
query_start_date = last_timestamp - interval_delta
|
||||
interval_delta_ms = pd.to_timedelta(tf_code).total_seconds() * 1000
|
||||
except ValueError:
|
||||
# --- FIX: Fall back to the safe method for special timeframes ---
|
||||
logging.debug(f"Cannot create timedelta for '{tf_code}'. Using safe 32-day lookback.")
|
||||
query_start_date = last_timestamp - timedelta(days=32)
|
||||
# Fall back to a safe 32-day lookback for special timeframes
|
||||
interval_delta_ms = timedelta(days=32).total_seconds() * 1000
|
||||
|
||||
params = (query_start_date.strftime('%Y-%m-%d %H:%M:%S'),)
|
||||
query_start_ms = last_timestamp_ms - interval_delta_ms
|
||||
params = (query_start_ms,)
|
||||
|
||||
df_1m = pd.read_sql(query, conn, params=params, parse_dates=['datetime_utc'])
|
||||
|
||||
@ -155,14 +165,15 @@ class Resampler:
|
||||
for index, row in resampled_df.iterrows():
|
||||
records_to_upsert.append((
|
||||
index.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
int(index.timestamp() * 1000), # Generate timestamp_ms
|
||||
row['open'], row['high'], row['low'], row['close'],
|
||||
row['volume'], row['number_of_trades']
|
||||
))
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.executemany(f'''
|
||||
INSERT OR REPLACE INTO "{target_table_name}" (datetime_utc, open, high, low, close, volume, number_of_trades)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT OR REPLACE INTO "{target_table_name}" (datetime_utc, timestamp_ms, open, high, low, close, volume, number_of_trades)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', records_to_upsert)
|
||||
conn.commit()
|
||||
|
||||
@ -203,10 +214,11 @@ class Resampler:
|
||||
logging.info(f" - {tf_name:<10}: {total:,} candles")
|
||||
|
||||
def _get_last_timestamp(self, conn, table_name):
|
||||
"""Gets the timestamp of the last entry in a table as a pandas Timestamp."""
|
||||
"""Gets the millisecond timestamp of the last entry in a table."""
|
||||
try:
|
||||
timestamp_str = pd.read_sql(f'SELECT MAX(datetime_utc) FROM "{table_name}"', conn).iloc[0, 0]
|
||||
return pd.to_datetime(timestamp_str) if timestamp_str else None
|
||||
# --- FIX: Query for the integer timestamp_ms, not the text datetime_utc ---
|
||||
timestamp_ms = pd.read_sql(f'SELECT MAX(timestamp_ms) FROM "{table_name}"', conn).iloc[0, 0]
|
||||
return int(timestamp_ms) if pd.notna(timestamp_ms) else None
|
||||
except (pd.io.sql.DatabaseError, IndexError):
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user