dane z plików - nie chodzi
This commit is contained in:
338
app.py
338
app.py
@ -3,6 +3,8 @@ import logging
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
from flask import Flask, render_template, request
|
from flask import Flask, render_template, request
|
||||||
from flask_socketio import SocketIO
|
from flask_socketio import SocketIO
|
||||||
from binance import Client
|
from binance import Client
|
||||||
@ -13,6 +15,8 @@ from datetime import datetime, timedelta
|
|||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
SYMBOL = 'ETHUSDT'
|
SYMBOL = 'ETHUSDT'
|
||||||
HISTORY_FILE = 'historical_data_1m.json'
|
HISTORY_FILE = 'historical_data_1m.json'
|
||||||
|
DATA_FOLDER = 'data'
|
||||||
|
USER_PREFERENCES_FILE = 'user_preferences.json'
|
||||||
RESTART_TIMEOUT_S = 15
|
RESTART_TIMEOUT_S = 15
|
||||||
BINANCE_WS_URL = f"wss://stream.binance.com:9443/ws/{SYMBOL.lower()}@trade"
|
BINANCE_WS_URL = f"wss://stream.binance.com:9443/ws/{SYMBOL.lower()}@trade"
|
||||||
|
|
||||||
@ -28,54 +32,276 @@ socketio = SocketIO(app, async_mode='threading')
|
|||||||
app_initialized = False
|
app_initialized = False
|
||||||
app_init_lock = Lock()
|
app_init_lock = Lock()
|
||||||
current_bar = {} # To track the currently forming 1-minute candle
|
current_bar = {} # To track the currently forming 1-minute candle
|
||||||
|
selected_csv_file = None # Currently selected CSV file
|
||||||
|
csv_file_lock = Lock() # Lock for CSV file operations
|
||||||
|
|
||||||
|
# --- Utility Functions ---
|
||||||
|
def get_available_csv_files():
|
||||||
|
"""Get list of available CSV files with their start dates."""
|
||||||
|
csv_files = []
|
||||||
|
if not os.path.exists(DATA_FOLDER):
|
||||||
|
os.makedirs(DATA_FOLDER)
|
||||||
|
return csv_files
|
||||||
|
|
||||||
|
for filename in os.listdir(DATA_FOLDER):
|
||||||
|
if filename.endswith('.csv') and SYMBOL in filename:
|
||||||
|
# Extract date from filename like ETHUSDT_20250101.csv
|
||||||
|
match = re.search(r'(\d{8})', filename)
|
||||||
|
if match:
|
||||||
|
date_str = match.group(1)
|
||||||
|
try:
|
||||||
|
start_date = datetime.strptime(date_str, '%Y%m%d')
|
||||||
|
file_path = os.path.join(DATA_FOLDER, filename)
|
||||||
|
file_size = os.path.getsize(file_path)
|
||||||
|
csv_files.append({
|
||||||
|
'filename': filename,
|
||||||
|
'start_date_str': start_date.strftime('%Y-%m-%d'),
|
||||||
|
'date_str': date_str,
|
||||||
|
'size': file_size,
|
||||||
|
'display_name': f"{start_date.strftime('%Y-%m-%d')} ({filename})"
|
||||||
|
})
|
||||||
|
logging.info(f"Found CSV file: {filename}, size: {file_size}, date: {date_str}")
|
||||||
|
except ValueError:
|
||||||
|
logging.warning(f"Could not parse date from filename: {filename}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort by start date (newest first)
|
||||||
|
csv_files.sort(key=lambda x: x['date_str'], reverse=True)
|
||||||
|
logging.info(f"Available CSV files: {[f['filename'] for f in csv_files]}")
|
||||||
|
return csv_files
|
||||||
|
|
||||||
|
def get_default_csv_file():
|
||||||
|
"""Get the default CSV file (smallest one or last used)."""
|
||||||
|
# Try to load last used file
|
||||||
|
if os.path.exists(USER_PREFERENCES_FILE):
|
||||||
|
try:
|
||||||
|
with open(USER_PREFERENCES_FILE, 'r') as f:
|
||||||
|
prefs = json.load(f)
|
||||||
|
last_file = prefs.get('last_csv_file')
|
||||||
|
if last_file and os.path.exists(os.path.join(DATA_FOLDER, last_file)):
|
||||||
|
logging.info(f"Using last selected file: {last_file}")
|
||||||
|
return last_file
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to smallest file
|
||||||
|
csv_files = get_available_csv_files()
|
||||||
|
if csv_files:
|
||||||
|
# Filter to exclude the large Binance file for better performance
|
||||||
|
filtered_files = [f for f in csv_files if not f['filename'].endswith('_Binance.csv')]
|
||||||
|
if filtered_files:
|
||||||
|
smallest_file = min(filtered_files, key=lambda x: x['size'])
|
||||||
|
logging.info(f"Using smallest filtered file: {smallest_file['filename']} ({smallest_file['size']} bytes)")
|
||||||
|
else:
|
||||||
|
smallest_file = min(csv_files, key=lambda x: x['size'])
|
||||||
|
logging.info(f"Using smallest file: {smallest_file['filename']} ({smallest_file['size']} bytes)")
|
||||||
|
return smallest_file['filename']
|
||||||
|
|
||||||
|
logging.warning("No CSV files found")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def save_user_preference(csv_filename):
|
||||||
|
"""Save the user's CSV file preference."""
|
||||||
|
prefs = {}
|
||||||
|
if os.path.exists(USER_PREFERENCES_FILE):
|
||||||
|
try:
|
||||||
|
with open(USER_PREFERENCES_FILE, 'r') as f:
|
||||||
|
prefs = json.load(f)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
prefs['last_csv_file'] = csv_filename
|
||||||
|
with open(USER_PREFERENCES_FILE, 'w') as f:
|
||||||
|
json.dump(prefs, f)
|
||||||
|
|
||||||
|
def read_csv_data(csv_filename):
|
||||||
|
"""Read historical data from CSV file."""
|
||||||
|
csv_path = os.path.join(DATA_FOLDER, csv_filename)
|
||||||
|
if not os.path.exists(csv_path):
|
||||||
|
return []
|
||||||
|
|
||||||
|
klines = []
|
||||||
|
try:
|
||||||
|
with open(csv_path, 'r', newline='', encoding='utf-8') as csvfile:
|
||||||
|
reader = csv.DictReader(csvfile)
|
||||||
|
for row in reader:
|
||||||
|
# Convert CSV row to kline format
|
||||||
|
open_time = datetime.strptime(row['Open time'], '%Y-%m-%d %H:%M:%S')
|
||||||
|
close_time = datetime.strptime(row['Close time'].split('.')[0], '%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
|
# --- FIX START: Convert string values to numeric types ---
|
||||||
|
# The original code passed the string values from the CSV directly.
|
||||||
|
# This caused the historical data to be misinterpreted by the chart.
|
||||||
|
# By converting to float/int here, we ensure data consistency.
|
||||||
|
# =================================================================
|
||||||
|
kline = [
|
||||||
|
int(open_time.timestamp() * 1000), # Open time (ms)
|
||||||
|
float(row['Open']), # Open
|
||||||
|
float(row['High']), # High
|
||||||
|
float(row['Low']), # Low
|
||||||
|
float(row['Close']), # Close
|
||||||
|
float(row['Volume']), # Volume
|
||||||
|
int(close_time.timestamp() * 1000), # Close time (ms)
|
||||||
|
float(row['Quote asset volume']), # Quote asset volume
|
||||||
|
int(row['Number of trades']), # Number of trades
|
||||||
|
float(row['Taker buy base asset volume']), # Taker buy base asset volume
|
||||||
|
float(row['Taker buy quote asset volume']), # Taker buy quote asset volume
|
||||||
|
float(row['Ignore']) # Ignore
|
||||||
|
]
|
||||||
|
# --- FIX END ---
|
||||||
|
# =================================================================
|
||||||
|
klines.append(kline)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error reading CSV file {csv_filename}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
return klines
|
||||||
|
|
||||||
|
def append_to_csv(csv_filename, candle_data):
|
||||||
|
"""Append new candle data to CSV file."""
|
||||||
|
csv_path = os.path.join(DATA_FOLDER, csv_filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with csv_file_lock:
|
||||||
|
# Convert candle data to CSV row
|
||||||
|
open_time = datetime.fromtimestamp(candle_data['time'])
|
||||||
|
close_time = open_time.replace(second=59, microsecond=999000)
|
||||||
|
|
||||||
|
row = [
|
||||||
|
open_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
candle_data['open'],
|
||||||
|
candle_data['high'],
|
||||||
|
candle_data['low'],
|
||||||
|
candle_data['close'],
|
||||||
|
0.0, # Volume (placeholder)
|
||||||
|
close_time.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
|
||||||
|
0.0, # Quote asset volume (placeholder)
|
||||||
|
1, # Number of trades (placeholder)
|
||||||
|
0.0, # Taker buy base asset volume (placeholder)
|
||||||
|
0.0, # Taker buy quote asset volume (placeholder)
|
||||||
|
0.0 # Ignore
|
||||||
|
]
|
||||||
|
|
||||||
|
# Check if file exists and has header
|
||||||
|
file_exists = os.path.exists(csv_path)
|
||||||
|
|
||||||
|
with open(csv_path, 'a', newline='', encoding='utf-8') as csvfile:
|
||||||
|
writer = csv.writer(csvfile)
|
||||||
|
|
||||||
|
# Write header if file is new
|
||||||
|
if not file_exists:
|
||||||
|
headers = [
|
||||||
|
'Open time', 'Open', 'High', 'Low', 'Close', 'Volume',
|
||||||
|
'Close time', 'Quote asset volume', 'Number of trades',
|
||||||
|
'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore'
|
||||||
|
]
|
||||||
|
writer.writerow(headers)
|
||||||
|
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error appending to CSV file {csv_filename}: {e}")
|
||||||
|
|
||||||
|
def fill_missing_data(csv_filename):
|
||||||
|
"""Fill missing data by downloading from Binance."""
|
||||||
|
global selected_csv_file
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.info(f"Checking for missing data in {csv_filename}")
|
||||||
|
|
||||||
|
# Get the start date from filename
|
||||||
|
match = re.search(r'(\d{8})', csv_filename)
|
||||||
|
if not match:
|
||||||
|
return
|
||||||
|
|
||||||
|
date_str = match.group(1)
|
||||||
|
start_date = datetime.strptime(date_str, '%Y%m%d')
|
||||||
|
|
||||||
|
# Read existing data
|
||||||
|
existing_data = read_csv_data(csv_filename)
|
||||||
|
|
||||||
|
# Determine what data we need to fetch
|
||||||
|
if existing_data:
|
||||||
|
# Get the last timestamp from existing data
|
||||||
|
last_timestamp = existing_data[-1][0] // 1000 # Convert to seconds
|
||||||
|
fetch_start = datetime.fromtimestamp(last_timestamp) + timedelta(minutes=1)
|
||||||
|
else:
|
||||||
|
fetch_start = start_date
|
||||||
|
|
||||||
|
# Fetch missing data up to current time
|
||||||
|
now = datetime.now()
|
||||||
|
if fetch_start >= now:
|
||||||
|
logging.info(f"No missing data for {csv_filename}")
|
||||||
|
return existing_data
|
||||||
|
|
||||||
|
logging.info(f"Fetching missing data from {fetch_start} to {now}")
|
||||||
|
|
||||||
|
client = Client()
|
||||||
|
missing_klines = client.get_historical_klines(
|
||||||
|
SYMBOL,
|
||||||
|
Client.KLINE_INTERVAL_1MINUTE,
|
||||||
|
start_str=fetch_start.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
end_str=now.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
)
|
||||||
|
|
||||||
|
if missing_klines:
|
||||||
|
# Append missing data to CSV
|
||||||
|
csv_path = os.path.join(DATA_FOLDER, csv_filename)
|
||||||
|
with csv_file_lock:
|
||||||
|
with open(csv_path, 'a', newline='', encoding='utf-8') as csvfile:
|
||||||
|
writer = csv.writer(csvfile)
|
||||||
|
|
||||||
|
for kline in missing_klines:
|
||||||
|
open_time = datetime.fromtimestamp(kline[0] / 1000)
|
||||||
|
close_time = datetime.fromtimestamp(kline[6] / 1000)
|
||||||
|
|
||||||
|
row = [
|
||||||
|
open_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
kline[1], kline[2], kline[3], kline[4], kline[5],
|
||||||
|
close_time.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
|
||||||
|
kline[7], kline[8], kline[9], kline[10], kline[11]
|
||||||
|
]
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
logging.info(f"Added {len(missing_klines)} missing candles to {csv_filename}")
|
||||||
|
existing_data.extend(missing_klines)
|
||||||
|
|
||||||
|
return existing_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error filling missing data for {csv_filename}: {e}")
|
||||||
|
return existing_data if 'existing_data' in locals() else []
|
||||||
|
|
||||||
# --- Historical Data Streaming ---
|
# --- Historical Data Streaming ---
|
||||||
def stream_historical_data(sid):
|
def stream_historical_data(sid):
|
||||||
"""
|
"""
|
||||||
Fetches the last week of historical 1-minute kline data from Binance,
|
Loads historical data from the selected CSV file and sends it to the client.
|
||||||
saves it to a file, and sends it to the connected client.
|
|
||||||
"""
|
"""
|
||||||
|
global selected_csv_file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.info(f"Starting historical data stream for SID={sid}")
|
logging.info(f"Starting historical data stream for SID={sid}")
|
||||||
client = Client()
|
|
||||||
|
|
||||||
# --- NEW SOLUTION: Load data for the last week ---
|
# Get selected CSV file or default
|
||||||
logging.info(f"Fetching historical data for the last 7 days for SID={sid}")
|
if not selected_csv_file:
|
||||||
# The `python-binance` library allows using relative date strings.
|
selected_csv_file = get_default_csv_file()
|
||||||
# This single call is more efficient for this use case.
|
|
||||||
all_klines = client.get_historical_klines(
|
|
||||||
SYMBOL,
|
|
||||||
Client.KLINE_INTERVAL_1MINUTE,
|
|
||||||
start_str="8 weeks ago UTC" # Fetches data starting from 8 weeks ago until now
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- ORIGINAL SOLUTION COMMENTED OUT ---
|
if not selected_csv_file:
|
||||||
# num_chunks = 6
|
# No CSV files available, create a default one
|
||||||
# chunk_size_days = 15
|
logging.warning("No CSV files available, creating default file")
|
||||||
# end_date = datetime.utcnow()
|
selected_csv_file = f"ETHUSDT_{datetime.now().strftime('%Y%m%d')}.csv"
|
||||||
# all_klines = []
|
|
||||||
#
|
|
||||||
# for i in range(num_chunks):
|
|
||||||
# start_date = end_date - timedelta(days=chunk_size_days)
|
|
||||||
# logging.info(f"Fetching chunk {i + 1}/{num_chunks} for SID={sid}")
|
|
||||||
# new_klines = client.get_historical_klines(SYMBOL, Client.KLINE_INTERVAL_1MINUTE, str(start_date), str(end_date))
|
|
||||||
# if new_klines:
|
|
||||||
# all_klines.extend(new_klines)
|
|
||||||
# # The progress emission is no longer needed for a single API call
|
|
||||||
# # socketio.emit('history_progress', {'progress': ((i + 1) / num_chunks) * 100}, to=sid)
|
|
||||||
# end_date = start_date
|
|
||||||
# socketio.sleep(0.05)
|
|
||||||
# --- END OF ORIGINAL SOLUTION ---
|
|
||||||
|
|
||||||
# The rest of the function processes the `all_klines` data as before
|
logging.info(f"Using CSV file: {selected_csv_file}")
|
||||||
seen = set()
|
|
||||||
unique_klines = [kline for kline in sorted(all_klines, key=lambda x: x[0]) if tuple(kline) not in seen and not seen.add(tuple(kline))]
|
|
||||||
|
|
||||||
with open(HISTORY_FILE, 'w') as f:
|
# Fill missing data and get all klines
|
||||||
json.dump(unique_klines, f)
|
all_klines = fill_missing_data(selected_csv_file)
|
||||||
|
|
||||||
logging.info(f"Finished data stream for SID={sid}. Sending final payload of {len(unique_klines)} klines.")
|
# Send progress update
|
||||||
socketio.emit('history_finished', {'klines_1m': unique_klines}, to=sid)
|
socketio.emit('history_progress', {'progress': 100}, to=sid)
|
||||||
|
|
||||||
|
logging.info(f"Finished data stream for SID={sid}. Sending final payload of {len(all_klines)} klines.")
|
||||||
|
socketio.emit('history_finished', {'klines_1m': all_klines}, to=sid)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error in stream_historical_data for SID={sid}: {e}", exc_info=True)
|
logging.error(f"Error in stream_historical_data for SID={sid}: {e}", exc_info=True)
|
||||||
@ -104,10 +330,14 @@ def binance_listener_thread():
|
|||||||
|
|
||||||
if not current_bar or candle_timestamp > current_bar.get("time", 0):
|
if not current_bar or candle_timestamp > current_bar.get("time", 0):
|
||||||
if current_bar:
|
if current_bar:
|
||||||
# The previous candle is now closed, emit it
|
# The previous candle is now closed, emit it and save to CSV
|
||||||
logging.info(f"Candle closed at {current_bar['close']}. Emitting 'candle_closed' event.")
|
logging.info(f"Candle closed at {current_bar['close']}. Emitting 'candle_closed' event.")
|
||||||
socketio.emit('candle_closed', current_bar)
|
socketio.emit('candle_closed', current_bar)
|
||||||
|
|
||||||
|
# Append to selected CSV file
|
||||||
|
if selected_csv_file:
|
||||||
|
append_to_csv(selected_csv_file, current_bar)
|
||||||
|
|
||||||
current_bar = {"time": candle_timestamp, "open": price, "high": price, "low": price, "close": price}
|
current_bar = {"time": candle_timestamp, "open": price, "high": price, "low": price, "close": price}
|
||||||
else:
|
else:
|
||||||
current_bar['high'] = max(current_bar.get('high', price), price)
|
current_bar['high'] = max(current_bar.get('high', price), price)
|
||||||
@ -135,6 +365,40 @@ def handle_connect():
|
|||||||
app_initialized = True
|
app_initialized = True
|
||||||
socketio.start_background_task(target=stream_historical_data, sid=request.sid)
|
socketio.start_background_task(target=stream_historical_data, sid=request.sid)
|
||||||
|
|
||||||
|
@socketio.on('get_csv_files')
|
||||||
|
def handle_get_csv_files():
|
||||||
|
"""Send available CSV files to client."""
|
||||||
|
logging.info(f"Received get_csv_files request from SID={request.sid}")
|
||||||
|
csv_files = get_available_csv_files()
|
||||||
|
default_file = get_default_csv_file()
|
||||||
|
logging.info(f"Sending CSV files list: {len(csv_files)} files, default: {default_file}")
|
||||||
|
socketio.emit('csv_files_list', {
|
||||||
|
'files': csv_files,
|
||||||
|
'selected': default_file
|
||||||
|
})
|
||||||
|
|
||||||
|
@socketio.on('select_csv_file')
|
||||||
|
def handle_select_csv_file(data):
|
||||||
|
"""Handle CSV file selection by user."""
|
||||||
|
global selected_csv_file
|
||||||
|
|
||||||
|
logging.info(f"Received select_csv_file request from SID={request.sid} with data: {data}")
|
||||||
|
filename = data.get('filename')
|
||||||
|
if filename:
|
||||||
|
csv_files = get_available_csv_files()
|
||||||
|
valid_files = [f['filename'] for f in csv_files]
|
||||||
|
|
||||||
|
if filename in valid_files:
|
||||||
|
selected_csv_file = filename
|
||||||
|
save_user_preference(filename)
|
||||||
|
logging.info(f"User selected CSV file: {filename}")
|
||||||
|
|
||||||
|
# Stream new historical data
|
||||||
|
socketio.start_background_task(target=stream_historical_data, sid=request.sid)
|
||||||
|
else:
|
||||||
|
logging.error(f"Invalid CSV file selected: {filename}")
|
||||||
|
socketio.emit('error', {'message': f'Invalid CSV file: {filename}'})
|
||||||
|
|
||||||
# --- Flask Routes ---
|
# --- Flask Routes ---
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
|
|||||||
102
data/data_miner.py
Normal file
102
data/data_miner.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
import csv
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def filter_csv_by_date(input_file, output_file, start_date_str):
|
||||||
|
"""
|
||||||
|
Reads a large CSV file line by line, filters by a start date,
|
||||||
|
and writes the results to a new file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_file (str): Path to the large input CSV.
|
||||||
|
output_file (str): Path to the output CSV file.
|
||||||
|
start_date_str (str): The start date in 'YYYY-MM-DD' format.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Convert the start date string into a datetime object for comparison
|
||||||
|
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
|
||||||
|
print(f"Filtering for dates on or after {start_date_str}...")
|
||||||
|
print(f"Output will be saved to: {output_file}")
|
||||||
|
|
||||||
|
|
||||||
|
# Open the input and output files
|
||||||
|
with open(input_file, 'r', newline='') as infile, \
|
||||||
|
open(output_file, 'w', newline='') as outfile:
|
||||||
|
|
||||||
|
reader = csv.reader(infile)
|
||||||
|
writer = csv.writer(outfile)
|
||||||
|
|
||||||
|
# 1. Read and write the header
|
||||||
|
header = next(reader)
|
||||||
|
writer.writerow(header)
|
||||||
|
|
||||||
|
# Find the index of the 'Open time' column
|
||||||
|
try:
|
||||||
|
date_column_index = header.index('Open time')
|
||||||
|
except ValueError:
|
||||||
|
print("Error: 'Open time' column not found in the header.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 2. Process the rest of the file line by line
|
||||||
|
processed_lines = 0
|
||||||
|
written_lines = 0
|
||||||
|
for row in reader:
|
||||||
|
processed_lines += 1
|
||||||
|
|
||||||
|
# Avoid errors from empty or malformed rows
|
||||||
|
if not row:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the date string from the correct column
|
||||||
|
row_date_str = row[date_column_index]
|
||||||
|
# Convert the row's date string to a datetime object
|
||||||
|
row_date = datetime.strptime(row_date_str, '%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
# 3. Compare dates and write to new file if it's a match
|
||||||
|
if row_date >= start_date:
|
||||||
|
writer.writerow(row)
|
||||||
|
written_lines += 1
|
||||||
|
|
||||||
|
except (ValueError, IndexError) as e:
|
||||||
|
# This will catch errors if a date is in the wrong format
|
||||||
|
# or if a row doesn't have enough columns.
|
||||||
|
print(f"Skipping malformed row {processed_lines + 1}: {row}. Error: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Optional: Print progress for very long operations
|
||||||
|
if processed_lines % 5000000 == 0:
|
||||||
|
print(f"Processed {processed_lines:,} lines...")
|
||||||
|
|
||||||
|
print("\n--- Processing Complete ---")
|
||||||
|
print(f"Total lines processed: {processed_lines:,}")
|
||||||
|
print(f"Total lines written: {written_lines:,}")
|
||||||
|
print(f"Filtered data saved to: {output_file}")
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: The file '{input_file}' was not found.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
# 1. Replace with the name of your large input file
|
||||||
|
input_filename = 'ETHUSDT_1m_Binance.csv'
|
||||||
|
|
||||||
|
# 2. Provide the start date in YYYY-MM-DD format
|
||||||
|
start_date_filter = '2025-07-01' # <-- REPLACE THIS
|
||||||
|
|
||||||
|
# 3. The output filename is generated automatically in the requested format
|
||||||
|
if start_date_filter != 'YYYY-MM-DD':
|
||||||
|
# This line removes the hyphens for the filename
|
||||||
|
filename_date_part = start_date_filter.replace('-', '')
|
||||||
|
output_filename = f'ETHUSDT_{filename_date_part}.csv'
|
||||||
|
else:
|
||||||
|
output_filename = 'ETHUSDT_unfiltered.csv'
|
||||||
|
|
||||||
|
|
||||||
|
# --- Run the script ---
|
||||||
|
if start_date_filter == 'YYYY-MM-DD':
|
||||||
|
print("Please update the 'start_date_filter' variable in the script with a date like '2025-07-01'.")
|
||||||
|
else:
|
||||||
|
filter_csv_by_date(input_filename, output_filename, start_date_filter)
|
||||||
|
|
||||||
|
|
||||||
File diff suppressed because one or more lines are too long
@ -1,4 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Aggregates fine-grained candle data into a larger timeframe.
|
* Aggregates fine-grained candle data into a larger timeframe.
|
||||||
* For example, it can convert 1-minute candles into 5-minute candles.
|
* For example, it can convert 1-minute candles into 5-minute candles.
|
||||||
*
|
*
|
||||||
@ -7,7 +7,7 @@
|
|||||||
* @param {number} intervalMinutes - The desired new candle interval in minutes (e.g., 5 for 5m).
|
* @param {number} intervalMinutes - The desired new candle interval in minutes (e.g., 5 for 5m).
|
||||||
* @returns {Array<Object>} A new array of aggregated candle objects.
|
* @returns {Array<Object>} A new array of aggregated candle objects.
|
||||||
*/
|
*/
|
||||||
function aggregateCandles(data, intervalMinutes) {
|
function aggregateCandles(data, intervalMinutes) {
|
||||||
if (!data || data.length === 0 || !intervalMinutes || intervalMinutes < 1) {
|
if (!data || data.length === 0 || !intervalMinutes || intervalMinutes < 1) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@ -17,8 +17,19 @@ function aggregateCandles(data, intervalMinutes) {
|
|||||||
let currentAggCandle = null;
|
let currentAggCandle = null;
|
||||||
|
|
||||||
data.forEach(candle => {
|
data.forEach(candle => {
|
||||||
|
// Validate candle data
|
||||||
|
if (!candle || !candle.time ||
|
||||||
|
isNaN(candle.open) || isNaN(candle.high) ||
|
||||||
|
isNaN(candle.low) || isNaN(candle.close) ||
|
||||||
|
candle.open <= 0 || candle.high <= 0 ||
|
||||||
|
candle.low <= 0 || candle.close <= 0) {
|
||||||
|
console.warn('Skipping invalid candle during aggregation:', candle);
|
||||||
|
return; // Skip this candle
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate the timestamp for the start of the interval bucket
|
// Calculate the timestamp for the start of the interval bucket
|
||||||
const bucketTimestamp = candle.time - (candle.time % intervalSeconds);
|
// Properly align to interval boundaries (e.g., 5-min intervals start at :00, :05, :10, etc.)
|
||||||
|
const bucketTimestamp = Math.floor(candle.time / intervalSeconds) * intervalSeconds;
|
||||||
|
|
||||||
if (!currentAggCandle || bucketTimestamp !== currentAggCandle.time) {
|
if (!currentAggCandle || bucketTimestamp !== currentAggCandle.time) {
|
||||||
// If a previous aggregated candle exists, push it to the results
|
// If a previous aggregated candle exists, push it to the results
|
||||||
@ -47,4 +58,5 @@ function aggregateCandles(data, intervalMinutes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return aggregated;
|
return aggregated;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,6 +130,15 @@
|
|||||||
<div id="progress-container" class="progress-bar-container">
|
<div id="progress-container" class="progress-bar-container">
|
||||||
<div class="progress-bar"></div>
|
<div class="progress-bar"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- CSV File Selection Dropdown -->
|
||||||
|
<div style="margin-top: 15px; width: 100%;">
|
||||||
|
<label for="csv-file-select" style="display: block; margin-bottom: 5px; font-size: 12px; color: var(--text-secondary);">Data Source:</label>
|
||||||
|
<select id="csv-file-select" style="width: 100%; background-color: var(--button-bg); border: 1px solid var(--border-color); color: var(--text-primary); padding: 6px; border-radius: 4px; font-size: 12px; cursor: pointer;">
|
||||||
|
<option value="">Loading...</option>
|
||||||
|
</select>
|
||||||
|
<div id="csv-info" style="font-size: 10px; color: var(--text-secondary); margin-top: 3px; text-align: center;"></div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="control-cell" id="indicator-cell-1"></div>
|
<div class="control-cell" id="indicator-cell-1"></div>
|
||||||
<div class="control-cell" id="indicator-cell-2"></div>
|
<div class="control-cell" id="indicator-cell-2"></div>
|
||||||
@ -192,6 +201,8 @@
|
|||||||
const modalInput = document.getElementById('timeframe-input');
|
const modalInput = document.getElementById('timeframe-input');
|
||||||
const modalPreviewText = document.getElementById('timeframe-preview-text');
|
const modalPreviewText = document.getElementById('timeframe-preview-text');
|
||||||
const modalConfirmBtn = document.getElementById('timeframe-confirm-btn');
|
const modalConfirmBtn = document.getElementById('timeframe-confirm-btn');
|
||||||
|
const csvFileSelect = document.getElementById('csv-file-select');
|
||||||
|
const csvInfoDiv = document.getElementById('csv-info');
|
||||||
|
|
||||||
function openModal(initialValue = '') {
|
function openModal(initialValue = '') {
|
||||||
modalOverlay.style.display = 'flex';
|
modalOverlay.style.display = 'flex';
|
||||||
@ -246,19 +257,88 @@
|
|||||||
manager.populateDropdowns();
|
manager.populateDropdowns();
|
||||||
|
|
||||||
const socket = io();
|
const socket = io();
|
||||||
socket.on('connect', () => console.log('Socket.IO connected.'));
|
socket.on('connect', () => {
|
||||||
|
console.log('Socket.IO connected.');
|
||||||
|
// Request available CSV files
|
||||||
|
socket.emit('get_csv_files');
|
||||||
|
});
|
||||||
|
|
||||||
socket.on('history_progress', (data) => {
|
socket.on('history_progress', (data) => {
|
||||||
if (data && data.progress) progressBar.style.width = `${data.progress}%`;
|
if (data && data.progress) progressBar.style.width = `${data.progress}%`;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
socket.on('csv_files_list', (data) => {
|
||||||
|
console.log('Received CSV files list:', data);
|
||||||
|
populateCsvDropdown(data.files, data.selected);
|
||||||
|
});
|
||||||
|
|
||||||
|
function populateCsvDropdown(files, selectedFile) {
|
||||||
|
csvFileSelect.innerHTML = '';
|
||||||
|
|
||||||
|
if (files.length === 0) {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = '';
|
||||||
|
option.textContent = 'No CSV files available';
|
||||||
|
csvFileSelect.appendChild(option);
|
||||||
|
csvInfoDiv.textContent = '';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
files.forEach(file => {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = file.filename;
|
||||||
|
option.textContent = file.display_name;
|
||||||
|
if (file.filename === selectedFile) {
|
||||||
|
option.selected = true;
|
||||||
|
// Show info about selected file
|
||||||
|
const sizeInMB = (file.size / (1024 * 1024)).toFixed(1);
|
||||||
|
csvInfoDiv.textContent = `${sizeInMB} MB - ${file.filename}`;
|
||||||
|
}
|
||||||
|
csvFileSelect.appendChild(option);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
csvFileSelect.addEventListener('change', (e) => {
|
||||||
|
const selectedFile = e.target.value;
|
||||||
|
if (selectedFile) {
|
||||||
|
console.log('User selected CSV file:', selectedFile);
|
||||||
|
|
||||||
|
// Update info display
|
||||||
|
const selectedOption = e.target.selectedOptions[0];
|
||||||
|
const files = Array.from(e.target.options).map(option => ({
|
||||||
|
filename: option.value,
|
||||||
|
display_name: option.textContent,
|
||||||
|
size: 0 // Will be updated by server response
|
||||||
|
}));
|
||||||
|
|
||||||
|
socket.emit('select_csv_file', { filename: selectedFile });
|
||||||
|
|
||||||
|
// Show loading state
|
||||||
|
progressContainer.style.display = 'block';
|
||||||
|
progressBar.style.width = '0%';
|
||||||
|
csvInfoDiv.textContent = 'Loading...';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
socket.on('history_finished', (data) => {
|
socket.on('history_finished', (data) => {
|
||||||
if (!data || !data.klines_1m) return;
|
if (!data || !data.klines_1m) return;
|
||||||
progressBar.style.width = '100%';
|
progressBar.style.width = '100%';
|
||||||
baseCandleData1m = data.klines_1m.map(k => ({
|
baseCandleData1m = data.klines_1m
|
||||||
time: k[0] / 1000, open: parseFloat(k[1]), high: parseFloat(k[2]),
|
.map(k => ({
|
||||||
low: parseFloat(k[3]), close: parseFloat(k[4])
|
time: k[0] / 1000,
|
||||||
}));
|
open: parseFloat(k[1]),
|
||||||
|
high: parseFloat(k[2]),
|
||||||
|
low: parseFloat(k[3]),
|
||||||
|
close: parseFloat(k[4])
|
||||||
|
}))
|
||||||
|
.filter(candle => {
|
||||||
|
// Filter out invalid candles with null, undefined, or NaN values
|
||||||
|
return candle.time &&
|
||||||
|
!isNaN(candle.open) && !isNaN(candle.high) &&
|
||||||
|
!isNaN(candle.low) && !isNaN(candle.close) &&
|
||||||
|
candle.open > 0 && candle.high > 0 &&
|
||||||
|
candle.low > 0 && candle.close > 0;
|
||||||
|
});
|
||||||
updateChartForTimeframe(true);
|
updateChartForTimeframe(true);
|
||||||
setTimeout(() => { progressContainer.style.display = 'none'; }, 500);
|
setTimeout(() => { progressContainer.style.display = 'none'; }, 500);
|
||||||
});
|
});
|
||||||
@ -267,6 +347,16 @@
|
|||||||
function handleLiveUpdate(update) {
|
function handleLiveUpdate(update) {
|
||||||
if (baseCandleData1m.length === 0 || displayedCandleData.length === 0) return;
|
if (baseCandleData1m.length === 0 || displayedCandleData.length === 0) return;
|
||||||
|
|
||||||
|
// Validate the update data
|
||||||
|
if (!update || !update.time ||
|
||||||
|
isNaN(update.open) || isNaN(update.high) ||
|
||||||
|
isNaN(update.low) || isNaN(update.close) ||
|
||||||
|
update.open <= 0 || update.high <= 0 ||
|
||||||
|
update.low <= 0 || update.close <= 0) {
|
||||||
|
console.warn('Invalid update data received:', update);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// First, ensure the base 1m data is up-to-date.
|
// First, ensure the base 1m data is up-to-date.
|
||||||
const lastBaseCandle = baseCandleData1m[baseCandleData1m.length - 1];
|
const lastBaseCandle = baseCandleData1m[baseCandleData1m.length - 1];
|
||||||
if (update.time > lastBaseCandle.time) {
|
if (update.time > lastBaseCandle.time) {
|
||||||
@ -278,20 +368,21 @@
|
|||||||
const candleDurationSeconds = currentTimeframeMinutes * 60;
|
const candleDurationSeconds = currentTimeframeMinutes * 60;
|
||||||
let lastDisplayedCandle = displayedCandleData[displayedCandleData.length - 1];
|
let lastDisplayedCandle = displayedCandleData[displayedCandleData.length - 1];
|
||||||
|
|
||||||
|
// Calculate which bucket this update belongs to using simple division
|
||||||
|
const updateBucketTime = Math.floor(update.time / candleDurationSeconds) * candleDurationSeconds;
|
||||||
|
|
||||||
// Check if the update belongs to the currently forming displayed candle
|
// Check if the update belongs to the currently forming displayed candle
|
||||||
if (update.time >= lastDisplayedCandle.time && update.time < lastDisplayedCandle.time + candleDurationSeconds) {
|
if (updateBucketTime === lastDisplayedCandle.time) {
|
||||||
// It does, so just update the High, Low, and Close prices
|
// It does, so just update the High, Low, and Close prices
|
||||||
lastDisplayedCandle.high = Math.max(lastDisplayedCandle.high, update.high);
|
lastDisplayedCandle.high = Math.max(lastDisplayedCandle.high, update.high);
|
||||||
lastDisplayedCandle.low = Math.min(lastDisplayedCandle.low, update.low);
|
lastDisplayedCandle.low = Math.min(lastDisplayedCandle.low, update.low);
|
||||||
lastDisplayedCandle.close = update.close;
|
lastDisplayedCandle.close = update.close;
|
||||||
candlestickSeries.update(lastDisplayedCandle);
|
candlestickSeries.update(lastDisplayedCandle);
|
||||||
} else if (update.time >= lastDisplayedCandle.time + candleDurationSeconds) {
|
} else if (updateBucketTime > lastDisplayedCandle.time) {
|
||||||
// This update is for a NEW candle.
|
// This update is for a NEW candle.
|
||||||
const newCandleTime = Math.floor(update.time / candleDurationSeconds) * candleDurationSeconds;
|
|
||||||
|
|
||||||
// Create the new candle. Its O,H,L,C are all from this first tick.
|
// Create the new candle. Its O,H,L,C are all from this first tick.
|
||||||
const newCandle = {
|
const newCandle = {
|
||||||
time: newCandleTime,
|
time: updateBucketTime,
|
||||||
open: update.open,
|
open: update.open,
|
||||||
high: update.high,
|
high: update.high,
|
||||||
low: update.low,
|
low: update.low,
|
||||||
@ -383,11 +474,22 @@
|
|||||||
|
|
||||||
function updateChartForTimeframe(isFullReset = false) {
|
function updateChartForTimeframe(isFullReset = false) {
|
||||||
if (baseCandleData1m.length === 0) return;
|
if (baseCandleData1m.length === 0) return;
|
||||||
|
|
||||||
|
try {
|
||||||
const visibleTimeRange = isFullReset ? null : chart.timeScale().getVisibleTimeRange();
|
const visibleTimeRange = isFullReset ? null : chart.timeScale().getVisibleTimeRange();
|
||||||
const newCandleData = aggregateCandles(baseCandleData1m, currentTimeframeMinutes);
|
const newCandleData = aggregateCandles(baseCandleData1m, currentTimeframeMinutes);
|
||||||
|
|
||||||
if (newCandleData.length > 0) {
|
// Validate the aggregated data
|
||||||
displayedCandleData = newCandleData;
|
const validCandleData = newCandleData.filter(candle => {
|
||||||
|
return candle && candle.time &&
|
||||||
|
!isNaN(candle.open) && !isNaN(candle.high) &&
|
||||||
|
!isNaN(candle.low) && !isNaN(candle.close) &&
|
||||||
|
candle.open > 0 && candle.high > 0 &&
|
||||||
|
candle.low > 0 && candle.close > 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (validCandleData.length > 0) {
|
||||||
|
displayedCandleData = validCandleData;
|
||||||
candlestickSeries.setData(displayedCandleData);
|
candlestickSeries.setData(displayedCandleData);
|
||||||
chartTitle.textContent = `{{ symbol }} Chart (${currentTimeframeMinutes}m)`;
|
chartTitle.textContent = `{{ symbol }} Chart (${currentTimeframeMinutes}m)`;
|
||||||
manager.recalculateAllAfterHistory(baseCandleData1m, displayedCandleData);
|
manager.recalculateAllAfterHistory(baseCandleData1m, displayedCandleData);
|
||||||
@ -396,6 +498,13 @@
|
|||||||
} else {
|
} else {
|
||||||
chart.timeScale().fitContent();
|
chart.timeScale().fitContent();
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
console.warn('No valid candle data available for timeframe:', currentTimeframeMinutes);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error updating chart for timeframe:', error);
|
||||||
|
console.error('Current timeframe:', currentTimeframeMinutes);
|
||||||
|
console.error('Base data length:', baseCandleData1m.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user