Initial commit

This commit is contained in:
Kalzu Rekku
2026-01-13 21:03:27 +02:00
commit abb60348f6
8 changed files with 713 additions and 0 deletions

39
onramp/README.md Normal file
View File

@@ -0,0 +1,39 @@
# Key Features of this Implementation:
File Rotation Handling: The FileTailer continuously checks the directory for the newest file matching the pattern. If a new hourly file appears, it automatically switches to it.
## OHLCV Logic:
Open: Set when a candle is first created for that timestamp bucket.
High/Low: Updated via max() and min().
Close: Updated with every new trade.
Buy Volume: Calculated by filtering trades where S == "Buy".
SQLite Upsert: Uses ON CONFLICT(timeframe, timestamp) DO UPDATE. This is crucial because trades for the same minute arrive sequentially; we need to update the existing row rather than creating duplicates.
## Robustness:
Wrapped in try-except blocks to prevent the service from crashing on a single malformed line.
Comprehensive logging to both file and console.
Configurable polling interval (default 200ms).
# To get the live, unfinished candle data:
code Bash
'''
echo "live" | nc 127.0.0.1 9999
'''
To get the general health/status:
code Bash
'''
echo "status" | nc 127.0.0.1 9999
'''

9
onramp/config.json Normal file
View File

@@ -0,0 +1,9 @@
{
"input_directory": "../input/output",
"file_pattern": "publicTrade.BTCUSDT_*.jsonl",
"database_path": "market_data.db",
"status_host": "127.0.0.1",
"status_port": 9999,
"poll_interval_ms": 200,
"symbol": "BTCUSDT"
}

199
onramp/onramp.py Normal file
View File

@@ -0,0 +1,199 @@
import os
import json
import time
import glob
import sqlite3
import logging
import socket
import threading
from datetime import datetime
from collections import defaultdict
# Setup Logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[logging.FileHandler("processor.log"), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
class CandleAggregator:
def __init__(self, db_path):
self.db_path = db_path
self.lock = threading.Lock() # Ensure thread safety
self.timeframes = {
"1m": 60,
"5m": 300,
"15m": 900,
"1h": 3600
}
self.init_db()
# Cache structure: {timeframe: {timestamp: {data}}}
self.cache = defaultdict(dict)
def init_db(self):
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS candles (
timeframe TEXT,
timestamp INTEGER,
open REAL, high REAL, low REAL, close REAL,
volume REAL, buy_volume REAL,
PRIMARY KEY (timeframe, timestamp)
)
""")
conn.commit()
def process_trade(self, ts_ms, price, volume, side):
ts_s = ts_ms // 1000
is_buy = 1 if side.lower() == "buy" else 0
with self.lock:
for tf_name, seconds in self.timeframes.items():
candle_ts = (ts_s // seconds) * seconds
current = self.cache[tf_name].get(candle_ts)
if not current:
current = {
"timestamp": candle_ts,
"open": price, "high": price, "low": price, "close": price,
"volume": volume, "buy_volume": volume if is_buy else 0.0
}
else:
current["high"] = max(current["high"], price)
current["low"] = min(current["low"], price)
current["close"] = price
current["volume"] += volume
if is_buy:
current["buy_volume"] += volume
self.cache[tf_name][candle_ts] = current
self.save_to_db(tf_name, candle_ts, current)
def save_to_db(self, timeframe, ts, data):
try:
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
INSERT INTO candles (timeframe, timestamp, open, high, low, close, volume, buy_volume)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(timeframe, timestamp) DO UPDATE SET
high = excluded.high, low = excluded.low, close = excluded.close,
volume = excluded.volume, buy_volume = excluded.buy_volume
""", (timeframe, ts, data['open'], data['high'], data['low'],
data['close'], data['volume'], data['buy_volume']))
except Exception as e:
logger.error(f"Database error: {e}")
def get_live_snapshot(self):
"""Returns the current state of all active candles in the cache."""
with self.lock:
# We return a copy to avoid dictionary size mutation errors during JSON serialization
return json.loads(json.dumps(self.cache))
class StatusServer:
def __init__(self, host, port, stats_ref, aggregator):
self.host = host
self.port = port
self.stats = stats_ref
self.aggregator = aggregator
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
def start(self):
self.sock.bind((self.host, self.port))
self.sock.listen(5)
threading.Thread(target=self._serve, daemon=True).start()
logger.info(f"Network server started on {self.host}:{self.port}")
def _serve(self):
while True:
try:
client, addr = self.sock.accept()
# Set a timeout so a slow client doesn't hang the server
client.settimeout(2.0)
# Receive command (e.g., "status" or "live")
data = client.recv(1024).decode('utf-8').strip()
response = {}
if data == "live":
response = {
"type": "live_candles",
"data": self.aggregator.get_live_snapshot()
}
else:
# Default to status info
response = {
"type": "status",
"uptime_start": self.stats['start_time'],
"last_file": self.stats['last_file'],
"total_trades": self.stats['lines_count'],
"last_ts": self.stats['last_ts']
}
client.send(json.dumps(response).encode('utf-8'))
client.close()
except Exception as e:
logger.error(f"Server error: {e}")
class FileTailer:
def __init__(self, config):
self.config = config
self.aggregator = CandleAggregator(config['database_path'])
self.stats = {
"start_time": datetime.now().isoformat(),
"last_file": None, "lines_count": 0, "last_ts": None
}
self.status_server = StatusServer(
config['status_host'],
config['status_port'],
self.stats,
self.aggregator
)
def get_latest_file(self):
path_pattern = os.path.join(self.config['input_directory'], self.config['file_pattern'])
files = sorted(glob.glob(path_pattern))
return files[-1] if files else None
def run(self):
self.status_server.start()
current_file_path = self.get_latest_file()
last_position = 0
while True:
newest_file = self.get_latest_file()
if newest_file and newest_file != current_file_path:
logger.info(f"Rotating to: {newest_file}")
current_file_path = newest_file
last_position = 0
if current_file_path and os.path.exists(current_file_path):
self.stats['last_file'] = current_file_path
with open(current_file_path, 'r') as f:
f.seek(last_position)
while True:
line = f.readline()
if not line: break
self.process_line(line)
last_position = f.tell()
time.sleep(self.config['poll_interval_ms'] / 1000.0)
def process_line(self, line):
try:
payload = json.loads(line)
if "data" not in payload: return
for trade in payload["data"]:
self.aggregator.process_trade(
trade["T"], float(trade["p"]), float(trade["v"]), trade["S"]
)
self.stats['last_ts'] = trade["T"]
self.stats['lines_count'] += 1
except Exception as e:
logger.error(f"Line processing error: {e}")
if __name__ == "__main__":
with open("config.json", "r") as cf:
conf = json.load(cf)
FileTailer(conf).run()