#!/usr/bin/env python3 """ Backfill Missing Indicators Calculates RSI and Bollinger Bands for existing data """ import sqlite3 import pandas as pd import numpy as np import talib from datetime import datetime def backfill_indicators(candles_db: str, analysis_db: str): """Backfill RSI and Bollinger Bands for all timeframes""" print("šŸ”§ BACKFILLING MISSING INDICATORS") print("=" * 70) # Connect to databases conn_candles = sqlite3.connect(candles_db) conn_analysis = sqlite3.connect(analysis_db) # Get all timeframes cursor = conn_analysis.cursor() cursor.execute("SELECT DISTINCT timeframe FROM analysis ORDER BY timeframe") timeframes = [row[0] for row in cursor.fetchall()] total_updated = 0 for timeframe in timeframes: print(f"\nšŸ“Š Processing {timeframe}...") # Fetch candle data df_candles = pd.read_sql_query( "SELECT timestamp, close, high, low FROM candles WHERE timeframe = ? ORDER BY timestamp", conn_candles, params=(timeframe,) ) if len(df_candles) < 20: print(f" āš ļø Skipping - insufficient data ({len(df_candles)} rows)") continue print(f" āœ“ Loaded {len(df_candles)} candles") # Calculate RSI df_candles['rsi_14'] = talib.RSI(df_candles['close'].values, timeperiod=14) # Calculate Bollinger Bands bb_upper, bb_middle, bb_lower = talib.BBANDS( df_candles['close'].values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0 ) df_candles['bb_upper'] = bb_upper df_candles['bb_middle'] = bb_middle df_candles['bb_lower'] = bb_lower # Calculate BB Squeeze # Squeeze = when BB width is in the lowest 20% of recent widths df_candles['bb_width'] = df_candles['bb_upper'] - df_candles['bb_lower'] df_candles['bb_width_rank'] = df_candles['bb_width'].rolling(window=100).apply( lambda x: (x.iloc[-1] <= x.quantile(0.2)).astype(int) if len(x) >= 20 else 0, raw=False ) df_candles['bb_squeeze'] = df_candles['bb_width_rank'].fillna(0).astype(int) # Update analysis database cursor_update = conn_analysis.cursor() updated = 0 for _, row in df_candles.iterrows(): cursor_update.execute(""" UPDATE analysis SET rsi_14 = ?, bb_upper = ?, bb_middle = ?, bb_lower = ?, bb_squeeze = ? WHERE timeframe = ? AND timestamp = ? """, ( float(row['rsi_14']) if not pd.isna(row['rsi_14']) else None, float(row['bb_upper']) if not pd.isna(row['bb_upper']) else None, float(row['bb_middle']) if not pd.isna(row['bb_middle']) else None, float(row['bb_lower']) if not pd.isna(row['bb_lower']) else None, int(row['bb_squeeze']), timeframe, int(row['timestamp']) )) updated += cursor_update.rowcount conn_analysis.commit() total_updated += updated print(f" āœ… Updated {updated} rows") # Show sample latest = df_candles.iloc[-1] print(f" Latest RSI: {latest['rsi_14']:.2f}" if not pd.isna(latest['rsi_14']) else " Latest RSI: NULL") print(f" Latest BB: Upper=${latest['bb_upper']:.2f}, Lower=${latest['bb_lower']:.2f}" if not pd.isna(latest['bb_upper']) else " Latest BB: NULL") conn_candles.close() conn_analysis.close() print(f"\n{'='*70}") print(f"āœ… BACKFILL COMPLETE!") print(f" Total rows updated: {total_updated}") print(f"{'='*70}") def verify_backfill(analysis_db: str): """Verify the backfill worked""" print("\nšŸ” VERIFICATION") print("=" * 70) conn = sqlite3.connect(analysis_db) cursor = conn.cursor() cursor.execute("SELECT DISTINCT timeframe FROM analysis") timeframes = [row[0] for row in cursor.fetchall()] for tf in timeframes: # Count NULL values cursor.execute(""" SELECT COUNT(*) as total, SUM(CASE WHEN rsi_14 IS NULL THEN 1 ELSE 0 END) as rsi_null, SUM(CASE WHEN bb_upper IS NULL THEN 1 ELSE 0 END) as bb_null FROM analysis WHERE timeframe = ? """, (tf,)) total, rsi_null, bb_null = cursor.fetchone() print(f"\n{tf}:") print(f" Total rows: {total}") print(f" RSI NULL: {rsi_null} ({rsi_null/total*100:.1f}%)" if total > 0 else " RSI NULL: N/A") print(f" BB NULL: {bb_null} ({bb_null/total*100:.1f}%)" if total > 0 else " BB NULL: N/A") # Get latest values cursor.execute(""" SELECT rsi_14, bb_upper, bb_lower, bb_squeeze FROM analysis WHERE timeframe = ? ORDER BY timestamp DESC LIMIT 1 """, (tf,)) row = cursor.fetchone() if row and row[0] is not None: print(f" āœ… Latest: RSI={row[0]:.2f}, BB_upper=${row[1]:.2f}, BB_squeeze={row[3]}") else: print(f" āŒ Latest values still NULL") conn.close() def main(): import json # Load config try: with open("config.json", "r") as f: config = json.load(f) candles_db = config.get("candles_db", "../onramp/market_data.db") analysis_db = config.get("analysis_db", "../analysis/analysis.db") except FileNotFoundError: print("āŒ config.json not found, using default paths") candles_db = "../onramp/market_data.db" analysis_db = "../analysis/analysis.db" print(f"Candles DB: {candles_db}") print(f"Analysis DB: {analysis_db}") try: backfill_indicators(candles_db, analysis_db) verify_backfill(analysis_db) print("\nšŸ’” NEXT STEPS:") print("=" * 70) print("1. Run the signal debugger again:") print(" python3 signal_debugger.py") print("\n2. Restart the signal generator:") print(" pkill -f signals.py") print(" ./signals.py") print("\n3. Update your analysis pipeline to calculate these indicators") print(" going forward so you don't need to backfill again") except Exception as e: print(f"\nāŒ Error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": main()