BytbitBTC/analysis/backfill_indicators.py

#!/usr/bin/env python3
"""
Backfill Missing Indicators
Calculates RSI and Bollinger Bands for existing data
"""

import sqlite3
import pandas as pd
import numpy as np
import talib
from datetime import datetime


def backfill_indicators(candles_db: str, analysis_db: str):
    """Backfill RSI and Bollinger Bands for all timeframes"""

    print("🔧 BACKFILLING MISSING INDICATORS")
    print("=" * 70)

    # Connect to databases
    conn_candles = sqlite3.connect(candles_db)
    conn_analysis = sqlite3.connect(analysis_db)

    # Get all timeframes
    cursor = conn_analysis.cursor()
    cursor.execute("SELECT DISTINCT timeframe FROM analysis ORDER BY timeframe")
    timeframes = [row[0] for row in cursor.fetchall()]

    total_updated = 0

    for timeframe in timeframes:
        print(f"\n📊 Processing {timeframe}...")

        # Fetch candle data
        df_candles = pd.read_sql_query(
            "SELECT timestamp, close, high, low FROM candles WHERE timeframe = ? ORDER BY timestamp",
            conn_candles,
            params=(timeframe,)
        )

        if len(df_candles) < 20:
            print(f"  ⚠️  Skipping - insufficient data ({len(df_candles)} rows)")
            continue

        print(f"  ✓ Loaded {len(df_candles)} candles")

        # Calculate RSI
        df_candles['rsi_14'] = talib.RSI(df_candles['close'].values, timeperiod=14)

        # Calculate Bollinger Bands
        bb_upper, bb_middle, bb_lower = talib.BBANDS(
            df_candles['close'].values,
            timeperiod=20,
            nbdevup=2,
            nbdevdn=2,
            matype=0
        )

        df_candles['bb_upper'] = bb_upper
        df_candles['bb_middle'] = bb_middle
        df_candles['bb_lower'] = bb_lower

        # Calculate BB Squeeze
        # Squeeze = when BB width is in the lowest 20% of recent widths
        df_candles['bb_width'] = df_candles['bb_upper'] - df_candles['bb_lower']
        df_candles['bb_width_rank'] = df_candles['bb_width'].rolling(window=100).apply(
            lambda x: (x.iloc[-1] <= x.quantile(0.2)).astype(int) if len(x) >= 20 else 0,
            raw=False
        )
        df_candles['bb_squeeze'] = df_candles['bb_width_rank'].fillna(0).astype(int)

        # Update analysis database
        cursor_update = conn_analysis.cursor()
        updated = 0

        for _, row in df_candles.iterrows():
            cursor_update.execute("""
                UPDATE analysis
                SET rsi_14 = ?, bb_upper = ?, bb_middle = ?, bb_lower = ?, bb_squeeze = ?
                WHERE timeframe = ? AND timestamp = ?
            """, (
                float(row['rsi_14']) if not pd.isna(row['rsi_14']) else None,
                float(row['bb_upper']) if not pd.isna(row['bb_upper']) else None,
                float(row['bb_middle']) if not pd.isna(row['bb_middle']) else None,
                float(row['bb_lower']) if not pd.isna(row['bb_lower']) else None,
                int(row['bb_squeeze']),
                timeframe,
                int(row['timestamp'])
            ))
            updated += cursor_update.rowcount

        conn_analysis.commit()
        total_updated += updated

        print(f"  ✅ Updated {updated} rows")

        # Show sample
        latest = df_candles.iloc[-1]
        print(f"     Latest RSI: {latest['rsi_14']:.2f}" if not pd.isna(latest['rsi_14']) else "     Latest RSI: NULL")
        print(f"     Latest BB: Upper=${latest['bb_upper']:.2f}, Lower=${latest['bb_lower']:.2f}" if not pd.isna(latest['bb_upper']) else "     Latest BB: NULL")

    conn_candles.close()
    conn_analysis.close()

    print(f"\n{'='*70}")
    print(f"✅ BACKFILL COMPLETE!")
    print(f"   Total rows updated: {total_updated}")
    print(f"{'='*70}")


def verify_backfill(analysis_db: str):
    """Verify the backfill worked"""
    print("\n🔍 VERIFICATION")
    print("=" * 70)

    conn = sqlite3.connect(analysis_db)
    cursor = conn.cursor()

    cursor.execute("SELECT DISTINCT timeframe FROM analysis")
    timeframes = [row[0] for row in cursor.fetchall()]

    for tf in timeframes:
        # Count NULL values
        cursor.execute("""
            SELECT
                COUNT(*) as total,
                SUM(CASE WHEN rsi_14 IS NULL THEN 1 ELSE 0 END) as rsi_null,
                SUM(CASE WHEN bb_upper IS NULL THEN 1 ELSE 0 END) as bb_null
            FROM analysis
            WHERE timeframe = ?
        """, (tf,))

        total, rsi_null, bb_null = cursor.fetchone()

        print(f"\n{tf}:")
        print(f"  Total rows: {total}")
        print(f"  RSI NULL: {rsi_null} ({rsi_null/total*100:.1f}%)" if total > 0 else "  RSI NULL: N/A")
        print(f"  BB NULL: {bb_null} ({bb_null/total*100:.1f}%)" if total > 0 else "  BB NULL: N/A")

        # Get latest values
        cursor.execute("""
            SELECT rsi_14, bb_upper, bb_lower, bb_squeeze
            FROM analysis
            WHERE timeframe = ?
            ORDER BY timestamp DESC
            LIMIT 1
        """, (tf,))

        row = cursor.fetchone()
        if row and row[0] is not None:
            print(f"  ✅ Latest: RSI={row[0]:.2f}, BB_upper=${row[1]:.2f}, BB_squeeze={row[3]}")
        else:
            print(f"  ❌ Latest values still NULL")

    conn.close()


def main():
    import json

    # Load config
    try:
        with open("config.json", "r") as f:
            config = json.load(f)
        candles_db = config.get("candles_db", "../onramp/market_data.db")
        analysis_db = config.get("analysis_db", "../analysis/analysis.db")
    except FileNotFoundError:
        print("❌ config.json not found, using default paths")
        candles_db = "../onramp/market_data.db"
        analysis_db = "../analysis/analysis.db"

    print(f"Candles DB:  {candles_db}")
    print(f"Analysis DB: {analysis_db}")

    try:
        backfill_indicators(candles_db, analysis_db)
        verify_backfill(analysis_db)

        print("\n💡 NEXT STEPS:")
        print("=" * 70)
        print("1. Run the signal debugger again:")
        print("   python3 signal_debugger.py")
        print("\n2. Restart the signal generator:")
        print("   pkill -f signals.py")
        print("   ./signals.py")
        print("\n3. Update your analysis pipeline to calculate these indicators")
        print("   going forward so you don't need to backfill again")

    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()