Lowered signal tresholds so low that we got signals flowing. Few debug scripts to see way signals were not firing. Fix analyst.py indicator calculation to use TA-lib.

2026-01-18 18:25:10 +02:00
parent f827728f51
commit ea1bd19d13
5 changed files with 749 additions and 32 deletions
@@ -121,38 +121,44 @@ analysis_conn.commit()

 # ========== Technical Indicator Functions ==========
 def compute_indicators(df):
-    close = df['close']
+    """Compute indicators using TA-Lib for accuracy"""
+    import talib
+    
+    close = df['close'].values
+    high = df['high'].values
+    low = df['low'].values
+    volume = df['volume'].values
+    
    # EMA and SMA
-    df['ema_9'] = close.ewm(span=9, adjust=False).mean()
-    df['ema_21'] = close.ewm(span=21, adjust=False).mean()
-    df['sma_50'] = close.rolling(window=50, min_periods=1).mean()
-    df['sma_200'] = close.rolling(window=200, min_periods=1).mean()
-    # RSI (14): using 14-period gains/losses and RSI formula (100 - 100/(1+RS)):contentReference[oaicite:3]{index=3}
-    delta = close.diff()
-    gain = delta.clip(lower=0)
-    loss = -delta.clip(upper=0)
-    avg_gain = gain.rolling(window=14, min_periods=14).mean()
-    avg_loss = loss.rolling(window=14, min_periods=14).mean()
-    rs = avg_gain / avg_loss.replace(0, pd.NA)
-    df['rsi_14'] = 100 - (100 / (1 + rs))
+    df['ema_9'] = talib.EMA(close, timeperiod=9)
+    df['ema_21'] = talib.EMA(close, timeperiod=21)
+    df['sma_50'] = talib.SMA(close, timeperiod=50)
+    df['sma_200'] = talib.SMA(close, timeperiod=200)
+    
+    # RSI (14) - Proper calculation
+    df['rsi_14'] = talib.RSI(close, timeperiod=14)
+    
    # MACD (12,26,9)
-    ema12 = close.ewm(span=12, adjust=False).mean()
-    ema26 = close.ewm(span=26, adjust=False).mean()
-    macd_line = ema12 - ema26
-    df['macd'] = macd_line
-    df['macd_signal'] = macd_line.ewm(span=9, adjust=False).mean()
-    df['macd_hist'] = df['macd'] - df['macd_signal']
+    macd, macd_signal, macd_hist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
+    df['macd'] = macd
+    df['macd_signal'] = macd_signal
+    df['macd_hist'] = macd_hist
+    
    # Bollinger Bands (20,2)
-    df['bb_middle'] = close.rolling(window=20, min_periods=20).mean()
-    bb_std = close.rolling(window=20, min_periods=20).std()
-    df['bb_upper'] = df['bb_middle'] + 2 * bb_std
-    df['bb_lower'] = df['bb_middle'] - 2 * bb_std
-    # Bollinger Squeeze: detect when BB width is lowest over 20 periods:contentReference[oaicite:4]{index=4}
-    bb_width = df['bb_upper'] - df['bb_lower']
-    rolling_min_width = bb_width.rolling(window=20, min_periods=20).min()
-    df['bb_squeeze'] = (bb_width <= rolling_min_width).astype(int)
-    # Volume moving average (20)
-    df['volume_ma_20'] = df['volume'].rolling(window=20, min_periods=1).mean()
+    bb_upper, bb_middle, bb_lower = talib.BBANDS(close, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
+    df['bb_upper'] = bb_upper
+    df['bb_middle'] = bb_middle
+    df['bb_lower'] = bb_lower
+    
+    # Bollinger Squeeze
+    bb_width = bb_upper - bb_lower
+    bb_width_series = pd.Series(bb_width)
+    rolling_min_width = bb_width_series.rolling(window=20, min_periods=20).min()
+    df['bb_squeeze'] = (bb_width_series <= rolling_min_width).fillna(0).astype(int)
+    
+    # Volume MA
+    df['volume_ma_20'] = talib.SMA(volume, timeperiod=20)
+    
    return df

 # ========== Health Check Server ==========
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""
+Backfill Missing Indicators
+Calculates RSI and Bollinger Bands for existing data
+"""
+
+import sqlite3
+import pandas as pd
+import numpy as np
+import talib
+from datetime import datetime
+
+
+def backfill_indicators(candles_db: str, analysis_db: str):
+    """Backfill RSI and Bollinger Bands for all timeframes"""
+    
+    print("🔧 BACKFILLING MISSING INDICATORS")
+    print("=" * 70)
+    
+    # Connect to databases
+    conn_candles = sqlite3.connect(candles_db)
+    conn_analysis = sqlite3.connect(analysis_db)
+    
+    # Get all timeframes
+    cursor = conn_analysis.cursor()
+    cursor.execute("SELECT DISTINCT timeframe FROM analysis ORDER BY timeframe")
+    timeframes = [row[0] for row in cursor.fetchall()]
+    
+    total_updated = 0
+    
+    for timeframe in timeframes:
+        print(f"\n📊 Processing {timeframe}...")
+        
+        # Fetch candle data
+        df_candles = pd.read_sql_query(
+            "SELECT timestamp, close, high, low FROM candles WHERE timeframe = ? ORDER BY timestamp",
+            conn_candles,
+            params=(timeframe,)
+        )
+        
+        if len(df_candles) < 20:
+            print(f"  ⚠️  Skipping - insufficient data ({len(df_candles)} rows)")
+            continue
+        
+        print(f"  ✓ Loaded {len(df_candles)} candles")
+        
+        # Calculate RSI
+        df_candles['rsi_14'] = talib.RSI(df_candles['close'].values, timeperiod=14)
+        
+        # Calculate Bollinger Bands
+        bb_upper, bb_middle, bb_lower = talib.BBANDS(
+            df_candles['close'].values,
+            timeperiod=20,
+            nbdevup=2,
+            nbdevdn=2,
+            matype=0
+        )
+        
+        df_candles['bb_upper'] = bb_upper
+        df_candles['bb_middle'] = bb_middle
+        df_candles['bb_lower'] = bb_lower
+        
+        # Calculate BB Squeeze
+        # Squeeze = when BB width is in the lowest 20% of recent widths
+        df_candles['bb_width'] = df_candles['bb_upper'] - df_candles['bb_lower']
+        df_candles['bb_width_rank'] = df_candles['bb_width'].rolling(window=100).apply(
+            lambda x: (x.iloc[-1] <= x.quantile(0.2)).astype(int) if len(x) >= 20 else 0,
+            raw=False
+        )
+        df_candles['bb_squeeze'] = df_candles['bb_width_rank'].fillna(0).astype(int)
+        
+        # Update analysis database
+        cursor_update = conn_analysis.cursor()
+        updated = 0
+        
+        for _, row in df_candles.iterrows():
+            cursor_update.execute("""
+                UPDATE analysis 
+                SET rsi_14 = ?, bb_upper = ?, bb_middle = ?, bb_lower = ?, bb_squeeze = ?
+                WHERE timeframe = ? AND timestamp = ?
+            """, (
+                float(row['rsi_14']) if not pd.isna(row['rsi_14']) else None,
+                float(row['bb_upper']) if not pd.isna(row['bb_upper']) else None,
+                float(row['bb_middle']) if not pd.isna(row['bb_middle']) else None,
+                float(row['bb_lower']) if not pd.isna(row['bb_lower']) else None,
+                int(row['bb_squeeze']),
+                timeframe,
+                int(row['timestamp'])
+            ))
+            updated += cursor_update.rowcount
+        
+        conn_analysis.commit()
+        total_updated += updated
+        
+        print(f"  ✅ Updated {updated} rows")
+        
+        # Show sample
+        latest = df_candles.iloc[-1]
+        print(f"     Latest RSI: {latest['rsi_14']:.2f}" if not pd.isna(latest['rsi_14']) else "     Latest RSI: NULL")
+        print(f"     Latest BB: Upper=${latest['bb_upper']:.2f}, Lower=${latest['bb_lower']:.2f}" if not pd.isna(latest['bb_upper']) else "     Latest BB: NULL")
+    
+    conn_candles.close()
+    conn_analysis.close()
+    
+    print(f"\n{'='*70}")
+    print(f"✅ BACKFILL COMPLETE!")
+    print(f"   Total rows updated: {total_updated}")
+    print(f"{'='*70}")
+
+
+def verify_backfill(analysis_db: str):
+    """Verify the backfill worked"""
+    print("\n🔍 VERIFICATION")
+    print("=" * 70)
+    
+    conn = sqlite3.connect(analysis_db)
+    cursor = conn.cursor()
+    
+    cursor.execute("SELECT DISTINCT timeframe FROM analysis")
+    timeframes = [row[0] for row in cursor.fetchall()]
+    
+    for tf in timeframes:
+        # Count NULL values
+        cursor.execute("""
+            SELECT 
+                COUNT(*) as total,
+                SUM(CASE WHEN rsi_14 IS NULL THEN 1 ELSE 0 END) as rsi_null,
+                SUM(CASE WHEN bb_upper IS NULL THEN 1 ELSE 0 END) as bb_null
+            FROM analysis 
+            WHERE timeframe = ?
+        """, (tf,))
+        
+        total, rsi_null, bb_null = cursor.fetchone()
+        
+        print(f"\n{tf}:")
+        print(f"  Total rows: {total}")
+        print(f"  RSI NULL: {rsi_null} ({rsi_null/total*100:.1f}%)" if total > 0 else "  RSI NULL: N/A")
+        print(f"  BB NULL: {bb_null} ({bb_null/total*100:.1f}%)" if total > 0 else "  BB NULL: N/A")
+        
+        # Get latest values
+        cursor.execute("""
+            SELECT rsi_14, bb_upper, bb_lower, bb_squeeze
+            FROM analysis 
+            WHERE timeframe = ?
+            ORDER BY timestamp DESC 
+            LIMIT 1
+        """, (tf,))
+        
+        row = cursor.fetchone()
+        if row and row[0] is not None:
+            print(f"  ✅ Latest: RSI={row[0]:.2f}, BB_upper=${row[1]:.2f}, BB_squeeze={row[3]}")
+        else:
+            print(f"  ❌ Latest values still NULL")
+    
+    conn.close()
+
+
+def main():
+    import json
+    
+    # Load config
+    try:
+        with open("config.json", "r") as f:
+            config = json.load(f)
+        candles_db = config.get("candles_db", "../onramp/market_data.db")
+        analysis_db = config.get("analysis_db", "../analysis/analysis.db")
+    except FileNotFoundError:
+        print("❌ config.json not found, using default paths")
+        candles_db = "../onramp/market_data.db"
+        analysis_db = "../analysis/analysis.db"
+    
+    print(f"Candles DB:  {candles_db}")
+    print(f"Analysis DB: {analysis_db}")
+    
+    try:
+        backfill_indicators(candles_db, analysis_db)
+        verify_backfill(analysis_db)
+        
+        print("\n💡 NEXT STEPS:")
+        print("=" * 70)
+        print("1. Run the signal debugger again:")
+        print("   python3 signal_debugger.py")
+        print("\n2. Restart the signal generator:")
+        print("   pkill -f signals.py")
+        print("   ./signals.py")
+        print("\n3. Update your analysis pipeline to calculate these indicators")
+        print("   going forward so you don't need to backfill again")
+        
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    main()