""" Data reader for fetching market data from Redis Streams """ import logging from typing import Optional, List, Dict, Any from datetime import datetime, timedelta import pandas as pd import redis import orjson import requests import time from .config import config logger = logging.getLogger(__name__) class MarketDataReader: """Read and aggregate market data from Redis Streams""" def __init__(self): self.redis_client = redis.Redis( host=config.REDIS_HOST, port=config.REDIS_PORT, db=config.REDIS_DB, decode_responses=False, ) def fetch_historical_klines_from_api( self, symbol: str = 'BTCUSDT', interval: str = '5m', limit: int = 200 ) -> pd.DataFrame: """ Fetch historical kline data from Binance API Args: symbol: Trading pair (e.g., 'BTCUSDT') interval: Kline interval (e.g., '5m', '15m', '1h', '4h') limit: Number of candles to fetch (max 1500) Returns: DataFrame with historical OHLCV data """ try: # Binance API endpoint url = 'https://fapi.binance.com/fapi/v1/klines' params = { 'symbol': symbol, 'interval': interval, 'limit': min(limit, 1500) # API limit } logger.info(f"Fetching {limit} historical candles from Binance API ({symbol} {interval})...") response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() # Parse API response klines = [] for item in data: klines.append({ 'timestamp': datetime.fromtimestamp(item[0] / 1000), 'open': float(item[1]), 'high': float(item[2]), 'low': float(item[3]), 'close': float(item[4]), 'volume': float(item[5]), 'quote_volume': float(item[7]), 'trades': int(item[8]), 'is_closed': True, # Historical data is always closed }) df = pd.DataFrame(klines) if not df.empty: df.set_index('timestamp', inplace=True) df.sort_index(inplace=True) logger.info(f"✅ Fetched {len(df)} candles from Binance API") return df except Exception as e: logger.error(f"Error fetching from Binance API: {e}") return pd.DataFrame() def read_kline_stream( self, stream_key: str, count: int = None, use_api_fallback: bool = True ) -> pd.DataFrame: """ Read kline data from Redis Stream and convert to DataFrame Only includes completed candles (x: true). If insufficient data, fetches historical data from Binance API. Args: stream_key: Redis stream key (e.g., 'binance:raw:kline:5m') count: Number of recent candles to fetch (default: LOOKBACK_PERIODS) use_api_fallback: Whether to fetch from API if Redis data insufficient Returns: DataFrame with OHLCV data and indicators """ if count is None: count = config.LOOKBACK_PERIODS try: # Read MORE messages from stream to account for duplicates # Multiply by 10 to ensure we get enough unique candles after filtering messages = self.redis_client.xrevrange(stream_key, count=count * 10) if not messages: logger.warning(f"No data found in stream: {stream_key}") # Fallback to API if use_api_fallback: return self._fetch_from_api_with_interval(stream_key, count) return pd.DataFrame() # Parse messages - ONLY keep completed candles (x: true) klines = [] seen_timestamps = set() for msg_id, fields in reversed(messages): # Reverse to get chronological order data = orjson.loads(fields[b'data']) k = data.get('k', {}) # IMPORTANT: Only keep completed candles if not k.get('x', False): continue # Deduplicate by timestamp timestamp = k['t'] if timestamp in seen_timestamps: continue seen_timestamps.add(timestamp) klines.append({ 'timestamp': datetime.fromtimestamp(k['t'] / 1000), 'open': float(k['o']), 'high': float(k['h']), 'low': float(k['l']), 'close': float(k['c']), 'volume': float(k['v']), 'quote_volume': float(k['q']), 'trades': int(k['n']), 'is_closed': k['x'], }) # Stop if we have enough candles if len(klines) >= count: break # Create DataFrame df = pd.DataFrame(klines) if df.empty: logger.warning(f"No completed candles found in stream: {stream_key}") # Fallback to API if use_api_fallback: return self._fetch_from_api_with_interval(stream_key, count) return df df.set_index('timestamp', inplace=True) df.sort_index(inplace=True) logger.info(f"Loaded {len(df)} completed candles from {stream_key}") # If still insufficient, supplement with API data if len(df) < count and use_api_fallback: logger.warning(f"Insufficient data: {len(df)}/{count} candles. Fetching from API...") api_df = self._fetch_from_api_with_interval(stream_key, count) if not api_df.empty: # Merge Redis and API data, preferring Redis for overlapping periods combined = pd.concat([api_df, df]) combined = combined[~combined.index.duplicated(keep='last')] combined.sort_index(inplace=True) logger.info(f"Combined data: {len(combined)} candles (Redis: {len(df)}, API: {len(api_df)})") return combined return df except Exception as e: logger.error(f"Error reading kline stream {stream_key}: {e}") return pd.DataFrame() def _fetch_from_api_with_interval(self, stream_key: str, count: int) -> pd.DataFrame: """Extract interval from stream key and fetch from API""" # Extract interval from stream key (e.g., 'binance:raw:kline:5m' -> '5m') try: interval = stream_key.split(':')[-1] return self.fetch_historical_klines_from_api( symbol='BTCUSDT', interval=interval, limit=count ) except Exception as e: logger.error(f"Error extracting interval from {stream_key}: {e}") return pd.DataFrame() def read_latest_depth(self) -> Optional[Dict[str, Any]]: """ Read latest order book depth data Returns: Dict with bids and asks, or None if no data """ try: messages = self.redis_client.xrevrange(config.DEPTH_KEY, count=1) if not messages: return None msg_id, fields = messages[0] data = orjson.loads(fields[b'data']) return { 'timestamp': datetime.fromtimestamp(data['E'] / 1000), 'bids': [[float(p), float(q)] for p, q in data['b']], 'asks': [[float(p), float(q)] for p, q in data['a']], } except Exception as e: logger.error(f"Error reading depth data: {e}") return None def read_recent_trades(self, count: int = 100) -> List[Dict[str, Any]]: """ Read recent trade data Args: count: Number of recent trades to fetch Returns: List of trade dictionaries """ try: messages = self.redis_client.xrevrange(config.TRADE_KEY, count=count) if not messages: return [] trades = [] for msg_id, fields in messages: data = orjson.loads(fields[b'data']) trades.append({ 'timestamp': datetime.fromtimestamp(data['T'] / 1000), 'price': float(data['p']), 'quantity': float(data['q']), 'is_buyer_maker': data['m'], # True = sell, False = buy }) return trades except Exception as e: logger.error(f"Error reading trade data: {e}") return [] def get_multi_timeframe_data(self) -> Dict[str, pd.DataFrame]: """ Fetch data from multiple timeframes Returns: Dict mapping timeframe to DataFrame """ # Different timeframes need different amount of data # Shorter timeframes: 200 candles (for detailed analysis) # Longer timeframes: fewer candles (100 for 1d, 60+ for 1w) timeframes = { '5m': (config.KLINE_5M_KEY, 200), '15m': (config.KLINE_15M_KEY, 200), '1h': (config.KLINE_1H_KEY, 200), '4h': (config.KLINE_4H_KEY, 200), '1d': (config.KLINE_1D_KEY, 100), # 100 days ≈ 3+ months '1w': (config.KLINE_1W_KEY, 65), # 65 weeks ≈ 15 months } data = {} for tf, (key, count) in timeframes.items(): df = self.read_kline_stream(key, count=count) if not df.empty: data[tf] = df return data def get_latest_price(self) -> Optional[float]: """Get latest close price from 5m kline""" try: df = self.read_kline_stream(config.KLINE_5M_KEY, count=1) if not df.empty: return float(df.iloc[-1]['close']) except Exception as e: logger.error(f"Error getting latest price: {e}") return None