tradusai/analysis/data_reader.py
2025-12-02 22:54:03 +08:00

295 lines
10 KiB
Python

"""
Data reader for fetching market data from Redis Streams
"""
import logging
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
import pandas as pd
import redis
import orjson
import requests
import time
from .config import config
logger = logging.getLogger(__name__)
class MarketDataReader:
"""Read and aggregate market data from Redis Streams"""
def __init__(self):
self.redis_client = redis.Redis(
host=config.REDIS_HOST,
port=config.REDIS_PORT,
db=config.REDIS_DB,
decode_responses=False,
)
def fetch_historical_klines_from_api(
self, symbol: str = 'BTCUSDT', interval: str = '5m', limit: int = 200
) -> pd.DataFrame:
"""
Fetch historical kline data from Binance API
Args:
symbol: Trading pair (e.g., 'BTCUSDT')
interval: Kline interval (e.g., '5m', '15m', '1h', '4h')
limit: Number of candles to fetch (max 1500)
Returns:
DataFrame with historical OHLCV data
"""
try:
# Binance API endpoint
url = 'https://fapi.binance.com/fapi/v1/klines'
params = {
'symbol': symbol,
'interval': interval,
'limit': min(limit, 1500) # API limit
}
logger.info(f"Fetching {limit} historical candles from Binance API ({symbol} {interval})...")
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
# Parse API response
klines = []
for item in data:
klines.append({
'timestamp': datetime.fromtimestamp(item[0] / 1000),
'open': float(item[1]),
'high': float(item[2]),
'low': float(item[3]),
'close': float(item[4]),
'volume': float(item[5]),
'quote_volume': float(item[7]),
'trades': int(item[8]),
'is_closed': True, # Historical data is always closed
})
df = pd.DataFrame(klines)
if not df.empty:
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
logger.info(f"✅ Fetched {len(df)} candles from Binance API")
return df
except Exception as e:
logger.error(f"Error fetching from Binance API: {e}")
return pd.DataFrame()
def read_kline_stream(
self, stream_key: str, count: int = None, use_api_fallback: bool = True
) -> pd.DataFrame:
"""
Read kline data from Redis Stream and convert to DataFrame
Only includes completed candles (x: true). If insufficient data,
fetches historical data from Binance API.
Args:
stream_key: Redis stream key (e.g., 'binance:raw:kline:5m')
count: Number of recent candles to fetch (default: LOOKBACK_PERIODS)
use_api_fallback: Whether to fetch from API if Redis data insufficient
Returns:
DataFrame with OHLCV data and indicators
"""
if count is None:
count = config.LOOKBACK_PERIODS
try:
# Read MORE messages from stream to account for duplicates
# Multiply by 10 to ensure we get enough unique candles after filtering
messages = self.redis_client.xrevrange(stream_key, count=count * 10)
if not messages:
logger.warning(f"No data found in stream: {stream_key}")
# Fallback to API
if use_api_fallback:
return self._fetch_from_api_with_interval(stream_key, count)
return pd.DataFrame()
# Parse messages - ONLY keep completed candles (x: true)
klines = []
seen_timestamps = set()
for msg_id, fields in reversed(messages): # Reverse to get chronological order
data = orjson.loads(fields[b'data'])
k = data.get('k', {})
# IMPORTANT: Only keep completed candles
if not k.get('x', False):
continue
# Deduplicate by timestamp
timestamp = k['t']
if timestamp in seen_timestamps:
continue
seen_timestamps.add(timestamp)
klines.append({
'timestamp': datetime.fromtimestamp(k['t'] / 1000),
'open': float(k['o']),
'high': float(k['h']),
'low': float(k['l']),
'close': float(k['c']),
'volume': float(k['v']),
'quote_volume': float(k['q']),
'trades': int(k['n']),
'is_closed': k['x'],
})
# Stop if we have enough candles
if len(klines) >= count:
break
# Create DataFrame
df = pd.DataFrame(klines)
if df.empty:
logger.warning(f"No completed candles found in stream: {stream_key}")
# Fallback to API
if use_api_fallback:
return self._fetch_from_api_with_interval(stream_key, count)
return df
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
logger.info(f"Loaded {len(df)} completed candles from {stream_key}")
# If still insufficient, supplement with API data
if len(df) < count and use_api_fallback:
logger.warning(f"Insufficient data: {len(df)}/{count} candles. Fetching from API...")
api_df = self._fetch_from_api_with_interval(stream_key, count)
if not api_df.empty:
# Merge Redis and API data, preferring Redis for overlapping periods
combined = pd.concat([api_df, df])
combined = combined[~combined.index.duplicated(keep='last')]
combined.sort_index(inplace=True)
logger.info(f"Combined data: {len(combined)} candles (Redis: {len(df)}, API: {len(api_df)})")
return combined
return df
except Exception as e:
logger.error(f"Error reading kline stream {stream_key}: {e}")
return pd.DataFrame()
def _fetch_from_api_with_interval(self, stream_key: str, count: int) -> pd.DataFrame:
"""Extract interval from stream key and fetch from API"""
# Extract interval from stream key (e.g., 'binance:raw:kline:5m' -> '5m')
try:
interval = stream_key.split(':')[-1]
return self.fetch_historical_klines_from_api(
symbol='BTCUSDT',
interval=interval,
limit=count
)
except Exception as e:
logger.error(f"Error extracting interval from {stream_key}: {e}")
return pd.DataFrame()
def read_latest_depth(self) -> Optional[Dict[str, Any]]:
"""
Read latest order book depth data
Returns:
Dict with bids and asks, or None if no data
"""
try:
messages = self.redis_client.xrevrange(config.DEPTH_KEY, count=1)
if not messages:
return None
msg_id, fields = messages[0]
data = orjson.loads(fields[b'data'])
return {
'timestamp': datetime.fromtimestamp(data['E'] / 1000),
'bids': [[float(p), float(q)] for p, q in data['b']],
'asks': [[float(p), float(q)] for p, q in data['a']],
}
except Exception as e:
logger.error(f"Error reading depth data: {e}")
return None
def read_recent_trades(self, count: int = 100) -> List[Dict[str, Any]]:
"""
Read recent trade data
Args:
count: Number of recent trades to fetch
Returns:
List of trade dictionaries
"""
try:
messages = self.redis_client.xrevrange(config.TRADE_KEY, count=count)
if not messages:
return []
trades = []
for msg_id, fields in messages:
data = orjson.loads(fields[b'data'])
trades.append({
'timestamp': datetime.fromtimestamp(data['T'] / 1000),
'price': float(data['p']),
'quantity': float(data['q']),
'is_buyer_maker': data['m'], # True = sell, False = buy
})
return trades
except Exception as e:
logger.error(f"Error reading trade data: {e}")
return []
def get_multi_timeframe_data(self) -> Dict[str, pd.DataFrame]:
"""
Fetch data from multiple timeframes
Returns:
Dict mapping timeframe to DataFrame
"""
# Different timeframes need different amount of data
# Shorter timeframes: 200 candles (for detailed analysis)
# Longer timeframes: fewer candles (100 for 1d, 60+ for 1w)
timeframes = {
'5m': (config.KLINE_5M_KEY, 200),
'15m': (config.KLINE_15M_KEY, 200),
'1h': (config.KLINE_1H_KEY, 200),
'4h': (config.KLINE_4H_KEY, 200),
'1d': (config.KLINE_1D_KEY, 100), # 100 days ≈ 3+ months
'1w': (config.KLINE_1W_KEY, 65), # 65 weeks ≈ 15 months
}
data = {}
for tf, (key, count) in timeframes.items():
df = self.read_kline_stream(key, count=count)
if not df.empty:
data[tf] = df
return data
def get_latest_price(self) -> Optional[float]:
"""Get latest close price from 5m kline"""
try:
df = self.read_kline_stream(config.KLINE_5M_KEY, count=1)
if not df.empty:
return float(df.iloc[-1]['close'])
except Exception as e:
logger.error(f"Error getting latest price: {e}")
return None