295 lines
10 KiB
Python
295 lines
10 KiB
Python
"""
|
|
Data reader for fetching market data from Redis Streams
|
|
"""
|
|
import logging
|
|
from typing import Optional, List, Dict, Any
|
|
from datetime import datetime, timedelta
|
|
import pandas as pd
|
|
import redis
|
|
import orjson
|
|
import requests
|
|
import time
|
|
|
|
from .config import config
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MarketDataReader:
|
|
"""Read and aggregate market data from Redis Streams"""
|
|
|
|
def __init__(self):
|
|
self.redis_client = redis.Redis(
|
|
host=config.REDIS_HOST,
|
|
port=config.REDIS_PORT,
|
|
db=config.REDIS_DB,
|
|
decode_responses=False,
|
|
)
|
|
|
|
def fetch_historical_klines_from_api(
|
|
self, symbol: str = 'BTCUSDT', interval: str = '5m', limit: int = 200
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Fetch historical kline data from Binance API
|
|
|
|
Args:
|
|
symbol: Trading pair (e.g., 'BTCUSDT')
|
|
interval: Kline interval (e.g., '5m', '15m', '1h', '4h')
|
|
limit: Number of candles to fetch (max 1500)
|
|
|
|
Returns:
|
|
DataFrame with historical OHLCV data
|
|
"""
|
|
try:
|
|
# Binance API endpoint
|
|
url = 'https://fapi.binance.com/fapi/v1/klines'
|
|
|
|
params = {
|
|
'symbol': symbol,
|
|
'interval': interval,
|
|
'limit': min(limit, 1500) # API limit
|
|
}
|
|
|
|
logger.info(f"Fetching {limit} historical candles from Binance API ({symbol} {interval})...")
|
|
response = requests.get(url, params=params, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
# Parse API response
|
|
klines = []
|
|
for item in data:
|
|
klines.append({
|
|
'timestamp': datetime.fromtimestamp(item[0] / 1000),
|
|
'open': float(item[1]),
|
|
'high': float(item[2]),
|
|
'low': float(item[3]),
|
|
'close': float(item[4]),
|
|
'volume': float(item[5]),
|
|
'quote_volume': float(item[7]),
|
|
'trades': int(item[8]),
|
|
'is_closed': True, # Historical data is always closed
|
|
})
|
|
|
|
df = pd.DataFrame(klines)
|
|
if not df.empty:
|
|
df.set_index('timestamp', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
logger.info(f"✅ Fetched {len(df)} candles from Binance API")
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching from Binance API: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def read_kline_stream(
|
|
self, stream_key: str, count: int = None, use_api_fallback: bool = True
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Read kline data from Redis Stream and convert to DataFrame
|
|
Only includes completed candles (x: true). If insufficient data,
|
|
fetches historical data from Binance API.
|
|
|
|
Args:
|
|
stream_key: Redis stream key (e.g., 'binance:raw:kline:5m')
|
|
count: Number of recent candles to fetch (default: LOOKBACK_PERIODS)
|
|
use_api_fallback: Whether to fetch from API if Redis data insufficient
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data and indicators
|
|
"""
|
|
if count is None:
|
|
count = config.LOOKBACK_PERIODS
|
|
|
|
try:
|
|
# Read MORE messages from stream to account for duplicates
|
|
# Multiply by 10 to ensure we get enough unique candles after filtering
|
|
messages = self.redis_client.xrevrange(stream_key, count=count * 10)
|
|
|
|
if not messages:
|
|
logger.warning(f"No data found in stream: {stream_key}")
|
|
# Fallback to API
|
|
if use_api_fallback:
|
|
return self._fetch_from_api_with_interval(stream_key, count)
|
|
return pd.DataFrame()
|
|
|
|
# Parse messages - ONLY keep completed candles (x: true)
|
|
klines = []
|
|
seen_timestamps = set()
|
|
|
|
for msg_id, fields in reversed(messages): # Reverse to get chronological order
|
|
data = orjson.loads(fields[b'data'])
|
|
k = data.get('k', {})
|
|
|
|
# IMPORTANT: Only keep completed candles
|
|
if not k.get('x', False):
|
|
continue
|
|
|
|
# Deduplicate by timestamp
|
|
timestamp = k['t']
|
|
if timestamp in seen_timestamps:
|
|
continue
|
|
seen_timestamps.add(timestamp)
|
|
|
|
klines.append({
|
|
'timestamp': datetime.fromtimestamp(k['t'] / 1000),
|
|
'open': float(k['o']),
|
|
'high': float(k['h']),
|
|
'low': float(k['l']),
|
|
'close': float(k['c']),
|
|
'volume': float(k['v']),
|
|
'quote_volume': float(k['q']),
|
|
'trades': int(k['n']),
|
|
'is_closed': k['x'],
|
|
})
|
|
|
|
# Stop if we have enough candles
|
|
if len(klines) >= count:
|
|
break
|
|
|
|
# Create DataFrame
|
|
df = pd.DataFrame(klines)
|
|
|
|
if df.empty:
|
|
logger.warning(f"No completed candles found in stream: {stream_key}")
|
|
# Fallback to API
|
|
if use_api_fallback:
|
|
return self._fetch_from_api_with_interval(stream_key, count)
|
|
return df
|
|
|
|
df.set_index('timestamp', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
|
|
logger.info(f"Loaded {len(df)} completed candles from {stream_key}")
|
|
|
|
# If still insufficient, supplement with API data
|
|
if len(df) < count and use_api_fallback:
|
|
logger.warning(f"Insufficient data: {len(df)}/{count} candles. Fetching from API...")
|
|
api_df = self._fetch_from_api_with_interval(stream_key, count)
|
|
|
|
if not api_df.empty:
|
|
# Merge Redis and API data, preferring Redis for overlapping periods
|
|
combined = pd.concat([api_df, df])
|
|
combined = combined[~combined.index.duplicated(keep='last')]
|
|
combined.sort_index(inplace=True)
|
|
logger.info(f"Combined data: {len(combined)} candles (Redis: {len(df)}, API: {len(api_df)})")
|
|
return combined
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading kline stream {stream_key}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def _fetch_from_api_with_interval(self, stream_key: str, count: int) -> pd.DataFrame:
|
|
"""Extract interval from stream key and fetch from API"""
|
|
# Extract interval from stream key (e.g., 'binance:raw:kline:5m' -> '5m')
|
|
try:
|
|
interval = stream_key.split(':')[-1]
|
|
return self.fetch_historical_klines_from_api(
|
|
symbol='BTCUSDT',
|
|
interval=interval,
|
|
limit=count
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error extracting interval from {stream_key}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def read_latest_depth(self) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Read latest order book depth data
|
|
|
|
Returns:
|
|
Dict with bids and asks, or None if no data
|
|
"""
|
|
try:
|
|
messages = self.redis_client.xrevrange(config.DEPTH_KEY, count=1)
|
|
|
|
if not messages:
|
|
return None
|
|
|
|
msg_id, fields = messages[0]
|
|
data = orjson.loads(fields[b'data'])
|
|
|
|
return {
|
|
'timestamp': datetime.fromtimestamp(data['E'] / 1000),
|
|
'bids': [[float(p), float(q)] for p, q in data['b']],
|
|
'asks': [[float(p), float(q)] for p, q in data['a']],
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading depth data: {e}")
|
|
return None
|
|
|
|
def read_recent_trades(self, count: int = 100) -> List[Dict[str, Any]]:
|
|
"""
|
|
Read recent trade data
|
|
|
|
Args:
|
|
count: Number of recent trades to fetch
|
|
|
|
Returns:
|
|
List of trade dictionaries
|
|
"""
|
|
try:
|
|
messages = self.redis_client.xrevrange(config.TRADE_KEY, count=count)
|
|
|
|
if not messages:
|
|
return []
|
|
|
|
trades = []
|
|
for msg_id, fields in messages:
|
|
data = orjson.loads(fields[b'data'])
|
|
|
|
trades.append({
|
|
'timestamp': datetime.fromtimestamp(data['T'] / 1000),
|
|
'price': float(data['p']),
|
|
'quantity': float(data['q']),
|
|
'is_buyer_maker': data['m'], # True = sell, False = buy
|
|
})
|
|
|
|
return trades
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading trade data: {e}")
|
|
return []
|
|
|
|
def get_multi_timeframe_data(self) -> Dict[str, pd.DataFrame]:
|
|
"""
|
|
Fetch data from multiple timeframes
|
|
|
|
Returns:
|
|
Dict mapping timeframe to DataFrame
|
|
"""
|
|
# Different timeframes need different amount of data
|
|
# Shorter timeframes: 200 candles (for detailed analysis)
|
|
# Longer timeframes: fewer candles (100 for 1d, 60+ for 1w)
|
|
timeframes = {
|
|
'5m': (config.KLINE_5M_KEY, 200),
|
|
'15m': (config.KLINE_15M_KEY, 200),
|
|
'1h': (config.KLINE_1H_KEY, 200),
|
|
'4h': (config.KLINE_4H_KEY, 200),
|
|
'1d': (config.KLINE_1D_KEY, 100), # 100 days ≈ 3+ months
|
|
'1w': (config.KLINE_1W_KEY, 65), # 65 weeks ≈ 15 months
|
|
}
|
|
|
|
data = {}
|
|
for tf, (key, count) in timeframes.items():
|
|
df = self.read_kline_stream(key, count=count)
|
|
if not df.empty:
|
|
data[tf] = df
|
|
|
|
return data
|
|
|
|
def get_latest_price(self) -> Optional[float]:
|
|
"""Get latest close price from 5m kline"""
|
|
try:
|
|
df = self.read_kline_stream(config.KLINE_5M_KEY, count=1)
|
|
if not df.empty:
|
|
return float(df.iloc[-1]['close'])
|
|
except Exception as e:
|
|
logger.error(f"Error getting latest price: {e}")
|
|
return None
|