trading.ai/data_fetcher.py
2025-08-14 10:06:19 +08:00

297 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from binance.client import Client
from binance.exceptions import BinanceAPIException, BinanceOrderException
import pandas as pd
import time
import logging
from datetime import datetime, timedelta
from typing import List, Dict, Optional
import requests
class BinanceDataFetcher:
def __init__(self, api_key=None, secret=None):
"""初始化Binance数据获取器"""
try:
# 如果没有提供API密钥使用公共客户端
if api_key and secret:
self.client = Client(api_key, secret)
else:
self.client = Client() # 公共客户端,只能访问市场数据
# 测试连接
self.client.ping()
print("Binance API连接成功")
except Exception as e:
logging.error(f"Binance API初始化失败: {e}")
print(f"Binance API初始化失败: {e}")
raise
def get_top_usdt_pairs(self, limit=100) -> List[str]:
"""获取交易量最大的USDT交易对排除稳定币"""
try:
print(f"正在获取前{limit}个USDT交易对...")
# 定义需要排除的稳定币
stable_coins = {
'USDCUSDT', 'BUSDUSDT', 'TUSDUSDT', 'PAXUSDT', 'DAIUSDT',
'FDUSDUSDT', 'USTCUSDT', 'SUSDUSDT', 'GUSDUSDT', 'USDPUSDT'
}
# 获取24小时ticker统计
tickers = self.client.get_ticker()
print(f"获取到{len(tickers)}个交易对")
usdt_pairs = []
for ticker in tickers:
symbol = ticker['symbol']
if (symbol.endswith('USDT') and
float(ticker['quoteVolume']) > 2000 * 10000 and
symbol not in stable_coins):
usdt_pairs.append({
'symbol': symbol,
'volume': float(ticker['quoteVolume']),
'price': float(ticker['lastPrice'])
})
# 按交易量排序
usdt_pairs.sort(key=lambda x: x['volume'], reverse=True)
top_pairs = [pair['symbol'] for pair in usdt_pairs[:limit]]
logging.info(f"获取到{len(top_pairs)}个USDT交易对(已排除稳定币)")
print(f"成功获取{len(top_pairs)}个USDT交易对(已排除稳定币)")
return top_pairs
except Exception as e:
logging.error(f"获取交易对失败: {e}")
print(f"获取交易对失败: {e}")
return []
def _convert_timeframe(self, timeframe: str) -> str:
"""转换时间周期格式"""
timeframe_mapping = {
'1m': Client.KLINE_INTERVAL_1MINUTE,
'5m': Client.KLINE_INTERVAL_5MINUTE,
'15m': Client.KLINE_INTERVAL_15MINUTE,
'30m': Client.KLINE_INTERVAL_30MINUTE,
'1h': Client.KLINE_INTERVAL_1HOUR,
'4h': Client.KLINE_INTERVAL_4HOUR,
'1d': Client.KLINE_INTERVAL_1DAY,
'3d': Client.KLINE_INTERVAL_3DAY,
'1w': Client.KLINE_INTERVAL_1WEEK
}
return timeframe_mapping.get(timeframe, Client.KLINE_INTERVAL_4HOUR)
def fetch_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 500) -> pd.DataFrame:
"""获取K线数据"""
try:
interval = self._convert_timeframe(timeframe)
klines = self.client.get_klines(
symbol=symbol,
interval=interval,
limit=limit
)
# 转换为DataFrame
df = pd.DataFrame(klines, columns=[
'timestamp', 'open', 'high', 'low', 'close', 'volume',
'close_time', 'quote_asset_volume', 'number_of_trades',
'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
])
# 数据类型转换
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
for col in ['open', 'high', 'low', 'close', 'volume']:
df[col] = pd.to_numeric(df[col])
# 设置索引
df.set_index('timestamp', inplace=True)
# 只保留需要的列
df = df[['open', 'high', 'low', 'close', 'volume']]
return df
except BinanceAPIException as e:
logging.error(f"获取{symbol} {timeframe}数据失败: {e}")
print(f"获取{symbol} {timeframe}数据失败: {e}")
return pd.DataFrame()
except Exception as e:
logging.error(f"获取{symbol} {timeframe}数据时发生未知错误: {e}")
return pd.DataFrame()
def get_multi_timeframe_data(self, symbol: str, timeframes: List[str] = None) -> Dict[str, pd.DataFrame]:
"""获取多时间周期数据"""
if timeframes is None:
timeframes = ['4h', '1h', '15m']
data = {}
for tf in timeframes:
df = self.fetch_ohlcv_data(symbol, tf)
if not df.empty:
data[tf] = df
time.sleep(0.1) # 避免频率限制
return data
def get_current_price(self, symbol: str) -> Optional[float]:
"""获取当前价格"""
try:
ticker = self.client.get_symbol_ticker(symbol=symbol)
return float(ticker['price'])
except Exception as e:
logging.error(f"获取{symbol}当前价格失败: {e}")
return None
def get_24h_stats(self, symbol: str) -> Dict:
"""获取24小时统计数据"""
try:
ticker = self.client.get_ticker(symbol=symbol)
return {
'price_change_percent': float(ticker['priceChangePercent']),
'volume': float(ticker['volume']),
'quote_volume': float(ticker['quoteVolume']),
'high_24h': float(ticker['highPrice']),
'low_24h': float(ticker['lowPrice'])
}
except Exception as e:
logging.error(f"获取{symbol} 24h统计数据失败: {e}")
return {}
def batch_fetch_data(self, symbols: List[str], timeframes: List[str] = None) -> Dict[str, Dict]:
"""批量获取多个币种的多时间周期数据"""
if timeframes is None:
timeframes = ['4h', '1h', '15m']
all_data = {}
total = len(symbols)
for i, symbol in enumerate(symbols):
try:
logging.info(f"正在获取 {symbol} 数据 ({i+1}/{total})")
print(f"正在获取 {symbol} 数据 ({i+1}/{total})")
# 获取多时间周期K线数据
timeframe_data = self.get_multi_timeframe_data(symbol, timeframes)
if timeframe_data:
# 获取24小时统计数据
stats = self.get_24h_stats(symbol)
# 组织数据结构以匹配分析器期望
all_data[symbol] = {
'timeframes': timeframe_data,
'volume_24h_usd': stats.get('quote_volume', 0), # 这是24小时USDT交易量
'stats': stats
}
logging.info(f"{symbol} 24小时交易量: ${stats.get('quote_volume', 0):,.0f}")
# 控制请求频率,避免被限制
time.sleep(0.2)
except Exception as e:
logging.error(f"获取{symbol}数据时出错: {e}")
print(f"获取{symbol}数据时出错: {e}")
continue
logging.info(f"成功获取{len(all_data)}个币种的数据")
print(f"成功获取{len(all_data)}个币种的数据")
return all_data
def get_top_market_cap_usdt_pairs(self, limit=100) -> List[str]:
"""获取市值排名前N的USDT交易对"""
try:
print(f"正在获取市值排名前{limit}个币种...")
# 从CoinGecko获取市值排名数据
url = "https://api.coingecko.com/api/v3/coins/markets"
params = {
'vs_currency': 'usd',
'order': 'market_cap_desc',
'per_page': min(limit * 2, 250), # 获取更多数据以确保有足够的匹配
'page': 1,
'sparkline': 'false'
}
response = requests.get(url, params=params, timeout=10)
if response.status_code != 200:
print(f"CoinGecko API请求失败: {response.status_code}")
# 回退到按交易量获取
return self.get_top_usdt_pairs(limit)
market_data = response.json()
print(f"从CoinGecko获取到{len(market_data)}个币种的市值数据")
# 获取Binance支持的所有USDT交易对
binance_tickers = self.client.get_ticker()
binance_usdt_symbols = set()
# 排除稳定币
stable_coins = {
'USDCUSDT', 'BUSDUSDT', 'TUSDUSDT', 'PAXUSDT', 'DAIUSDT',
'FDUSDUSDT', 'USTCUSDT', 'SUSDUSDT', 'GUSDUSDT', 'USDPUSDT'
}
for ticker in binance_tickers:
symbol = ticker['symbol']
if (symbol.endswith('USDT') and
float(ticker['quoteVolume']) > 1000000 and # 最低交易量要求
symbol not in stable_coins):
binance_usdt_symbols.add(symbol)
print(f"Binance支持的USDT交易对: {len(binance_usdt_symbols)}")
# 匹配市值排名和Binance交易对
matched_pairs = []
for coin in market_data:
# 尝试不同的符号格式匹配
symbol_variants = [
f"{coin['symbol'].upper()}USDT",
f"{coin['id'].upper().replace('-', '')}USDT"
]
for variant in symbol_variants:
if variant in binance_usdt_symbols:
matched_pairs.append({
'symbol': variant,
'market_cap_rank': coin['market_cap_rank'],
'market_cap': coin['market_cap'],
'name': coin['name']
})
print(f"匹配成功: {coin['name']}({coin['symbol']}) -> {variant} (排名#{coin['market_cap_rank']})")
break
if len(matched_pairs) >= limit:
break
# 按市值排名排序
matched_pairs.sort(key=lambda x: x['market_cap_rank'] if x['market_cap_rank'] else 999999)
top_pairs = [pair['symbol'] for pair in matched_pairs[:limit]]
print(f"成功匹配{len(top_pairs)}个市值排名前{limit}的USDT交易对")
# 如果匹配数量不足,用交易量排序的方式补充
if len(top_pairs) < limit:
print(f"市值匹配数量不足({len(top_pairs)}/{limit}),用交易量排序补充...")
volume_pairs = self.get_top_usdt_pairs(limit)
# 补充未包含的交易对
for pair in volume_pairs:
if pair not in top_pairs and len(top_pairs) < limit:
top_pairs.append(pair)
return top_pairs[:limit]
except Exception as e:
logging.error(f"获取市值排名失败: {e}")
print(f"获取市值排名失败: {e},回退到按交易量获取")
# 出错时回退到原有的交易量排序方式
return self.get_top_usdt_pairs(limit)
def get_exchange_info(self):
"""获取交易所信息"""
try:
return self.client.get_exchange_info()
except Exception as e:
logging.error(f"获取交易所信息失败: {e}")
return None