610 lines
21 KiB
Python
610 lines
21 KiB
Python
"""
|
||
新闻舆情服务 - 获取加密货币和股票相关新闻
|
||
"""
|
||
import re
|
||
import html
|
||
import asyncio
|
||
import aiohttp
|
||
import xml.etree.ElementTree as ET
|
||
from typing import List, Dict, Any, Optional
|
||
from datetime import datetime, timedelta
|
||
from app.utils.logger import logger
|
||
from app.config import get_settings
|
||
|
||
|
||
class NewsService:
|
||
"""新闻舆情服务"""
|
||
|
||
# 律动快讯 RSS
|
||
BLOCKBEATS_RSS = "https://api.theblockbeats.news/v2/rss/newsflash"
|
||
|
||
# Cointelegraph RSS - 英文加密货币新闻
|
||
COINTELEGRAPH_RSS = "https://cointelegraph.com/rss"
|
||
|
||
# CoinDesk RSS - 权威加密货币新闻
|
||
COINDESK_RSS = "https://www.coindesk.com/arc/outboundfeeds/rss/"
|
||
|
||
# Brave Search API
|
||
BRAVE_SEARCH_API = "https://api.search.brave.com/res/v1/web/search"
|
||
|
||
def __init__(self):
|
||
"""初始化新闻服务"""
|
||
self._cache: Dict[str, List[Dict[str, Any]]] = {'crypto': [], 'stock': {}}
|
||
self._cache_time: Optional[datetime] = None
|
||
self._cache_duration = timedelta(minutes=5) # 缓存5分钟
|
||
self.settings = get_settings()
|
||
logger.info("新闻舆情服务初始化完成")
|
||
|
||
async def get_latest_news(self, limit: int = 20) -> List[Dict[str, Any]]:
|
||
"""
|
||
获取最新加密货币新闻(多源聚合)
|
||
|
||
数据源:
|
||
1. 律动快讯 - 中文快讯
|
||
2. Cointelegraph - 英文新闻
|
||
3. CoinDesk - 英文深度分析
|
||
|
||
Args:
|
||
limit: 获取数量
|
||
|
||
Returns:
|
||
新闻列表
|
||
"""
|
||
# 检查缓存
|
||
if self._cache and self._cache_time:
|
||
if datetime.now() - self._cache_time < self._cache_duration:
|
||
return self._cache['crypto'][:limit] if isinstance(self._cache, dict) else self._cache[:limit]
|
||
|
||
try:
|
||
# 并发获取所有源的新闻
|
||
news_tasks = [
|
||
self._fetch_blockbeats_news(),
|
||
self._fetch_cointelegraph_news(),
|
||
self._fetch_coindesk_news(),
|
||
]
|
||
|
||
results = await asyncio.gather(*news_tasks, return_exceptions=True)
|
||
|
||
# 合并新闻
|
||
all_news = []
|
||
for result in results:
|
||
if isinstance(result, list):
|
||
all_news.extend(result)
|
||
elif isinstance(result, Exception):
|
||
logger.warning(f"获取新闻失败: {result}")
|
||
|
||
# 按时间排序
|
||
all_news.sort(key=lambda x: x.get('time') or datetime.min, reverse=True)
|
||
|
||
# 更新缓存
|
||
self._cache = {'crypto': all_news, 'stock': self._cache.get('stock', {}) if isinstance(self._cache, dict) else {}}
|
||
self._cache_time = datetime.now()
|
||
|
||
logger.info(f"获取到 {len(all_news)} 条加密货币新闻(律动+Cointelegraph+CoinDesk)")
|
||
return all_news[:limit]
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取新闻失败: {e}")
|
||
# 返回缓存
|
||
if isinstance(self._cache, dict):
|
||
return self._cache.get('crypto', [])[:limit]
|
||
return self._cache[:limit] if self._cache else []
|
||
|
||
async def _fetch_blockbeats_news(self) -> List[Dict[str, Any]]:
|
||
"""获取律动快讯"""
|
||
news_list = []
|
||
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(self.BLOCKBEATS_RSS, timeout=10) as response:
|
||
if response.status != 200:
|
||
logger.error(f"获取律动快讯失败: HTTP {response.status}")
|
||
return []
|
||
|
||
content = await response.text()
|
||
|
||
# 解析 XML
|
||
root = ET.fromstring(content)
|
||
channel = root.find('channel')
|
||
|
||
if channel is None:
|
||
return []
|
||
|
||
for item in channel.findall('item'):
|
||
title_elem = item.find('title')
|
||
desc_elem = item.find('description')
|
||
pub_date_elem = item.find('pubDate')
|
||
link_elem = item.find('link')
|
||
|
||
if title_elem is None:
|
||
continue
|
||
|
||
# 提取标题
|
||
title = self._clean_cdata(title_elem.text or '')
|
||
|
||
# 提取描述(去除 HTML 标签)
|
||
description = ''
|
||
if desc_elem is not None and desc_elem.text:
|
||
description = self._clean_html(self._clean_cdata(desc_elem.text))
|
||
|
||
# 解析时间
|
||
pub_time = None
|
||
if pub_date_elem is not None and pub_date_elem.text:
|
||
pub_time = self._parse_rss_date(self._clean_cdata(pub_date_elem.text))
|
||
|
||
# 链接
|
||
link = ''
|
||
if link_elem is not None and link_elem.text:
|
||
link = self._clean_cdata(link_elem.text)
|
||
|
||
news_list.append({
|
||
'title': title,
|
||
'description': description[:500], # 限制长度
|
||
'time': pub_time,
|
||
'time_str': pub_time.strftime('%m-%d %H:%M') if pub_time else '',
|
||
'link': link,
|
||
'source': '律动BlockBeats'
|
||
})
|
||
|
||
logger.info(f"获取到 {len(news_list)} 条律动快讯")
|
||
return news_list
|
||
|
||
except ET.ParseError as e:
|
||
logger.error(f"解析律动快讯 XML 失败: {e}")
|
||
# 记录部分内容用于调试
|
||
if 'content' in locals():
|
||
logger.debug(f"RSS 内容前 500 字符: {content[:500]}")
|
||
return []
|
||
except Exception as e:
|
||
logger.error(f"获取律动快讯失败: {e}")
|
||
import traceback
|
||
logger.debug(traceback.format_exc())
|
||
return []
|
||
|
||
async def _fetch_cointelegraph_news(self) -> List[Dict[str, Any]]:
|
||
"""获取 Cointelegraph 新闻(英文)"""
|
||
news_list = []
|
||
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(self.COINTELEGRAPH_RSS, timeout=10) as response:
|
||
if response.status != 200:
|
||
logger.error(f"获取 Cointelegraph 失败: HTTP {response.status}")
|
||
return []
|
||
|
||
content = await response.text()
|
||
|
||
# 解析 XML
|
||
root = ET.fromstring(content)
|
||
channel = root.find('channel')
|
||
|
||
if channel is None:
|
||
return []
|
||
|
||
for item in channel.findall('item')[:20]: # 最多取20条
|
||
title_elem = item.find('title')
|
||
desc_elem = item.find('description')
|
||
pub_date_elem = item.find('pubDate')
|
||
link_elem = item.find('link')
|
||
|
||
if title_elem is None:
|
||
continue
|
||
|
||
# 提取标题
|
||
title = self._clean_cdata(title_elem.text or '')
|
||
|
||
# 提取描述(去除 HTML 标签)
|
||
description = ''
|
||
if desc_elem is not None and desc_elem.text:
|
||
description = self._clean_html(self._clean_cdata(desc_elem.text))
|
||
|
||
# 解析时间
|
||
pub_time = None
|
||
if pub_date_elem is not None and pub_date_elem.text:
|
||
pub_time = self._parse_rss_date(self._clean_cdata(pub_date_elem.text))
|
||
|
||
# 链接
|
||
link = ''
|
||
if link_elem is not None and link_elem.text:
|
||
link = self._clean_cdata(link_elem.text)
|
||
|
||
news_list.append({
|
||
'title': title,
|
||
'description': description[:500],
|
||
'time': pub_time,
|
||
'time_str': pub_time.strftime('%m-%d %H:%M') if pub_time else '',
|
||
'link': link,
|
||
'source': 'Cointelegraph'
|
||
})
|
||
|
||
logger.info(f"获取到 {len(news_list)} 条 Cointelegraph 新闻")
|
||
return news_list
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取 Cointelegraph 失败: {e}")
|
||
return []
|
||
|
||
async def _fetch_coindesk_news(self) -> List[Dict[str, Any]]:
|
||
"""获取 CoinDesk 新闻(英文)"""
|
||
news_list = []
|
||
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(self.COINDESK_RSS, timeout=10) as response:
|
||
if response.status != 200:
|
||
logger.error(f"获取 CoinDesk 失败: HTTP {response.status}")
|
||
return []
|
||
|
||
content = await response.text()
|
||
|
||
# 解析 XML
|
||
root = ET.fromstring(content)
|
||
channel = root.find('channel')
|
||
|
||
if channel is None:
|
||
return []
|
||
|
||
for item in channel.findall('item')[:20]: # 最多取20条
|
||
title_elem = item.find('title')
|
||
desc_elem = item.find('description')
|
||
pub_date_elem = item.find('pubDate')
|
||
link_elem = item.find('link')
|
||
|
||
if title_elem is None:
|
||
continue
|
||
|
||
# 提取标题
|
||
title = self._clean_cdata(title_elem.text or '')
|
||
|
||
# 提取描述(去除 HTML 标签)
|
||
description = ''
|
||
if desc_elem is not None and desc_elem.text:
|
||
description = self._clean_html(self._clean_cdata(desc_elem.text))
|
||
|
||
# 解析时间
|
||
pub_time = None
|
||
if pub_date_elem is not None and pub_date_elem.text:
|
||
pub_time = self._parse_rss_date(self._clean_cdata(pub_date_elem.text))
|
||
|
||
# 链接
|
||
link = ''
|
||
if link_elem is not None and link_elem.text:
|
||
link = self._clean_cdata(link_elem.text)
|
||
|
||
news_list.append({
|
||
'title': title,
|
||
'description': description[:500],
|
||
'time': pub_time,
|
||
'time_str': pub_time.strftime('%m-%d %H:%M') if pub_time else '',
|
||
'link': link,
|
||
'source': 'CoinDesk'
|
||
})
|
||
|
||
logger.info(f"获取到 {len(news_list)} 条 CoinDesk 新闻")
|
||
return news_list
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取 CoinDesk 失败: {e}")
|
||
return []
|
||
|
||
def _clean_cdata(self, text: str) -> str:
|
||
"""清理 CDATA 标记"""
|
||
if not text:
|
||
return ''
|
||
# 移除 CDATA 包装
|
||
text = re.sub(r'<!\[CDATA\[(.*?)\]\]>', r'\1', text, flags=re.DOTALL)
|
||
return text.strip()
|
||
|
||
def _clean_html(self, text: str) -> str:
|
||
"""清理 HTML 标签"""
|
||
if not text:
|
||
return ''
|
||
# 移除 HTML 标签
|
||
text = re.sub(r'<[^>]+>', '', text)
|
||
# 解码 HTML 实体
|
||
text = html.unescape(text)
|
||
# 清理多余空白
|
||
text = re.sub(r'\s+', ' ', text)
|
||
return text.strip()
|
||
|
||
def _parse_rss_date(self, date_str: str) -> Optional[datetime]:
|
||
"""解析 RSS 日期格式"""
|
||
if not date_str:
|
||
return None
|
||
|
||
# RSS 日期格式: "Sat, 07 Feb 2026 00:30:33 +0800"
|
||
formats = [
|
||
'%a, %d %b %Y %H:%M:%S %z',
|
||
'%a, %d %b %Y %H:%M:%S',
|
||
'%Y-%m-%d %H:%M:%S'
|
||
]
|
||
|
||
for fmt in formats:
|
||
try:
|
||
return datetime.strptime(date_str, fmt)
|
||
except ValueError:
|
||
continue
|
||
|
||
return None
|
||
|
||
def filter_relevant_news(self, news_list: List[Dict[str, Any]],
|
||
symbols: List[str] = None,
|
||
hours: int = 4) -> List[Dict[str, Any]]:
|
||
"""
|
||
过滤相关新闻
|
||
|
||
Args:
|
||
news_list: 新闻列表
|
||
symbols: 关注的交易对(如 ['BTCUSDT', 'ETHUSDT'])
|
||
hours: 只保留最近几小时的新闻
|
||
|
||
Returns:
|
||
过滤后的新闻
|
||
"""
|
||
if not news_list:
|
||
return []
|
||
|
||
# 时间过滤
|
||
cutoff_time = datetime.now() - timedelta(hours=hours)
|
||
filtered = []
|
||
|
||
# 关键词映射
|
||
symbol_keywords = {
|
||
'BTCUSDT': ['比特币', 'BTC', 'Bitcoin'],
|
||
'ETHUSDT': ['以太坊', 'ETH', 'Ethereum'],
|
||
'BNBUSDT': ['BNB', 'Binance'],
|
||
'SOLUSDT': ['SOL', 'Solana'],
|
||
}
|
||
|
||
# 通用关键词(影响整体市场)
|
||
market_keywords = [
|
||
'市场', '行情', '反弹', '下跌', '暴跌', '暴涨', '清算',
|
||
'资金费率', '多单', '空单', '杠杆', '爆仓',
|
||
'美联储', 'Fed', '利率', '通胀',
|
||
'监管', 'SEC', 'ETF',
|
||
'鲸鱼', '巨鲸', '大户',
|
||
'交易所', 'Binance', 'Coinbase'
|
||
]
|
||
|
||
for news in news_list:
|
||
# 时间过滤
|
||
if news.get('time'):
|
||
# 处理带时区的时间
|
||
news_time = news['time']
|
||
if news_time.tzinfo:
|
||
news_time = news_time.replace(tzinfo=None)
|
||
if news_time < cutoff_time:
|
||
continue
|
||
|
||
title = news.get('title', '')
|
||
desc = news.get('description', '')
|
||
content = title + ' ' + desc
|
||
|
||
# 检查是否与关注的交易对相关
|
||
is_relevant = False
|
||
|
||
if symbols:
|
||
for symbol in symbols:
|
||
keywords = symbol_keywords.get(symbol, [])
|
||
for kw in keywords:
|
||
if kw.lower() in content.lower():
|
||
is_relevant = True
|
||
news['related_symbol'] = symbol
|
||
break
|
||
if is_relevant:
|
||
break
|
||
|
||
# 检查是否包含市场关键词
|
||
if not is_relevant:
|
||
for kw in market_keywords:
|
||
if kw.lower() in content.lower():
|
||
is_relevant = True
|
||
news['related_symbol'] = 'MARKET'
|
||
break
|
||
|
||
if is_relevant:
|
||
filtered.append(news)
|
||
|
||
return filtered
|
||
|
||
async def search_stock_news(self, symbol: str, stock_name: str = '',
|
||
max_results: int = 10) -> List[Dict[str, Any]]:
|
||
"""
|
||
使用 Brave Search API 搜索股票相关新闻
|
||
|
||
Args:
|
||
symbol: 股票代码(如 AAPL, 0700.HK)
|
||
stock_name: 股票中文名称(可选)
|
||
max_results: 最大结果数
|
||
|
||
Returns:
|
||
新闻列表
|
||
"""
|
||
api_key = self.settings.brave_api_key
|
||
if not api_key:
|
||
logger.warning("未配置 Brave API Key,跳过新闻搜索")
|
||
return []
|
||
|
||
# 检查缓存
|
||
cache_key = f"{symbol}_{stock_name}"
|
||
if self._cache_time and cache_key in self._cache.get('stock', {}):
|
||
if datetime.now() - self._cache_time < self._cache_duration:
|
||
return self._cache['stock'][cache_key][:max_results]
|
||
|
||
# 构建搜索查询
|
||
# 根据股票类型构建不同的搜索词
|
||
if symbol.endswith('.HK'):
|
||
# 港股
|
||
if stock_name:
|
||
query = f"{stock_name} 港股 新闻 最新"
|
||
else:
|
||
query = f"{symbol.replace('.HK', '')} 港股 新闻 最新"
|
||
else:
|
||
# 美股
|
||
if stock_name:
|
||
query = f"{stock_name} 股票 {symbol} news latest"
|
||
else:
|
||
query = f"{symbol} stock news latest"
|
||
|
||
try:
|
||
headers = {
|
||
'Accept': 'application/json',
|
||
'Accept-Encoding': 'gzip',
|
||
'X-Subscription-Token': api_key
|
||
}
|
||
|
||
params = {
|
||
'q': query,
|
||
'count': max_results,
|
||
'text_decorations': 'false', # 改为字符串
|
||
'search_lang': 'zh-hans', # Brave Search 使用 zh-hans 而非 zh-CN
|
||
# 'result_filter': 'news', # 免费计划不支持,移除此参数
|
||
'freshness': 'pd' # 过去24小时
|
||
}
|
||
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(
|
||
self.BRAVE_SEARCH_API,
|
||
headers=headers,
|
||
params=params,
|
||
timeout=10
|
||
) as response:
|
||
if response.status != 200:
|
||
logger.error(f"Brave Search API 请求失败: HTTP {response.status}")
|
||
return []
|
||
|
||
data = await response.json()
|
||
|
||
# 解析搜索结果
|
||
news_list = []
|
||
web_results = data.get('web', {}).get('results', [])
|
||
|
||
for item in web_results:
|
||
title = item.get('title', '')
|
||
url = item.get('url', '')
|
||
description = item.get('description', '')
|
||
|
||
# 清理描述
|
||
description = self._clean_html(description)
|
||
|
||
news_list.append({
|
||
'title': title,
|
||
'description': description[:500],
|
||
'time': datetime.now(), # Brave Search 不返回精确时间
|
||
'time_str': datetime.now().strftime('%m-%d %H:%M'),
|
||
'link': url,
|
||
'source': 'Brave Search'
|
||
})
|
||
|
||
logger.info(f"Brave Search 搜索 {symbol} 获取到 {len(news_list)} 条新闻")
|
||
|
||
# 更新缓存
|
||
if 'stock' not in self._cache:
|
||
self._cache['stock'] = {}
|
||
self._cache['stock'][cache_key] = news_list
|
||
self._cache_time = datetime.now()
|
||
|
||
return news_list[:max_results]
|
||
|
||
except aiohttp.ClientError as e:
|
||
logger.error(f"Brave Search API 请求失败: {e}")
|
||
return []
|
||
except Exception as e:
|
||
logger.error(f"搜索股票新闻失败: {e}")
|
||
import traceback
|
||
logger.debug(traceback.format_exc())
|
||
return []
|
||
|
||
async def get_crypto_news(self, symbol: str, limit: int = 10) -> Dict[str, Any]:
|
||
"""
|
||
获取加密货币相关新闻
|
||
|
||
Args:
|
||
symbol: 加密货币代码(如 BTCUSDT)
|
||
limit: 最大结果数
|
||
|
||
Returns:
|
||
包含 articles 列表的字典
|
||
"""
|
||
try:
|
||
# 获取一般市场新闻(包含加密货币相关)
|
||
all_news = await self.get_latest_news(limit=limit * 2)
|
||
|
||
# 筛选与该币种相关的新闻
|
||
symbol_keywords = {
|
||
'BTCUSDT': ['BTC', 'Bitcoin', '比特币', 'bitcoin'],
|
||
'ETHUSDT': ['ETH', 'Ethereum', '以太坊', 'ethereum'],
|
||
'SOLUSDT': ['SOL', 'Solana', 'solana'],
|
||
'BNBUSDT': ['BNB', 'Binance', '币安'],
|
||
'ADAUSDT': ['ADA', 'Cardano', 'cardano'],
|
||
'XRPUSDT': ['XRP', 'Ripple'],
|
||
'DOGEUSDT': ['DOGE', 'Dogecoin', '狗狗币'],
|
||
'MATICUSDT': ['MATIC', 'Polygon'],
|
||
}
|
||
|
||
# 通用加密货币关键词
|
||
crypto_keywords = ['crypto', 'cryptocurrency', '加密货币', 'blockchain', '区块链']
|
||
|
||
keywords = symbol_keywords.get(symbol, []) + crypto_keywords
|
||
|
||
filtered_news = []
|
||
for news in all_news:
|
||
title = news.get('title', '').lower()
|
||
description = news.get('description', '').lower()
|
||
|
||
if any(kw.lower() in title or kw.lower() in description for kw in keywords):
|
||
filtered_news.append(news)
|
||
|
||
if len(filtered_news) >= limit:
|
||
break
|
||
|
||
return {
|
||
'articles': filtered_news[:limit],
|
||
'total': len(filtered_news)
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.warning(f"获取加密货币新闻失败: {e}")
|
||
return {'articles': [], 'total': 0}
|
||
|
||
def format_news_for_llm(self, news_list: List[Dict[str, Any]],
|
||
max_items: int = 10) -> str:
|
||
"""
|
||
格式化新闻供 LLM 分析
|
||
|
||
Args:
|
||
news_list: 新闻列表
|
||
max_items: 最大条数
|
||
|
||
Returns:
|
||
格式化的新闻文本
|
||
"""
|
||
if not news_list:
|
||
return "暂无相关新闻"
|
||
|
||
lines = ["## 最新市场新闻\n"]
|
||
|
||
for i, news in enumerate(news_list[:max_items], 1):
|
||
time_str = news.get('time_str', '')
|
||
title = news.get('title', '')
|
||
desc = news.get('description', '')[:200] # 限制描述长度
|
||
|
||
lines.append(f"### {i}. [{time_str}] {title}")
|
||
if desc:
|
||
lines.append(f"{desc}")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
# 全局实例
|
||
_news_service: Optional[NewsService] = None
|
||
|
||
|
||
def get_news_service() -> NewsService:
|
||
"""获取新闻服务实例"""
|
||
global _news_service
|
||
if _news_service is None:
|
||
_news_service = NewsService()
|
||
return _news_service
|