"""
新闻舆情服务 - 获取加密货币相关新闻
"""
import re
import html
import aiohttp
import xml.etree.ElementTree as ET
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from app.utils.logger import logger
class NewsService:
"""新闻舆情服务"""
# 律动快讯 RSS
BLOCKBEATS_RSS = "https://api.theblockbeats.news/v2/rss/newsflash"
def __init__(self):
"""初始化新闻服务"""
self._cache: List[Dict[str, Any]] = []
self._cache_time: Optional[datetime] = None
self._cache_duration = timedelta(minutes=5) # 缓存5分钟
logger.info("新闻舆情服务初始化完成")
async def get_latest_news(self, limit: int = 20) -> List[Dict[str, Any]]:
"""
获取最新新闻
Args:
limit: 获取数量
Returns:
新闻列表
"""
# 检查缓存
if self._cache and self._cache_time:
if datetime.now() - self._cache_time < self._cache_duration:
return self._cache[:limit]
try:
news = await self._fetch_blockbeats_news()
self._cache = news
self._cache_time = datetime.now()
return news[:limit]
except Exception as e:
logger.error(f"获取新闻失败: {e}")
return self._cache[:limit] if self._cache else []
async def _fetch_blockbeats_news(self) -> List[Dict[str, Any]]:
"""获取律动快讯"""
news_list = []
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.BLOCKBEATS_RSS, timeout=10) as response:
if response.status != 200:
logger.error(f"获取律动快讯失败: HTTP {response.status}")
return []
content = await response.text()
# 解析 XML
root = ET.fromstring(content)
channel = root.find('channel')
if channel is None:
return []
for item in channel.findall('item'):
title_elem = item.find('title')
desc_elem = item.find('description')
pub_date_elem = item.find('pubDate')
link_elem = item.find('link')
if title_elem is None:
continue
# 提取标题
title = self._clean_cdata(title_elem.text or '')
# 提取描述(去除 HTML 标签)
description = ''
if desc_elem is not None and desc_elem.text:
description = self._clean_html(self._clean_cdata(desc_elem.text))
# 解析时间
pub_time = None
if pub_date_elem is not None and pub_date_elem.text:
pub_time = self._parse_rss_date(self._clean_cdata(pub_date_elem.text))
# 链接
link = ''
if link_elem is not None and link_elem.text:
link = self._clean_cdata(link_elem.text)
news_list.append({
'title': title,
'description': description[:500], # 限制长度
'time': pub_time,
'time_str': pub_time.strftime('%m-%d %H:%M') if pub_time else '',
'link': link,
'source': '律动BlockBeats'
})
logger.info(f"获取到 {len(news_list)} 条律动快讯")
return news_list
except Exception as e:
logger.error(f"解析律动快讯失败: {e}")
return []
def _clean_cdata(self, text: str) -> str:
"""清理 CDATA 标记"""
if not text:
return ''
# 移除 CDATA 包装
text = re.sub(r'', r'\1', text, flags=re.DOTALL)
return text.strip()
def _clean_html(self, text: str) -> str:
"""清理 HTML 标签"""
if not text:
return ''
# 移除 HTML 标签
text = re.sub(r'<[^>]+>', '', text)
# 解码 HTML 实体
text = html.unescape(text)
# 清理多余空白
text = re.sub(r'\s+', ' ', text)
return text.strip()
def _parse_rss_date(self, date_str: str) -> Optional[datetime]:
"""解析 RSS 日期格式"""
if not date_str:
return None
# RSS 日期格式: "Sat, 07 Feb 2026 00:30:33 +0800"
formats = [
'%a, %d %b %Y %H:%M:%S %z',
'%a, %d %b %Y %H:%M:%S',
'%Y-%m-%d %H:%M:%S'
]
for fmt in formats:
try:
return datetime.strptime(date_str, fmt)
except ValueError:
continue
return None
def filter_relevant_news(self, news_list: List[Dict[str, Any]],
symbols: List[str] = None,
hours: int = 4) -> List[Dict[str, Any]]:
"""
过滤相关新闻
Args:
news_list: 新闻列表
symbols: 关注的交易对(如 ['BTCUSDT', 'ETHUSDT'])
hours: 只保留最近几小时的新闻
Returns:
过滤后的新闻
"""
if not news_list:
return []
# 时间过滤
cutoff_time = datetime.now() - timedelta(hours=hours)
filtered = []
# 关键词映射
symbol_keywords = {
'BTCUSDT': ['比特币', 'BTC', 'Bitcoin'],
'ETHUSDT': ['以太坊', 'ETH', 'Ethereum'],
'BNBUSDT': ['BNB', 'Binance'],
'SOLUSDT': ['SOL', 'Solana'],
}
# 通用关键词(影响整体市场)
market_keywords = [
'市场', '行情', '反弹', '下跌', '暴跌', '暴涨', '清算',
'资金费率', '多单', '空单', '杠杆', '爆仓',
'美联储', 'Fed', '利率', '通胀',
'监管', 'SEC', 'ETF',
'鲸鱼', '巨鲸', '大户',
'交易所', 'Binance', 'Coinbase'
]
for news in news_list:
# 时间过滤
if news.get('time'):
# 处理带时区的时间
news_time = news['time']
if news_time.tzinfo:
news_time = news_time.replace(tzinfo=None)
if news_time < cutoff_time:
continue
title = news.get('title', '')
desc = news.get('description', '')
content = title + ' ' + desc
# 检查是否与关注的交易对相关
is_relevant = False
if symbols:
for symbol in symbols:
keywords = symbol_keywords.get(symbol, [])
for kw in keywords:
if kw.lower() in content.lower():
is_relevant = True
news['related_symbol'] = symbol
break
if is_relevant:
break
# 检查是否包含市场关键词
if not is_relevant:
for kw in market_keywords:
if kw.lower() in content.lower():
is_relevant = True
news['related_symbol'] = 'MARKET'
break
if is_relevant:
filtered.append(news)
return filtered
def format_news_for_llm(self, news_list: List[Dict[str, Any]],
max_items: int = 10) -> str:
"""
格式化新闻供 LLM 分析
Args:
news_list: 新闻列表
max_items: 最大条数
Returns:
格式化的新闻文本
"""
if not news_list:
return "暂无相关新闻"
lines = ["## 最新市场新闻\n"]
for i, news in enumerate(news_list[:max_items], 1):
time_str = news.get('time_str', '')
title = news.get('title', '')
desc = news.get('description', '')[:200] # 限制描述长度
lines.append(f"### {i}. [{time_str}] {title}")
if desc:
lines.append(f"{desc}")
lines.append("")
return "\n".join(lines)
# 全局实例
_news_service: Optional[NewsService] = None
def get_news_service() -> NewsService:
"""获取新闻服务实例"""
global _news_service
if _news_service is None:
_news_service = NewsService()
return _news_service