astock-agent/backend/app/data/sina_client.py
2026-04-23 23:24:54 +08:00

250 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""新浪财经数据客户端。
作为东方财富/腾讯的兜底数据源:
- 分钟 K 线:用于盘中量能证据
- 实时行情:可按股票列表批量拉取
新浪接口属于公开网页行情接口,字段稳定性弱于正式数据服务,因此只作为 fallback。
"""
import json
import logging
import re
from collections import defaultdict
import httpx
import pandas as pd
from app.data.cache import cache
from app.data.models import StockQuote
logger = logging.getLogger(__name__)
SINA_KLINE_URL = "https://quotes.sina.cn/cn/api/json_v2.php/CN_MarketDataService.getKLineData"
SINA_QUOTE_URL = "https://hq.sinajs.cn/list="
HEADERS = {
"Referer": "https://finance.sina.com.cn",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
}
def _ts_code_to_sina(ts_code: str) -> str:
code, market = ts_code.split(".")
return f"{market.lower()}{code}"
async def get_min_kline(ts_code: str, period: str = "5", count: int = 48) -> pd.DataFrame:
"""获取新浪分钟 K 线,返回与东方财富分钟线一致的列。"""
scale = period if period in {"5", "15", "30", "60"} else "5"
cache_key = f"sina_min_kline:{ts_code}:{scale}:{count}"
cached = cache.get(cache_key)
if cached is not None:
return cached
params = {
"symbol": _ts_code_to_sina(ts_code),
"scale": scale,
"ma": "no",
"datalen": str(count),
}
try:
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
resp = await client.get(SINA_KLINE_URL, params=params, headers=HEADERS)
resp.raise_for_status()
rows = _parse_kline_payload(resp.text)
if not rows:
return pd.DataFrame()
result = []
for item in rows[-count:]:
amount = item.get("amount")
volume = item.get("volume", 0)
result.append({
"time": item.get("day", ""),
"open": float(item.get("open", 0) or 0),
"close": float(item.get("close", 0) or 0),
"high": float(item.get("high", 0) or 0),
"low": float(item.get("low", 0) or 0),
"volume": float(volume or 0),
# 新浪分钟线常见字段只有 volume量能分布分析需要 amount 时用 volume 兜底。
"amount": float(amount if amount not in (None, "") else volume or 0),
})
df = pd.DataFrame(result)
cache.set(cache_key, df, ttl=180)
return df
except Exception as e:
logger.warning("新浪分钟K线获取失败 %s: %s", ts_code, e)
return pd.DataFrame()
async def get_realtime_quotes_batch(ts_codes: list[str]) -> dict[str, StockQuote]:
"""批量获取新浪实时行情。"""
results: dict[str, StockQuote] = {}
if not ts_codes:
return results
batch_size = 80
for i in range(0, len(ts_codes), batch_size):
batch = ts_codes[i:i + batch_size]
symbols = ",".join(_ts_code_to_sina(code) for code in batch)
try:
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
resp = await client.get(f"{SINA_QUOTE_URL}{symbols}", headers=HEADERS)
resp.raise_for_status()
resp.encoding = "gbk"
quote_map = _parse_quote_payload(resp.text)
for ts_code in batch:
data = quote_map.get(_ts_code_to_sina(ts_code))
quote = _quote_from_fields(ts_code, data)
if quote:
results[ts_code] = quote
except Exception as e:
logger.warning("新浪批量行情获取失败: %s", e)
return results
async def get_sector_realtime_ranking_by_industry(limit: int = 20) -> list[dict]:
"""用新浪全市场实时行情 + Tushare 静态行业映射聚合今日行业榜。
这是东方财富板块榜不可用时的 fallback不依赖 Tushare 当日行情。
"""
from app.data.tushare_client import tushare_client
cache_key = f"sina_sector_industry:{limit}"
cached = cache.get(cache_key)
if cached is not None:
return cached
stock_basic = tushare_client.get_stock_basic()
if stock_basic.empty or "industry" not in stock_basic.columns:
logger.warning("新浪行业榜兜底失败: 股票静态行业映射为空")
return []
stock_basic = stock_basic.dropna(subset=["industry"])
code_to_industry = {}
code_to_name = {}
for _, row in stock_basic.iterrows():
ts_code = row.get("ts_code")
industry = row.get("industry")
if not ts_code or not industry:
continue
code_to_industry[ts_code] = str(industry)
code_to_name[ts_code] = str(row.get("name") or ts_code)
quotes = await get_realtime_quotes_batch(list(code_to_industry.keys()))
if not quotes:
return []
grouped = defaultdict(lambda: {
"amount": 0.0,
"pct_sum": 0.0,
"count": 0,
"up_count": 0,
"down_count": 0,
"leading": None,
})
for ts_code, quote in quotes.items():
industry = code_to_industry.get(ts_code)
if not industry:
continue
bucket = grouped[industry]
bucket["amount"] += quote.amount or 0
bucket["pct_sum"] += quote.pct_chg
bucket["count"] += 1
if quote.pct_chg > 0:
bucket["up_count"] += 1
elif quote.pct_chg < 0:
bucket["down_count"] += 1
leading = bucket["leading"]
if leading is None or quote.pct_chg > leading["pct_chg"]:
bucket["leading"] = {
"ts_code": ts_code,
"name": quote.name or code_to_name.get(ts_code, ts_code),
"pct_chg": quote.pct_chg,
}
result = []
for idx, (industry, bucket) in enumerate(grouped.items(), start=1):
count = bucket["count"] or 1
leading = bucket["leading"] or {}
result.append({
"sector_code": f"SINA_{idx:03d}",
"sector_name": industry,
"board_type": "industry",
"pct_change": round(bucket["pct_sum"] / count, 2),
"amount": round(bucket["amount"], 2),
"turnover_rate": 0,
"up_count": int(bucket["up_count"]),
"down_count": int(bucket["down_count"]),
"leading_stock_name": leading.get("name", ""),
"leading_stock_code": leading.get("ts_code", ""),
"leading_stock_pct": float(leading.get("pct_chg", 0) or 0),
"source": "sina",
})
result.sort(key=lambda item: (item["pct_change"], item["amount"]), reverse=True)
result = result[:limit]
cache.set(cache_key, result, ttl=180)
logger.info("新浪行业实时榜兜底: 获取 %s 个行业", len(result))
return result
def _parse_kline_payload(text: str) -> list[dict]:
text = (text or "").strip()
if not text:
return []
if text.startswith("["):
return json.loads(text)
start = text.find("[")
end = text.rfind("]")
if start >= 0 and end > start:
return json.loads(text[start:end + 1])
return []
def _parse_quote_payload(text: str) -> dict[str, list[str]]:
result: dict[str, list[str]] = {}
pattern = re.compile(r'var hq_str_([^=]+)="([^"]*)";')
for symbol, payload in pattern.findall(text or ""):
if payload:
result[symbol] = payload.split(",")
return result
def _quote_from_fields(ts_code: str, fields: list[str] | None) -> StockQuote | None:
if not fields or len(fields) < 32:
return None
try:
name = fields[0]
open_price = float(fields[1] or 0)
pre_close = float(fields[2] or 0)
price = float(fields[3] or 0)
high = float(fields[4] or 0)
low = float(fields[5] or 0)
volume = float(fields[8] or 0)
amount = float(fields[9] or 0) / 10000
pct_chg = ((price - pre_close) / pre_close * 100) if pre_close else 0
if price <= 0:
return None
return StockQuote(
ts_code=ts_code,
name=name,
price=price,
pct_chg=round(pct_chg, 2),
volume=volume,
amount=amount,
turnover_rate=0,
high=high,
low=low,
open=open_price,
pre_close=pre_close,
)
except (IndexError, ValueError):
return None