alphax/app/services/llm_insights.py
2026-05-16 14:52:10 +08:00

634 lines
24 KiB
Python

"""Async cached LLM explanation layer."""
import json
import os
from datetime import datetime, timedelta
import requests
from app.core.opportunity_lifecycle import normalize_action_status
from app.db.altcoin_db import get_conn, _derive_execution_fields
from app.db.llm_insights import compute_input_hash, get_any_insight, get_insights_for_targets, get_latest_insight_by_type, upsert_insight
PROMPTS = {
"recommendation_explain_v1": "recommendation_explain_v1",
"sentiment_explain_v1": "sentiment_explain_v1",
"sentiment_batch_analyze_v1": "sentiment_batch_analyze_v1",
"review_memo_v1": "review_memo_v1",
}
def _env_bool(name, default=False):
value = os.getenv(name)
if value is None:
return default
return str(value).strip().lower() in ("1", "true", "yes", "on")
def get_llm_params():
"""Runtime LLM config. This is system config, not strategy config."""
return {
"enabled": _env_bool("ALPHAX_LLM_ENABLED", False),
"base_url": os.getenv("ALPHAX_LLM_BASE_URL", "https://api.openai.com/v1").strip(),
"api_key_env": os.getenv("ALPHAX_LLM_API_KEY_ENV", "ALPHAX_LLM_API_KEY").strip(),
"model": os.getenv("ALPHAX_LLM_MODEL", "gpt-4o-mini").strip(),
"timeout": int(os.getenv("ALPHAX_LLM_TIMEOUT", "20") or "20"),
"max_tokens": int(os.getenv("ALPHAX_LLM_MAX_TOKENS", "900") or "900"),
}
def get_llm_module_enabled(module_name):
if not _env_bool("ALPHAX_LLM_ENABLED", False):
return False
env_name = f"ALPHAX_LLM_{str(module_name or '').upper()}_ENABLED"
return _env_bool(env_name, True)
def _dump_json(value):
return json.dumps(value or {}, ensure_ascii=False, sort_keys=True, default=str)
def _get_target_key(value):
if value is None:
return ""
return str(value)
def _json_fallback(value, fallback=None):
try:
return json.loads(value) if isinstance(value, str) else (value if value is not None else fallback)
except Exception:
return fallback
def _parse_insight_payload(content):
if not isinstance(content, dict):
return {}
if isinstance(content.get("content"), dict):
return content["content"]
return content
def _call_llm_json(prompt_version, payload):
params = get_llm_params()
api_key = os.getenv(str(params.get("api_key_env") or "OPENAI_API_KEY"), "").strip()
if not params.get("enabled", False) or not api_key:
return {"status": "skipped", "error": "llm_disabled_or_missing_key"}
base_url = str(params.get("base_url") or "").rstrip("/")
model = str(params.get("model") or "").strip()
timeout = int(params.get("timeout") or 20)
max_tokens = int(params.get("max_tokens") or 900)
system_prompt = (
"You are a crypto research assistant. Return strict JSON only. "
"Do not change trading decisions, scores, or strategy state."
)
user_prompt = _dump_json({
"prompt_version": prompt_version,
"input": payload,
"output_schema_hint": "JSON object with concise Chinese fields only",
})
try:
resp = requests.post(
f"{base_url}/chat/completions",
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
json={
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"temperature": 0.2,
"max_tokens": max_tokens,
"response_format": {"type": "json_object"},
},
timeout=timeout,
)
if resp.status_code >= 400:
return {"status": "failed", "error": f"http_{resp.status_code}", "raw": resp.text[:1000], "model": model}
data = resp.json()
content = (((data.get("choices") or [{}])[0]).get("message") or {}).get("content") or "{}"
parsed = json.loads(content)
if not isinstance(parsed, dict):
raise ValueError("llm_output_not_object")
return {"status": "success", "content": parsed, "model": model}
except json.JSONDecodeError as exc:
return {"status": "failed", "error": f"invalid_json:{exc}", "model": model}
except Exception as exc:
return {"status": "failed", "error": str(exc)[:1000], "model": model}
def _should_generate_recommendation(row):
action_status = normalize_action_status(row.get("action_status") or row.get("entry_plan", {}).get("entry_action") or "持有", row.get("status") or "active")
execution_status = str(row.get("execution_status") or "")
observe_tier = str(row.get("observe_tier") or "")
state_reason = str(row.get("state_reason") or row.get("execution_reason") or "")
entry_window = row.get("entry_window") or {}
if execution_status in ("buy_now", "wait_pullback", "invalid") or action_status in ("可即刻买入", "等回踩", "衰减") or row.get("display_bucket") == "realtime":
return True
if observe_tier == "strong" and ("回踩" in state_reason or "入场" in state_reason or "失效" in state_reason):
return True
if isinstance(entry_window, dict) and entry_window.get("status") == "active":
return True
if "重点观察" in state_reason:
return True
return False
def _should_generate_sentiment(row):
importance = str(row.get("importance") or "").upper()
source = str(row.get("source") or "").lower()
title = str(row.get("title") or "")
if importance in ("A", "S", "RISK"):
return True
if "binance" in source:
return True
if any(k in title.lower() for k in ("listing", "launch", "mainnet", "upgrade", "partnership", "hack", "exploit", "burn", "合约", "上币", "主网", "升级", "合作", "黑客", "漏洞")):
return True
return False
def _is_internal_sentiment_event(row):
event_type = str(row.get("event_type") or "")
title = str(row.get("title") or "")
source = str(row.get("source") or "")
return (
event_type in ("market_heat", "theme_expansion", "theme_direct", "llm_sentiment_candidate")
or source == "llm_sentiment"
or title.startswith("[主题扩散:")
)
def _should_generate_review(item):
metrics = item.get("metrics") or {}
release_decision = str(item.get("release_decision") or "")
failure_count = int(metrics.get("fail_count") or 0)
hit_count = int(metrics.get("hit_count") or 0)
pollution = item.get("pollution_summary") or {}
if release_decision in ("gray", "release", "hold"):
return True
if failure_count > 0 or hit_count > 0:
return True
if int(pollution.get("contaminated_symbol_count") or 0) > 0:
return True
return False
def _build_recommendation_payload(row):
entry_plan = row.get("entry_plan") or _json_fallback(row.get("entry_plan_json"), {}) or {}
signals = row.get("signals") or _json_fallback(row.get("signals"), []) or []
if isinstance(signals, str):
signals = _json_fallback(signals, []) or []
return {
"target_type": "recommendation",
"target_id": row.get("id"),
"symbol": row.get("symbol"),
"rec_time": row.get("rec_time"),
"status": row.get("status"),
"action_status": row.get("action_status"),
"execution_status": row.get("execution_status"),
"execution_label": row.get("execution_label"),
"execution_reason": row.get("execution_reason"),
"rec_score": row.get("rec_score"),
"entry_price": row.get("entry_price"),
"current_price": row.get("current_price"),
"stop_loss": row.get("stop_loss"),
"tp1": row.get("tp1"),
"tp2": row.get("tp2"),
"observe_tier": row.get("observe_tier"),
"observe_reason": row.get("observe_reason"),
"state_reason": row.get("state_reason"),
"entry_window": row.get("entry_window"),
"market_context": row.get("market_context"),
"derivatives_context": row.get("derivatives_context"),
"sector_context": row.get("sector_context"),
"entry_plan": entry_plan,
"signals": signals,
}
def _build_sentiment_payload(row):
return {
"target_type": "sentiment",
"target_id": row.get("event_id") or row.get("id"),
"source": row.get("source"),
"source_label": row.get("source_label"),
"event_type": row.get("event_type"),
"importance": row.get("importance"),
"title": row.get("title"),
"related_symbol": row.get("related_symbol"),
"related_base": row.get("related_base"),
"decision": row.get("decision"),
"tech_score": row.get("tech_score"),
"published_at": row.get("published_at"),
"detected_at": row.get("detected_at"),
"relation_tag": row.get("relation_tag"),
"in_active": row.get("in_active"),
"in_screened": row.get("in_screened"),
}
def _build_sentiment_batch_payload(hours=24, limit=40):
conn = get_conn()
events = []
try:
rows = conn.execute(
"""
SELECT id, source, symbol, title, url, published_at, detected_at, importance,
event_type, decision, tech_score, rec_id, pushed
FROM event_news
WHERE detected_at >= %s
ORDER BY published_at::timestamp DESC, id DESC
LIMIT %s
""",
((datetime.now() - timedelta(hours=int(hours or 24))).isoformat(), int(limit or 40)),
).fetchall()
except Exception:
rows = []
for raw in rows:
row = {
"event_id": f"event_news:{raw[0]}",
"source": raw[1],
"related_symbol": raw[2],
"related_base": (str(raw[2] or "").split("/")[0] or "").upper(),
"title": raw[3],
"url": raw[4],
"published_at": raw[5],
"detected_at": raw[6],
"importance": raw[7],
"event_type": raw[8],
"decision": raw[9],
"tech_score": raw[10],
"rec_id": raw[11],
"pushed": bool(raw[12]),
}
if _is_internal_sentiment_event(row):
continue
events.append(row)
try:
trend_rows = conn.execute(
"""
SELECT id, symbol, name, trend_rank, trend_score, market_cap_rank, detected_at, extra_json
FROM sentiment_events
WHERE detected_at = (SELECT MAX(detected_at) FROM sentiment_events WHERE source='coingecko')
ORDER BY trend_rank
LIMIT 20
"""
).fetchall()
except Exception:
trend_rows = []
conn.close()
trend_news = []
for raw in trend_rows:
extra = _json_fallback(raw[7], {}) or {}
for n in (extra.get("news") or [])[:3]:
title = n.get("title") or ""
if not title:
continue
trend_news.append({
"event_id": f"sentiment_event:{raw[0]}:{n.get('url') or title}",
"source": n.get("source") or "news",
"related_symbol": f"{str(raw[1] or '').upper()}/USDT",
"related_base": str(raw[1] or "").upper(),
"related_name": raw[2] or raw[1],
"title": title[:180],
"url": n.get("url") or "",
"published_at": n.get("published") or "",
"detected_at": raw[6],
"importance": "B",
"event_type": "news",
"trend_rank": raw[3],
"trend_score": raw[4],
"market_cap_rank": raw[5],
"price_usd": extra.get("price_usd", 0),
"change_24h_pct": extra.get("change_24h_pct", 0),
})
combined = events + trend_news
seen = set()
deduped = []
for item in combined:
key = ((item.get("title") or "").strip().lower(), item.get("related_base"), item.get("source"))
if key in seen:
continue
seen.add(key)
deduped.append(item)
deduped = deduped[: int(limit or 40)]
return {
"target_type": "sentiment_batch",
"target_id": f"sentiment_batch:{int(hours or 24)}h",
"hours": int(hours or 24),
"generated_at": datetime.now().isoformat(),
"event_count": len(deduped),
"events": deduped,
"instructions": {
"role": "作为加密市场舆情分析师,判断这些新闻对山寨币行情的影响。",
"focus": [
"归纳主线叙事和受影响币种",
"区分利好、利空、风险和噪音",
"给出可信度和短线影响窗口",
"指出哪些币种需要触发技术检查",
"不要给买卖指令,只做舆情影响分析",
],
"expected_schema": {
"market_mood": "risk_on|neutral|risk_off",
"summary": "中文摘要",
"hot_themes": [{"theme": "", "impact": "", "symbols": [], "confidence": 0}],
"coin_impacts": [{"symbol": "", "direction": "positive|negative|risk|neutral", "reason": "", "confidence": 0, "need_technical_check": False}],
"risk_events": [{"title": "", "symbols": [], "risk_type": "", "severity": "low|medium|high"}],
"watchlist": [{"symbol": "", "why": "", "trigger": ""}],
},
},
}
def _build_review_payload(item):
return {
"target_type": "review",
"target_id": item.get("id") or item.get("created_at") or item.get("run_date"),
"run_date": item.get("run_date"),
"created_at": item.get("created_at"),
"title": item.get("title"),
"summary": item.get("summary"),
"metrics": item.get("metrics") or {},
"findings": item.get("findings") or [],
"problems": item.get("problems") or [],
"actions": item.get("actions") or [],
"candidate_rules": item.get("candidate_rules") or [],
"success_analysis": item.get("success_analysis") or {},
"failure_analysis": item.get("failure_analysis") or {},
"pollution_summary": item.get("pollution_summary") or {},
"version_change_summary": item.get("version_change_summary") or "",
}
def generate_recommendation_insights(limit=30):
if not get_llm_module_enabled("recommendations"):
return {"status": "skipped", "reason": "module_disabled", "processed": 0}
conn = get_conn()
rows = conn.execute(
"""
SELECT r.*,
lpc.price AS latest_cache_price,
lpc.updated_at AS latest_cache_updated_at
FROM recommendation r
LEFT JOIN latest_price_cache lpc ON lpc.symbol = r.symbol
WHERE r.status='active' AND COALESCE(r.display_bucket,'watch_pool') != 'history'
ORDER BY r.rec_time DESC
"""
).fetchall()
conn.close()
items = []
seen = set()
for row in rows:
item = _derive_execution_fields(dict(row))
if not _should_generate_recommendation(item):
continue
if str(item.get("id")) in seen:
continue
seen.add(str(item.get("id")))
items.append(item)
if limit and len(items) >= int(limit):
break
processed = 0
for row in items:
payload = _build_recommendation_payload(row)
input_hash = compute_input_hash(payload)
cached = get_any_insight("recommendation", payload["target_id"], PROMPTS["recommendation_explain_v1"], input_hash)
if cached:
continue
result = _call_llm_json(PROMPTS["recommendation_explain_v1"], payload)
upsert_insight(
"recommendation",
payload["target_id"],
PROMPTS["recommendation_explain_v1"],
PROMPTS["recommendation_explain_v1"],
input_hash,
result.get("status") or "failed",
input_payload=payload,
content=result.get("content") if result.get("status") == "success" else {"raw": result.get("raw", "")},
error=result.get("error", ""),
model=result.get("model", ""),
)
processed += 1
return {"status": "success", "processed": processed, "scanned": len(items)}
def generate_sentiment_insights(limit=30):
if not get_llm_module_enabled("sentiment"):
return {"status": "skipped", "reason": "module_disabled", "processed": 0}
conn = get_conn()
try:
rows = conn.execute(
"""
SELECT id AS event_id, source, symbol, title, url, published_at, detected_at, importance,
event_type, decision, tech_score, rec_id, pushed
FROM event_news
ORDER BY published_at::timestamp DESC, id DESC
LIMIT 120
"""
).fetchall()
except Exception:
rows = []
finally:
conn.close()
processed = 0
for raw in rows:
row = {
"event_id": f"event_news:{raw[0]}",
"source": raw[1],
"symbol": raw[2],
"title": raw[3],
"published_at": raw[5],
"detected_at": raw[6],
"importance": raw[7],
"event_type": raw[8],
"decision": raw[9],
"tech_score": raw[10],
"rec_id": raw[11],
"pushed": raw[12],
"source_label": "Binance公告" if "binance" in str(raw[1]).lower() else str(raw[1] or ""),
"related_symbol": raw[2],
"related_base": (str(raw[2] or "").split("/")[0] or "").upper(),
"in_active": False,
"in_screened": False,
"relation_tag": "",
}
if not _should_generate_sentiment(row):
continue
payload = _build_sentiment_payload(row)
input_hash = compute_input_hash(payload)
cached = get_any_insight("sentiment", payload["target_id"], PROMPTS["sentiment_explain_v1"], input_hash)
if cached:
continue
result = _call_llm_json(PROMPTS["sentiment_explain_v1"], payload)
upsert_insight(
"sentiment",
payload["target_id"],
PROMPTS["sentiment_explain_v1"],
PROMPTS["sentiment_explain_v1"],
input_hash,
result.get("status") or "failed",
input_payload=payload,
content=result.get("content") if result.get("status") == "success" else {"raw": result.get("raw", "")},
error=result.get("error", ""),
model=result.get("model", ""),
)
processed += 1
if limit and processed >= int(limit):
break
return {"status": "success", "processed": processed}
def generate_sentiment_batch_analysis(limit=40, hours=24):
if not get_llm_module_enabled("sentiment"):
return {"status": "skipped", "reason": "module_disabled", "processed": 0}
payload = _build_sentiment_batch_payload(hours=hours, limit=limit)
if not payload.get("events"):
return {"status": "skipped", "reason": "no_sentiment_events", "processed": 0}
input_hash = compute_input_hash(payload)
cached = get_any_insight("sentiment_batch", payload["target_id"], PROMPTS["sentiment_batch_analyze_v1"], input_hash)
if cached:
candidate_result = {"queued": 0, "skipped": 0, "symbols": []}
if cached.get("status") == "success":
try:
from app.services.event_driven_screener import enqueue_llm_sentiment_candidates
candidate_result = enqueue_llm_sentiment_candidates(
cached.get("content") or {},
source_insight_id=str(cached.get("id") or input_hash),
)
except Exception as exc:
candidate_result = {"queued": 0, "skipped": 0, "symbols": [], "error": str(exc)[:300]}
return {
"status": "success",
"processed": 0,
"cached": True,
"event_count": payload.get("event_count", 0),
"candidate_events": candidate_result,
}
result = _call_llm_json(PROMPTS["sentiment_batch_analyze_v1"], payload)
candidate_result = {"queued": 0, "skipped": 0, "symbols": []}
if result.get("status") == "success":
try:
from app.services.event_driven_screener import enqueue_llm_sentiment_candidates
candidate_result = enqueue_llm_sentiment_candidates(
result.get("content") or {},
source_insight_id=input_hash,
)
except Exception as exc:
candidate_result = {"queued": 0, "skipped": 0, "symbols": [], "error": str(exc)[:300]}
upsert_insight(
"sentiment_batch",
payload["target_id"],
PROMPTS["sentiment_batch_analyze_v1"],
PROMPTS["sentiment_batch_analyze_v1"],
input_hash,
result.get("status") or "failed",
input_payload=payload,
content=result.get("content") if result.get("status") == "success" else {"raw": result.get("raw", "")},
error=result.get("error", ""),
model=result.get("model", ""),
)
return {
"status": "success",
"processed": 1,
"event_count": payload.get("event_count", 0),
"candidate_events": candidate_result,
}
def generate_review_memos(limit=10):
if not get_llm_module_enabled("review"):
return {"status": "skipped", "reason": "module_disabled", "processed": 0}
from app.db.review_queries import get_strategy_iteration_logs
logs = get_strategy_iteration_logs(limit=max(limit or 10, 1))
processed = 0
for item in logs:
if not _should_generate_review(item):
continue
payload = _build_review_payload(item)
input_hash = compute_input_hash(payload)
cached = get_any_insight("review", payload["target_id"], PROMPTS["review_memo_v1"], input_hash)
if cached:
continue
result = _call_llm_json(PROMPTS["review_memo_v1"], payload)
upsert_insight(
"review",
payload["target_id"],
PROMPTS["review_memo_v1"],
PROMPTS["review_memo_v1"],
input_hash,
result.get("status") or "failed",
input_payload=payload,
content=result.get("content") if result.get("status") == "success" else {"raw": result.get("raw", "")},
error=result.get("error", ""),
model=result.get("model", ""),
)
processed += 1
return {"status": "success", "processed": processed}
def run(scope="recommendations", limit=30):
scope = str(scope or "").strip()
if scope == "recommendations":
return generate_recommendation_insights(limit=limit)
if scope == "sentiment":
return generate_sentiment_batch_analysis(limit=limit)
if scope == "sentiment-events":
return generate_sentiment_insights(limit=limit)
if scope == "review":
return generate_review_memos(limit=limit)
raise ValueError(f"unknown llm scope: {scope}")
def attach_recommendation_insights(items):
ids = [str(item.get("id")) for item in items or [] if item.get("id") is not None]
insights = get_insights_for_targets("recommendation", ids, PROMPTS["recommendation_explain_v1"])
for item in items or []:
insight = insights.get(str(item.get("id")))
if insight:
item["llm_insight"] = insight
return items
def attach_sentiment_insights(items):
ids = [str(item.get("event_id") or item.get("id")) for item in items or [] if (item.get("event_id") or item.get("id")) is not None]
insights = get_insights_for_targets("sentiment", ids, PROMPTS["sentiment_explain_v1"])
for item in items or []:
insight = insights.get(str(item.get("event_id") or item.get("id")))
if insight:
item["llm_insight"] = insight
return items
def get_latest_review_memo():
return get_latest_insight_by_type("review", PROMPTS["review_memo_v1"])
def get_latest_sentiment_batch_analysis():
return get_latest_insight_by_type("sentiment_batch", PROMPTS["sentiment_batch_analyze_v1"])
def get_latest_sentiment_batch_attempt():
return get_latest_insight_by_type("sentiment_batch", PROMPTS["sentiment_batch_analyze_v1"], success_only=False)
__all__ = [
"PROMPTS",
"attach_recommendation_insights",
"attach_sentiment_insights",
"generate_recommendation_insights",
"generate_review_memos",
"generate_sentiment_batch_analysis",
"generate_sentiment_insights",
"get_latest_sentiment_batch_analysis",
"get_latest_sentiment_batch_attempt",
"get_latest_review_memo",
"run",
]