alphax/app/db/review_center.py

"""Review center read models.

This module keeps the new review/iteration semantics explicit:
- opportunity review describes whether the system found useful opportunities;
- paper trading review is the only place where PnL is treated as execution PnL;
- evidence attribution describes whether onchain/sentiment/LLM evidence helped.
"""

from __future__ import annotations

import json
from datetime import datetime, timedelta

from app.db.paper_trading import get_paper_trading_summary
from app.db.schema import get_conn


def _safe_int(value, default=0):
    try:
        return int(value or 0)
    except Exception:
        return default


def _safe_float(value, default=0.0):
    try:
        return float(value or 0)
    except Exception:
        return default


def _loads(value, default=None):
    try:
        if isinstance(value, (dict, list)):
            return value
        if isinstance(value, str) and value.strip():
            return json.loads(value)
    except Exception:
        pass
    return default if default is not None else {}


def _since(days):
    return (datetime.now() - timedelta(days=max(1, min(_safe_int(days, 30), 365)))).isoformat()


def _bucket_count(rows, key, fallback="unknown"):
    counts = {}
    for row in rows:
        value = (row.get(key) or fallback) if isinstance(row, dict) else fallback
        counts[value] = counts.get(value, 0) + 1
    return [{"name": k, "count": v} for k, v in sorted(counts.items(), key=lambda x: (-x[1], x[0]))]


def _dedupe_by_symbol(rows, limit=0):
    items = []
    seen = set()
    for row in rows:
        symbol = str(row.get("symbol") or "").strip().upper()
        key = symbol or f"row:{row.get('id')}"
        if key in seen:
            continue
        seen.add(key)
        items.append(row)
        if limit and len(items) >= int(limit):
            break
    return items


def _opportunity_review(conn, since):
    rec_rows = [dict(r) for r in conn.execute(
        """
        SELECT id, symbol, rec_time, status, display_bucket, execution_status, action_status,
               entry_triggered, strategy_version, signal_codes_json, signal_labels_json,
               market_context_json, derivatives_context_json, sector_context_json
        FROM recommendation
        WHERE rec_time >= %s
        ORDER BY rec_time DESC, id DESC
        """,
        (since,),
    ).fetchall()]
    review_rows = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM review_log
        WHERE review_time >= %s
        ORDER BY review_time DESC, id DESC
        """,
        (since,),
    ).fetchall()]
    missed_rows_raw = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM missed_explosions
        WHERE detect_time >= %s
        ORDER BY gain_pct DESC, detect_time DESC
        LIMIT 200
        """,
        (since,),
    ).fetchall()]
    missed_rows = _dedupe_by_symbol(missed_rows_raw, limit=20)

    total = len(rec_rows)
    executed_ids = {
        int(r["recommendation_id"])
        for r in conn.execute("SELECT recommendation_id FROM paper_trades WHERE opened_at >= %s", (since,)).fetchall()
        if r.get("recommendation_id")
    }
    buy_now = [r for r in rec_rows if r.get("execution_status") == "buy_now"]
    wait_pullback = [r for r in rec_rows if r.get("execution_status") == "wait_pullback"]
    observe = [r for r in rec_rows if r.get("execution_status") == "observe" or r.get("display_bucket") == "watch_pool"]
    invalid = [r for r in rec_rows if r.get("execution_status") == "invalid" or r.get("status") in ("expired", "invalid", "archived")]
    executed = [r for r in rec_rows if int(r.get("id") or 0) in executed_ids]

    outcomes = _bucket_count(review_rows, "outcome", "未复盘")
    reviewed_effective = [r for r in review_rows if r.get("outcome") in ("爆发", "失败", "横盘")]
    hit_count = sum(1 for r in reviewed_effective if r.get("outcome") == "爆发")

    return {
        "definition": "机会复盘只评价机会发现、确认和漏选，不代表交易收益。",
        "summary": {
            "total_opportunities": total,
            "buy_now_count": len(buy_now),
            "wait_pullback_count": len(wait_pullback),
            "observe_count": len(observe),
            "invalid_count": len(invalid),
            "paper_executed_count": len(executed),
            "reviewed_count": len(review_rows),
            "effective_review_count": len(reviewed_effective),
            "opportunity_hit_rate": round(hit_count / len(reviewed_effective) * 100, 2) if reviewed_effective else 0,
            "missed_explosion_count": len(missed_rows),
        },
        "status_distribution": _bucket_count(rec_rows, "execution_status", "unknown"),
        "outcome_distribution": outcomes,
        "missed_explosions": missed_rows[:10],
        "recent_reviews": review_rows[:12],
    }


def _paper_review(conn, since, days):
    summary = get_paper_trading_summary(days=days)
    trades = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM paper_trades
        WHERE opened_at >= %s
        ORDER BY opened_at DESC, id DESC
        LIMIT 20
        """,
        (since,),
    ).fetchall()]
    events = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM paper_trade_events
        WHERE event_time >= %s
        ORDER BY event_time DESC, id DESC
        LIMIT 30
        """,
        (since,),
    ).fetchall()]
    exit_reasons = _bucket_count([t for t in trades if t.get("status") == "closed"], "exit_reason", "unknown")
    event_types = _bucket_count(events, "event_type", "unknown")
    return {
        "definition": "模拟交易复盘是唯一收益口径，基于 paper_trades 的开仓、平仓、移动止盈事件。",
        "summary": summary,
        "exit_reasons": exit_reasons,
        "event_types": event_types,
        "recent_trades": trades,
        "recent_events": events,
    }


def _evidence_review(conn, since):
    news_rows = [dict(r) for r in conn.execute(
        """
        SELECT source, symbol, importance, event_type, decision, tech_score, processed, detected_at, title
        FROM event_news
        WHERE detected_at >= %s
        ORDER BY detected_at DESC, id DESC
        LIMIT 80
        """,
        (since,),
    ).fetchall()]
    onchain_rows = [dict(r) for r in conn.execute(
        """
        SELECT source, chain, symbol, signal_code, signal_label, direction, value_usd,
               confidence, severity, detected_at
        FROM onchain_events
        WHERE detected_at >= %s
        ORDER BY detected_at DESC, id DESC
        LIMIT 80
        """,
        (since,),
    ).fetchall()]
    raw_onchain_rows = [dict(r) for r in conn.execute(
        """
        SELECT source, chain, event_type, symbol_guess, mapped_symbol, mapping_status,
               importance, detected_at, title
        FROM onchain_raw_events
        WHERE detected_at >= %s
        ORDER BY importance DESC, detected_at DESC, id DESC
        LIMIT 80
        """,
        (since,),
    ).fetchall()]
    llm_rows = [dict(r) for r in conn.execute(
        """
        SELECT target_type, insight_type, status, model, prompt_version, updated_at
        FROM llm_insights
        WHERE updated_at >= %s
        ORDER BY updated_at DESC, id DESC
        LIMIT 80
        """,
        (since,),
    ).fetchall()]

    mapped_raw = [r for r in raw_onchain_rows if r.get("mapping_status") == "mapped" or r.get("mapped_symbol")]
    high_onchain = [r for r in onchain_rows if _safe_int(r.get("confidence")) >= 70 or str(r.get("severity") or "").upper() in ("A", "S")]
    actionable_news = [r for r in news_rows if r.get("decision") in ("recommend", "observe", "risk") or str(r.get("importance") or "").upper() in ("A", "S")]
    llm_success = [r for r in llm_rows if r.get("status") == "success"]

    return {
        "definition": "多源归因只判断证据贡献：舆情、链上、LLM 是否帮助发现/解释机会，不直接生成交易收益。",
        "summary": {
            "news_count": len(news_rows),
            "actionable_news_count": len(actionable_news),
            "onchain_signal_count": len(onchain_rows),
            "high_confidence_onchain_count": len(high_onchain),
            "raw_onchain_count": len(raw_onchain_rows),
            "mapped_raw_onchain_count": len(mapped_raw),
            "llm_runs": len(llm_rows),
            "llm_success_count": len(llm_success),
        },
        "news_sources": _bucket_count(news_rows, "source", "unknown"),
        "news_decisions": _bucket_count(news_rows, "decision", "unprocessed"),
        "onchain_sources": _bucket_count(onchain_rows, "source", "unknown"),
        "onchain_signals": _bucket_count(onchain_rows, "signal_code", "unknown")[:12],
        "raw_mapping": _bucket_count(raw_onchain_rows, "mapping_status", "unknown"),
        "llm_status": _bucket_count(llm_rows, "status", "unknown"),
        "recent_news": news_rows[:8],
        "recent_onchain": onchain_rows[:8],
        "recent_llm": llm_rows[:8],
    }


def _iteration_review(conn, since):
    logs = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM strategy_iteration_log
        WHERE created_at >= %s
        ORDER BY created_at DESC, id DESC
        LIMIT 12
        """,
        (since,),
    ).fetchall()]
    candidates = [dict(r) for r in conn.execute(
        """
        SELECT *
        FROM strategy_rule_candidate
        ORDER BY created_at DESC, id DESC
        LIMIT 30
        """
    ).fetchall()]
    for item in logs:
        for field, fallback in (
            ("metrics_json", {}),
            ("findings_json", []),
            ("problems_json", []),
            ("actions_json", []),
            ("candidate_rules_json", []),
        ):
            item[field.replace("_json", "")] = _loads(item.get(field), fallback)
    return {
        "definition": "策略迭代只产生候选假设和发布闸门结论，不直接等于收益提升。",
        "summary": {
            "iteration_count": len(logs),
            "candidate_count": len(candidates),
            "gray_count": sum(1 for c in candidates if c.get("status") == "gray"),
            "active_count": sum(1 for c in candidates if c.get("status") == "active"),
            "latest_release_decision": (logs[0].get("release_decision") if logs else "") or "hold",
            "latest_release_reason": (logs[0].get("release_reason") if logs else "") or "",
        },
        "release_decisions": _bucket_count(logs, "release_decision", "unknown"),
        "candidate_status": _bucket_count(candidates, "status", "candidate"),
        "recent_logs": logs,
        "recent_candidates": candidates[:12],
    }


def get_review_center_dashboard(days=30):
    days = max(1, min(_safe_int(days, 30), 365))
    since = _since(days)
    conn = get_conn()
    try:
        opportunity = _opportunity_review(conn, since)
        paper = _paper_review(conn, since, days)
        evidence = _evidence_review(conn, since)
        iteration = _iteration_review(conn, since)
    finally:
        conn.close()

    return {
        "days": days,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "principles": [
            "机会归档不计算交易收益，只记录发现、确认、失效和漏选。",
            "真实收益口径只来自模拟交易或未来真实交易账本。",
            "链上、舆情、LLM 属于证据层，只做发现和解释，不直接改变推荐状态。",
            "策略迭代只发布经过样本约束和灰度闸门验证的规则。",
        ],
        "opportunity": opportunity,
        "paper_trading": paper,
        "evidence": evidence,
        "iteration": iteration,
    }


__all__ = ["get_review_center_dashboard"]