alphax/app/core/signal_quality.py

"""信号时效指数衰减 + 假突破/假破位概率模型（多空双向）。

## 信号时效衰减
替换硬截断的 stale 判断，改为指数衰减：
- 1H 信号：前 2 小时权重 1.0，之后每小时 ×0.6
- 4H 信号：前 8 小时权重 1.0，之后每 4 小时 ×0.5
- 15m 信号：前 30 分钟权重 1.0，之后每 15 分钟 ×0.5
- D1 信号：前 24 小时权重 1.0，之后每天 ×0.7

## 假突破/假破位概率模型
事前估计突破质量（多空通用）：
- 成交量倍数（突破时量 vs 均量）
- 突破幅度 vs ATR
- 时段流动性（亚洲时段流动性薄）
- 前方供需区距离
- 连续突破尝试次数（多次失败后的突破更可靠）
"""

from __future__ import annotations

import math
from datetime import datetime, timezone
from typing import Optional

import numpy as np
import pandas as pd

from app.config.config_loader import _get_section as _get_cfg_section


# ===========================================================================
# Part 1: 信号时效指数衰减
# ===========================================================================

def _decay_config() -> dict:
    try:
        cfg = _get_cfg_section("signal_decay") or {}
    except Exception:
        cfg = {}
    return {
        # (grace_period_hours, decay_rate_per_period, period_hours)
        "1h": {
            "grace_hours": float(cfg.get("1h_grace_hours", 2)),
            "decay_rate": float(cfg.get("1h_decay_rate", 0.6)),
            "period_hours": float(cfg.get("1h_period_hours", 1)),
        },
        "4h": {
            "grace_hours": float(cfg.get("4h_grace_hours", 8)),
            "decay_rate": float(cfg.get("4h_decay_rate", 0.5)),
            "period_hours": float(cfg.get("4h_period_hours", 4)),
        },
        "15m": {
            "grace_hours": float(cfg.get("15m_grace_hours", 0.5)),
            "decay_rate": float(cfg.get("15m_decay_rate", 0.5)),
            "period_hours": float(cfg.get("15m_period_hours", 0.25)),
        },
        "1d": {
            "grace_hours": float(cfg.get("1d_grace_hours", 24)),
            "decay_rate": float(cfg.get("1d_decay_rate", 0.7)),
            "period_hours": float(cfg.get("1d_period_hours", 24)),
        },
        # Minimum weight below which signal is considered expired
        "min_weight": float(cfg.get("min_weight", 0.05)),
    }


def compute_signal_decay(age_hours: float, timeframe: str = "1h") -> float:
    """Compute exponential decay weight for a signal based on its age.

    Args:
        age_hours: how many hours ago the signal fired
        timeframe: the signal's timeframe ("15m", "1h", "4h", "1d")

    Returns:
        weight between 0.0 and 1.0
    """
    cfg = _decay_config()
    tf_cfg = cfg.get(timeframe, cfg["1h"])
    min_weight = cfg["min_weight"]

    grace = tf_cfg["grace_hours"]
    decay_rate = tf_cfg["decay_rate"]
    period = tf_cfg["period_hours"]

    if age_hours <= grace:
        return 1.0

    # Number of decay periods elapsed after grace
    elapsed = age_hours - grace
    periods = elapsed / period if period > 0 else elapsed

    # Exponential decay: weight = decay_rate ^ periods
    weight = math.pow(decay_rate, periods)

    return max(min_weight, min(1.0, weight))


def apply_decay_to_score(base_score: float, age_hours: float, timeframe: str = "1h") -> float:
    """Apply time decay to a factor score.

    Args:
        base_score: the original score (positive or negative)
        age_hours: signal age in hours
        timeframe: signal timeframe

    Returns:
        decayed score (same sign, reduced magnitude)
    """
    weight = compute_signal_decay(age_hours, timeframe)
    return round(base_score * weight, 3)


def is_signal_expired(age_hours: float, timeframe: str = "1h") -> bool:
    """Check if a signal has decayed below minimum threshold."""
    cfg = _decay_config()
    weight = compute_signal_decay(age_hours, timeframe)
    return weight <= cfg["min_weight"]


def signal_freshness_label(age_hours: float, timeframe: str = "1h") -> str:
    """Human-readable freshness label."""
    weight = compute_signal_decay(age_hours, timeframe)
    if weight >= 0.9:
        return "新鲜"
    elif weight >= 0.5:
        return "有效"
    elif weight >= 0.2:
        return "衰减中"
    elif weight > 0.05:
        return "即将过期"
    else:
        return "已过期"


# ===========================================================================
# Part 2: 假突破/假破位概率模型（多空双向）
# ===========================================================================

def _breakout_quality_config() -> dict:
    try:
        cfg = _get_cfg_section("breakout_quality") or {}
    except Exception:
        cfg = {}
    return {
        # Volume requirements
        "vol_ratio_strong": float(cfg.get("vol_ratio_strong", 3.0)),
        "vol_ratio_weak": float(cfg.get("vol_ratio_weak", 1.5)),
        # ATR requirements
        "atr_breakout_strong": float(cfg.get("atr_breakout_strong", 1.5)),
        "atr_breakout_weak": float(cfg.get("atr_breakout_weak", 0.5)),
        # Time-of-day (UTC hours for Asian session = low liquidity)
        "low_liquidity_start_utc": int(cfg.get("low_liquidity_start_utc", 0)),
        "low_liquidity_end_utc": int(cfg.get("low_liquidity_end_utc", 8)),
        # Prior attempts
        "prior_fail_lookback": int(cfg.get("prior_fail_lookback", 20)),
        "prior_fail_bonus_per_attempt": float(cfg.get("prior_fail_bonus_per_attempt", 8)),
        # Nearby zone penalty
        "zone_distance_close_pct": float(cfg.get("zone_distance_close_pct", 2.0)),
        "zone_distance_far_pct": float(cfg.get("zone_distance_far_pct", 5.0)),
        # Thresholds
        "high_quality_min": float(cfg.get("high_quality_min", 70)),
        "low_quality_max": float(cfg.get("low_quality_max", 40)),
        # Weights
        "weight_high_quality": float(cfg.get("weight_high_quality", 3.0)),
        "weight_low_quality_penalty": float(cfg.get("weight_low_quality_penalty", -4.0)),
    }


def estimate_breakout_quality(
    df: pd.DataFrame,
    breakout_bar_index: int = -1,
    breakout_level: float = 0,
    direction: str = "long",
    atr: float = 0,
    nearby_zones: Optional[list[dict]] = None,
) -> dict:
    """Estimate the quality/probability of a breakout being genuine vs fake.

    Works for both breakouts (long) and breakdowns (short).

    Args:
        df: kline DataFrame containing the breakout bar
        breakout_bar_index: index of the breakout bar (-1 = latest)
        breakout_level: the price level being broken (resistance for long, support for short)
        direction: "long" (breakout above) or "short" (breakdown below)
        atr: pre-computed ATR (if 0, will compute from df)
        nearby_zones: supply/demand zones near the breakout [{type, top, btm, q_score}]

    Returns:
        {
            "quality_score": float (0-100, higher = more likely genuine),
            "quality_tier": "high" / "medium" / "low",
            "factors": {
                "volume_score": float,
                "magnitude_score": float,
                "timing_score": float,
                "prior_attempts_score": float,
                "zone_clearance_score": float,
            },
            "fake_probability": float (0-1),
            "signal": str,
            "recommendation": str,  # "可即刻买入" / "等确认" / "观察"
        }
    """
    cfg = _breakout_quality_config()
    result = {
        "quality_score": 50.0,
        "quality_tier": "medium",
        "factors": {},
        "fake_probability": 0.5,
        "signal": "",
        "recommendation": "等确认",
    }

    if df is None or len(df) < 20:
        return result

    # Get breakout bar
    if breakout_bar_index == -1:
        breakout_bar_index = len(df) - 1
    if breakout_bar_index < 0 or breakout_bar_index >= len(df):
        return result

    bar = df.iloc[breakout_bar_index]
    bar_close = float(bar["close"])
    bar_open = float(bar["open"])
    bar_high = float(bar["high"])
    bar_low = float(bar["low"])
    bar_vol = float(bar["volume"])

    # Compute ATR if not provided
    if atr <= 0:
        if len(df) >= 15:
            tr = pd.concat([
                df["high"] - df["low"],
                abs(df["high"] - df["close"].shift(1)),
                abs(df["low"] - df["close"].shift(1)),
            ], axis=1).max(axis=1)
            atr = float(tr.rolling(14).mean().iloc[-1])
        if atr <= 0:
            atr = float(df["high"].iloc[-1] - df["low"].iloc[-1])

    # Average volume (20-bar)
    vol_window = min(20, len(df) - 1)
    avg_vol = float(df["volume"].iloc[max(0, breakout_bar_index - vol_window):breakout_bar_index].mean())
    if avg_vol <= 0:
        avg_vol = float(df["volume"].mean())

    # --- Factor 1: Volume (0-25 points) ---
    vol_ratio = bar_vol / avg_vol if avg_vol > 0 else 1
    if vol_ratio >= cfg["vol_ratio_strong"]:
        volume_score = 25.0
    elif vol_ratio >= cfg["vol_ratio_weak"]:
        volume_score = 10.0 + (vol_ratio - cfg["vol_ratio_weak"]) / (cfg["vol_ratio_strong"] - cfg["vol_ratio_weak"]) * 15
    else:
        volume_score = max(0, vol_ratio / cfg["vol_ratio_weak"] * 10)

    # --- Factor 2: Breakout magnitude vs ATR (0-25 points) ---
    if direction == "long":
        magnitude = bar_close - breakout_level if breakout_level > 0 else bar_close - bar_open
    else:
        magnitude = breakout_level - bar_close if breakout_level > 0 else bar_open - bar_close

    atr_ratio = magnitude / atr if atr > 0 else 0
    if atr_ratio >= cfg["atr_breakout_strong"]:
        magnitude_score = 25.0
    elif atr_ratio >= cfg["atr_breakout_weak"]:
        magnitude_score = 8.0 + (atr_ratio - cfg["atr_breakout_weak"]) / (cfg["atr_breakout_strong"] - cfg["atr_breakout_weak"]) * 17
    else:
        magnitude_score = max(0, atr_ratio / cfg["atr_breakout_weak"] * 8)

    # --- Factor 3: Timing / session (0-15 points) ---
    # Try to determine time of breakout
    timing_score = 10.0  # default neutral
    try:
        if "timestamp" in df.columns:
            ts = df["timestamp"].iloc[breakout_bar_index]
            if hasattr(ts, "hour"):
                hour_utc = ts.hour
            else:
                hour_utc = pd.Timestamp(ts).hour
            low_liq_start = cfg["low_liquidity_start_utc"]
            low_liq_end = cfg["low_liquidity_end_utc"]
            if low_liq_start <= hour_utc < low_liq_end:
                timing_score = 3.0  # Asian session = higher fake probability
            elif 13 <= hour_utc <= 20:
                timing_score = 15.0  # US session = most reliable
            else:
                timing_score = 10.0  # European session = decent
    except Exception:
        timing_score = 10.0

    # --- Factor 4: Prior failed attempts (0-20 points) ---
    # More prior failures at this level = more reliable when it finally breaks
    prior_attempts = 0
    lookback = min(cfg["prior_fail_lookback"], breakout_bar_index)
    if breakout_level > 0 and lookback > 0:
        for i in range(breakout_bar_index - lookback, breakout_bar_index):
            if i < 0:
                continue
            if direction == "long":
                # Count bars that touched but failed to close above level
                if float(df["high"].iloc[i]) >= breakout_level and float(df["close"].iloc[i]) < breakout_level:
                    prior_attempts += 1
            else:
                # Count bars that touched but failed to close below level
                if float(df["low"].iloc[i]) <= breakout_level and float(df["close"].iloc[i]) > breakout_level:
                    prior_attempts += 1

    prior_score = min(20.0, prior_attempts * cfg["prior_fail_bonus_per_attempt"])

    # --- Factor 5: Zone clearance (0-15 points) ---
    # If there's a strong opposing zone very close, breakout is more likely to fail
    zone_score = 12.0  # default: no zone info = neutral-positive
    if nearby_zones:
        closest_opposing_dist = float("inf")
        for zone in nearby_zones:
            if direction == "long" and zone.get("type") == "supply":
                # Supply zone above = resistance
                zone_dist = (float(zone.get("btm", 0)) - bar_close) / bar_close * 100 if bar_close > 0 else 999
                if 0 < zone_dist < closest_opposing_dist:
                    closest_opposing_dist = zone_dist
            elif direction == "short" and zone.get("type") == "demand":
                # Demand zone below = support
                zone_dist = (bar_close - float(zone.get("top", 0))) / bar_close * 100 if bar_close > 0 else 999
                if 0 < zone_dist < closest_opposing_dist:
                    closest_opposing_dist = zone_dist

        if closest_opposing_dist < cfg["zone_distance_close_pct"]:
            zone_score = 2.0  # Very close opposing zone = high fake risk
        elif closest_opposing_dist < cfg["zone_distance_far_pct"]:
            zone_score = 8.0
        else:
            zone_score = 15.0  # Clear path

    # --- Combine ---
    quality_score = volume_score + magnitude_score + timing_score + prior_score + zone_score
    quality_score = max(0, min(100, quality_score))

    # Determine tier
    if quality_score >= cfg["high_quality_min"]:
        quality_tier = "high"
        recommendation = "可即刻买入" if direction == "long" else "可即刻做空"
        fake_prob = max(0.05, (100 - quality_score) / 100 * 0.4)
    elif quality_score <= cfg["low_quality_max"]:
        quality_tier = "low"
        recommendation = "观察"
        fake_prob = min(0.9, (100 - quality_score) / 100)
    else:
        quality_tier = "medium"
        recommendation = "等确认"
        fake_prob = (100 - quality_score) / 100 * 0.7

    # Build signal text
    dir_label = "突破" if direction == "long" else "破位"
    signal_parts = []
    if volume_score >= 20:
        signal_parts.append(f"放量{vol_ratio:.1f}x")
    elif volume_score < 8:
        signal_parts.append(f"量不足{vol_ratio:.1f}x")
    if magnitude_score >= 20:
        signal_parts.append(f"幅度{atr_ratio:.1f}ATR")
    elif magnitude_score < 8:
        signal_parts.append(f"幅度弱{atr_ratio:.1f}ATR")
    if prior_attempts >= 2:
        signal_parts.append(f"第{prior_attempts+1}次尝试")
    if timing_score <= 5:
        signal_parts.append("亚洲时段")

    signal = f"{dir_label}质量{'高' if quality_tier == 'high' else '低' if quality_tier == 'low' else '中'}({', '.join(signal_parts)})" if signal_parts else ""

    result.update({
        "quality_score": round(quality_score, 1),
        "quality_tier": quality_tier,
        "factors": {
            "volume_score": round(volume_score, 1),
            "volume_ratio": round(vol_ratio, 2),
            "magnitude_score": round(magnitude_score, 1),
            "atr_ratio": round(atr_ratio, 2),
            "timing_score": round(timing_score, 1),
            "prior_attempts_score": round(prior_score, 1),
            "prior_attempts": prior_attempts,
            "zone_clearance_score": round(zone_score, 1),
        },
        "fake_probability": round(fake_prob, 3),
        "signal": signal,
        "recommendation": recommendation,
    })

    return result


# ---------------------------------------------------------------------------
# Factor scoring interface
# ---------------------------------------------------------------------------

def breakout_quality_factor_score(quality_data: dict) -> tuple[float, str]:
    """Convert breakout quality into a factor score.

    Returns:
        (score_delta, signal_label)
    """
    cfg = _breakout_quality_config()
    tier = quality_data.get("quality_tier", "medium")
    signal = quality_data.get("signal", "")

    if tier == "high":
        return cfg["weight_high_quality"], signal
    elif tier == "low":
        return cfg["weight_low_quality_penalty"], signal
    else:
        return 0.0, ""