"""催化事件存储与主题分数聚合。""" from __future__ import annotations import logging from datetime import datetime, timedelta from sqlalchemy import text from app.catalyst.mapper import analyze_catalyst from app.catalyst.models import CatalystAnalysis, CatalystInput, ThemeCatalystScore from app.db import tables from app.db.database import get_db logger = logging.getLogger(__name__) async def ingest_catalyst(item: CatalystInput, use_llm: bool = True) -> CatalystAnalysis: analysis = await analyze_catalyst(item, use_llm=use_llm) await save_catalyst(analysis) return analysis async def save_catalyst(analysis: CatalystAnalysis) -> int: async with get_db() as db: result = await db.execute( tables.catalysts_table.insert().values( title=analysis.title, summary=analysis.summary, source=analysis.source, url=analysis.url, published_at=analysis.published_at, catalyst_type=analysis.catalyst_type, strength=analysis.strength, freshness=analysis.freshness, confidence=analysis.confidence, raw_text=analysis.raw_text, llm_reason=analysis.llm_reason, is_active=True, ) ) catalyst_id = int(result.inserted_primary_key[0]) if analysis.themes: await db.execute( tables.theme_catalysts_table.insert(), [ { "catalyst_id": catalyst_id, "theme_id": theme.theme_id, "theme_name": theme.theme_name, "relevance": theme.relevance, "reason": theme.reason, } for theme in analysis.themes ], ) await db.commit() return catalyst_id async def get_recent_catalysts(limit: int = 30, hours: int = 72) -> list[dict]: since = datetime.now() - timedelta(hours=hours) async with get_db() as db: result = await db.execute( text( "SELECT c.*, " "GROUP_CONCAT(tc.theme_name || ':' || ROUND(tc.relevance, 0), ',') AS themes " "FROM catalysts c " "LEFT JOIN theme_catalysts tc ON tc.catalyst_id = c.id " "WHERE c.is_active = 1 AND COALESCE(c.published_at, c.created_at) >= :since " "GROUP BY c.id " "ORDER BY COALESCE(c.published_at, c.created_at) DESC, c.id DESC " "LIMIT :limit" ), {"since": since, "limit": limit}, ) rows = result.mappings().all() return [dict(row) for row in rows] async def build_theme_catalyst_scores(hours: int = 72, limit: int = 20) -> list[ThemeCatalystScore]: since = datetime.now() - timedelta(hours=hours) async with get_db() as db: rows = ( await db.execute( text( "SELECT tc.theme_id, tc.theme_name, " "COUNT(*) AS catalyst_count, " "SUM((c.strength * 0.45 + c.freshness * 0.25 + c.confidence * 0.15 + tc.relevance * 0.15)) AS raw_score, " "GROUP_CONCAT(SUBSTR(COALESCE(tc.reason, c.summary, c.title), 1, 60), ' | ') AS reasons " "FROM theme_catalysts tc " "JOIN catalysts c ON c.id = tc.catalyst_id " "WHERE c.is_active = 1 AND COALESCE(c.published_at, c.created_at) >= :since " "GROUP BY tc.theme_id, tc.theme_name " "ORDER BY raw_score DESC " "LIMIT :limit" ), {"since": since, "limit": limit}, ) ).mappings().all() scores = [] for row in rows: raw = float(row.get("raw_score") or 0) count = int(row.get("catalyst_count") or 0) normalized = min(raw / max(count, 1), 100) reasons = [ item.strip() for item in str(row.get("reasons") or "").split("|") if item.strip() ][:3] scores.append(ThemeCatalystScore( theme_id=row["theme_id"], theme_name=row["theme_name"], catalyst_score=round(normalized, 1), catalyst_count=count, top_reasons=reasons, generated_by="catalyst_layer", )) return scores