astock-agent/backend/app/catalyst/service.py
2026-05-14 11:10:17 +08:00

121 lines
4.4 KiB
Python

"""催化事件存储与主题分数聚合。"""
from __future__ import annotations
import logging
from datetime import datetime, timedelta
from sqlalchemy import text
from app.catalyst.mapper import analyze_catalyst
from app.catalyst.models import CatalystAnalysis, CatalystInput, ThemeCatalystScore
from app.db import tables
from app.db.database import get_db
logger = logging.getLogger(__name__)
async def ingest_catalyst(item: CatalystInput, use_llm: bool = True) -> CatalystAnalysis:
analysis = await analyze_catalyst(item, use_llm=use_llm)
await save_catalyst(analysis)
return analysis
async def save_catalyst(analysis: CatalystAnalysis) -> int:
async with get_db() as db:
result = await db.execute(
tables.catalysts_table.insert().values(
title=analysis.title,
summary=analysis.summary,
source=analysis.source,
url=analysis.url,
published_at=analysis.published_at,
catalyst_type=analysis.catalyst_type,
strength=analysis.strength,
freshness=analysis.freshness,
confidence=analysis.confidence,
raw_text=analysis.raw_text,
llm_reason=analysis.llm_reason,
is_active=True,
)
)
catalyst_id = int(result.inserted_primary_key[0])
if analysis.themes:
await db.execute(
tables.theme_catalysts_table.insert(),
[
{
"catalyst_id": catalyst_id,
"theme_id": theme.theme_id,
"theme_name": theme.theme_name,
"relevance": theme.relevance,
"reason": theme.reason,
}
for theme in analysis.themes
],
)
await db.commit()
return catalyst_id
async def get_recent_catalysts(limit: int = 30, hours: int = 72) -> list[dict]:
since = datetime.now() - timedelta(hours=hours)
async with get_db() as db:
result = await db.execute(
text(
"SELECT c.*, "
"GROUP_CONCAT(tc.theme_name || ':' || ROUND(tc.relevance, 0), ',') AS themes "
"FROM catalysts c "
"LEFT JOIN theme_catalysts tc ON tc.catalyst_id = c.id "
"WHERE c.is_active = 1 AND COALESCE(c.published_at, c.created_at) >= :since "
"GROUP BY c.id "
"ORDER BY COALESCE(c.published_at, c.created_at) DESC, c.id DESC "
"LIMIT :limit"
),
{"since": since, "limit": limit},
)
rows = result.mappings().all()
return [dict(row) for row in rows]
async def build_theme_catalyst_scores(hours: int = 72, limit: int = 20) -> list[ThemeCatalystScore]:
since = datetime.now() - timedelta(hours=hours)
async with get_db() as db:
rows = (
await db.execute(
text(
"SELECT tc.theme_id, tc.theme_name, "
"COUNT(*) AS catalyst_count, "
"SUM((c.strength * 0.45 + c.freshness * 0.25 + c.confidence * 0.15 + tc.relevance * 0.15)) AS raw_score, "
"GROUP_CONCAT(SUBSTR(COALESCE(tc.reason, c.summary, c.title), 1, 60), ' | ') AS reasons "
"FROM theme_catalysts tc "
"JOIN catalysts c ON c.id = tc.catalyst_id "
"WHERE c.is_active = 1 AND COALESCE(c.published_at, c.created_at) >= :since "
"GROUP BY tc.theme_id, tc.theme_name "
"ORDER BY raw_score DESC "
"LIMIT :limit"
),
{"since": since, "limit": limit},
)
).mappings().all()
scores = []
for row in rows:
raw = float(row.get("raw_score") or 0)
count = int(row.get("catalyst_count") or 0)
normalized = min(raw / max(count, 1), 100)
reasons = [
item.strip()
for item in str(row.get("reasons") or "").split("|")
if item.strip()
][:3]
scores.append(ThemeCatalystScore(
theme_id=row["theme_id"],
theme_name=row["theme_name"],
catalyst_score=round(normalized, 1),
catalyst_count=count,
top_reasons=reasons,
generated_by="catalyst_layer",
))
return scores