This commit is contained in:
aaron 2026-05-26 21:10:06 +08:00
parent 46ceae0c9a
commit e6c16a0dbb
5 changed files with 190 additions and 16 deletions

View File

@ -34,6 +34,17 @@ docker compose up --build
Compose 会启动两个服务:`dispatcher` 负责 Web/API/管理台,`worker` 负责周期性处理失败重试。 Compose 会启动两个服务:`dispatcher` 负责 Web/API/管理台,`worker` 负责周期性处理失败重试。
## Delivery Reliability
系统采用“先入库、后发送”的至少一次投递模型:
- TradingView webhook 命中规则后,会先写入 `alerts``deliveries`,再异步发送飞书。
- 未发送的飞书会保留在 `deliveries`,状态为 `pending`、`processing`、`retry` 或 `failed`
- 每次实际发送尝试都会写入 `delivery_attempts`,包括 HTTP 状态、飞书返回体、错误原因和下次重试时间。
- 发送失败会按 `RETRY_BACKOFF_SECONDS` 延迟重试,直到达到 `MAX_DELIVERY_ATTEMPTS`
- 飞书 HTTP 200 但响应体业务码非 0 时,也会按失败处理并进入重试。
- 管理台「日志」页面可以查看未发送、重试中、失败的分发记录,并可手动立即重发。
## TradingView Payload ## TradingView Payload
```json ```json

View File

@ -136,8 +136,26 @@ class Database:
FOREIGN KEY(target_id) REFERENCES webhook_targets(id) ON DELETE SET NULL FOREIGN KEY(target_id) REFERENCES webhook_targets(id) ON DELETE SET NULL
); );
CREATE TABLE IF NOT EXISTS delivery_attempts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
delivery_id INTEGER NOT NULL,
alert_id INTEGER NOT NULL,
attempt_no INTEGER NOT NULL,
status TEXT NOT NULL,
response_code INTEGER,
response_body TEXT,
error TEXT,
attempted_at TEXT NOT NULL,
next_attempt_at TEXT,
FOREIGN KEY(delivery_id) REFERENCES deliveries(id) ON DELETE CASCADE,
FOREIGN KEY(alert_id) REFERENCES alerts(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_deliveries_retry CREATE INDEX IF NOT EXISTS idx_deliveries_retry
ON deliveries(status, next_attempt_at); ON deliveries(status, next_attempt_at);
CREATE INDEX IF NOT EXISTS idx_delivery_attempts_delivery
ON delivery_attempts(delivery_id, attempt_no);
""" """
) )
existing_columns = { existing_columns = {

View File

@ -103,6 +103,30 @@ def build_feishu_message(alert: dict[str, Any], rule: dict[str, Any] | None = No
} }
def feishu_response_error(response_body: str | None) -> str | None:
if not response_body:
return None
try:
payload = json.loads(response_body)
except json.JSONDecodeError:
return None
if not isinstance(payload, dict):
return None
code = payload.get("code", payload.get("StatusCode", payload.get("status_code")))
if code in (None, 0, "0"):
return None
message = (
payload.get("msg")
or payload.get("message")
or payload.get("StatusMessage")
or payload.get("status_msg")
or "unknown error"
)
return f"feishu_error: code={code}, message={message}"
class Dispatcher: class Dispatcher:
def __init__(self, db: Database, settings: Settings): def __init__(self, db: Database, settings: Settings):
self.db = db self.db = db
@ -224,9 +248,11 @@ class Dispatcher:
delivery_ids.append(int(delivery.lastrowid)) delivery_ids.append(int(delivery.lastrowid))
if rule and not delivery_ids: if rule and not delivery_ids:
status = "unmatched"
error = "Matched rule has no webhook targets."
conn.execute( conn.execute(
"UPDATE alerts SET status = ?, error = ? WHERE id = ?", "UPDATE alerts SET status = ?, error = ? WHERE id = ?",
("unmatched", "Matched rule has no webhook targets.", alert_id), (status, error, alert_id),
) )
if self.settings.dispatch_inline: if self.settings.dispatch_inline:
@ -269,20 +295,21 @@ class Dispatcher:
""", """,
(now, limit), (now, limit),
).fetchall() ).fetchall()
delivery_ids = [row["id"] for row in rows] claimed_rows = []
if delivery_ids: for row in rows:
placeholders = ",".join("?" for _ in delivery_ids) cur = conn.execute(
conn.execute( """
f"""
UPDATE deliveries UPDATE deliveries
SET status = 'processing', updated_at = ? SET status = 'processing', updated_at = ?
WHERE id IN ({placeholders}) WHERE id = ?
AND status IN ('pending', 'retry') AND status IN ('pending', 'retry')
""", """,
(now, *delivery_ids), (now, row["id"]),
) )
if cur.rowcount == 1:
claimed_rows.append(row)
jobs = [(dict(row), from_json(row["payload"], {})) for row in rows] jobs = [(dict(row), from_json(row["payload"], {})) for row in claimed_rows]
if not jobs: if not jobs:
return 0 return 0
worker_count = max(1, min(concurrency, len(jobs))) worker_count = max(1, min(concurrency, len(jobs)))
@ -369,6 +396,10 @@ class Dispatcher:
if response_code >= 400: if response_code >= 400:
status = "failed" status = "failed"
error = f"Feishu webhook returned HTTP {response_code}" error = f"Feishu webhook returned HTTP {response_code}"
else:
error = feishu_response_error(response_body)
if error:
status = "failed"
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:
response_code = exc.code response_code = exc.code
response_body = exc.read(2048).decode(errors="replace") response_body = exc.read(2048).decode(errors="replace")
@ -390,7 +421,28 @@ class Dispatcher:
next_time = datetime.now(UTC) + timedelta(seconds=self.settings.retry_backoff_seconds * attempts) next_time = datetime.now(UTC) + timedelta(seconds=self.settings.retry_backoff_seconds * attempts)
next_attempt_at = next_time.replace(microsecond=0).isoformat() next_attempt_at = next_time.replace(microsecond=0).isoformat()
attempted_at = now_iso()
with self.db.connect() as conn: with self.db.connect() as conn:
conn.execute(
"""
INSERT INTO delivery_attempts (
delivery_id, alert_id, attempt_no, status, response_code,
response_body, error, attempted_at, next_attempt_at
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
delivery["id"],
delivery["alert_id"],
attempts,
status,
response_code,
response_body,
error,
attempted_at,
next_attempt_at,
),
)
conn.execute( conn.execute(
""" """
UPDATE deliveries UPDATE deliveries
@ -402,11 +454,11 @@ class Dispatcher:
status, status,
attempts, attempts,
next_attempt_at, next_attempt_at,
now_iso(), attempted_at,
response_code, response_code,
response_body, response_body,
error, error,
now_iso(), attempted_at,
delivery["id"], delivery["id"],
), ),
) )

View File

@ -605,7 +605,17 @@ class Handler(BaseHTTPRequestHandler):
).fetchall() ).fetchall()
else: else:
deliveries = conn.execute( deliveries = conn.execute(
f"SELECT * FROM deliveries {delivery_where_sql} ORDER BY id DESC LIMIT ? OFFSET ?", f"""
SELECT d.*,
(
SELECT COUNT(*)
FROM delivery_attempts da
WHERE da.delivery_id = d.id
) AS attempt_records
FROM deliveries d
{delivery_where_sql}
ORDER BY d.id DESC LIMIT ? OFFSET ?
""",
(*delivery_params, page_size, offset), (*delivery_params, page_size, offset),
).fetchall() ).fetchall()
return { return {
@ -619,7 +629,12 @@ class Handler(BaseHTTPRequestHandler):
with self.context.db.connect() as conn: with self.context.db.connect() as conn:
alerts = conn.execute("SELECT * FROM alerts ORDER BY id DESC LIMIT 100").fetchall() alerts = conn.execute("SELECT * FROM alerts ORDER BY id DESC LIMIT 100").fetchall()
deliveries = conn.execute("SELECT * FROM deliveries ORDER BY id DESC LIMIT 200").fetchall() deliveries = conn.execute("SELECT * FROM deliveries ORDER BY id DESC LIMIT 200").fetchall()
return {"alerts": [dict(row) for row in alerts], "deliveries": [dict(row) for row in deliveries]} attempts = conn.execute("SELECT * FROM delivery_attempts ORDER BY id DESC LIMIT 200").fetchall()
return {
"alerts": [dict(row) for row in alerts],
"deliveries": [dict(row) for row in deliveries],
"delivery_attempts": [dict(row) for row in attempts],
}
def render_dashboard(self) -> None: def render_dashboard(self) -> None:
host = self.headers.get("Host", f"localhost:{self.context.settings.port}") host = self.headers.get("Host", f"localhost:{self.context.settings.port}")
@ -927,18 +942,18 @@ class Handler(BaseHTTPRequestHandler):
<td><details class="payload-details"><summary>查看</summary><pre>{html.escape(raw_payload)}</pre></details></td> <td><details class="payload-details"><summary>查看</summary><pre>{html.escape(raw_payload)}</pre></details></td>
</tr>""" </tr>"""
delivery_rows = "".join( delivery_rows = "".join(
f"""<tr><td>{row['id']}</td><td>{row['alert_id']}</td><td>{html.escape(row['target_name'])}</td><td><span class='status'>{html.escape(row['status'])}</span></td><td>{row['attempts']}</td><td>{html.escape(str(row['response_code'] or ''))}</td><td>{format_display_time(self.context.settings, row['last_attempt_at'])}</td><td>{html.escape(row['error'] or '')}</td><td>{format_display_time(self.context.settings, row['next_attempt_at'])}</td><td><form class="inline" method="post" action="/deliveries/retry-one"><input type="hidden" name="id" value="{row['id']}"><button class="small-button" type="submit" {'disabled' if row['status'] == 'sent' else ''}>立即重发</button></form></td></tr>""" f"""<tr><td>{row['id']}</td><td>{row['alert_id']}</td><td>{html.escape(row['target_name'])}</td><td><span class='status'>{html.escape(row['status'])}</span></td><td>{row['attempts']}</td><td>{row.get('attempt_records', row['attempts'])}</td><td>{html.escape(str(row['response_code'] or ''))}</td><td>{format_display_time(self.context.settings, row['last_attempt_at'])}</td><td>{html.escape(row['error'] or '')}</td><td>{format_display_time(self.context.settings, row['next_attempt_at'])}</td><td><form class="inline" method="post" action="/deliveries/retry-one"><input type="hidden" name="id" value="{row['id']}"><button class="small-button" type="submit" {'disabled' if row['status'] == 'sent' else ''}>立即重发</button></form></td></tr>"""
for row in logs["deliveries"] for row in logs["deliveries"]
) )
alert_empty = '<tr><td colspan="8" class="empty-cell">暂无 Alert 日志</td></tr>' alert_empty = '<tr><td colspan="8" class="empty-cell">暂无 Alert 日志</td></tr>'
delivery_empty = '<tr><td colspan="10" class="empty-cell">暂无分发日志</td></tr>' delivery_empty = '<tr><td colspan="11" class="empty-cell">暂无分发日志</td></tr>'
alert_active = " active" if active_tab == "alerts" else "" alert_active = " active" if active_tab == "alerts" else ""
delivery_active = " active" if active_tab == "deliveries" else "" delivery_active = " active" if active_tab == "deliveries" else ""
active_table = ( active_table = (
f"""<table><thead><tr><th>ID</th><th>品种</th><th>周期</th><th>策略</th><th>状态</th><th>错误</th><th>时间</th><th>原始 Alert</th></tr></thead><tbody>{alert_rows or alert_empty}</tbody></table> f"""<table><thead><tr><th>ID</th><th>品种</th><th>周期</th><th>策略</th><th>状态</th><th>错误</th><th>时间</th><th>原始 Alert</th></tr></thead><tbody>{alert_rows or alert_empty}</tbody></table>
{render_pagination("/logs", "alerts", page, logs["alert_total"], LOG_PAGE_SIZE, filters)}""" {render_pagination("/logs", "alerts", page, logs["alert_total"], LOG_PAGE_SIZE, filters)}"""
if active_tab == "alerts" if active_tab == "alerts"
else f"""<table><thead><tr><th>ID</th><th>Alert</th><th>目标</th><th>状态</th><th>次数</th><th>HTTP</th><th>发送时间</th><th>错误</th><th>下次重试</th><th>操作</th></tr></thead><tbody>{delivery_rows or delivery_empty}</tbody></table> else f"""<table><thead><tr><th>ID</th><th>Alert</th><th>目标</th><th>状态</th><th>次数</th><th>明细</th><th>HTTP</th><th>发送时间</th><th>错误</th><th>下次重试</th><th>操作</th></tr></thead><tbody>{delivery_rows or delivery_empty}</tbody></table>
{render_pagination("/logs", "deliveries", page, logs["delivery_total"], LOG_PAGE_SIZE, filters)}""" {render_pagination("/logs", "deliveries", page, logs["delivery_total"], LOG_PAGE_SIZE, filters)}"""
) )
timezone_label = html.escape(display_timezone_label(self.context.settings)) timezone_label = html.escape(display_timezone_label(self.context.settings))

View File

@ -3,6 +3,7 @@ from __future__ import annotations
import os import os
import tempfile import tempfile
import unittest import unittest
from unittest.mock import patch
from app.config import Settings from app.config import Settings
from app.db import Database, now_iso, to_json from app.db import Database, now_iso, to_json
@ -163,6 +164,69 @@ class DispatcherTest(unittest.TestCase):
self.assertEqual(delivery["attempts"], 1) self.assertEqual(delivery["attempts"], 1)
self.assertTrue(delivery["error"].startswith("network_error:") or delivery["error"].startswith("send_error:")) self.assertTrue(delivery["error"].startswith("network_error:") or delivery["error"].startswith("send_error:"))
def test_feishu_business_error_is_retried_even_with_http_200(self) -> None:
class Response:
def __enter__(self) -> "Response":
return self
def __exit__(self, exc_type: object, exc: object, traceback: object) -> None:
return None
def getcode(self) -> int:
return 200
def read(self, size: int = -1) -> bytes:
return b'{"code":9499,"msg":"bad sign"}'
self.add_rule(self.add_target(url="https://open.feishu.cn/open-apis/bot/v2/hook/test"))
with patch("urllib.request.urlopen", return_value=Response()):
result = self.dispatcher.receive_alert(
{"timeframe": "5m", "symbol": "BTCUSDT", "strategy": "breakout", "action": "buy"}
)
processed = self.dispatcher.process_due_deliveries(limit=10)
with self.db.connect() as conn:
delivery = conn.execute("SELECT * FROM deliveries WHERE alert_id = ?", (result["alert_id"],)).fetchone()
attempt = conn.execute(
"SELECT * FROM delivery_attempts WHERE delivery_id = ?",
(delivery["id"],),
).fetchone()
self.assertEqual(processed, 1)
self.assertEqual(delivery["status"], "retry")
self.assertEqual(delivery["attempts"], 1)
self.assertIn("feishu_error: code=9499", delivery["error"])
self.assertEqual(attempt["attempt_no"], 1)
self.assertEqual(attempt["status"], "retry")
self.assertIn("feishu_error: code=9499", attempt["error"])
def test_retry_attempts_are_recorded_individually(self) -> None:
self.add_rule(self.add_target(url="https://open.feishu.cn/open-apis/bot/v2/hook/test"))
with patch("urllib.request.urlopen", side_effect=TimeoutError("timed out")):
result = self.dispatcher.receive_alert(
{"timeframe": "5m", "symbol": "BTCUSDT", "strategy": "breakout", "action": "buy"}
)
self.dispatcher.process_due_deliveries(limit=10)
with self.db.connect() as conn:
delivery = conn.execute(
"SELECT * FROM deliveries WHERE alert_id = ?",
(result["alert_id"],),
).fetchone()
self.dispatcher.retry_delivery_now(delivery["id"])
with self.db.connect() as conn:
delivery = conn.execute("SELECT * FROM deliveries WHERE alert_id = ?", (result["alert_id"],)).fetchone()
attempts = conn.execute(
"SELECT * FROM delivery_attempts WHERE delivery_id = ? ORDER BY attempt_no",
(delivery["id"],),
).fetchall()
self.assertEqual(delivery["status"], "failed")
self.assertEqual(delivery["attempts"], 2)
self.assertEqual([row["attempt_no"] for row in attempts], [1, 2])
self.assertEqual([row["status"] for row in attempts], ["retry", "failed"])
def test_rule_can_dispatch_to_multiple_targets(self) -> None: def test_rule_can_dispatch_to_multiple_targets(self) -> None:
target_a = self.add_target("ops-a") target_a = self.add_target("ops-a")
target_b = self.add_target("ops-b") target_b = self.add_target("ops-b")
@ -177,6 +241,20 @@ class DispatcherTest(unittest.TestCase):
count = conn.execute("SELECT COUNT(*) AS c FROM deliveries WHERE alert_id = ?", (result["alert_id"],)).fetchone()["c"] count = conn.execute("SELECT COUNT(*) AS c FROM deliveries WHERE alert_id = ?", (result["alert_id"],)).fetchone()["c"]
self.assertEqual(count, 2) self.assertEqual(count, 2)
def test_rule_with_missing_targets_returns_unmatched(self) -> None:
self.add_rule_with_targets([999])
result = self.dispatcher.receive_alert(
{"timeframe": "5m", "symbol": "BTCUSDT", "strategy": "breakout", "action": "buy"}
)
with self.db.connect() as conn:
alert = conn.execute("SELECT * FROM alerts WHERE id = ?", (result["alert_id"],)).fetchone()
self.assertEqual(result["status"], "unmatched")
self.assertEqual(result["delivery_ids"], [])
self.assertEqual(alert["status"], "unmatched")
self.assertEqual(alert["error"], "Matched rule has no webhook targets.")
def test_legacy_disabled_target_is_still_dispatchable(self) -> None: def test_legacy_disabled_target_is_still_dispatchable(self) -> None:
target_id = self.add_target() target_id = self.add_target()
with self.db.connect() as conn: with self.db.connect() as conn: