280 lines
8.9 KiB
Python
280 lines
8.9 KiB
Python
"""
|
||
全局异常处理器
|
||
|
||
捕获系统中所有未处理的异常,并发送飞书通知
|
||
"""
|
||
import sys
|
||
import traceback
|
||
from datetime import datetime
|
||
from typing import Optional
|
||
from threading import Lock
|
||
|
||
from app.utils.logger import logger
|
||
|
||
|
||
class GlobalExceptionHandler:
|
||
"""全局异常处理器"""
|
||
|
||
_instance = None
|
||
_lock = Lock()
|
||
_initialized = False
|
||
|
||
def __new__(cls):
|
||
"""单例模式"""
|
||
if cls._instance is None:
|
||
with cls._lock:
|
||
if cls._instance is None:
|
||
cls._instance = super().__new__(cls)
|
||
return cls._instance
|
||
|
||
def __init__(self):
|
||
"""初始化异常处理器"""
|
||
if GlobalExceptionHandler._initialized:
|
||
return
|
||
|
||
GlobalExceptionHandler._initialized = True
|
||
self.feishu_service = None
|
||
self.enabled = True
|
||
self.last_error_time = None
|
||
self.error_cooldown = 300 # 错误通知冷却时间(秒),避免重复通知
|
||
|
||
logger.info("全局异常处理器初始化完成")
|
||
|
||
def set_feishu_service(self, feishu_service):
|
||
"""设置飞书服务"""
|
||
self.feishu_service = feishu_service
|
||
logger.info("异常处理器已连接飞书服务")
|
||
|
||
def set_enabled(self, enabled: bool):
|
||
"""启用或禁用异常通知"""
|
||
self.enabled = enabled
|
||
logger.info(f"异常通知已{'启用' if enabled else '禁用'}")
|
||
|
||
def set_cooldown(self, seconds: int):
|
||
"""设置错误通知冷却时间"""
|
||
self.error_cooldown = seconds
|
||
logger.info(f"错误通知冷却时间已设置为 {seconds} 秒")
|
||
|
||
def handle_exception(self, exc_type, exc_value, exc_traceback):
|
||
"""
|
||
处理异常
|
||
|
||
Args:
|
||
exc_type: 异常类型
|
||
exc_value: 异常值
|
||
exc_traceback: 异常堆栈
|
||
"""
|
||
# 检查是否是键盘中断(用户主动退出)
|
||
if exc_type == KeyboardInterrupt:
|
||
logger.info("用户主动中断程序")
|
||
return
|
||
|
||
# 检查是否启用
|
||
if not self.enabled:
|
||
logger.warning("异常通知已禁用,仅记录日志")
|
||
self._log_exception(exc_type, exc_value, exc_traceback)
|
||
return
|
||
|
||
# 检查冷却时间
|
||
if self.last_error_time:
|
||
time_since_last = (datetime.now() - self.last_error_time).total_seconds()
|
||
if time_since_last < self.error_cooldown:
|
||
logger.warning(f"错误通知冷却中(剩余 {int(self.error_cooldown - time_since_last)} 秒)")
|
||
self._log_exception(exc_type, exc_value, exc_traceback)
|
||
return
|
||
|
||
# 记录异常
|
||
self._log_exception(exc_type, exc_value, exc_traceback)
|
||
|
||
# 发送飞书通知
|
||
self._send_error_notification(exc_type, exc_value, exc_traceback)
|
||
|
||
# 更新最后错误时间
|
||
self.last_error_time = datetime.now()
|
||
|
||
def _log_exception(self, exc_type, exc_value, exc_traceback):
|
||
"""记录异常到日志"""
|
||
logger.error("=" * 60)
|
||
logger.error("❌ 未捕获的异常")
|
||
logger.error("=" * 60)
|
||
logger.error(f"异常类型: {exc_type.__name__}")
|
||
logger.error(f"异常信息: {str(exc_value)}")
|
||
logger.error(f"发生时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
|
||
# 打印完整的堆栈跟踪
|
||
tb_lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
|
||
logger.error("堆栈跟踪:\n" + "".join(tb_lines))
|
||
logger.error("=" * 60)
|
||
|
||
def _send_error_notification(self, exc_type, exc_value, exc_traceback):
|
||
"""发送错误通知到飞书"""
|
||
if not self.feishu_service:
|
||
logger.warning("飞书服务未设置,无法发送错误通知")
|
||
return
|
||
|
||
try:
|
||
# 获取异常信息
|
||
exc_name = exc_type.__name__
|
||
exc_msg = str(exc_value)
|
||
exc_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||
|
||
# 获取堆栈跟踪(限制长度)
|
||
tb_list = traceback.format_exception(exc_type, exc_value, exc_traceback)
|
||
|
||
# 构建堆栈信息(限制长度,避免超出飞书限制)
|
||
stack_trace = "".join(tb_list)
|
||
|
||
# 限制堆栈信息长度(飞书有长度限制)
|
||
max_length = 3000
|
||
if len(stack_trace) > max_length:
|
||
stack_trace = stack_trace[:max_length] + "\n... (堆栈信息过长,已截断)"
|
||
|
||
# 格式化堆栈信息,使用代码块
|
||
formatted_stack = "```\n" + stack_trace + "\n```"
|
||
|
||
# 构建飞书消息
|
||
message = f"""🚨 **系统异常报警**
|
||
|
||
**异常类型**: {exc_name}
|
||
**异常信息**: {exc_msg}
|
||
**发生时间**: {exc_time}
|
||
|
||
**堆栈跟踪**:
|
||
{formatted_stack}
|
||
|
||
⚠️ 请及时处理系统异常"""
|
||
|
||
# 发送飞书通知
|
||
import asyncio
|
||
try:
|
||
# 获取当前事件循环
|
||
loop = asyncio.get_event_loop()
|
||
if loop.is_running():
|
||
# 如果事件循环正在运行,使用 run_coroutine_threadsafe
|
||
asyncio.run_coroutine_threadsafe(
|
||
self.feishu_service.send_text(message),
|
||
loop
|
||
)
|
||
else:
|
||
# 如果事件循环未运行,直接运行
|
||
asyncio.run(self.feishu_service.send_text(message))
|
||
|
||
logger.info("✅ 已发送异常通知到飞书")
|
||
|
||
except RuntimeError:
|
||
# 没有事件循环,创建新的
|
||
asyncio.run(self.feishu_service.send_text(message))
|
||
logger.info("✅ 已发送异常通知到飞书")
|
||
|
||
except Exception as e:
|
||
logger.error(f"发送异常通知失败: {e}")
|
||
|
||
|
||
# 创建全局异常处理器实例
|
||
_exception_handler: Optional[GlobalExceptionHandler] = None
|
||
|
||
|
||
def get_exception_handler() -> GlobalExceptionHandler:
|
||
"""获取全局异常处理器实例"""
|
||
global _exception_handler
|
||
if _exception_handler is None:
|
||
_exception_handler = GlobalExceptionHandler()
|
||
return _exception_handler
|
||
|
||
|
||
def setup_global_exception_handler():
|
||
"""
|
||
设置全局异常处理器
|
||
|
||
捕获所有未处理的异常,并发送飞书通知
|
||
"""
|
||
handler = get_exception_handler()
|
||
|
||
def handle_exception(exc_type, exc_value, exc_traceback):
|
||
"""异常处理回调函数"""
|
||
handler.handle_exception(exc_type, exc_value, exc_traceback)
|
||
|
||
# 设置全局异常钩子
|
||
sys.excepthook = handle_exception
|
||
|
||
logger.info("✅ 全局异常处理器已安装")
|
||
|
||
|
||
def init_error_notifier(feishu_service, enabled: bool = True, cooldown: int = 300):
|
||
"""
|
||
初始化错误通知器
|
||
|
||
Args:
|
||
feishu_service: 飞书服务实例
|
||
enabled: 是否启用异常通知
|
||
cooldown: 错误通知冷却时间(秒)
|
||
"""
|
||
handler = get_exception_handler()
|
||
handler.set_feishu_service(feishu_service)
|
||
handler.set_enabled(enabled)
|
||
handler.set_cooldown(cooldown)
|
||
|
||
logger.info("错误通知器初始化完成")
|
||
|
||
|
||
def notify_error(title: str, message: str, level: str = "error"):
|
||
"""
|
||
手动触发错误通知(用于已捕获但需要通知的错误)
|
||
|
||
Args:
|
||
title: 错误标题
|
||
message: 错误消息
|
||
level: 错误级别 (error, warning, info)
|
||
"""
|
||
handler = get_exception_handler()
|
||
|
||
if not handler.feishu_service or not handler.enabled:
|
||
logger.debug(f"错误通知未启用或飞书服务未设置: {title}")
|
||
return
|
||
|
||
# 检查冷却时间
|
||
if handler.last_error_time:
|
||
time_since_last = (datetime.now() - handler.last_error_time).total_seconds()
|
||
if time_since_last < handler.error_cooldown:
|
||
logger.debug(f"错误通知冷却中,跳过: {title}")
|
||
return
|
||
|
||
try:
|
||
# 根据级别选择图标
|
||
icons = {
|
||
"error": "🚨",
|
||
"warning": "⚠️",
|
||
"info": "ℹ️"
|
||
}
|
||
icon = icons.get(level, "📌")
|
||
|
||
# 构建消息
|
||
formatted_message = f"""{icon} **{title}**
|
||
|
||
{message}
|
||
|
||
**时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"""
|
||
|
||
# 发送飞书通知
|
||
import asyncio
|
||
try:
|
||
loop = asyncio.get_event_loop()
|
||
if loop.is_running():
|
||
asyncio.run_coroutine_threadsafe(
|
||
handler.feishu_service.send_text(formatted_message),
|
||
loop
|
||
)
|
||
else:
|
||
asyncio.run(handler.feishu_service.send_text(formatted_message))
|
||
|
||
logger.info(f"✅ 已发送错误通知: {title}")
|
||
handler.last_error_time = datetime.now()
|
||
|
||
except RuntimeError:
|
||
asyncio.run(handler.feishu_service.send_text(formatted_message))
|
||
logger.info(f"✅ 已发送错误通知: {title}")
|
||
handler.last_error_time = datetime.now()
|
||
|
||
except Exception as e:
|
||
logger.error(f"发送手动错误通知失败: {e}")
|