327 lines
13 KiB
Python
327 lines
13 KiB
Python
import os
|
||
import json
|
||
import requests
|
||
from typing import Dict, Any, List, Optional, Tuple
|
||
import time
|
||
import logging
|
||
import datetime
|
||
|
||
from cryptoai.utils.config_loader import ConfigLoader
|
||
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.FileHandler("qwen_token_usage.log"),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
|
||
class QwenAPI:
|
||
"""Qwen API交互类,用于进行大语言模型调用"""
|
||
|
||
def __init__(self):
|
||
"""
|
||
初始化Qwen API
|
||
|
||
Args:
|
||
api_key: Qwen API密钥
|
||
model: 使用的模型名称
|
||
"""
|
||
|
||
config_loader = ConfigLoader()
|
||
self.qwen_config = config_loader.get_qwen_config()
|
||
|
||
self.api_key = self.qwen_config['api_key']
|
||
self.model = self.qwen_config['model']
|
||
self.base_url = "https://dashscope.aliyuncs.com/api/v1"
|
||
self.headers = {
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Bearer {self.api_key}"
|
||
}
|
||
|
||
# Token 使用统计
|
||
self.token_usage = {
|
||
"total_prompt_tokens": 0,
|
||
"total_completion_tokens": 0,
|
||
"total_tokens": 0,
|
||
"calls": []
|
||
}
|
||
|
||
# 创建日志记录器
|
||
self.logger = logging.getLogger("QwenAPI")
|
||
|
||
def streaming_call(self, user_prompt: str):
|
||
"""
|
||
流式调用Qwen API
|
||
"""
|
||
|
||
system_prompt = "你是一个专业的区块链分析高手"
|
||
|
||
try:
|
||
endpoint = f"{self.base_url}/services/aigc/text-generation/generation"
|
||
payload = {
|
||
"model": self.model,
|
||
"input": {
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt}
|
||
]
|
||
},
|
||
"parameters": {
|
||
"stream": True
|
||
}
|
||
}
|
||
|
||
response = requests.post(endpoint, headers=self.headers, json=payload, stream=True)
|
||
response.raise_for_status()
|
||
|
||
for line in response.iter_lines():
|
||
if line:
|
||
# 解码二进制数据为字符串
|
||
line = line.decode('utf-8')
|
||
|
||
# 跳过空行和心跳检查行
|
||
if not line or line == "data: [DONE]":
|
||
continue
|
||
|
||
# 移除 "data: " 前缀
|
||
if line.startswith("data: "):
|
||
line = line[6:]
|
||
|
||
try:
|
||
# 解析JSON数据
|
||
data = json.loads(line)
|
||
|
||
# 提取content内容
|
||
if (data.get("output") and
|
||
data["output"].get("choices") and
|
||
len(data["output"]["choices"]) > 0 and
|
||
data["output"]["choices"][0].get("message") and
|
||
data["output"]["choices"][0]["message"].get("content")):
|
||
|
||
content = data["output"]["choices"][0]["message"]["content"]
|
||
yield content
|
||
except json.JSONDecodeError as e:
|
||
self.logger.error(f"解析JSON时出错: {e}, 原始数据: {line}")
|
||
continue
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"流式调用Qwen API时出错: {e}")
|
||
raise e
|
||
|
||
|
||
def call_model(self, prompt: str, system_prompt: str = None, task_type: str = "未知任务", symbol: str = "未知", temperature: float = 0.2, max_tokens: int = 2000) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||
"""
|
||
调用Qwen大语言模型
|
||
|
||
Args:
|
||
prompt: 用户提示词
|
||
system_prompt: 系统提示词,如果为None则使用默认值
|
||
task_type: 任务类型,用于记录
|
||
symbol: 交易对符号,用于记录
|
||
temperature: 采样温度,控制输出随机性
|
||
max_tokens: 最大生成token数
|
||
|
||
Returns:
|
||
(API响应, token使用信息)
|
||
"""
|
||
if system_prompt is None:
|
||
system_prompt = "你是一个专业的加密货币分析助手,擅长分析市场趋势、预测价格走向和提供交易建议。请始终使用中文回复,并确保输出格式规范的JSON。"
|
||
|
||
usage_info = {}
|
||
|
||
try:
|
||
endpoint = f"{self.base_url}/services/aigc/text-generation/generation"
|
||
|
||
payload = {
|
||
"model": self.model,
|
||
"input": {
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": prompt}
|
||
]
|
||
},
|
||
"parameters": {
|
||
"temperature": temperature,
|
||
"max_tokens": max_tokens
|
||
}
|
||
}
|
||
|
||
start_time = time.time()
|
||
response = requests.post(endpoint, headers=self.headers, json=payload)
|
||
response.raise_for_status()
|
||
response_data = response.json()
|
||
end_time = time.time()
|
||
|
||
# 记录token使用情况
|
||
if 'usage' in response_data:
|
||
prompt_tokens = response_data['usage'].get('input_tokens', 0)
|
||
completion_tokens = response_data['usage'].get('output_tokens', 0)
|
||
total_tokens = prompt_tokens + completion_tokens
|
||
|
||
usage_info = {
|
||
"prompt_tokens": prompt_tokens,
|
||
"completion_tokens": completion_tokens,
|
||
"total_tokens": total_tokens,
|
||
"task_type": task_type,
|
||
"symbol": symbol,
|
||
"model": self.model,
|
||
"timestamp": datetime.datetime.now().isoformat(),
|
||
"duration_seconds": round(end_time - start_time, 2)
|
||
}
|
||
|
||
# 更新总计
|
||
self.token_usage["total_prompt_tokens"] += prompt_tokens
|
||
self.token_usage["total_completion_tokens"] += completion_tokens
|
||
self.token_usage["total_tokens"] += total_tokens
|
||
self.token_usage["calls"].append(usage_info)
|
||
|
||
# 记录到日志
|
||
self.logger.info(
|
||
f"Qwen API调用 - 任务: {task_type}, 符号: {symbol}, "
|
||
f"输入tokens: {prompt_tokens}, 输出tokens: {completion_tokens}, "
|
||
f"总tokens: {total_tokens}, 耗时: {round(end_time - start_time, 2)}秒"
|
||
)
|
||
|
||
return response_data, usage_info
|
||
|
||
except Exception as e:
|
||
error_msg = f"调用Qwen API时出错: {e}"
|
||
self.logger.error(error_msg)
|
||
return {}, usage_info
|
||
|
||
def extract_text_from_response(self, response: Dict[str, Any]) -> str:
|
||
"""
|
||
从响应中提取文本数据
|
||
"""
|
||
try:
|
||
if 'output' in response and 'choices' in response['output'] and len(response['output']['choices']) > 0:
|
||
content = response['output']['choices'][0]['message']['content']
|
||
|
||
# 如果内容以```markdown开头,则去掉```
|
||
if content.startswith('```markdown'):
|
||
content = content[len('```markdown'):]
|
||
# 如果内容以```结尾,则去掉```
|
||
if content.endswith('```'):
|
||
content = content[:-len('```')]
|
||
|
||
return content
|
||
else:
|
||
return {"error": "无法从响应中提取文本", "raw_content": response}
|
||
except Exception as e:
|
||
return {"error": str(e), "raw_content": response}
|
||
|
||
|
||
def extract_json_from_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""
|
||
从响应中提取JSON数据
|
||
|
||
Args:
|
||
response: API响应
|
||
|
||
Returns:
|
||
提取的JSON数据
|
||
"""
|
||
try:
|
||
if 'output' in response and 'choices' in response['output'] and len(response['output']['choices']) > 0:
|
||
content = response['output']['choices'][0]['message']['content']
|
||
|
||
# 尝试从响应中提取JSON
|
||
start_idx = content.find('{')
|
||
end_idx = content.rfind('}') + 1
|
||
|
||
if start_idx != -1 and end_idx != -1:
|
||
json_str = content[start_idx:end_idx]
|
||
return json.loads(json_str)
|
||
|
||
return {"error": "无法从响应中提取JSON", "raw_content": content}
|
||
|
||
return {"error": "API响应格式不正确", "raw_response": response}
|
||
|
||
except Exception as e:
|
||
error_msg = f"解析响应时出错: {e}"
|
||
self.logger.error(error_msg)
|
||
return {"error": str(e), "raw_response": response}
|
||
|
||
def get_token_usage_stats(self) -> Dict[str, Any]:
|
||
"""
|
||
获取Token使用统计信息
|
||
|
||
Returns:
|
||
包含使用统计的字典
|
||
"""
|
||
return {
|
||
"total_prompt_tokens": self.token_usage["total_prompt_tokens"],
|
||
"total_completion_tokens": self.token_usage["total_completion_tokens"],
|
||
"total_tokens": self.token_usage["total_tokens"],
|
||
"total_calls": len(self.token_usage["calls"]),
|
||
"average_tokens_per_call": self.token_usage["total_tokens"] / len(self.token_usage["calls"]) if self.token_usage["calls"] else 0,
|
||
"detailed_calls": self.token_usage["calls"][-10:] # 仅返回最近10次调用详情
|
||
}
|
||
|
||
def export_token_usage(self, file_path: str = None, format: str = "json") -> str:
|
||
"""
|
||
导出Token使用数据到文件
|
||
|
||
Args:
|
||
file_path: 文件路径,如果为None则自动生成
|
||
format: 导出格式,支持'json'或'csv'
|
||
|
||
Returns:
|
||
导出文件的路径
|
||
"""
|
||
if file_path is None:
|
||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
file_path = f"qwen_token_usage_{timestamp}.{format}"
|
||
|
||
try:
|
||
if format.lower() == "json":
|
||
with open(file_path, 'w', encoding='utf-8') as f:
|
||
json.dump(self.token_usage, f, indent=2, ensure_ascii=False)
|
||
elif format.lower() == "csv":
|
||
import csv
|
||
|
||
with open(file_path, 'w', newline='', encoding='utf-8') as f:
|
||
writer = csv.writer(f)
|
||
# 写入表头
|
||
writer.writerow([
|
||
"timestamp", "task_type", "symbol", "model",
|
||
"prompt_tokens", "completion_tokens", "total_tokens",
|
||
"duration_seconds"
|
||
])
|
||
|
||
# 写入数据
|
||
for call in self.token_usage["calls"]:
|
||
writer.writerow([
|
||
call.get("timestamp", ""),
|
||
call.get("task_type", ""),
|
||
call.get("symbol", ""),
|
||
call.get("model", ""),
|
||
call.get("prompt_tokens", 0),
|
||
call.get("completion_tokens", 0),
|
||
call.get("total_tokens", 0),
|
||
call.get("duration_seconds", 0)
|
||
])
|
||
|
||
# 写入总计
|
||
writer.writerow([])
|
||
writer.writerow([
|
||
f"总计 (调用次数: {len(self.token_usage['calls'])})",
|
||
"", "", "",
|
||
self.token_usage["total_prompt_tokens"],
|
||
self.token_usage["total_completion_tokens"],
|
||
self.token_usage["total_tokens"],
|
||
""
|
||
])
|
||
else:
|
||
raise ValueError(f"不支持的格式: {format},仅支持 'json' 或 'csv'")
|
||
|
||
self.logger.info(f"Token使用数据已导出到: {file_path}")
|
||
return file_path
|
||
|
||
except Exception as e:
|
||
error_msg = f"导出Token使用数据时出错: {e}"
|
||
self.logger.error(error_msg)
|
||
return "" |