from tencentcloud.common import credential from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.ocr.v20181119 import ocr_client, models from app.core.config import settings import json import base64 class OCRService: def __init__(self): cred = credential.Credential(settings.TENCENT_SECRET_ID, settings.TENCENT_SECRET_KEY) httpProfile = HttpProfile() httpProfile.endpoint = "ocr.tencentcloudapi.com" clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile self.client = ocr_client.OcrClient(cred, settings.TENCENT_REGION, clientProfile) async def recognize_pickup_code(self, image_content: bytes) -> dict: """识别收件码图片""" try: # 将图片内容转为base64 img_base64 = base64.b64encode(image_content).decode() req = models.GeneralAccurateOCRRequest() req.ImageBase64 = img_base64 resp = self.client.GeneralAccurateOCR(req) result = json.loads(resp.to_json_string()) print(result) # 解析文本内容 text_list = [] for item in result.get("TextDetections", []): text_list.append(item["DetectedText"]) # 提取关键信息 pickup_info = self._extract_pickup_info(text_list) return pickup_info except Exception as e: raise Exception(f"识别失败: {str(e)}") def _is_valid_pickup_code(self, text: str) -> bool: """验证是否是有效的取件码格式""" import re # 匹配格式:xx-x-xxx 或 xx-xx-xxx 等类似格式 patterns = [ r'\b\d{1,2}-\d{1,2}-\d{2,3}\b', # 15-4-223 r'\b\d{4,8}\b', # 普通4-8位数字 ] for pattern in patterns: if re.search(pattern, text): return True return False def _extract_pickup_info(self, text_list: list) -> dict: """提取收件码信息""" # 存储所有驿站信息 stations = [] current_station = None current_codes = [] pickup_info = { "stations": [], # 驿站列表 "app_type": None # APP类型(菜鸟/京东等) } # 识别APP类型 app_keywords = { "菜鸟": "CAINIAO", "京东": "JD", "顺丰": "SF" } for text in text_list: # 查找APP类型 for keyword, app_type in app_keywords.items(): if keyword in text: pickup_info["app_type"] = app_type break # 查找驿站名称 is_station = False if "驿站" in text: is_station = True elif "站点" in text: is_station = True elif "仓" in text: is_station = True elif "站" in text: is_station = True elif "分拨" in text: is_station = True elif "分拣" in text: is_station = True elif "分拨" in text: is_station = True if is_station: # 如果之前有未保存的驿站信息,先保存 if current_station and current_codes: stations.append({ "station_name": current_station, "pickup_codes": current_codes }) # 开始新的驿站信息收集 current_station = text current_codes = [] # 查找取件码 if self._is_valid_pickup_code(text): # 清理文本中的多余字符 cleaned_text = ''.join(c for c in text if c.isdigit() or c == '-') # 提取所有匹配的取件码 import re for pattern in [r'\d{1,2}-\d{1,2}-\d{2,3}', r'\d{4,8}']: matches = re.finditer(pattern, cleaned_text) for match in matches: code = match.group() # 如果已找到驿站,将取件码添加到当前驿站 if current_station and code not in current_codes: current_codes.append(code) # 如果还没找到驿站,暂存取件码 elif code not in current_codes: current_codes.append(code) # 保存最后一个驿站的信息 if current_station and current_codes: stations.append({ "station_name": current_station, "pickup_codes": current_codes }) # 如果有未分配到驿站的取件码,创建一个默认驿站 elif current_codes: stations.append({ "station_name": None, "pickup_codes": current_codes }) # 如果找到了取件码但没找到APP类型,根据取件码格式推测 if stations and not pickup_info["app_type"]: # 如果任一取件码包含连字符,判定为菜鸟 for station in stations: if any('-' in code for code in station["pickup_codes"]): pickup_info["app_type"] = "CAINIAO" break pickup_info["stations"] = stations return pickup_info ocr_service = OCRService()