154 lines
5.7 KiB
Python
154 lines
5.7 KiB
Python
from tencentcloud.common import credential
|
||
from tencentcloud.common.profile.client_profile import ClientProfile
|
||
from tencentcloud.common.profile.http_profile import HttpProfile
|
||
from tencentcloud.ocr.v20181119 import ocr_client, models
|
||
from app.core.config import settings
|
||
import json
|
||
import base64
|
||
|
||
class OCRService:
|
||
def __init__(self):
|
||
cred = credential.Credential(settings.TENCENT_SECRET_ID, settings.TENCENT_SECRET_KEY)
|
||
httpProfile = HttpProfile()
|
||
httpProfile.endpoint = "ocr.tencentcloudapi.com"
|
||
|
||
clientProfile = ClientProfile()
|
||
clientProfile.httpProfile = httpProfile
|
||
self.client = ocr_client.OcrClient(cred, settings.TENCENT_REGION, clientProfile)
|
||
|
||
async def recognize_pickup_code(self, image_content: bytes) -> dict:
|
||
"""识别收件码图片"""
|
||
try:
|
||
# 将图片内容转为base64
|
||
img_base64 = base64.b64encode(image_content).decode()
|
||
|
||
req = models.GeneralAccurateOCRRequest()
|
||
req.ImageBase64 = img_base64
|
||
|
||
resp = self.client.GeneralAccurateOCR(req)
|
||
result = json.loads(resp.to_json_string())
|
||
|
||
print(result)
|
||
|
||
# 解析文本内容
|
||
text_list = []
|
||
for item in result.get("TextDetections", []):
|
||
text_list.append(item["DetectedText"])
|
||
|
||
# 提取关键信息
|
||
pickup_info = self._extract_pickup_info(text_list)
|
||
return pickup_info
|
||
|
||
except Exception as e:
|
||
raise Exception(f"识别失败: {str(e)}")
|
||
|
||
def _is_valid_pickup_code(self, text: str) -> bool:
|
||
"""验证是否是有效的取件码格式"""
|
||
import re
|
||
# 匹配格式:xx-x-xxx 或 xx-xx-xxx 等类似格式
|
||
patterns = [
|
||
r'\b\d{1,2}-\d{1,2}-\d{2,3}\b', # 15-4-223
|
||
r'\b\d{4,8}\b', # 普通4-8位数字
|
||
]
|
||
|
||
for pattern in patterns:
|
||
if re.search(pattern, text):
|
||
return True
|
||
return False
|
||
|
||
def _extract_pickup_info(self, text_list: list) -> dict:
|
||
"""提取收件码信息"""
|
||
# 存储所有驿站信息
|
||
stations = []
|
||
current_station = None
|
||
current_codes = []
|
||
|
||
pickup_info = {
|
||
"stations": [], # 驿站列表
|
||
"app_type": None # APP类型(菜鸟/京东等)
|
||
}
|
||
|
||
# 识别APP类型
|
||
app_keywords = {
|
||
"菜鸟": "CAINIAO",
|
||
"京东": "JD",
|
||
"顺丰": "SF"
|
||
}
|
||
|
||
for text in text_list:
|
||
# 查找APP类型
|
||
for keyword, app_type in app_keywords.items():
|
||
if keyword in text:
|
||
pickup_info["app_type"] = app_type
|
||
break
|
||
|
||
# 查找驿站名称
|
||
is_station = False
|
||
if "驿站" in text:
|
||
is_station = True
|
||
elif "站点" in text:
|
||
is_station = True
|
||
elif "仓" in text:
|
||
is_station = True
|
||
elif "站" in text:
|
||
is_station = True
|
||
elif "分拨" in text:
|
||
is_station = True
|
||
elif "分拣" in text:
|
||
is_station = True
|
||
elif "分拨" in text:
|
||
is_station = True
|
||
|
||
if is_station:
|
||
# 如果之前有未保存的驿站信息,先保存
|
||
if current_station and current_codes:
|
||
stations.append({
|
||
"station_name": current_station,
|
||
"pickup_codes": current_codes
|
||
})
|
||
# 开始新的驿站信息收集
|
||
current_station = text
|
||
current_codes = []
|
||
|
||
# 查找取件码
|
||
if self._is_valid_pickup_code(text):
|
||
# 清理文本中的多余字符
|
||
cleaned_text = ''.join(c for c in text if c.isdigit() or c == '-')
|
||
# 提取所有匹配的取件码
|
||
import re
|
||
for pattern in [r'\d{1,2}-\d{1,2}-\d{2,3}', r'\d{4,8}']:
|
||
matches = re.finditer(pattern, cleaned_text)
|
||
for match in matches:
|
||
code = match.group()
|
||
# 如果已找到驿站,将取件码添加到当前驿站
|
||
if current_station and code not in current_codes:
|
||
current_codes.append(code)
|
||
# 如果还没找到驿站,暂存取件码
|
||
elif code not in current_codes:
|
||
current_codes.append(code)
|
||
|
||
# 保存最后一个驿站的信息
|
||
if current_station and current_codes:
|
||
stations.append({
|
||
"station_name": current_station,
|
||
"pickup_codes": current_codes
|
||
})
|
||
# 如果有未分配到驿站的取件码,创建一个默认驿站
|
||
elif current_codes:
|
||
stations.append({
|
||
"station_name": None,
|
||
"pickup_codes": current_codes
|
||
})
|
||
|
||
# 如果找到了取件码但没找到APP类型,根据取件码格式推测
|
||
if stations and not pickup_info["app_type"]:
|
||
# 如果任一取件码包含连字符,判定为菜鸟
|
||
for station in stations:
|
||
if any('-' in code for code in station["pickup_codes"]):
|
||
pickup_info["app_type"] = "CAINIAO"
|
||
break
|
||
|
||
pickup_info["stations"] = stations
|
||
return pickup_info
|
||
|
||
ocr_service = OCRService() |