基于YOLO、ByteTrack及PaddleOCR的车辆追踪与车牌智能识别全流程

2025年10月22日 0条评论 3.01k次阅读 0人点赞 bluemoon

PlateTrack 集成了目标检测、车牌识别和视频处理等一系列任务，有以下几个流程：

🚗 对道路中的车辆进行二次车牌检测
📝 使用 OCR 技术识别车牌号码：自动补全省份信息，过滤无效识别结果，完美支持中文车牌显示
📹 生成带有识别结果的标注视频
📊 输出详细的检测数据文件

项目结构如下：


PlateTrack/
├── src/
│   ├── plate_track.py          # 主程序文件
│   ├── config.py               # 配置文件
│   └── utils.py                # 工具函数
├── models/
│   ├── vehicle_detector.pt     # 车辆检测模型
│   └── license_plate_detector.pt # 车牌检测模型
├── data/
│   ├── input_videos/           # 输入视频
│   ├── output_videos/          # 输出视频
│   └── detection_results/      # 检测结果
├── docs/
│   └── README.md
├── requirements.txt
└── run_demo.py                 # 演示脚本

快速开始：

克隆项目

git clone https://github.com/username/PlateTrack.git
cd PlateTrack

安装依赖与python包

pip install -r requirements.txt

自定义配置

修改 config.py 中的配置参数如下：


# 输入检测的视频或图像序列
IMAGES_DIR = './data/input_videos/images-2'
# 输入图像序列或视频的车辆目标检测结果
FULL_DETECTION_TXT_PATH = './data/input_videos/images-2.txt'
# 车牌检测权重
PLATE_DETECTION_MODEL_PATH = './models/license_plate_detector.pt'
# 输出车牌检测视频
OUTPUT_VIDEO_PATH = './data/output_videos/output_traffic_video.mp4'
# 输出车牌检测标签文件
OUTPUT_TXT_PATH = './data/detection_results/images-car-plate-final.txt'
# 需要进行车牌二次检测的车辆类别，包括汽车、面包车、卡车、公交
VEHICLE_CLASSES = ['Car', 'Van', 'Truck', 'Bus']
# 首字补全，在第一个汉字识别效果不佳或者无法识别的时候，自动补全车牌号第一个汉字区号
DEFAULT_PROVINCE = '苏'

代码思路: 图像预处理

该函数主要处理车牌图像，OCR在识别文字的时候会有图像的尺寸要求，这个函数将不符合尺寸的车牌放大处理，同时加入了对比度调整、锐化、去噪等操作，使图像更加清晰


def preprocess_plate_image(plate_img):
    """车牌的预处理：放大、对比度、锐化、去噪"""
    if plate_img is None or plate_img.size == 0:
        return plate_img
    # 创建输入图像的拷贝
    processed = plate_img.copy()
    # 获取图像的高度、宽度信息
    h, w = processed.shape[:2]
    # 放大小车牌，目标至少150x40
    if w < 150 or h < 40:
        scale_w = max(150 / w, 1.0)
        scale_h = max(40 / h, 1.0)
        # 额外放大两倍确保清晰
        scale = max(scale_w, scale_h) * 2.0
        new_w = int(w * scale)
        new_h = int(h * scale)
        # 三次插值算法放大
        processed = cv2.resize(processed, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
    # 对比度增强与轻微锐化、去噪
    # 像素值 = alpha × 原像素值 + beta，其中 alpha 控制对比度，beta 控制亮度
    processed = cv2.convertScaleAbs(processed, alpha=1.5, beta=20)
    # 图像锐化，增强边缘和细节
    kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
    processed = cv2.filter2D(processed, -1, kernel)
    # 图像轻微去噪，使用双边滤波函数，在去噪的同时保持图像边缘清晰
    processed = cv2.bilateralFilter(processed, 5, 50, 50)
    return processed

使用PaddleOCR进行车牌识别

OCR初始化


# 导入OCR
from PIL import Image, ImageDraw, ImageFont
# ---------- 初始化 OCR ----------
try:
    ocr = PaddleOCR(
        lang='ch', # 支持中英文识别
        use_doc_orientation_classify=False,
        use_doc_unwarping=False,
        text_det_limit_side_len=64
    )
    print("PaddleOCR 初始化成功")
except Exception as e:
    print(f"PaddleOCR 初始化失败，请检查安装: {e}")
    ocr = None

OCR识别，输入裁剪处理后的车牌图像输出车牌号文本


def recognize_license_plate(license_plate_image: np.ndarray, save_debug_path: str = None) -> str:
    """
    对单个车牌图像执行预处理 + OCR + 后处理（自动补首字、过滤非车牌）
    返回识别出的车牌字符串（或空字符串）
    """
    if ocr is None or license_plate_image is None or license_plate_image.size == 0:
        return ""

    # 预处理
    processed = preprocess_plate_image(license_plate_image)

    # # 调试图像
    # if save_debug_path:
    #     cv2.imwrite(save_debug_path, license_plate_image)
    #     cv2.imwrite(save_debug_path.replace('.jpg', '_proc.jpg'), processed)

    try:
        # PaddleOCR
        result = ocr.predict(processed,
                             use_doc_orientation_classify=False, # 禁用文档方向分类
                             use_doc_unwarping=False, # 禁用文档弯曲校正
                             use_textline_orientation=False, # 禁用文本行方向检测
                             text_det_thresh=0.3, # 文本检测置信度阈值
                             text_det_box_thresh=0.5, # 文本检测框置信度阈值
                             text_rec_score_thresh=0.3) # 文本识别置信度阈值

        # result是list(dict)
        # 遍历检测到的文本区域，按score降序优先使用高置信度结果
        candidates = []
        if result and len(result) > 0:
            item = result[0]
            # PaddleOCR返回rec_texts/rec_scores
            if 'rec_texts' in item and item['rec_texts']:
                texts = item['rec_texts']
                # 获取置信度
                scores = item.get('rec_scores', [1.0] * len(texts))
                for t, s in zip(texts, scores):
                    # 文本和置信度配对
                    candidates.append((t, float(s)))
            else:
                # 老格式：每个element里包含text/score
                for r in result:
                    if isinstance(r, dict) and 'text' in r and 'score' in r:
                        candidates.append((r['text'], float(r['score'])))

        # 按元组的第二个元素（置信度）降序排序
        candidates = sorted(candidates, key=lambda x: x[1], reverse=True)

        for text, score in candidates:
            if not text or score < 0.25:
                continue
            # 去空格和分隔符
            cleaned = text.strip().replace(' ', '').replace('-', '').replace('·', '').replace('.', '')
            # 常见OCR错误替换
            cleaned = cleaned.replace('O', '0').replace('I', '1').replace('l', '1').replace('Z', '2').replace('S', '5').replace('B', '8').replace('0', 'D')
            # 若首字不是中文则自动补省份（Unicode范围）
            if not ('\u4e00' <= cleaned[0] <= '\u9fff'):
                cleaned = DEFAULT_PROVINCE + cleaned
            # 统一转成字母大写
            cleaned = cleaned.upper()
            # 过滤非车牌内容,将路上的广告牌、标志牌常见字样过滤掉，比如禁止、减速等
            if not is_valid_plate(cleaned):
                # 如果被过滤，继续尝试下一个候选项
                continue

            return cleaned # 返回有效的车牌字符串

    except Exception as e:
        print(f"OCR 识别异常: {e}")

    return ""

使用opencv和PIL绘制编码视频


# putText函数只能绘制英文
# 所以需要将cv图片转换为PIL格式
# cv2的颜色模式为BGR，PIL的颜色格式为RGB
# --- 绘制框与标签 ---
# BGR格式
            if obj_class in VEHICLE_CLASSES:
                color = (84, 218, 250)
            elif obj_class == 'Pedestrian':
                color = (195, 202, 129)
            else:
                color = (94, 111, 229)
            # 绘制矩形框
            cv2.rectangle(frame_to_draw, (x1_obj, y1_obj), (x2_obj, y2_obj), color, 2)
            # 主标签绘制（类型和ID）
            label_text = f"{obj_class}{obj_id}"
            # 绘制主标签背景与文本
            (text_w, text_h), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            cv2.rectangle(frame_to_draw,
                        (x1_obj, max(0, y1_obj - text_h - baseline - 5)),
                        (x1_obj + text_w, y1_obj),
                        color, -1)
            cv2.putText(frame_to_draw, label_text, (x1_obj, max(0, y1_obj - baseline - 5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            # 如果有车牌号，在车辆框下方单独显示
            if plate_text:
                plate_color = (27, 55, 121)               
                # 支持中文
                try:                   
                    # 将OpenCV图像转换为PIL图像
                    pil_img = Image.fromarray(cv2.cvtColor(frame_to_draw, cv2.COLOR_BGR2RGB))
                    draw = ImageDraw.Draw(pil_img)                 
                    # 加载中文字体
                    font_path = "/home/kemove/devdata1/zrl/MOT/ByteTrack/ultralytics/SIMHEI.TTF"  # 黑体字体文件，可以从系统字体目录获取或下载
                    font_size = 25
                    font = ImageFont.truetype(font_path, font_size)                  
                    # 计算车牌文本位置（车辆框下方）
                    plate_label = f"{plate_text}"
                    bbox = draw.textbbox((0, 0), plate_label, font=font)
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]                 
                    # 计算背景矩形位置
                    bg_x1 = x1_obj
                    bg_y1 = y2_obj + 5
                    bg_x2 = x1_obj + text_width + 10
                    bg_y2 = y2_obj + 5 + text_height + 10          
                    # 绘制背景矩形
                    draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=plate_color)    
                    # 绘制车牌文本
                    draw.text((bg_x1 + 5, bg_y1 + 5), plate_label, font=font, fill=(255, 255, 255))         
                    # 将PIL图像转换回OpenCV格式
                    frame_to_draw = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

图像坐标、车辆坐标以及车牌坐标的转换

首先这里有一个辅助函数wh_to_xyxy，目标检测的标签给出的格式为（x,y,w,h）分别代表检测框的左上x，左上y，宽度w，高度h，需要用函数wh_to_xyxy将其转换为（x1,y1,x2,y2）代表左上x，左上y，右下x，右下y


# ---------- 辅助函数 ----------
def wh_to_xyxy(x, y, w, h):
    """将 (x,y,w,h) 转为 (x1,y1,x2,y2)"""
    return int(x), int(y), int(x + w), int(y + h)

（1）车牌检测结果的相对坐标处理
- 相对坐标指的是相对于裁剪后车辆图像的车牌的坐标
- 车牌检测是在cropped裁剪后的车辆图像上进行的，所以检测到的车牌的坐标是相对于这个裁剪区域的
简单示意图如下：


if plate_results and len(plate_results[0].boxes) > 0:
    for box in plate_results[0].boxes:
        coords = box.xyxy.cpu().numpy()[0]
        conf = float(box.conf.cpu().numpy()[0])
        # 获取车牌相对于车辆检测结果的相对坐标x1_rel, y1_rel, x2_rel, y2_rel
        x1_rel, y1_rel, x2_rel, y2_rel = map(int, coords)
        w_rel = max(1, x2_rel - x1_rel)
        h_rel = max(1, y2_rel - y1_rel)
        ar = w_rel / h_rel

假设：
- 原图尺寸：1920×1080
- 车辆在原图中的位置：x1_obj=100, y1_obj=200, x2_obj=500, y2_obj=600
- 车牌在裁剪图像中的检测位置：x1_rel=50, y1_rel=100, x2_rel=150, y2_rel=130
（2）相对坐标转绝对坐标


x1_abs = x1_rel + x1_crop
y1_abs = y1_rel + y1_crop
x2_abs = x2_rel + x2_crop
y2_abs = y2_rel + y2_crop

- 裁剪区域：从原图的(100,200)到(500,600)，尺寸400×400
- 车牌在裁剪图像中的位置：(50,100)到(150,130)
由此计算出车牌在整个图像坐标系的绝对坐标，不再以车辆的范围（100，200）为原点，而是以（0，0）为绝对坐标的原点，可得绝对坐标x1_abs、y1_abs、x2_abs、y2_abs：
- x1_abs = 50 + 100 = 150 （车牌左上角在原图中的x坐标）
- y1_abs = 100 + 200 = 300 （车牌左上角在原图中的y坐标）
- x2_abs = 150 + 100 = 250 （车牌右下角在原图中的x坐标）
- y2_abs = 130 + 200 = 330 （车牌右下角在原图中的y坐标）
（3）坐标扩展和最终裁剪


expand = 2
y1c = max(0, y1_abs - expand)
x1c = max(0, x1_abs - expand)
y2c = min(H, y2_abs + expand)
x2c = min(W, x2_abs + expand)
plate_img = frame[y1c:y2c, x1c:x2c]

- 车牌检测框可能裁剪得太紧，边缘字符可能被截断
- 扩展2个像素可以确保完整包含车牌字符，计算过程如下：
- y1c = max(0, 300 - 2) = 298 （扩展后的上边界）
- x1c = max(0, 150 - 2) = 148 （扩展后的左边界）
- y2c = min(1080, 330 + 2) = 332（扩展后的下边界）
- x2c = min(1920, 250 + 2) = 252（扩展后的右边界）
其中max和min函数是边界保护：
- max(0, ...)：确保不会出现负坐标（超出图像左边界或上边界）
- min(W, ...) 和 min(H, ...)：确保不会超出图像右边界或下边界

主要算法流程


# ---------- 主流程 ----------
def main():
    # 验证路径
    if not os.path.exists(IMAGES_DIR):
        print(f"图像目录不存在: {IMAGES_DIR}")
        return
    if not os.path.exists(FULL_DETECTION_TXT_PATH):
        print(f"检测结果 TXT 不存在: {FULL_DETECTION_TXT_PATH}")
        return

    # 加载检测结果 TXT
    try:
        df = pd.read_csv(
            FULL_DETECTION_TXT_PATH,
            sep=',',
            header=None,
            names=['frame_name', 'class', 'id', 'x', 'y', 'w', 'h']
        )
        print(f"加载检测记录数: {len(df)}")
    except Exception as e:
        print(f"读取 TXT 失败: {e}")
        return

    # 加载车牌检测模型
    try:
        plate_model = YOLO(PLATE_DETECTION_MODEL_PATH)
        print("车牌检测模型加载成功")
    except Exception as e:
        print(f"加载车牌检测模型失败: {e}")
        return

    # 视频写入器准备
    frame_names = sorted([f for f in os.listdir(IMAGES_DIR) if os.path.isfile(os.path.join(IMAGES_DIR, f))])
    if not frame_names:
        print("图像文件夹为空")
        return

    first_frame = cv2.imread(os.path.join(IMAGES_DIR, frame_names[0]))
    if first_frame is None:
        print("无法读取首帧")
        return
    H, W = first_frame.shape[:2]
    FPS = 10
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, FPS, (W, H))

    final_lines = []

    # 创建进度条
    print("开始处理视频帧...")
    progress_bar = tqdm(total=len(frame_names), desc="处理进度", unit="帧", ncols=100)
    
    # 统计信息
    total_vehicles = 0
    plates_detected = 0

    # 遍历每帧
    for frame_name in frame_names:
        frame_path = os.path.join(IMAGES_DIR, frame_name)
        frame = cv2.imread(frame_path)
        if frame is None:
            progress_bar.update(1)
            continue

        frame_to_draw = frame.copy()
        current_df = df[df['frame_name'] == frame_name]

        for _, row in current_df.iterrows():
            obj_x, obj_y, obj_w, obj_h = row['x'], row['y'], row['w'], row['h']
            obj_id = row['id']
            obj_class = row['class']
            x1_obj, y1_obj, x2_obj, y2_obj = wh_to_xyxy(obj_x, obj_y, obj_w, obj_h)

            plate_text = ""
            best_plate_box_abs = None

            # 仅对车辆类别进行车牌检测 + OCR
            if obj_class in VEHICLE_CLASSES:
                total_vehicles += 1
                
                # 裁剪车辆 ROI
                x1_crop = max(0, x1_obj)
                y1_crop = max(0, y1_obj)
                x2_crop = min(W, x2_obj)
                y2_crop = min(H, y2_obj)
                cropped = frame[y1_crop:y2_crop, x1_crop:x2_crop]
                if cropped is not None and cropped.size > 0:
                    # 在裁剪区域运行车牌检测（返回可能多个框）
                    plate_results = plate_model(cropped, conf=0.1, verbose=False)

                    # 解析候选框并做筛选
                    candidate_boxes = []
                    if plate_results and len(plate_results[0].boxes) > 0:
                        for box in plate_results[0].boxes:
                            # xyxy, conf
                            coords = box.xyxy.cpu().numpy()[0]
                            conf = float(box.conf.cpu().numpy()[0])
                            x1_rel, y1_rel, x2_rel, y2_rel = map(int, coords)
                            w_rel = max(1, x2_rel - x1_rel)
                            h_rel = max(1, y2_rel - y1_rel)
                            ar = w_rel / h_rel

                            # 过滤候选：置信度、长宽比、位置（车牌一般在车辆下半部）
                            if conf < 0.35:
                                continue
                            if ar < 2.0 or ar > 8.0:
                                continue
                            if y1_rel < (cropped.shape[0] * 0.5):  # 上半部丢弃
                                continue

                            candidate_boxes.append((x1_rel, y1_rel, x2_rel, y2_rel, conf))

                    # 若有候选框，按置信度从高到低尝试 OCR
                    if candidate_boxes:
                        candidate_boxes = sorted(candidate_boxes, key=lambda x: x[4], reverse=True)
                        for i, (x1_rel, y1_rel, x2_rel, y2_rel, conf) in enumerate(candidate_boxes):
                            # 映射回原图坐标
                            x1_abs = x1_rel + x1_crop
                            y1_abs = y1_rel + y1_crop
                            x2_abs = x2_rel + x2_crop
                            y2_abs = y2_rel + y2_crop

                            # 适当扩展一点像素避免切得太紧
                            expand = 2
                            y1c = max(0, y1_abs - expand)
                            x1c = max(0, x1_abs - expand)
                            y2c = min(H, y2_abs + expand)
                            x2c = min(W, x2_abs + expand)
                            plate_img = frame[y1c:y2c, x1c:x2c]

                            if plate_img is None or plate_img.size == 0:
                                continue

                            debug_save = os.path.join(DEBUG_OUTPUT_DIR, f"plate_{frame_name.split('.')[0]}_{obj_id}_{i}.jpg")
                            plate_rec = recognize_license_plate(plate_img, save_debug_path=debug_save)

                            if plate_rec:
                                plate_text = plate_rec
                                best_plate_box_abs = [x1_abs, y1_abs, x2_abs, y2_abs]
                                plates_detected += 1
                                break  # 找到一个可信的就可以停止尝试

            # 记录 TXT 行（无论是否识别到车牌都记录）
            original_line = f"{frame_name},{obj_class},{obj_id},{obj_x},{obj_y},{obj_w},{obj_h}"
            final_lines.append(f"{original_line},{plate_text}\n")

            # --- 绘制框与标签 ---
            # BGR格式
            if obj_class in VEHICLE_CLASSES:
                color = (84, 218, 250)
            elif obj_class == 'Pedestrian':
                color = (195, 202, 129)
            else:
                color = (94, 111, 229)
            # color = (94, 111, 229) if obj_class in VEHICLE_CLASSES elif obj_class is Pedestrian (133, 150, 241)  
            cv2.rectangle(frame_to_draw, (x1_obj, y1_obj), (x2_obj, y2_obj), color, 2)

            # 主标签（类型和ID）
            label_text = f"{obj_class}{obj_id}"
            # if plate_text:
            #     label_text += f"|PLATE:{plate_text}"

            # 绘制主标签背景与文本
            (text_w, text_h), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            cv2.rectangle(frame_to_draw,
                        (x1_obj, max(0, y1_obj - text_h - baseline - 5)),
                        (x1_obj + text_w, y1_obj),
                        color, -1)
            cv2.putText(frame_to_draw, label_text, (x1_obj, max(0, y1_obj - baseline - 5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            # 如果有车牌号，在车辆框下方单独显示
            if plate_text:
                plate_color = (27, 55, 121)
                
                # 支持中文
                try:
                    
                    # 将OpenCV图像转换为PIL图像
                    pil_img = Image.fromarray(cv2.cvtColor(frame_to_draw, cv2.COLOR_BGR2RGB))
                    draw = ImageDraw.Draw(pil_img)
                    
                    # 加载中文字体
                    font_path = "./SIMHEI.TTF"  # 黑体字体文件，可以从系统字体目录获取或下载
                    font_size = 25
                    font = ImageFont.truetype(font_path, font_size)
                    
                    # 计算车牌文本位置（车辆框下方）
                    plate_label = f"{plate_text}"
                    bbox = draw.textbbox((0, 0), plate_label, font=font)
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]
                    
                    # 计算背景矩形位置
                    bg_x1 = x1_obj
                    bg_y1 = y2_obj + 5
                    bg_x2 = x1_obj + text_width + 10
                    bg_y2 = y2_obj + 5 + text_height + 10
                    
                    # 绘制背景矩形
                    draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=plate_color)
                    
                    # 绘制车牌文本
                    draw.text((bg_x1 + 5, bg_y1 + 5), plate_label, font=font, fill=(255, 255, 255))
                    
                    # 将PIL图像转换回OpenCV格式
                    frame_to_draw = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
                    
                except ImportError:
                    # 如果PIL不可用，回退到OpenCV（不支持中文）
                    print("PIL未安装,中文显示不正常,请安装: pip install Pillow")
                    
                    plate_label = f"PLATE:{plate_text}"
                    (text_w_plate, text_h_plate), baseline_plate = cv2.getTextSize(plate_label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    
                    # 绘制车牌背景
                    cv2.rectangle(frame_to_draw,
                                (x1_obj, y2_obj + 5),
                                (x1_obj + text_w_plate, y2_obj + 5 + text_h_plate + baseline_plate + 5),
                                plate_color, -1)
                    
                    # 绘制车牌文本
                    cv2.putText(frame_to_draw, plate_label, 
                                (x1_obj, y2_obj + 5 + text_h_plate + baseline_plate),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        # 写入视频帧
        video_writer.write(frame_to_draw)
        
        # 更新进度条
        progress_bar.update(1)
        progress_bar.set_postfix({
            '车辆数': total_vehicles,
            '车牌识别': plates_detected,
            '识别率': f"{plates_detected/max(total_vehicles,1)*100:.1f}%"
        })

    # 关闭进度条
    progress_bar.close()

    # 释放视频并写入 TXT
    video_writer.release()
    with open(OUTPUT_TXT_PATH, 'w') as f:
        f.writelines(final_lines)

    print("\n处理完成")
    print(f"视频保存: {OUTPUT_VIDEO_PATH}")
    print(f"TXT 保存: {OUTPUT_TXT_PATH} (格式: frame,class,id,x,y,w,h,plate_text)")
    print(f"统计信息: 总共处理 {len(frame_names)} 帧，检测到 {total_vehicles} 辆车，成功识别 {plates_detected} 个车牌")

回声收集者

基于YOLO、ByteTrack及PaddleOCR的车辆追踪与车牌智能识别全流程

本作品采用知识共享署名-相同方式共享 4.0 国际许可协议进行许可

发表回复取消回复

回声收集者

本作品采用 知识共享署名-相同方式共享 4.0 国际许可协议 进行许可

发表回复 取消回复

本作品采用知识共享署名-相同方式共享 4.0 国际许可协议进行许可

发表回复取消回复