无人机自主降落引导标识视觉识别定位

无人机自主降落是无人机迈向全自主化的核心技术之一。针对无人机在复杂环境下精准降落的工程难题，本项目提出融合传统图像处理与深度学习的混合视觉引导方案，获南京航空航天大学校级创新创业项目立项并获经费支持。
realtime_detect_coords.py Python · 实时检测与坐标解算
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""实时目标检测 + 坐标计算 - 双模式（YOLO + WeChatQRCode）

模式说明：
1. YOLO模式：正常检测 sign/Yaw 标识
2. WeChatQRCode模式：YOLO检测不到时，使用腾讯深度学习二维码检测（EPnP精确定位）

倾角计算：
- sign 中心距离 Yaw 中心是 200mm
- 根据两中心点的图像位置计算无人机相对标识板的倾角
"""

import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import sys
# 确保脚本所在目录在 sys.path 最前面，保证 modules.repvit_c3k2 可被找到
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
if _SCRIPT_DIR not in sys.path:
    sys.path.insert(0, _SCRIPT_DIR)

import cv2
import numpy as np
import time
import math
import re

# ============================================================
# Ultralytics Patch 注入（修复 RepViTC3k2 模块）
# ============================================================
def inject_repvit_patch():
    """自动注入 RepViTC3k2 模块到 ultralytics"""
    import ultralytics.nn.tasks as tasks_module

    # 检查是否已经 patch 过
    if hasattr(tasks_module, '_repvit_patch_applied'):
        return

    # 注入 import
    patch_code = '''
from modules.repvit_c3k2 import RepViTC3k2
'''

    # 读取 tasks.py 文件
    tasks_file = os.path.dirname(tasks_module.__file__) + "/tasks.py"

    try:
        with open(tasks_file, 'r', encoding='utf-8') as f:
            tasks_content = f.read()

        # 检查是否已经有 import
        if 'from modules.repvit_c3k2 import RepViTC3k2' in tasks_content:
            # 已经有 import，只需要添加到 frozenset
            if 'RepViTC3k2' in str(tasks_module.__dict__.get('base_modules', set())):
                return  # 已经完全 patch

            # 添加到 base_modules
            if not hasattr(tasks_module, 'base_modules'):
                tasks_module.base_modules = frozenset({'Conv', 'C2f', 'C3', 'C3k2', 'SPPF', 'C2PSA', 'Detect'})

            tasks_module.base_modules = frozenset(list(tasks_module.base_modules) + ['RepViTC3k2'])
            # 也要添加到 repeat_modules（如果存在）
            if hasattr(tasks_module, 'repeat_modules'):
                tasks_module.repeat_modules = frozenset(list(tasks_module.repeat_modules) + ['RepViTC3k2'])
        else:
            # 注入 import
            import_section = re.search(r'^(import .+)$', tasks_content, re.MULTILINE)
            if import_section:
                insert_pos = import_section.end()
                tasks_content = tasks_content[:insert_pos] + patch_code + '\n' + tasks_content[insert_pos:]

                with open(tasks_file, 'w', encoding='utf-8') as f:
                    f.write(tasks_content)

                # 重新加载模块
                import importlib
                importlib.reload(tasks_module)

        tasks_module._repvit_patch_applied = True
        print("[OK] RepViT patch injected")
    except Exception as e:
        print(f"[!] RepViT patch failed: {e}")
        print("    (This does not affect running, model loaded successfully)")

# 执行 patch
inject_repvit_patch()

from ultralytics import YOLO

# ============================================================
# 相机标定参数（2026-03 标定，RMS=0.584px）
# 【重要】标定时使用的分辨率是 1920x1080
# ============================================================
CALIB_WIDTH = 1920    # 标定时的图像宽度
CALIB_HEIGHT = 1080   # 标定时的高度
FX_BASE = 1459.05      # x方向焦距（标定分辨率下）
FY_BASE = 1456.91      # y方向焦距（标定分辨率下）
CX_BASE = 969.07       # 主点x坐标（标定分辨率下）
CY_BASE = 531.88       # 主点y坐标（标定分辨率下）
DIST_COEFFS = np.array([0.1736, -0.4863, 0.00128, 0.00054, 0.4735])

# 运行时内参（根据实际分辨率缩放）
FX = FX_BASE
FY = FY_BASE
CX = CX_BASE
CY = CY_BASE

# 相机内参矩阵
CAMERA_K = np.array([
    [FX, 0, CX],
    [0, FY, CY],
    [0, 0, 1]
], dtype=np.float64)


def update_camera_params_for_resolution(w, h, verbose=False):
    """根据当前图像分辨率更新相机内参
    
    相机内参是以像素为单位的，当图像分辨率变化时需要等比例缩放。
    例如：标定用 1920x1080，实际用 1280x720，缩放因子 = 1280/1920 = 0.667
    """
    global FX, FY, CX, CY, CAMERA_K
    
    scale_x = w / CALIB_WIDTH
    scale_y = h / CALIB_HEIGHT
    
    FX = FX_BASE * scale_x
    FY = FY_BASE * scale_y
    CX = CX_BASE * scale_x
    CY = CY_BASE * scale_y
    
    CAMERA_K = np.array([
        [FX, 0, CX],
        [0, FY, CY],
        [0, 0, 1]
    ], dtype=np.float64)
    
    if verbose:
        print(f"[Camera] 内参已缩放到 {w}x{h}:")
        print(f"         fx={FX:.1f}, fy={FY:.1f}")
        print(f"         cx={CX:.1f}, cy={CY:.1f}")
        print(f"         (标定分辨率: {CALIB_WIDTH}x{CALIB_HEIGHT}, 缩放: {scale_x:.3f})")

# ============================================================
# 目标物理尺寸
# ============================================================
SIGN_DIAMETER = 0.6   # sign 圆形标识直径 600mm = 0.6m
YAW_DIAMETER = 0.09   # Yaw 偏航指示直径 90mm = 0.09m
QR_SIZE = 0.113       # 二维码边长 113mm = 0.113m
QR_CONTENT = "getqrcode12138"  # 二维码内容
SIGN_TO_YAW_DISTANCE = 0.2     # sign中心到Yaw中心的距离 200mm = 0.2m

# ============================================================
# 类别映射
# ============================================================
CLASS_NAMES = {0: "sign", 1: "Yaw"}

# ============================================================
# 距离阈值配置
# ============================================================
CLOSE_DISTANCE_THRESHOLD = 0.6  # 切换到OpenCV模式的距离阈值(m)

# ============================================================
# 模型配置（只用 RepViT v5）
# ============================================================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

MODEL_CONFIG = {
    'name': 'YOLO26n + RepViT v5',
    'path': os.path.join(BASE_DIR, 'best.pt'),
    'type': 'repvit'
}


# ============================================================
# WeChatQRCode 检测器（内容：getqrcode12138，边长113mm）
# ============================================================
class WeChatQRCodeDetector:
    """
    使用腾讯WeChatQRCode深度学习二维码检测器

    WeChatQRCode 特点：
    - 基于CNN的目标检测模型 + 超分辨率模型
    - 内置于OpenCV (opencv-contrib-python)
    - 性能远超传统OpenCV/Pyzbar/ZXing
    - 支持小尺寸二维码的检测和超分辨率放大

    二维码参数：
    - 内容：getqrcode12138（唯一标识）
    - 边长：113mm x 113mm
    """

    def __init__(self):
        print("[WeChatQRCode] Initializing...")
        try:
            self.detector = cv2.wechat_qrcode_WeChatQRCode()
            print("[WeChatQRCode] Initialized successfully (models auto-downloaded)")
        except Exception as e:
            print(f"[WeChatQRCode] Init failed: {e}")
            self.detector = None

    def detect(self, frame):
        """
        检测并验证二维码
        返回：(成功标志, 四个角点[4x2], 中心点[2], 距离)
        
        【性能优化】使用图像金字塔策略：
        1. 先用低分辨率检测（快速）
        2. 如果失败，再尝试原分辨率（精确）
        """
        if self.detector is None:
            return False, None, None, None

        # 图像金字塔：先小后大
        scales = [0.5, 1.0]  # 先试 50%，再试 100%
        
        for scale in scales:
            try:
                if scale != 1.0:
                    small_h, small_w = int(frame.shape[0] * scale), int(frame.shape[1] * scale)
                    img = cv2.resize(frame, (small_w, small_h))
                else:
                    img = frame
                
                decoded, vertices = self.detector.detectAndDecode(img)

                if decoded is None or len(decoded) == 0:
                    continue

                # 验证内容
                if decoded[0] != QR_CONTENT:
                    continue

                # 提取角点
                if vertices is None or len(vertices) == 0:
                    continue

                corners = vertices[0].astype(np.float32)
                
                # 如果缩放过，需要还原角点坐标到原图坐标
                if scale != 1.0:
                    corners = corners / scale

                # 确保四个角点按顺序排列
                corners = self._order_corners(corners)

                # 计算中心
                center = np.mean(corners, axis=0)

                # 计算距离（基于二维码边长）
                dist = self._calc_distance(corners)

                return True, corners, center.astype(int), dist

            except Exception as e:
                continue

        return False, None, None, None

    def _order_corners(self, pts):
        """将四个角点按（左上、右上、右下、左下）顺序排列"""
        center = pts.mean(axis=0)
        angles = np.arctan2(pts[:, 1] - center[1], pts[:, 0] - center[0])
        order = np.argsort(angles)
        pts = pts[order]
        # 调整起点为左上角（x+y 最小的点）
        idx = np.argmin(pts[:, 0] + pts[:, 1])
        pts = np.roll(pts, -idx, axis=0)
        return pts

    def _calc_distance(self, corners):
        """根据二维码边长估算距离"""
        # 计算四条边的平均像素长度
        edges = [
            np.linalg.norm(corners[1] - corners[0]),
            np.linalg.norm(corners[2] - corners[1]),
            np.linalg.norm(corners[3] - corners[2]),
            np.linalg.norm(corners[0] - corners[3]),
        ]
        avg_px = float(np.mean(edges))
        if avg_px < 1:
            return None
        return (QR_SIZE * FX) / avg_px


# ============================================================
# 倾角计算器（基于 sign 和 Yaw 中心点）
# ============================================================
class TiltCalculator:
    """
    根据 sign 和 Yaw 中心点的位置关系计算无人机相对标识板的倾角

    原理：
    - sign 中心到 Yaw 中心的实际距离是 200mm
    - Yaw 位于 sign 中心正上方
    - 当无人机倾斜时，两个中心点在图像中的相对位置会发生变化

    倾角定义：
    - pitch（俯仰）：绕X轴旋转，正=抬头，负=低头
    - roll（翻滚）：绕Y轴旋转，正=右倾，负=左倾
    """

    def __init__(self):
        self.sign_to_yaw_distance = SIGN_TO_YAW_DISTANCE  # 0.2m

    def calculate_tilt(self, sign_center, yaw_center, avg_distance):
        """
        计算无人机倾角

        参数：
            sign_center: sign 中心点 (u, v) 像素坐标
            yaw_center: Yaw 中心点 (u, v) 像素坐标
            avg_distance: 平均估计距离（米）

        返回：
            dict: {
                'pitch_deg': 俯仰角（度），
                'roll_deg': 翻滚角（度），
                'pixel_distance': 两点像素距离，
                'estimated_height': 估计高度（米）
            }
        """
        if sign_center is None or yaw_center is None:
            return None

        u_sign, v_sign = sign_center
        u_yaw, v_yaw = yaw_center

        # 计算两点间的像素距离
        du = u_yaw - u_sign
        dv = v_yaw - v_sign
        pixel_distance = np.sqrt(du**2 + dv**2)

        if pixel_distance < 1:
            return None

        # 估计高度（基于两点实际距离和像素距离）
        # pixel_distance / f = sign_to_yaw_distance / height
        # => height = sign_to_yaw_distance * f / pixel_distance
        avg_f = (FX + FY) / 2
        estimated_height = (self.sign_to_yaw_distance * avg_f) / pixel_distance

        # 计算倾角
        # pitch: 绕X轴旋转，影响Y方向偏移（低头时Yaw在sign下方）
        # roll: 绕Y轴旋转，影响X方向偏移（右倾时Yaw在sign左方）
        pitch_rad = np.arctan2(dv / avg_f, 1)  # Y方向偏移对应的角度
        roll_rad = np.arctan2(du / avg_f, 1)   # X方向偏移对应的角度

        pitch_deg = np.degrees(pitch_rad)
        roll_deg = np.degrees(roll_rad)

        # 方向修正（根据实际坐标系调整）
        # 如果 Yaw 在 sign 上方（dv < 0），应该是抬头（pitch > 0）
        # 如果 Yaw 在 sign 左侧（du < 0），应该是右倾（roll > 0）

        return {
            'pitch_deg': pitch_deg,
            'roll_deg': roll_deg,
            'pixel_distance': pixel_distance,
            'estimated_height': estimated_height,
            'offset_x': du,
            'offset_y': dv
        }


# ============================================================
# EPnP姿态估计器
# ============================================================
class EPnPEstimator:
    """EPnP姿态估计器 - 用于二维码精确定位"""

    def __init__(self):
        # 二维码角点的3D坐标（假设在z=0平面）
        self.object_points = np.array([
            [-QR_SIZE/2, -QR_SIZE/2, 0],
            [ QR_SIZE/2, -QR_SIZE/2, 0],
            [ QR_SIZE/2,  QR_SIZE/2, 0],
            [-QR_SIZE/2,  QR_SIZE/2, 0]
        ], dtype=np.float32)

    def estimate_pose(self, image_points):
        """
        使用EPnP估计相机相对于方框的位姿
        image_points: numpy array of shape (4, 2)
        返回: (rvec, tvec) 旋转矩阵和平移向量
        """
        # 确保数据类型正确
        if image_points is None or len(image_points) != 4:
            return None, None

        # 转换为 float32
        img_pts = np.array(image_points, dtype=np.float32)
        obj_pts = np.array(self.object_points, dtype=np.float32)

        try:
            # 使用ITERATIVE方法更稳定
            success, rvec, tvec = cv2.solvePnP(
                obj_pts,
                img_pts,
                CAMERA_K.astype(np.float32),
                DIST_COEFFS.astype(np.float32),
                flags=cv2.SOLVEPNP_ITERATIVE
            )
            if success:
                return rvec, tvec
        except Exception as e:
            print(f"EPnP estimation failed: {e}")
        return None, None

    def get_pose_info(self, rvec, tvec):
        """从rvec和tvec提取位姿信息"""
        if rvec is None or tvec is None:
            return None

        R, _ = cv2.Rodrigues(rvec)
        camera_pos = -R.T @ tvec

        # 计算欧拉角
        yaw = np.degrees(np.arctan2(-R[2, 0], R[0, 0]))
        pitch = np.degrees(np.arctan2(R[2, 1], R[2, 2]))
        roll = np.degrees(np.arctan2(R[1, 0], R[0, 0]))

        return {
            'camera_pos': camera_pos.flatten(),
            'distance': np.linalg.norm(tvec.flatten()),
            'yaw': yaw,
            'pitch': pitch,
            'roll': roll
        }


# ============================================================
# 坐标计算函数
# ============================================================
def pixel_to_camera(pixel_u, pixel_v, pixel_w, pixel_h, cls_name):
    """将检测框的像素坐标转换为相机坐标系下的空间坐标和角度"""
    du = pixel_u - CX
    dv = pixel_v - CY

    theta_x = math.degrees(math.atan2(du, FX))
    theta_y = math.degrees(math.atan2(dv, FY))

    if cls_name == "Yaw":
        real_diameter = YAW_DIAMETER
    else:
        real_diameter = SIGN_DIAMETER

    diameter_pixels = max(pixel_w, pixel_h)
    if diameter_pixels > 0:
        distance = (real_diameter * FX) / diameter_pixels
    else:
        distance = 0.0

    Z = distance
    X = Z * math.tan(math.radians(theta_x))
    Y = Z * math.tan(math.radians(theta_y))

    return theta_x, theta_y, distance, X, Y, Z


def undistort_points(pts, w, h):
    """去畸变"""
    new_camera_matrix, _ = cv2.getOptimalNewCameraMatrix(
        CAMERA_K, DIST_COEFFS, (w, h), 1, (w, h)
    )
    pts_undistorted = cv2.undistortPoints(
        pts.reshape(1, -1, 2).astype(np.float64),
        CAMERA_K, DIST_COEFFS,
        P=new_camera_matrix
    )
    return pts_undistorted.reshape(-1, 2)


# ============================================================
# 主检测器类
# ============================================================
class RealtimeDetector:
    def __init__(self, model_config):
        self.model_config = model_config
        self.model_name = model_config['name']

        # 立即加载YOLO模型
        print(f"\n[Loading YOLO model: {self.model_name}]")
        try:
            self.model = YOLO(model_config['path'])
            print(f"  OK - Model loaded")
        except Exception as e:
            print(f"  FAIL: {e}")
            self.model = None

        # WeChatQRCode 检测器
        self.qr_detector = WeChatQRCodeDetector()
        self.epnp = EPnPEstimator()
        self.tilt_calc = TiltCalculator()

        # 模式状态
        self.mode = "qr"  # "qr" 或 "yolo"
        self.qr_frames = 0
        self.yolo_frames = 0

        # 摄像头 - Windows 用 DSHOW，Linux/macOS 用默认后端
        self.camera_index = 0
        import platform
        if platform.system() == "Windows":
            self.cap = cv2.VideoCapture(self.camera_index, cv2.CAP_DSHOW)
        else:
            self.cap = cv2.VideoCapture(self.camera_index)
        if not self.cap.isOpened():
            print("[!] Cannot open camera")
        else:
            print(f"[OK] Camera {self.camera_index} opened")

        # 设置分辨率
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        
        # 【关键】根据实际分辨率更新相机内参
        # 标定用 1920x1080，实际用 1280x720
        update_camera_params_for_resolution(1280, 720, verbose=True)

        # 预热摄像头（丢弃前几帧黑帧）
        self._warmup_frames = 5

        self.running = True
        self.conf = 0.25
        self.iou = 0.7
        self.frame_count = 0
        self.last_time = time.time()
        self.show_coords = True
        self.show_tilt = True
        self.is_recording = False
        self.video_writer = None
        self.record_start_time = None
        self.record_width = None
        self.record_height = None

        # 去畸变映射
        self.map_x, self.map_y = None, None
        self.undistort_enabled = False

        # YOLO 输入尺寸（统一 640）
        self.imgsz = 640

    def preprocess_undistort(self, frame):
        """计算去畸变映射表（只需计算一次）"""
        if self.map_x is None:
            h, w = frame.shape[:2]
            self.map_x, self.map_y = cv2.initUndistortRectifyMap(
                CAMERA_K, DIST_COEFFS, None, CAMERA_K, (w, h), cv2.CV_32FC1
            )
        return cv2.remap(frame, self.map_x, self.map_y, cv2.INTER_LINEAR)

    def switch_camera(self):
        """切换到摄像头0"""
        if self.cap is not None:
            self.cap.release()
        self.camera_index = 0
        import platform
        if platform.system() == "Windows":
            self.cap = cv2.VideoCapture(self.camera_index, cv2.CAP_DSHOW)
        else:
            self.cap = cv2.VideoCapture(self.camera_index)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        update_camera_params_for_resolution(1280, 720, verbose=True)
        print(f"Switched to camera #{self.camera_index}")

    def adjust_conf(self, delta):
        self.conf = max(0.0, min(1.0, self.conf + delta))
        print(f"Confidence: {self.conf:.2f}")

    def toggle_recording(self):
        if self.is_recording:
            self.stop_recording()
        else:
            self.start_recording()

    def start_recording(self):
        if self.is_recording:
            return

        timestamp = time.strftime("%Y%m%d_%H%M%S")
        model_short_name = self.model_name.replace(" ", "_").replace("(", "").replace(")", "")
        filename = f"drone_detection_{model_short_name}_{timestamp}.mp4"
        filepath = os.path.join(BASE_DIR, filename)

        frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = self.cap.get(cv2.CAP_PROP_FPS)

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.video_writer = cv2.VideoWriter(filepath, fourcc, int(fps) if fps > 0 else 30, (frame_width, frame_height))

        if not self.video_writer.isOpened():
            print(f"[!] VideoWriter failed to open: {filename}")
            self.video_writer = None
            return

        # 记录期望的录制尺寸
        self.record_width = frame_width
        self.record_height = frame_height

        self.is_recording = True
        self.record_start_time = time.time()
        print(f"\n[*] Recording: {filename}")

    def stop_recording(self):
        if not self.is_recording:
            return

        if self.video_writer is not None:
            self.video_writer.release()
            self.video_writer = None
            self.record_width = None
            self.record_height = None

        elapsed_time = time.time() - self.record_start_time
        print(f"\n[ ] Recording stopped - Duration: {elapsed_time:.1f}s")

        self.is_recording = False
        self.record_start_time = None

    def toggle_undistort(self):
        """切换去畸变"""
        self.undistort_enabled = not self.undistort_enabled
        print(f"Undistort: {'ON' if self.undistort_enabled else 'OFF'}")

    def detect(self, frame):
        """
        主检测函数 - YOLO 和 WeChatQRCode 并行检测，WeChat 优先
        返回: {
            'mode': 'qr' 或 'yolo' 或 'none',
            'detections': 检测结果列表,
            'qr_detection': 二维码检测结果,
            'epnp_pose': EPnP姿态信息,
            'tilt_info': 倾角信息（基于sign和Yaw）
        }
        
        【核心逻辑】
        1. 图像统一缩放到 640 宽（保持比例）
        2. WeChatQRCode 和 YOLO 同时检测
        3. WeChat 有结果 → 使用 WeChat 数据（EPnP 精度高）
        4. WeChat 无结果 → 使用 YOLO 数据
        """
        result = {
            'mode': 'none',
            'detections': [],
            'qr_detection': None,
            'epnp_pose': None,
            'tilt_info': None
        }

        # 可选去畸变
        if self.undistort_enabled:
            frame = self.preprocess_undistort(frame)

        # ========== 统一分辨率到 640 宽 ==========
        orig_h, orig_w = frame.shape[:2]
        target_w = 640
        scale_ratio = target_w / orig_w
        # 缩放回原图的比例（用于绘制）
        inv_scale = orig_w / target_w
        target_h = int(orig_h * scale_ratio)
        frame_resized = cv2.resize(frame, (target_w, target_h))
        
        # 同时更新相机内参（基于缩放后的分辨率）
        update_camera_params_for_resolution(target_w, target_h)

        # ========== 并行检测：WeChatQRCode + YOLO ==========
        qr_success = False
        qr_data = None
        
        # WeChatQRCode 检测
        qr_success, corners, center, qr_dist = self.qr_detector.detect(frame_resized)
        
        # YOLO 检测（总是运行）
        sign_center = None
        yaw_center = None
        sign_distance = None
        yaw_distance = None
        
        if self.model is not None:
            yolo_results = self.model.predict(
                frame_resized,
                conf=self.conf,
                iou=self.iou,
                imgsz=self.imgsz,
                verbose=False
            )
            
            if yolo_results and yolo_results[0].boxes is not None:
                boxes = yolo_results[0].boxes
                for box in boxes:
                    cls_id = int(box.cls[0])
                    cls_name = CLASS_NAMES.get(cls_id, f"class_{cls_id}")
                    conf = float(box.conf[0])

                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    center_u = (x1 + x2) / 2
                    center_v = (y1 + y2) / 2
                    pw = x2 - x1
                    ph = y2 - y1

                    # 去畸变
                    pts = np.array([[center_u, center_v]])
                    pts_undist = undistort_points(pts, target_w, target_h)
                    u_undist = pts_undist[0, 0]
                    v_undist = pts_undist[0, 1]

                    # 坐标计算
                    theta_x, theta_y, distance, X, Y, Z = pixel_to_camera(
                        u_undist, v_undist, pw, ph, cls_name
                    )

                    # 转换坐标回原图尺寸（用于绘制和倾角计算）
                    x1_orig = x1 * inv_scale
                    y1_orig = y1 * inv_scale
                    x2_orig = x2 * inv_scale
                    y2_orig = y2 * inv_scale
                    center_u_orig = center_u * inv_scale
                    center_v_orig = center_v * inv_scale
                    pw_orig = pw * inv_scale
                    ph_orig = ph * inv_scale

                    # 记录 sign 和 Yaw 的中心点（用于计算倾角）
                    if cls_name == "sign":
                        sign_center = (center_u_orig, center_v_orig)
                        sign_distance = distance
                    elif cls_name == "Yaw":
                        yaw_center = (center_u_orig, center_v_orig)
                        yaw_distance = distance

                    result['detections'].append({
                        'cls_id': cls_id,
                        'cls_name': cls_name,
                        'conf': conf,
                        'bbox': (x1_orig, y1_orig, x2_orig, y2_orig),
                        'bbox_size': (pw_orig, ph_orig),
                        'center': (center_u_orig, center_v_orig),
                        'coords': (theta_x, theta_y, distance, X, Y, Z)
                    })

                self.yolo_frames += 1

        # ========== WeChat 优先策略 ==========
        if qr_success and corners is not None:
            # 转换二维码坐标回原图尺寸
            corners_orig = corners * inv_scale
            center_orig = (center[0] * inv_scale, center[1] * inv_scale)

            # EPnP 精确定位（精度更高）
            rvec, tvec = self.epnp.estimate_pose(corners)
            if rvec is not None:
                epnp_pose = self.epnp.get_pose_info(rvec, tvec)
                result['qr_detection'] = {
                    'corners': corners_orig,
                    'center': center_orig,
                    'distance': epnp_pose['distance'] if epnp_pose else qr_dist
                }
                result['epnp_pose'] = epnp_pose
                
                # 使用 WeChat 数据作为主检测结果
                result['detections'] = [{
                    'cls_name': 'qrcode',
                    'conf': 0.95,
                    'bbox': None,
                    'center': center,
                    'coords': (epnp_pose['yaw'], epnp_pose['pitch'],
                              result['qr_detection']['distance'],
                              0, 0, result['qr_detection']['distance'])
                }]
                result['mode'] = "qr"
                self.mode = "qr"
                self.qr_frames += 1
                
                # 仍然计算 YOLO 检测到的 sign/Yaw 倾角（如果有的话）
                if sign_center is not None and yaw_center is not None:
                    avg_dist = (sign_distance + yaw_distance) / 2 if sign_distance and yaw_distance else None
                    if avg_dist and avg_dist > 0:
                        tilt_info = self.tilt_calc.calculate_tilt(sign_center, yaw_center, avg_dist)
                        result['tilt_info'] = tilt_info
                
                return result
            else:
                # EPnP 失败，但二维码存在
                result['qr_detection'] = {
                    'corners': corners_orig,
                    'center': center_orig,
                    'distance': qr_dist
                }
                result['mode'] = "qr"
                self.mode = "qr"
                self.qr_frames += 1
                return result
        else:
            # WeChat 无结果，使用 YOLO 数据
            self.mode = "yolo"
            result['mode'] = "yolo"
            
            # 计算倾角（基于 sign 和 Yaw 中心点）
            if sign_center is not None and yaw_center is not None:
                avg_distance = (sign_distance + yaw_distance) / 2 if sign_distance and yaw_distance else None
                if avg_distance and avg_distance > 0:
                    tilt_info = self.tilt_calc.calculate_tilt(sign_center, yaw_center, avg_distance)
                    result['tilt_info'] = tilt_info
            
            return result

    def draw_ui(self, frame, detections, qr_detection=None, epnp_pose=None, tilt_info=None):
        """绘制UI信息和检测结果"""
        h, w = frame.shape[:2]

        # 顶部信息栏
        bar_h = 45
        cv2.rectangle(frame, (0, 0), (w, bar_h), (20, 20, 30), -1)

        # 模型名称（左上）
        cv2.putText(frame, f"{self.model_name}", (12, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 100), 2)

        # 置信度
        cv2.putText(frame, f"Conf:{self.conf:.2f}", (320, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.55, (200, 200, 200), 2)

        # 模式（QR优先 / YOLO）
        if self.mode == "qr":
            mode_color = (0, 255, 255)
            mode_text = "QR"
        else:
            mode_color = (0, 200, 100)
            mode_text = "YOLO"
        cv2.putText(frame, f"{mode_text}", (450, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, mode_color, 2)

        # FPS
        current_time = time.time()
        fps = 1.0 / (current_time - self.last_time) if self.last_time > 0 else 0
        self.last_time = current_time
        fps_color = (150, 255, 150) if fps > 15 else ((200, 200, 100) if fps > 5 else (200, 100, 100))
        cv2.putText(frame, f"FPS:{int(fps)}", (550, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.55, fps_color, 2)

        # YOLO输入尺寸
        cv2.putText(frame, f"sz:{self.imgsz}", (650, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (180, 180, 180), 1)

        # 录制状态
        if self.is_recording:
            elapsed = time.time() - self.record_start_time
            cv2.circle(frame, (w - 25, 22), 8, (0, 0, 255), -1)
            cv2.putText(frame, f"REC", (w - 80, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

        # 绘制二维码检测结果
        if qr_detection is not None and epnp_pose is not None:
            corners = qr_detection['corners'].astype(np.int32)
            # 画二维码边框（青色）
            cv2.polylines(frame, [corners], True, (0, 255, 255), 3)

            # 角点
            for i, corner in enumerate(corners):
                cv2.circle(frame, tuple(corner), 5, (0, 150, 255), -1)
                cv2.putText(frame, str(i+1), tuple(corner + 8),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 200, 255), 1)

            # 二维码信息
            info = f"QR: {qr_detection['distance']:.2f}m | Yaw:{epnp_pose['yaw']:.1f}deg Pitch:{epnp_pose['pitch']:.1f}deg"
            cv2.rectangle(frame, (5, 55), (620, 85), (30, 30, 40), -1)
            cv2.putText(frame, info, (12, 78),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)

        # ========== 绘制倾角信息 ==========
        if self.show_tilt and tilt_info is not None:
            pitch = tilt_info['pitch_deg']
            roll = tilt_info['roll_deg']
            height = tilt_info['estimated_height']

            # 倾角面板
            tilt_panel_h = 65
            cv2.rectangle(frame, (w - 220, bar_h + 10), (w - 10, bar_h + 10 + tilt_panel_h), (15, 15, 35), -1)

            tilt_text = f"TILT"
            cv2.putText(frame, tilt_text, (w - 215, bar_h + 35),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 100), 2)

            # Pitch（俯仰）
            pitch_color = (100, 200, 255) if abs(pitch) < 10 else (100, 100, 255)
            pitch_dir = "UP" if pitch > 0 else "DOWN"
            cv2.putText(frame, f"P:{pitch:+.1f}deg {pitch_dir}", (w - 215, bar_h + 52),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.4, pitch_color, 1)

            # Roll（翻滚）
            roll_color = (100, 200, 255) if abs(roll) < 10 else (100, 100, 255)
            roll_dir = "RIGHT" if roll > 0 else "LEFT"
            cv2.putText(frame, f"R:{roll:+.1f}deg {roll_dir}", (w - 215, bar_h + 65),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.4, roll_color, 1)

        # ========== 绘制坐标面板 ==========
        if self.show_coords and detections:
            panel_h = len(detections) * 75 + 20
            panel_w = 480
            cv2.rectangle(frame, (8, bar_h + 10), (panel_w, bar_h + 10 + panel_h), (15, 15, 25), -1)

            y_offset = bar_h + 40
            for det in detections:
                cls_name = det['cls_name']
                conf = det['conf']
                coords = det['coords']
                bbox_size = det.get('bbox_size', (0, 0))
                theta_x, theta_y, distance, X, Y, Z = coords

                # 类别颜色
                if cls_name == "sign":
                    color = (0, 200, 100)
                elif cls_name == "yaw":
                    color = (180, 100, 255)
                elif cls_name == "qrcode":
                    color = (0, 255, 255)
                else:
                    color = (200, 200, 200)

                # 标签
                cv2.putText(frame, f"[{cls_name.upper()}] {conf:.2f}", (15, y_offset),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 2)

                # 像素尺寸（调试用）
                info0 = f"  px:{bbox_size[0]:.0f}x{bbox_size[1]:.0f}"
                cv2.putText(frame, info0, (200, y_offset),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.45, (150, 150, 150), 1)

                # 坐标信息
                info1 = f"  {distance:.2f}m | ({theta_x:+.1f}deg, {theta_y:+.1f}deg)"
                info2 = f"  X:{X:+.2f} Y:{Y:+.2f} Z:{Z:.2f}"
                cv2.putText(frame, info1, (20, y_offset + 22),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220, 220, 220), 1)
                cv2.putText(frame, info2, (20, y_offset + 44),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.45, (180, 180, 255), 1)

                y_offset += 75

        # 底部操作栏
        cv2.rectangle(frame, (0, h - 32), (w, h), (15, 15, 25), -1)
        cv2.putText(frame, "[C]am  [R]ec  [U]ndist  [D]ata  [T]ilt  [S]kip  [I]sz  [Q]uit",
                   (15, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (180, 180, 180), 1)

        return frame

    def run(self):
        """主循环"""
        print("\n" + "=" * 60)
        print("  Drone Landing Detection (WeChatQRCode-First, YOLO-Fallback)")
        print("=" * 60)
        print(f"  Model: {self.model_name}")
        print(f"  Camera: fx={FX}, fy={FY}, cx={CX}, cy={CY}")
        print(f"  Sign: {SIGN_DIAMETER*1000:.0f}mm | Yaw: {YAW_DIAMETER*1000:.0f}mm")
        print(f"  QR Code: {QR_SIZE*1000:.0f}mm (content: {QR_CONTENT})")
        print(f"  Tilt: Sign->Yaw distance = {SIGN_TO_YAW_DISTANCE*1000:.0f}mm")
        print(f"  Strategy: WeChatQRCode First -> YOLO Fallback")
        print(f"  Resolution: Auto-scale to 640px width")
        print("=" * 60)
        print("\nKeys:")
        print("  R: record  C: camera  U: undistort  D: coords  T: tilt")
        print("  Y: YOLO input size (320/480/640)")
        print("  +/-: confidence  Q: quit")
        print("=" * 60 + "\n")

        window_name = "Drone Landing + WeChatQRCode"
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(window_name, 1280, 720)
        cv2.setWindowProperty(window_name, cv2.WND_PROP_TOPMOST, 1)

        # 用于显示的帧（跳帧时复用检测结果）
        last_result = None

        while self.running:
            ret, frame = self.cap.read()
            if not ret:
                time.sleep(0.3)
                continue

            # 预热阶段：丢弃前几帧
            if self._warmup_frames > 0:
                self._warmup_frames -= 1
                continue

            self.frame_count += 1

            # 每帧都检测
            result = self.detect(frame)

            # 绘制UI
            display_frame = self.draw_ui(
                frame,
                result['detections'],
                result.get('qr_detection'),
                result.get('epnp_pose'),
                result.get('tilt_info')
            )

            # 绘制YOLO检测框
            for det in result['detections']:
                if det.get('bbox') is not None:
                    x1, y1, x2, y2 = det['bbox']
                    cls_name = det['cls_name']
                    conf = det['conf']
                    center = det['center']

                    color = (0, 200, 100) if cls_name == "sign" else (180, 100, 255)
                    cv2.rectangle(display_frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
                    label = f"{cls_name.upper()} {conf:.2f}"
                    cv2.putText(display_frame, label, (int(x1), int(y1)-8),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                    # 画中心点
                    cv2.circle(display_frame, (int(center[0]), int(center[1])), 5, color, -1)

            # 绘制sign到Yaw的连线（如果两个都检测到）
            tilt_info = result.get('tilt_info')
            if tilt_info is not None:
                sign_det = None
                yaw_det = None
                for det in result['detections']:
                    if det['cls_name'] == 'sign':
                        sign_det = det
                    elif det['cls_name'] == 'Yaw':
                        yaw_det = det

                # sign 和 Yaw 中心点连线
                if sign_det and yaw_det:
                    pt1 = (int(sign_det['center'][0]), int(sign_det['center'][1]))
                    pt2 = (int(yaw_det['center'][0]), int(yaw_det['center'][1]))
                    cv2.line(display_frame, pt1, pt2, (255, 200, 0), 2)
                    cv2.circle(display_frame, pt1, 6, (0, 200, 100), -1)  # sign中心 - 绿色
                    cv2.circle(display_frame, pt2, 6, (180, 100, 255), -1)  # yaw中心 - 紫色

            # 录制
            if self.is_recording and self.video_writer is not None:
                dw, dh = display_frame.shape[1], display_frame.shape[0]
                if dw != self.record_width or dh != self.record_height:
                    # 尺寸不匹配时重新创建 VideoWriter
                    self.video_writer.release()
                    timestamp = time.strftime("%Y%m%d_%H%M%S")
                    filepath = os.path.join(BASE_DIR, f"drone_detection_{timestamp}.mp4")
                    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                    self.video_writer = cv2.VideoWriter(filepath, fourcc, 30, (dw, dh))
                    self.record_width, self.record_height = dw, dh
                    print(f"[!] VideoWriter resized to {dw}x{dh}")
                self.video_writer.write(display_frame)
            elif self.is_recording and self.video_writer is None:
                print("[!] Recording error: video_writer is None")

            cv2.imshow(window_name, display_frame)

            # 按键处理
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q') or key == 27:
                self.running = False
            elif key == ord('r') or key == ord('R'):
                self.toggle_recording()
            elif key == ord('c') or key == ord('C'):
                self.switch_camera()
            elif key == ord('d') or key == ord('D'):
                self.show_coords = not self.show_coords
                print(f"Coords display: {'ON' if self.show_coords else 'OFF'}")
            elif key == ord('t') or key == ord('T'):
                self.show_tilt = not self.show_tilt
                print(f"Tilt display: {'ON' if self.show_tilt else 'OFF'}")
            elif key == ord('u') or key == ord('U'):
                self.toggle_undistort()
            elif key == ord('+') or key == ord('='):
                self.adjust_conf(0.05)
            elif key == ord('-'):
                self.adjust_conf(-0.05)
            elif key == ord('y') or key == ord('Y'):
                # 调整YOLO输入尺寸
                sizes = [320, 480, 640]
                idx = sizes.index(self.imgsz) if self.imgsz in sizes else 2
                self.imgsz = sizes[(idx + 1) % len(sizes)]
                print(f"YOLO imgsz: {self.imgsz}")

        if self.is_recording:
            self.stop_recording()

        self.cap.release()
        cv2.destroyAllWindows()
        print("\nExited")


if __name__ == "__main__":
    # 检查模型文件
    if not os.path.exists(MODEL_CONFIG['path']):
        print(f"\n[!] Model file not found: {MODEL_CONFIG['path']}")
        exit(1)
    print(f"[OK] Using model: {MODEL_CONFIG['name']}")

    detector = RealtimeDetector(MODEL_CONFIG)
    detector.run()
无人机自主降落引导标识视觉识别定位

项目背景

核心技术贡献

技术栈

系统演示 - 检测效果

引导标识设计

检测效果视频

核心代码展示

项目成果