YOLOv8 开应用发

基于高通Quick AI Application Builder (QAI AppBuilder)工具、wxWidget库和YOLOv8模型,快速编写一个从摄像头取流,识别内容并显示到屏幕的app。

环境准备

sudo apt update -y
sudo apt install -y g++ make pkg-config wx3.2-headers libwxgtk3.2-dev
sudo apt install -y libopencv-core-dev libopencv-calib3d-dev libopencv-dnn-dev
sudo apt install -y libopencv-objdetect-dev libopencv-photo-dev libopencv-stitching-dev
sudo apt install -y libopencv-video-dev libgstreamer-plugins-base1.0-dev

下载QAI AppBuilder工具并解压到用户目录

# 点击上面的连接下载压缩包ai-engine-direct-helper.tar,放置在~/ai-engine-direct-helper.tar
# 解压
cd ~
tar -xvf ai-engine-direct-helper.tar

编辑ai-engine-direct-helper/src/LibAppBuilder.cpp,将其中的空main函数移除,后面编译时会用到这个源文件,但是不需要这里的main函数。

下载QAIRT工具并解压到用户目录

# 点击上面的连接下载压缩包v2.37.1.250807.zip,放置在~/v2.37.1.250807.zip
# 解压
cd ~
tar -xvf v2.37.1.250807.zip

编写代码

创建main.cpp文件,内容如下:

#include <iostream>
#include <filesystem>
#include <string>
#include <vector>
#include <cstddef>
#include <opencv2/opencv.hpp>
#include <opencv2/core/utils/logger.hpp>
#include <algorithm>
#include <map>
#include <thread>

#include <wx/rawbmp.h>
#include <gst/video/video-info.h>
#include <wx/log.h>
#include <wx/wx.h>
#include <gst/gst.h>
#include <gst/app/gstappsink.h>
#include <wx/dcclient.h>
#include <wx/thread.h>
#include <chrono>
#include <atomic>

namespace fs = std::filesystem;
const std::string MODEL_NAME = "yolov8_det";
const int IMAGE_SIZE = 640;
unsigned long frameCounter = 0;

// Class mapping for YOLOv8
std::map<int, std::string> class_map = {
    {0, "person"},
    {1, "bicycle"},
    {2, "car"},
    {3, "motorcycle"},
    {4, "airplane"},
    {5, "bus"},
    {6, "train"},
    {7, "truck"},
    {8, "boat"},
    {9, "traffic light"},
    {10, "fire hydrant"},
    {11, "stop sign"},
    {12, "parking meter"},
    {13, "bench"},
    {14, "bird"},
    {15, "cat"},
    {16, "dog"},
    {17, "horse"},
    {18, "sheep"},
    {19, "cow"},
    {20, "elephant"},
    {21, "bear"},
    {22, "zebra"},
    {23, "giraffe"},
    {24, "backpack"},
    {25, "umbrella"},
    {26, "handbag"},
    {27, "tie"},
    {28, "suitcase"},
    {29, "frisbee"},
    {30, "skis"},
    {31, "snowboard"},
    {32, "sports ball"},
    {33, "kite"},
    {34, "baseball bat"},
    {35, "baseball glove"},
    {36, "skateboard"},
    {37, "surfboard"},
    {38, "tennis racket"},
    {39, "bottle"},
    {40, "wine glass"},
    {41, "cup"},
    {42, "fork"},
    {43, "knife"},
    {44, "spoon"},
    {45, "bowl"},
    {46, "banana"},
    {47, "apple"},
    {48, "sandwich"},
    {49, "orange"},
    {50, "broccoli"},
    {51, "carrot"},
    {52, "hot dog"},
    {53, "pizza"},
    {54, "donut"},
    {55, "cake"},
    {56, "chair"},
    {57, "couch"},
    {58, "potted plant"},
    {59, "bed"},
    {60, "dining table"},
    {61, "toilet"},
    {62, "tv"},
    {63, "laptop"},
    {64, "mouse"},
    {65, "remote"},
    {66, "keyboard"},
    {67, "cell phone"},
    {68, "microwave"},
    {69, "oven"},
    {70, "toaster"},
    {71, "sink"},
    {72, "refrigerator"},
    {73, "book"},
    {74, "clock"},
    {75, "vase"},
    {76, "scissors"},
    {77, "teddy bear"},
    {78, "hair drier"},
    {79, "toothbrush"}
};

struct Detection {
    float x1, y1, x2, y2;
    float score;
    int class_id;
};

float calculateIoU(const Detection& box1, const Detection& box2) {
    float inter_x1 = std::max(box1.x1, box2.x1);
    float inter_y1 = std::max(box1.y1, box2.y1);
    float inter_x2 = std::min(box1.x2, box2.x2);
    float inter_y2 = std::min(box1.y2, box2.y2);

    float inter_area = std::max(0.0f, inter_x2 - inter_x1) * std::max(0.0f, inter_y2 - inter_y1);
    float box1_area = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
    float box2_area = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);

    return inter_area / (box1_area + box2_area - inter_area);
}

std::vector<Detection> nonMaximumSuppression(const std::vector<Detection>& detections,
                                             float score_threshold = 0.45,
                                             float iou_threshold = 0.7) {
    std::vector<Detection> filtered_detections;
    std::vector<Detection> candidates;

    // Filter by score threshold
    for (const auto& det : detections) {
        if (det.score >= score_threshold) {
            candidates.push_back(det);
        }
    }

    // Sort by score in descending order
    std::sort(candidates.begin(), candidates.end(),
              [](const Detection& a, const Detection& b) { return a.score > b.score; });

    // NMS
    while (!candidates.empty()) {
        // Take the highest scoring detection
        Detection best_det = candidates[0];
        filtered_detections.push_back(best_det);
        candidates.erase(candidates.begin());

        // Remove overlapping detections
        for (auto it = candidates.begin(); it != candidates.end();) {
            if (calculateIoU(best_det, *it) > iou_threshold) {
                it = candidates.erase(it);
            } else {
                ++it;
            }
        }
    }

    return filtered_detections;
}

void drawBoxOnImage(cv::Mat& image, const Detection& det,
                    float scale_x = 1.0f, float scale_y = 1.0f) {
    // Scale coordinates back to original image size
    int x1 = static_cast<int>(det.x1 * scale_x);
    int y1 = static_cast<int>(det.y1 * scale_y);
    int x2 = static_cast<int>(det.x2 * scale_x);
    int y2 = static_cast<int>(det.y2 * scale_y);

    // Clamp coordinates to image boundaries
    x1 = std::max(0, std::min(image.cols - 1, x1));
    y1 = std::max(0, std::min(image.rows - 1, y1));
    x2 = std::max(0, std::min(image.cols - 1, x2));
    y2 = std::max(0, std::min(image.rows - 1, y2));

    if (x1 >= x2 || y1 >= y2) return; // Skip invalid boxes

    // Draw rectangle (BGR color: green)
    cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);

    // Create label text
    std::string label = class_map[det.class_id] + " " + std::to_string(det.score).substr(0, 4);

    // Draw label background
    int baseline = 0;
    cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 2, &baseline);
    cv::rectangle(image, cv::Point(x1, y1 - text_size.height - baseline),
                  cv::Point(x1 + text_size.width, y1), cv::Scalar(0, 255, 0), cv::FILLED);

    // Draw label text
    cv::putText(image, label, cv::Point(x1, y1 - baseline),
                cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0), 2);
}

class YOLOv8Detector {
private:
    LibAppBuilder libAppBuilder;
    bool initialized = false;

public:
    YOLOv8Detector() = default;

    ~YOLOv8Detector() {
        if (initialized) {
            libAppBuilder.ModelDestroy(MODEL_NAME);
        }
    }

    bool Initialize() {
        // Get current working directory
        fs::path execution_ws = fs::current_path();
        fs::path backend_lib_path = execution_ws / "libQnnHtp.so";
        fs::path system_lib_path = execution_ws / "libQnnSystem.so";
        fs::path model_path = execution_ws / (MODEL_NAME + ".bin");

        // Disable OpenCV logging
        cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_ERROR);

        SetLogLevel(1);
        SetProfilingLevel(1);    // ProfilingLevel::BASIC

        std::cout << "Initializing YOLOv8 model..." << std::endl;
        int ret = libAppBuilder.ModelInitialize(MODEL_NAME, model_path.string(),
                                                backend_lib_path.string(), system_lib_path.string());

        if (ret < 0) {
            std::cout << "Failed to initialize YOLOv8 model" << std::endl;
            return false;
        }

        initialized = true;
        std::cout << "YOLOv8 model initialized successfully" << std::endl;
        return true;
    }

    std::vector<Detection> Detect(cv::Mat& image) {
        if (!initialized) return {};

    	frameCounter++;

        // Save original dimensions for scaling
        int orig_height = image.rows;
        int orig_width = image.cols;
        float scale_x = static_cast<float>(orig_width) / IMAGE_SIZE;
        float scale_y = static_cast<float>(orig_height) / IMAGE_SIZE;

        // Resize image to model input size
        cv::Mat resized_image;
        cv::resize(image, resized_image, cv::Size(IMAGE_SIZE, IMAGE_SIZE),
                   0, 0, cv::INTER_LINEAR);

        // Convert BGR to RGB and normalize to [0, 1]
        cv::Mat rgb_image;
        cv::cvtColor(resized_image, rgb_image, cv::COLOR_BGR2RGB);
        cv::Mat float_image;
        rgb_image.convertTo(float_image, CV_32FC3, 1.0 / 255.0);

        // Prepare input tensor (NHWC format)
        std::vector<float> input_data(IMAGE_SIZE * IMAGE_SIZE * 3);
        memcpy(input_data.data(), float_image.data, input_data.size() * sizeof(float));

        // Set performance profile
        SetPerfProfileGlobal("burst");

        // Prepare input/output buffers
        std::vector<uint8_t*> inputBuffers;
        std::vector<uint8_t*> outputBuffers;
        std::vector<size_t> outputSize;

        inputBuffers.push_back(reinterpret_cast<uint8_t*>(input_data.data()));

        // Perform inference
        std::string perfProfile = "burst";
        int ret = libAppBuilder.ModelInference(MODEL_NAME, inputBuffers, outputBuffers,
                                               outputSize, perfProfile);

        // Release performance profile
        RelPerfProfileGlobal();

        if (ret < 0) {
            std::cout << "Inference failed" << std::endl;
            return {};
        }

        // Parse output tensors
        if (outputBuffers.size() < 3) {
            std::cout << "Unexpected number of output buffers: " << outputBuffers.size() << std::endl;
            return {};
        }

        // Get output data
        float* scores_data = reinterpret_cast<float*>(outputBuffers[0]);
        float* class_ids_data = reinterpret_cast<float*>(outputBuffers[1]);
        float* boxes_data = reinterpret_cast<float*>(outputBuffers[2]);

        // Assuming output shape is [1, 8400, ...]
        const int num_predictions = 8400;

        // Collect all detections
        std::vector<Detection> all_detections;
        for (int i = 0; i < num_predictions; i++) {
            Detection det;
            det.score = scores_data[i];
            det.class_id = static_cast<int>(class_ids_data[i]);

            // Box coordinates: [x1, y1, x2, y2]
            det.x1 = boxes_data[i * 4];
            det.y1 = boxes_data[i * 4 + 1];
            det.x2 = boxes_data[i * 4 + 2];
            det.y2 = boxes_data[i * 4 + 3];

            // Scale back to original image coordinates
            det.x1 *= scale_x;
            det.y1 *= scale_y;
            det.x2 *= scale_x;
            det.y2 *= scale_y;

            all_detections.push_back(det);
        }

        // Apply NMS
        std::vector<Detection> filtered_detections = nonMaximumSuppression(all_detections);

        // Free output buffers
        for (auto buffer : outputBuffers) {
            free(buffer);
        }

        return filtered_detections;
    }
};

class CameraFrame : public wxFrame
{
public:
    CameraFrame(const wxString& title);
    ~CameraFrame();

private:
    wxMutex m_imageMutex;
    wxImage m_frameImage;
    GstElement *m_pipeline = nullptr;
    GstElement *m_appsink = nullptr;
    YOLOv8Detector m_detector;

    bool InitGstPipeline();
    static GstFlowReturn NewSampleCallback(GstAppSink *sink, gpointer user_data);
    void ConvertGstBufferToWxImage(GstBuffer *buffer, GstCaps *caps);
    void OnPaint(wxPaintEvent& event);
    void OnClose(wxCloseEvent& event);

    DECLARE_EVENT_TABLE()
};

class CameraApp : public wxApp
{
public:
    virtual bool OnInit();
};

IMPLEMENT_APP(CameraApp)

BEGIN_EVENT_TABLE(CameraFrame, wxFrame)
    EVT_PAINT(CameraFrame::OnPaint)
    EVT_CLOSE(CameraFrame::OnClose)
END_EVENT_TABLE()

bool CameraApp::OnInit()
{
    gst_init(nullptr, nullptr);

    CameraFrame *frame = new CameraFrame(wxT("YOLOv8 Real-time Detection (640x480)"));
    frame->Show(true);
    return true;
}

CameraFrame::CameraFrame(const wxString& title)
    : wxFrame(nullptr, wxID_ANY, title, wxDefaultPosition, wxSize(640, 480))
{
    // Initialize detector
    if (!m_detector.Initialize()) {
        wxLogError("Failed to initialize YOLOv8 detector");
        Close();
        return;
    }

    // Initialize pipeline and start
    if (InitGstPipeline()) {
        gst_element_set_state(m_pipeline, GST_STATE_PLAYING);
        //wxLogMessage("GStreamer pipeline started successfully");
    } else {
        wxLogError("Failed to init GStreamer pipeline");
        Close();
    }
}

CameraFrame::~CameraFrame()
{
    if (m_pipeline) {
        gst_element_set_state(m_pipeline, GST_STATE_NULL);
        gst_object_unref(m_pipeline);
    }
    gst_deinit();
}

static void OnBusError(GstBus* bus, GstMessage* msg, gpointer data)
{
    GError* err = nullptr;
    gchar* debug_info = nullptr;
    gst_message_parse_error(msg, &err, &debug_info);
    wxLogError("GStreamer error: %s", err ? err->message : "Unknown error");
    g_error_free(err);
    g_free(debug_info);
}

bool CameraFrame::InitGstPipeline()
{
    const gchar *pipelineDesc = "qtiqmmfsrc name=camsrc ! "
                                "video/x-raw,format=NV12,width=640,height=480,framerate=30/1 ! "
                                "videoconvert ! "
                                "video/x-raw,format=BGR ! "
                                "appsink name=appsink sync=false max-buffers=1 drop=true";

    // Create pipeline
    m_pipeline = gst_parse_launch(pipelineDesc, nullptr);
    if (!m_pipeline) return false;

    GstBus* bus = gst_pipeline_get_bus(GST_PIPELINE(m_pipeline));
    gst_bus_add_signal_watch(bus);
    g_signal_connect(bus, "message::error", G_CALLBACK(OnBusError), this);
    gst_object_unref(bus);

    // Get appsink element
    m_appsink = gst_bin_get_by_name(GST_BIN(m_pipeline), "appsink");
    if (!m_appsink) return false;

    // Configure appsink
    gst_app_sink_set_emit_signals(GST_APP_SINK(m_appsink), TRUE);
    gst_app_sink_set_drop(GST_APP_SINK(m_appsink), TRUE);
    gst_app_sink_set_max_buffers(GST_APP_SINK(m_appsink), 1);
    GstAppSinkCallbacks callbacks = {nullptr, nullptr, NewSampleCallback, nullptr, nullptr, nullptr};
    gst_app_sink_set_callbacks(GST_APP_SINK(m_appsink), &callbacks, this, nullptr);

    return true;
}

GstFlowReturn CameraFrame::NewSampleCallback(GstAppSink *sink, gpointer user_data)
{
    CameraFrame *frame = static_cast<CameraFrame*>(user_data);
    GstSample *sample = gst_app_sink_pull_sample(sink);
    if (!sample) return GST_FLOW_ERROR;

    GstBuffer *buffer = gst_sample_get_buffer(sample);
    GstCaps *caps = gst_sample_get_caps(sample);

    if (buffer && caps) {
        frame->ConvertGstBufferToWxImage(buffer, caps);
        frame->Refresh();
    }

    gst_sample_unref(sample);
    return GST_FLOW_OK;
}

void CameraFrame::ConvertGstBufferToWxImage(GstBuffer *buffer, GstCaps *caps)
{
    GstVideoInfo info;
    if (!gst_video_info_from_caps(&info, caps)) {
        wxLogWarning("Failed to get video info from caps");
        return;
    }

    // Validate video dimensions
    if (info.width <= 0 || info.height <= 0) {
        wxLogWarning("Invalid video dimensions: %dx%d", info.width, info.height);
        return;
    }

    GstMapInfo map;
    if (!gst_buffer_map(buffer, &map, GST_MAP_READ)) {
        wxLogWarning("Failed to map GST buffer");
        return;
    }

    // Validate data size (RGB format should be width×height×3)
    size_t expectedSize = info.width * info.height * 3;
    if (map.size < expectedSize) {
        wxLogWarning("Buffer size mismatch: expected %zu, got %zu", expectedSize, map.size);
        gst_buffer_unmap(buffer, &map);
        return;
    }

    // Convert GST buffer to cv::Mat for detection
    cv::Mat frame(info.height, info.width, CV_8UC3, map.data);

    // Perform detection
    auto detections = m_detector.Detect(frame);

    // Draw detections on frame
    for (const auto& det : detections) {
        drawBoxOnImage(frame, det);
    }

    // Convert BGR to RGB for wxImage
    cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);

    wxMutexLocker locker(m_imageMutex);
    m_frameImage = wxImage(info.width, info.height, frame.data, true).Copy();

    gst_buffer_unmap(buffer, &map);
}

void CameraFrame::OnPaint(wxPaintEvent& event)
{
    wxPaintDC dc(this);
    wxMutexLocker locker(m_imageMutex);
    if (!m_frameImage.IsOk() || m_frameImage.GetWidth() <= 0 || m_frameImage.GetHeight() <= 0) {
        // Draw default background
        dc.SetBackground(wxBrush(wxColour(50, 50, 50)));
        dc.Clear();

        // Draw status text
        dc.SetTextForeground(wxColour(255, 255, 255));
        dc.DrawText(wxT("Waiting for camera feed..."), wxPoint(20, 20));
        return;
    }

    // Draw image, scaled to window size
    wxSize clientSize = GetClientSize();
    if (clientSize.GetWidth() <= 0 || clientSize.GetHeight() <= 0)
        return;

    wxBitmap bmp(m_frameImage.Scale(clientSize.GetWidth(), clientSize.GetHeight(), wxIMAGE_QUALITY_BILINEAR));
    dc.DrawBitmap(bmp, 0, 0, false);
}

void CameraFrame::OnClose(wxCloseEvent& event)
{
    Destroy();
}

创建Makefile

创建一个Makefile文件,内容如下:

SRCS += main.cpp
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/*.cpp)
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/Log/*.cpp)
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/PAL/src/linux/*.cpp)
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/Utils/*.cpp)
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/WrapperUtils/*.cpp)
SRCS += $(wildcard /home/pi/ai-engine-direct-helper/src/PAL/src/common/*.cpp)
OBJS = $(patsubst %cpp, %o, $(SRCS))
DEPS = $(patsubst %cpp, %d, $(SRCS))
CXXFLAGS += -I/usr/include/opencv4
CXXFLAGS += -I/usr/include/wx-3.2
CXXFLAGS += -I/home/pi/qairt/2.37.1.250807/include/QNN
CXXFLAGS += -I/home/pi/ai-engine-direct-helper/src
CXXFLAGS += -I/home/pi/ai-engine-direct-helper/src/Utils
CXXFLAGS += -I/home/pi/ai-engine-direct-helper/src/WrapperUtils 
CXXFLAGS += -I/home/pi/ai-engine-direct-helper/src/Log
CXXFLAGS += -I/home/pi/ai-engine-direct-helper/src/PAL/include
CXXFLAGS += -I/usr/include/torch/csrc/api/include
LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio
CXXFLAGS += `wx-config --cxxflags` `pkg-config --cflags gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0` -Wall -g
LDFLAGS += `wx-config --libs` `pkg-config --libs gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0`

TARGET = wx_yolo8

all: $(TARGET)
%.o: %.cpp
    g++ -c -g -MMD $(CXXFLAGS) $< -o $@

$(TARGET): ${OBJS}
    g++ -o $@ $^ ${LDFLAGS}

sinclude $(DEPS)
.PHONY: clean

clean:
    $(RM) $(TARGET) ${OBJS} ${DEPS}

编译程序

执行命令 make 编译程序,获得可执行程序文件wx_yolo8。

下载模型文件

下载模型文件yolov8_det.bin

下载运行时库文件

libQnnHtp.so
libQnnHtpV68Skel.so
libQnnHtpV68Stub.so
libQnnSystem.so

运行程序

执行下列命令

FILE_PATH=$(sudo find /run/user/1001 -type f -name '.mutter-*' | head -n 1)
sudo XAUTHORITY="$FILE_PATH" \
     LD_LIBRARY_PATH=lib:$LD_LIBRARY_PATH \
     GST_PLUGIN_PATH=/usr/lib/gstreamer-1.0:$GST_PLUGIN_PATH \
     DISPLAY=:0 ./wx_yolo8

运行效果