From 734b5b410a4a9cdcef4f5c66fec41b27f2d05c4f Mon Sep 17 00:00:00 2001 From: Xiaoxia Date: Tue, 20 Jan 2026 22:44:37 +0800 Subject: [PATCH] Support both esp_video and esp32_camera (#1671) * Update project version to 2.2.1 and refactor camera component handling - Incremented project version from 2.2.0 to 2.2.1 in CMakeLists.txt. - Removed legacy esp32_camera component and replaced it with esp_video for ESP32-S3 and ESP32-P4 boards. - Updated board implementations to utilize the new esp_video component, ensuring compatibility and improved functionality. - Cleaned up Kconfig options related to camera selection, streamlining the configuration process. - Enhanced camera initialization logic across various board files to support the new component structure. * Refactor camera handling in AtomS3R CAM/M12 EchoBase board - Replaced the legacy EspVideo component with the new Esp32Camera class for improved camera functionality. - Updated camera initialization logic to utilize a more structured configuration approach, enhancing clarity and maintainability. - Removed outdated comments and code related to the previous camera implementation in the README file. * Update camera configuration for atoms3r-cam-m12-echo-base - Removed outdated camera configuration options from config.json to streamline the setup. - Retained essential partition table configuration for improved clarity. * Enhance Esp32Camera functionality and memory management - Added esp_timer.h for improved timing functionality. - Streamlined camera initialization by removing redundant frame buffer setup and logging. - Improved memory allocation for JPEG encoding and added error handling for unsupported pixel formats. - Updated comments for clarity and consistency, ensuring better understanding of the code flow. --- CMakeLists.txt | 2 +- main/CMakeLists.txt | 25 +- main/Kconfig.projbuild | 25 - main/boards/atk-dnesp32s3/atk_dnesp32s3.cc | 6 +- .../atoms3r-cam-m12-echo-base/README.md | 6 - .../atoms3r_cam_m12_echo_base.cc | 66 +- .../atoms3r-cam-m12-echo-base/config.json | 5 +- .../compact_wifi_board_s3cam.cc | 6 +- main/boards/common/esp32_camera.cc | 972 ++------------- main/boards/common/esp32_camera.h | 39 +- main/boards/common/esp32s3_camera.cc | 413 ------- main/boards/common/esp32s3_camera.h | 53 - main/boards/common/esp_video.cc | 1041 +++++++++++++++++ main/boards/common/esp_video.h | 53 + main/boards/df-k10/df_k10_board.cc | 6 +- main/boards/df-s3-ai-cam/df_s3_ai_cam.cc | 6 +- main/boards/echoear/EchoEar.cc | 6 +- .../esp-p4-function-ev-board.cc | 8 +- .../esp-s3-lcd-ev-board-2.cc | 6 +- .../esp-s3-lcd-ev-board.cc | 6 +- .../boards/esp-sparkbot/esp_sparkbot_board.cc | 6 +- .../esp32-s3-touch-lcd-3.5.cc | 6 +- main/boards/esp32s3-korvo2-v3/config.json | 8 - .../esp32s3_korvo2_v3_board.cc | 6 +- .../kevin-sp-v3-dev/kevin-sp-v3_board.cc | 6 +- main/boards/kevin-sp-v4-dev/config.json | 8 - .../kevin-sp-v4-dev/kevin-sp-v4_board.cc | 6 +- main/boards/lichuang-dev/config.json | 5 +- .../boards/lichuang-dev/lichuang_dev_board.cc | 64 +- .../lilygo-t-cameraplus-s3.cc | 6 +- .../boards/m5stack-core-s3/m5stack_core_s3.cc | 6 +- main/boards/m5stack-tab5/m5stack_tab5.cc | 6 +- main/boards/otto-robot/otto_robot.cc | 6 +- .../boards/waveshare-p4-nano/esp32-p4-nano.cc | 6 +- .../esp32-p4-wifi6-touch-lcd-4b.cc | 6 +- .../esp32-p4-wifi6-touch-lcd-7b.cc | 6 +- .../esp32-p4-wifi6-touch-lcd-xc.cc | 6 +- .../esp32-s3-audio_board.cc | 6 +- .../waveshare-s3-touch-lcd-3.5b.cc | 6 +- main/display/lvgl_display/jpg/image_to_jpeg.h | 2 +- main/mcp_server.cc | 2 +- 41 files changed, 1348 insertions(+), 1581 deletions(-) delete mode 100644 main/boards/common/esp32s3_camera.cc delete mode 100644 main/boards/common/esp32s3_camera.h create mode 100644 main/boards/common/esp_video.cc create mode 100644 main/boards/common/esp_video.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c6392e1a..90c5c94c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,5 +9,5 @@ include($ENV{IDF_PATH}/tools/cmake/project.cmake) # "Trim" the build. Include the minimal set of components, main, and anything it depends on. idf_build_set_property(MINIMAL_BUILD ON) -set(PROJECT_VER "2.2.0") +set(PROJECT_VER "2.2.1") project(xiaozhi) diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index a885970c..14d6b30b 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -52,8 +52,6 @@ list(APPEND SOURCES "boards/common/axp2101.cc" "boards/common/backlight.cc" "boards/common/button.cc" - "boards/common/esp32_camera.cc" - "boards/common/esp32s3_camera.cc" "boards/common/i2c_device.cc" "boards/common/knob.cc" "boards/common/power_save_timer.cc" @@ -758,29 +756,18 @@ if(CONFIG_IDF_TARGET_ESP32) "led/gpio_led.cc" "display/lvgl_display/jpg/image_to_jpeg.cpp" "display/lvgl_display/jpg/jpeg_to_image.c" - "boards/common/esp32_camera.cc" - "boards/common/esp32s3_camera.cc" "boards/common/nt26_board.cc" ) endif() -# ESP32-S3: 根据 Kconfig 选择使用哪个摄像头组件 -if(CONFIG_IDF_TARGET_ESP32S3) - if(CONFIG_XIAOZHI_USE_ESP_CAMERA) - # 使用 esp_camera 组件,排除 esp32_camera.cc - list(REMOVE_ITEM SOURCES "boards/common/esp32_camera.cc") - elseif(CONFIG_XIAOZHI_USE_ESP_VIDEO) - # 使用 esp_video 组件,排除 esp32s3_camera.cc - list(REMOVE_ITEM SOURCES "boards/common/esp32s3_camera.cc") - else() - # 默认使用 esp_camera 组件 - list(REMOVE_ITEM SOURCES "boards/common/esp32_camera.cc") - endif() +# Include EspVideo if target is ESP32S3 or ESP32P4 +if(CONFIG_IDF_TARGET_ESP32S3 OR CONFIG_IDF_TARGET_ESP32P4) + list(APPEND SOURCES "boards/common/esp_video.cc") endif() -# ESP32-P4: 只能使用 esp_video 组件,排除 esp32s3_camera.cc -if(CONFIG_IDF_TARGET_ESP32P4) - list(REMOVE_ITEM SOURCES "boards/common/esp32s3_camera.cc") +# Include Esp32Camera if target is ESP32S3 +if(CONFIG_IDF_TARGET_ESP32S3) + list(APPEND SOURCES "boards/common/esp32_camera.cc") endif() idf_component_register(SRCS ${SOURCES} diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index f8463c6a..4189da67 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -737,31 +737,6 @@ menu "Camera Configuration" comment "Warning: Please read the help text before modifying these settings." - choice XIAOZHI_CAMERA_COMPONENT - prompt "Camera Component Selection" - default XIAOZHI_USE_ESP_VIDEO if IDF_TARGET_ESP32S3 - default XIAOZHI_USE_ESP_VIDEO if IDF_TARGET_ESP32P4 - help - Select the camera component to use. - ESP32-S3 can choose between esp_camera (legacy) or esp_video (new). - ESP32-P4 only supports esp_video. - - config XIAOZHI_USE_ESP_CAMERA - bool "Use esp_camera (legacy component)" - depends on IDF_TARGET_ESP32S3 - help - Use the legacy esp32-camera component. - This is the traditional camera driver for ESP32-S3. - - config XIAOZHI_USE_ESP_VIDEO - bool "Use esp_video (new component)" - depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 - help - Use the new esp_video component. - This component provides V4L2-like interface and is required for ESP32-P4. - On ESP32-S3, it provides additional features but may require different camera configuration. - endchoice - config XIAOZHI_CAMERA_ALLOW_JPEG_INPUT bool "Allow JPEG Input" default n diff --git a/main/boards/atk-dnesp32s3/atk_dnesp32s3.cc b/main/boards/atk-dnesp32s3/atk_dnesp32s3.cc index cd9b7598..833c6698 100644 --- a/main/boards/atk-dnesp32s3/atk_dnesp32s3.cc +++ b/main/boards/atk-dnesp32s3/atk_dnesp32s3.cc @@ -6,7 +6,7 @@ #include "config.h" #include "i2c_device.h" #include "led/single_led.h" -#include "esp32_camera.h" +#include "esp_video.h" #include #include @@ -49,7 +49,7 @@ private: Button boot_button_; LcdDisplay* display_; XL9555* xl9555_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializeI2c() { // Initialize I2C peripheral @@ -179,7 +179,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } public: diff --git a/main/boards/atoms3r-cam-m12-echo-base/README.md b/main/boards/atoms3r-cam-m12-echo-base/README.md index 83738ec4..6da32806 100644 --- a/main/boards/atoms3r-cam-m12-echo-base/README.md +++ b/main/boards/atoms3r-cam-m12-echo-base/README.md @@ -14,12 +14,6 @@ AtomS3R CAM、AtomS3R M12 是 M5Stack 推出的基于 ESP32-S3-PICO-1-N8R8 的 两款开发版均**不带屏幕、不带额外按键**,需要使用语音唤醒。必要时,需要使用 `idf.py monitor` 查看 log 以确定运行状态。 -> ![NOTE] -> -> 自版本 [待定] 起,由于依赖库不支持 OV3660 传感器,AtomS3R M12 无法使用摄像头识别功能。 -> -> AtomS3R CAM 不受影响;使用旧版本小智固件的 AtomS3R M12 不受影响。 - ## 配置、编译命令 **配置编译目标为 ESP32S3** diff --git a/main/boards/atoms3r-cam-m12-echo-base/atoms3r_cam_m12_echo_base.cc b/main/boards/atoms3r-cam-m12-echo-base/atoms3r_cam_m12_echo_base.cc index b9f6233f..c889288f 100644 --- a/main/boards/atoms3r-cam-m12-echo-base/atoms3r_cam_m12_echo_base.cc +++ b/main/boards/atoms3r-cam-m12-echo-base/atoms3r_cam_m12_echo_base.cc @@ -126,47 +126,33 @@ private: } void InitializeCamera() { - static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = { - .data_width = CAM_CTLR_DATA_WIDTH_8, - .data_io = { - [0] = CAMERA_PIN_D0, - [1] = CAMERA_PIN_D1, - [2] = CAMERA_PIN_D2, - [3] = CAMERA_PIN_D3, - [4] = CAMERA_PIN_D4, - [5] = CAMERA_PIN_D5, - [6] = CAMERA_PIN_D6, - [7] = CAMERA_PIN_D7, - }, - .vsync_io = CAMERA_PIN_VSYNC, - .de_io = CAMERA_PIN_HREF, - .pclk_io = CAMERA_PIN_PCLK, - .xclk_io = CAMERA_PIN_XCLK, - }; + camera_config_t config = {}; + config.pin_d0 = CAMERA_PIN_D0; + config.pin_d1 = CAMERA_PIN_D1; + config.pin_d2 = CAMERA_PIN_D2; + config.pin_d3 = CAMERA_PIN_D3; + config.pin_d4 = CAMERA_PIN_D4; + config.pin_d5 = CAMERA_PIN_D5; + config.pin_d6 = CAMERA_PIN_D6; + config.pin_d7 = CAMERA_PIN_D7; + config.pin_xclk = CAMERA_PIN_XCLK; + config.pin_pclk = CAMERA_PIN_PCLK; + config.pin_vsync = CAMERA_PIN_VSYNC; + config.pin_href = CAMERA_PIN_HREF; + config.pin_sccb_sda = CAMERA_PIN_SIOD; + config.pin_sccb_scl = CAMERA_PIN_SIOC; + config.sccb_i2c_port = 1; + config.pin_pwdn = CAMERA_PIN_PWDN; + config.pin_reset = CAMERA_PIN_RESET; + config.xclk_freq_hz = XCLK_FREQ_HZ; + config.pixel_format = PIXFORMAT_RGB565; + config.frame_size = FRAMESIZE_QVGA; + config.jpeg_quality = 12; + config.fb_count = 1; + config.fb_location = CAMERA_FB_IN_PSRAM; + config.grab_mode = CAMERA_GRAB_WHEN_EMPTY; - esp_video_init_sccb_config_t sccb_config = { - .init_sccb = true, - .i2c_config = { - .port = 1, - .scl_pin = CAMERA_PIN_SIOC, - .sda_pin = CAMERA_PIN_SIOD, - }, - .freq = 100000, - }; - - esp_video_init_dvp_config_t dvp_config = { - .sccb_config = sccb_config, - .reset_pin = CAMERA_PIN_RESET, - .pwdn_pin = CAMERA_PIN_PWDN, - .dvp_pin = dvp_pin_config, - .xclk_freq = XCLK_FREQ_HZ, - }; - - esp_video_init_config_t video_config = { - .dvp = &dvp_config, - }; - - camera_ = new Esp32Camera(video_config); + camera_ = new Esp32Camera(config); camera_->SetHMirror(false); } diff --git a/main/boards/atoms3r-cam-m12-echo-base/config.json b/main/boards/atoms3r-cam-m12-echo-base/config.json index 1c798134..b8d66299 100644 --- a/main/boards/atoms3r-cam-m12-echo-base/config.json +++ b/main/boards/atoms3r-cam-m12-echo-base/config.json @@ -5,10 +5,7 @@ "name": "atoms3r-cam-m12-echo-base", "sdkconfig_append": [ "CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y", - "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions/v2/8m.csv\"", - "CONFIG_CAMERA_GC0308=y", - "CONFIG_CAMERA_GC0308_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_GC0308_DVP_YUV422_320X240_20FPS=y" + "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions/v2/8m.csv\"" ] } ] diff --git a/main/boards/bread-compact-wifi-s3cam/compact_wifi_board_s3cam.cc b/main/boards/bread-compact-wifi-s3cam/compact_wifi_board_s3cam.cc index 98b76f47..8a44c4ed 100644 --- a/main/boards/bread-compact-wifi-s3cam/compact_wifi_board_s3cam.cc +++ b/main/boards/bread-compact-wifi-s3cam/compact_wifi_board_s3cam.cc @@ -8,7 +8,7 @@ #include "mcp_server.h" #include "lamp_controller.h" #include "led/single_led.h" -#include "esp32_camera.h" +#include "esp_video.h" #include #include @@ -65,7 +65,7 @@ private: Button boot_button_; LcdDisplay* display_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializeSpi() { spi_bus_config_t buscfg = {}; @@ -165,7 +165,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); camera_->SetHMirror(false); } diff --git a/main/boards/common/esp32_camera.cc b/main/boards/common/esp32_camera.cc index 15cdad01..60a5050a 100644 --- a/main/boards/common/esp32_camera.cc +++ b/main/boards/common/esp32_camera.cc @@ -1,388 +1,52 @@ #include "sdkconfig.h" -// esp32_camera (使用 esp_video 组件) 用于 ESP32-P4,或 ESP32-S3 选择使用 esp_video 时 -#if defined(CONFIG_IDF_TARGET_ESP32P4) || (defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_VIDEO)) -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include -#include "esp_imgfx_color_convert.h" -#include "esp_video_device.h" -#include "esp_video_init.h" -#include "linux/videodev2.h" - +#include "esp32_camera.h" #include "board.h" #include "display.h" -#include "esp32_camera.h" -#include "esp_jpeg_common.h" -#include "jpg/image_to_jpeg.h" -#include "jpg/jpeg_to_image.h" #include "lvgl_display.h" #include "mcp_server.h" #include "system_info.h" - -#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE -#undef LOG_LOCAL_LEVEL -#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG) -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE -#include // should be after LOCAL_LOG_LEVEL definition - -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE -#ifdef CONFIG_IDF_TARGET_ESP32P4 -#include "driver/ppa.h" -#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90) -#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_270) -#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270) -#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_90) -#else -#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set" -#endif // angle -#else // target -#include "esp_imgfx_rotate.h" -#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90) -#define IMAGE_ROTATION_ANGLE (90) -#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270) -#define IMAGE_ROTATION_ANGLE (270) -#else -#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set" -#endif // angle -#endif // target -#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - +#include "jpg/image_to_jpeg.h" +#include "esp_timer.h" #define TAG "Esp32Camera" -#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP) -#warning \ - "CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER or CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP is enabled, which may cause image corruption in YUV422 format!" -#endif +Esp32Camera::Esp32Camera(const camera_config_t &config) { + esp_err_t err = esp_camera_init(&config); + if (err != ESP_OK) { + ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err); + return; + } -#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE -#define CAM_PRINT_FOURCC(pixelformat) \ - char fourcc[5]; \ - fourcc[0] = pixelformat & 0xFF; \ - fourcc[1] = (pixelformat >> 8) & 0xFF; \ - fourcc[2] = (pixelformat >> 16) & 0xFF; \ - fourcc[3] = (pixelformat >> 24) & 0xFF; \ - fourcc[4] = '\0'; \ - ESP_LOGD(TAG, "FOURCC: '%c%c%c%c'", fourcc[0], fourcc[1], fourcc[2], fourcc[3]); - -// for compatibility with old esp_video version -#ifndef MAP_FAILED -#define MAP_FAILED nullptr -#endif - -__attribute__((weak)) esp_err_t esp_video_deinit(void) { - return ESP_ERR_NOT_SUPPORTED; -} -// end of for compatibility with old esp_video version - -static void log_available_video_devices() { - for (int i = 0; i < 50; i++) { - char path[16]; - snprintf(path, sizeof(path), "/dev/video%d", i); - int fd = open(path, O_RDONLY); - if (fd >= 0) { - ESP_LOGD(TAG, "found video device: %s", path); - close(fd); + sensor_t *s = esp_camera_sensor_get(); + if (s) { + if (s->id.PID == GC0308_PID) { + s->set_hmirror(s, 0); // Control camera mirror: 1 for mirror, 0 for normal } - } -} -#else -#define CAM_PRINT_FOURCC(pixelformat) (void)0; -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE - -Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) { - if (esp_video_init(&config) != ESP_OK) { - ESP_LOGE(TAG, "esp_video_init failed"); - return; + ESP_LOGI(TAG, "Camera initialized: format=%d", config.pixel_format); } -#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE - esp_log_level_set(TAG, ESP_LOG_DEBUG); -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE - - const char* video_device_name = nullptr; - - if (false) { /* 用于构建 else if */ - } -#if CONFIG_ESP_VIDEO_ENABLE_MIPI_CSI_VIDEO_DEVICE - else if (config.csi != nullptr) { - video_device_name = ESP_VIDEO_MIPI_CSI_DEVICE_NAME; - } -#endif -#if CONFIG_ESP_VIDEO_ENABLE_DVP_VIDEO_DEVICE - else if (config.dvp != nullptr) { - video_device_name = ESP_VIDEO_DVP_DEVICE_NAME; - } -#endif -#if CONFIG_ESP_VIDEO_ENABLE_HW_JPEG_VIDEO_DEVICE - else if (config.jpeg != nullptr) { - video_device_name = ESP_VIDEO_JPEG_DEVICE_NAME; - } -#endif -#if CONFIG_ESP_VIDEO_ENABLE_SPI_VIDEO_DEVICE - else if (config.spi != nullptr) { - video_device_name = ESP_VIDEO_SPI_DEVICE_NAME; - } -#endif -#if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE - else if (config.usb_uvc != nullptr) { - video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(0); - } -#endif - - if (video_device_name == nullptr) { - ESP_LOGE(TAG, "no video device is enabled"); - return; - } - - video_fd_ = open(video_device_name, O_RDWR); - - if (video_fd_ < 0) { - ESP_LOGE(TAG, "open %s failed, errno=%d(%s)", video_device_name, errno, strerror(errno)); -#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE - log_available_video_devices(); -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE - return; - } - - struct v4l2_capability cap = {}; - if (ioctl(video_fd_, VIDIOC_QUERYCAP, &cap) != 0) { - ESP_LOGE(TAG, "VIDIOC_QUERYCAP failed, errno=%d(%s)", errno, strerror(errno)); - close(video_fd_); - video_fd_ = -1; - return; - } - - ESP_LOGD( - TAG, - "VIDIOC_QUERYCAP: driver=%s, card=%s, bus_info=%s, version=0x%08lx, capabilities=0x%08lx, device_caps=0x%08lx", - cap.driver, cap.card, cap.bus_info, cap.version, cap.capabilities, cap.device_caps); - - struct v4l2_format format = {}; - format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - if (ioctl(video_fd_, VIDIOC_G_FMT, &format) != 0) { - ESP_LOGE(TAG, "VIDIOC_G_FMT failed, errno=%d(%s)", errno, strerror(errno)); - close(video_fd_); - video_fd_ = -1; - return; - } - ESP_LOGD(TAG, "VIDIOC_G_FMT: pixelformat=0x%08lx, width=%ld, height=%ld", format.fmt.pix.pixelformat, - format.fmt.pix.width, format.fmt.pix.height); - CAM_PRINT_FOURCC(format.fmt.pix.pixelformat); - - struct v4l2_format setformat = {}; - setformat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - sensor_width_ = format.fmt.pix.width; - sensor_height_ = format.fmt.pix.height; -#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - setformat.fmt.pix.width = format.fmt.pix.width; - setformat.fmt.pix.height = format.fmt.pix.height; - - struct v4l2_fmtdesc fmtdesc = {}; - fmtdesc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - fmtdesc.index = 0; - uint32_t best_fmt = 0; - int best_rank = 1 << 30; // large number - - // 注: 当前版本 esp_video 中 YUV422P 实际输出为 YUYV。 -#if defined(CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE) && defined(CONFIG_SOC_PPA_SUPPORTED) - auto get_rank = [](uint32_t fmt) -> int { - switch (fmt) { - case V4L2_PIX_FMT_RGB24: - return 0; - case V4L2_PIX_FMT_RGB565: - return 1; -#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER - case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式 - return 2; -#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER - case V4L2_PIX_FMT_GREY: - case V4L2_PIX_FMT_YUV422P: - default: - return 1 << 29; // unsupported - } - }; -#else - auto get_rank = [](uint32_t fmt) -> int { - switch (fmt) { - case V4L2_PIX_FMT_YUV422P: - return 10; - case V4L2_PIX_FMT_RGB565: - return 11; - case V4L2_PIX_FMT_RGB24: - return 12; -#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER - case V4L2_PIX_FMT_YUV420: - return 13; -#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER -#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT - case V4L2_PIX_FMT_JPEG: - return 5; -#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT - case V4L2_PIX_FMT_GREY: - return 20; - default: - return 1 << 29; // unsupported - } - }; -#endif - while (ioctl(video_fd_, VIDIOC_ENUM_FMT, &fmtdesc) == 0) { - ESP_LOGD(TAG, "VIDIOC_ENUM_FMT: pixelformat=0x%08lx, description=%s", fmtdesc.pixelformat, fmtdesc.description); - CAM_PRINT_FOURCC(fmtdesc.pixelformat); - int rank = get_rank(fmtdesc.pixelformat); - if (rank < best_rank) { - best_rank = rank; - best_fmt = fmtdesc.pixelformat; - } - fmtdesc.index++; - } - if (best_rank < (1 << 29)) { - setformat.fmt.pix.pixelformat = best_fmt; - sensor_format_ = best_fmt; - } - - if (!setformat.fmt.pix.pixelformat) { - ESP_LOGE(TAG, "no supported pixel format found"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - - ESP_LOGD(TAG, "selected pixel format: 0x%08lx", setformat.fmt.pix.pixelformat); - - if (ioctl(video_fd_, VIDIOC_S_FMT, &setformat) != 0) { - ESP_LOGE(TAG, "VIDIOC_S_FMT failed, errno=%d(%s)", errno, strerror(errno)); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - frame_.width = setformat.fmt.pix.height; - frame_.height = setformat.fmt.pix.width; -#else - frame_.width = setformat.fmt.pix.width; - frame_.height = setformat.fmt.pix.height; -#endif - - // 申请缓冲并mmap - struct v4l2_requestbuffers req = {}; - req.count = strcmp(video_device_name, ESP_VIDEO_MIPI_CSI_DEVICE_NAME) == 0 ? 2 : 1; - req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - req.memory = V4L2_MEMORY_MMAP; - if (ioctl(video_fd_, VIDIOC_REQBUFS, &req) != 0) { - ESP_LOGE(TAG, "VIDIOC_REQBUFS failed"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - mmap_buffers_.resize(req.count); - for (uint32_t i = 0; i < req.count; i++) { - struct v4l2_buffer buf = {}; - buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - buf.memory = V4L2_MEMORY_MMAP; - buf.index = i; - if (ioctl(video_fd_, VIDIOC_QUERYBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_QUERYBUF failed"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - void* start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, video_fd_, buf.m.offset); - if (start == MAP_FAILED) { - ESP_LOGE(TAG, "mmap failed"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - mmap_buffers_[i].start = start; - mmap_buffers_[i].length = buf.length; - - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_QBUF failed"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - } - - int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - if (ioctl(video_fd_, VIDIOC_STREAMON, &type) != 0) { - ESP_LOGE(TAG, "VIDIOC_STREAMON failed"); - close(video_fd_); - video_fd_ = -1; - sensor_format_ = 0; - return; - } - -#ifdef CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE - // 当启用 ISP 时,ISP 需要一些照片来初始化参数,因此开启后后台拍摄5s照片并丢弃 - xTaskCreate( - [](void* arg) { - Esp32Camera* self = static_cast(arg); - uint16_t capture_count = 0; - TickType_t start = xTaskGetTickCount(); - TickType_t duration = 5000 / portTICK_PERIOD_MS; // 5s - while ((xTaskGetTickCount() - start) < duration) { - struct v4l2_buffer buf = {}; - buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - buf.memory = V4L2_MEMORY_MMAP; - if (ioctl(self->video_fd_, VIDIOC_DQBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_DQBUF failed during init"); - vTaskDelay(10 / portTICK_PERIOD_MS); - continue; - } - if (ioctl(self->video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_QBUF failed during init"); - } - capture_count++; - } - ESP_LOGI(TAG, "Camera init success, captured %d frames in %lums", capture_count, - (unsigned long)((xTaskGetTickCount() - start) * portTICK_PERIOD_MS)); - self->streaming_on_ = true; - vTaskDelete(NULL); - }, - "CameraInitTask", 4096, this, 5, nullptr); -#else - ESP_LOGI(TAG, "Camera init success"); streaming_on_ = true; -#endif // CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE } Esp32Camera::~Esp32Camera() { - if (streaming_on_ && video_fd_ >= 0) { - int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - ioctl(video_fd_, VIDIOC_STREAMOFF, &type); - } - for (auto& b : mmap_buffers_) { - if (b.start && b.length) { - munmap(b.start, b.length); + if (streaming_on_) { + if (current_fb_) { + esp_camera_fb_return(current_fb_); + current_fb_ = nullptr; } + esp_camera_deinit(); + streaming_on_ = false; } - if (video_fd_ >= 0) { - close(video_fd_); - video_fd_ = -1; - } - sensor_format_ = 0; - esp_video_deinit(); } -void Esp32Camera::SetExplainUrl(const std::string& url, const std::string& token) { +void Esp32Camera::SetExplainUrl(const std::string &url, const std::string &token) { explain_url_ = url; explain_token_ = token; } @@ -392,532 +56,123 @@ bool Esp32Camera::Capture() { encoder_thread_.join(); } - if (!streaming_on_ || video_fd_ < 0) { + if (!streaming_on_) { return false; } - for (int i = 0; i < 3; i++) { - struct v4l2_buffer buf = {}; - buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - buf.memory = V4L2_MEMORY_MMAP; - if (ioctl(video_fd_, VIDIOC_DQBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_DQBUF failed"); + // Get the latest frame, discard old frames for real-time performance + for (int i = 0; i < 2; i++) { + if (current_fb_) { + esp_camera_fb_return(current_fb_); + } + current_fb_ = esp_camera_fb_get(); + if (!current_fb_) { + ESP_LOGE(TAG, "Camera capture failed"); return false; } - if (i == 2) { - // 保存帧副本到PSRAM - if (frame_.data) { - heap_caps_free(frame_.data); - frame_.data = nullptr; - frame_.format = 0; - } - frame_.len = buf.bytesused; - frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (!frame_.data) { - ESP_LOGE(TAG, "alloc frame copy failed: need allocate %lu bytes", buf.bytesused); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, sensor_width = %d, sensor_height = %d", - mmap_buffers_[buf.index].length, sensor_width_, sensor_height_); -#else - ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, frame.width = %d, frame.height = %d", - mmap_buffers_[buf.index].length, frame_.width, frame_.height); -#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - ESP_LOG_BUFFER_HEXDUMP(TAG, mmap_buffers_[buf.index].start, MIN(mmap_buffers_[buf.index].length, 256), - ESP_LOG_DEBUG); - - switch (sensor_format_) { - case V4L2_PIX_FMT_RGB565: - case V4L2_PIX_FMT_RGB24: - case V4L2_PIX_FMT_YUYV: - case V4L2_PIX_FMT_YUV420: - case V4L2_PIX_FMT_GREY: -#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT - case V4L2_PIX_FMT_JPEG: -#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT -#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP - { - auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; - auto dst16 = (uint16_t*)frame_.data; - size_t count = (size_t)mmap_buffers_[buf.index].length / 2; - for (size_t i = 0; i < count; i++) { - dst16[i] = __builtin_bswap16(src16[i]); - } - } -#else - memcpy(frame_.data, mmap_buffers_[buf.index].start, - MIN(mmap_buffers_[buf.index].length, frame_.len)); -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP - frame_.format = sensor_format_; - break; - case V4L2_PIX_FMT_YUV422P: { - // 这个格式是 422 YUYV,不是 planer - frame_.format = V4L2_PIX_FMT_YUYV; -#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP - { - auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; - auto dst16 = (uint16_t*)frame_.data; - size_t count = (size_t)mmap_buffers_[buf.index].length / 2; - for (size_t i = 0; i < count; i++) { - dst16[i] = __builtin_bswap16(src16[i]); - } - } -#else - memcpy(frame_.data, mmap_buffers_[buf.index].start, - MIN(mmap_buffers_[buf.index].length, frame_.len)); -#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP - break; - } - case V4L2_PIX_FMT_RGB565X: { - // 大端序的 RGB565 需要转换为小端序 - // 目前 esp_video 的大小端都会返回格式为 RGB565,不会返回格式为 RGB565X,此 case 用于未来版本兼容 - auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; - auto dst16 = (uint16_t*)frame_.data; - size_t pixel_count = (size_t)frame_.width * (size_t)frame_.height; - for (size_t i = 0; i < pixel_count; i++) { - dst16[i] = __builtin_bswap16(src16[i]); - } - frame_.format = V4L2_PIX_FMT_RGB565; - break; - } - default: - ESP_LOGE(TAG, "unsupported sensor format: 0x%08lx", sensor_format_); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE -#ifndef CONFIG_SOC_PPA_SUPPORTED - uint8_t* rotate_dst = - (uint8_t*)heap_caps_aligned_alloc(64, frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (rotate_dst == nullptr) { - ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - uint8_t* rotate_src = (uint8_t*)frame_.data; - - esp_imgfx_rotate_cfg_t rotate_cfg = { - .in_res = - { - .width = static_cast(sensor_width_), - .height = static_cast(sensor_height_), - }, - .degree = IMAGE_ROTATION_ANGLE, - }; - switch (frame_.format) { - case V4L2_PIX_FMT_RGB565: - rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE; - break; - case V4L2_PIX_FMT_YUYV: - rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE; - break; - case V4L2_PIX_FMT_GREY: - rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_Y; - break; - case V4L2_PIX_FMT_RGB24: - rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888; - break; - default: - ESP_LOGE(TAG, "unsupported sensor format: 0x%08lx", sensor_format_); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - esp_imgfx_rotate_handle_t rotate_handle = nullptr; - esp_imgfx_err_t imgfx_err = esp_imgfx_rotate_open(&rotate_cfg, &rotate_handle); - if (imgfx_err != ESP_IMGFX_ERR_OK || rotate_handle == nullptr) { - ESP_LOGE(TAG, "esp_imgfx_rotate_create failed"); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - - esp_imgfx_data_t rotate_input_data = { - .data = rotate_src, - .data_len = frame_.len, - }; - esp_imgfx_data_t rotate_output_data = { - .data = rotate_dst, - .data_len = frame_.len, - }; - - imgfx_err = esp_imgfx_rotate_process(rotate_handle, &rotate_input_data, &rotate_output_data); - if (imgfx_err != ESP_IMGFX_ERR_OK) { - ESP_LOGE(TAG, "esp_imgfx_rotate_process failed"); - heap_caps_free(rotate_dst); - rotate_dst = nullptr; - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - esp_imgfx_rotate_close(rotate_handle); - rotate_handle = nullptr; - return false; - } - - frame_.data = rotate_dst; - - heap_caps_free(rotate_src); - rotate_src = nullptr; - - esp_imgfx_rotate_close(rotate_handle); - rotate_handle = nullptr; -#else // CONFIG_SOC_PPA_SUPPORTED - uint8_t* rotate_src = nullptr; - - ppa_srm_color_mode_t ppa_color_mode; - switch (frame_.format) { - case V4L2_PIX_FMT_RGB565: - rotate_src = (uint8_t*)frame_.data; - ppa_color_mode = PPA_SRM_COLOR_MODE_RGB565; - break; - case V4L2_PIX_FMT_RGB24: - rotate_src = (uint8_t*)frame_.data; - ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888; - break; - case V4L2_PIX_FMT_YUYV: { - ESP_LOGW(TAG, "YUYV format is not supported for PPA rotation, using software conversion to RGB888"); - rotate_src = (uint8_t*)heap_caps_malloc(frame_.width * frame_.height * 3, - MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (rotate_src == nullptr) { - ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - esp_imgfx_color_convert_cfg_t convert_cfg = { - .in_res = {.width = static_cast(frame_.width), - .height = static_cast(frame_.height)}, - .in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_YUYV, - .out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888, - }; - esp_imgfx_color_convert_handle_t convert_handle = nullptr; - esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle); - if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) { - ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed"); - heap_caps_free(rotate_src); - rotate_src = nullptr; - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - esp_imgfx_data_t convert_input_data = { - .data = frame_.data, - .data_len = frame_.len, - }; - esp_imgfx_data_t convert_output_data = { - .data = rotate_src, - .data_len = static_cast(frame_.width * frame_.height * 3), - }; - err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data); - if (err != ESP_IMGFX_ERR_OK) { - ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed"); - heap_caps_free(rotate_src); - rotate_src = nullptr; - esp_imgfx_color_convert_close(convert_handle); - convert_handle = nullptr; - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - esp_imgfx_color_convert_close(convert_handle); - convert_handle = nullptr; - ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888; - heap_caps_free(frame_.data); - frame_.data = rotate_src; - frame_.len = frame_.width * frame_.height * 3; - break; - } - default: - ESP_LOGE(TAG, "unsupported sensor format for PPA rotation: 0x%08lx", sensor_format_); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - - uint8_t* rotate_dst = (uint8_t*)heap_caps_malloc( - frame_.width * frame_.height * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT | MALLOC_CAP_CACHE_ALIGNED); - if (rotate_dst == nullptr) { - ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - - ppa_client_handle_t ppa_client = nullptr; - ppa_client_config_t client_cfg = { - .oper_type = PPA_OPERATION_SRM, - .max_pending_trans_num = 1, - }; - esp_err_t err = ppa_register_client(&client_cfg, &ppa_client); - if (err != ESP_OK || ppa_client == nullptr) { - ESP_LOGE(TAG, "ppa_register_client failed: %d", (int)err); - heap_caps_free(rotate_dst); - rotate_dst = nullptr; - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - - ppa_srm_rotation_angle_t ppa_angle = IMAGE_ROTATION_ANGLE; - - ppa_srm_oper_config_t srm_cfg = {}; - srm_cfg.in.buffer = (void*)rotate_src; - srm_cfg.in.pic_w = sensor_width_; - srm_cfg.in.pic_h = sensor_height_; - srm_cfg.in.block_w = sensor_width_; - srm_cfg.in.block_h = sensor_height_; - srm_cfg.in.block_offset_x = 0; - srm_cfg.in.block_offset_y = 0; - srm_cfg.in.srm_cm = ppa_color_mode; - - srm_cfg.out.buffer = (void*)rotate_dst; - srm_cfg.out.buffer_size = frame_.len; - srm_cfg.out.pic_w = frame_.width; - srm_cfg.out.pic_h = frame_.height; - srm_cfg.out.block_offset_x = 0; - srm_cfg.out.block_offset_y = 0; - srm_cfg.out.srm_cm = PPA_SRM_COLOR_MODE_RGB565; - - // 等比例缩放 1.0 - srm_cfg.scale_x = 1.0f; - srm_cfg.scale_y = 1.0f; - srm_cfg.rotation_angle = ppa_angle; - srm_cfg.mode = PPA_TRANS_MODE_BLOCKING; - srm_cfg.user_data = nullptr; - - err = ppa_do_scale_rotate_mirror(ppa_client, &srm_cfg); - if (err != ESP_OK) { - ESP_LOGE(TAG, "ppa_do_scale_rotate_mirror failed: %d", (int)err); - heap_caps_free(rotate_dst); - rotate_dst = nullptr; - (void)ppa_unregister_client(ppa_client); - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); - } - return false; - } - - (void)ppa_unregister_client(ppa_client); - - frame_.data = rotate_dst; - frame_.len = frame_.width * frame_.height * 2; - frame_.format = V4L2_PIX_FMT_RGB565; - heap_caps_free(rotate_src); - rotate_src = nullptr; -#endif // CONFIG_SOC_PPA_SUPPORTED -#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - } - - if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { - ESP_LOGE(TAG, "VIDIOC_QBUF failed"); - } } - // 显示预览图片 - auto display = dynamic_cast(Board::GetInstance().GetDisplay()); - if (display != nullptr) { - if (!frame_.data) { - ESP_LOGE(TAG, "frame.data is null"); + // Perform byte swapping for RGB565 format and prepare preview image + if (current_fb_->format == PIXFORMAT_RGB565) { + size_t pixel_count = current_fb_->width * current_fb_->height; + size_t data_size = pixel_count * 2; + + uint8_t *preview_data = (uint8_t *)heap_caps_malloc(data_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (preview_data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for preview image"); return false; } - uint16_t w = frame_.width; - uint16_t h = frame_.height; - size_t lvgl_image_size = frame_.len; - size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐 - lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565; - uint8_t* data = nullptr; - switch (frame_.format) { - // LVGL 显示 YUV 系的图像似乎都有问题,暂时转换为 RGB565 显示 - case V4L2_PIX_FMT_YUYV: - case V4L2_PIX_FMT_YUV420: - case V4L2_PIX_FMT_RGB24: { - color_format = LV_COLOR_FORMAT_RGB565; - data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (data == nullptr) { - ESP_LOGE(TAG, "Failed to allocate memory for preview image"); - return false; - } - esp_imgfx_color_convert_cfg_t convert_cfg = { - .in_res = {.width = static_cast(frame_.width), - .height = static_cast(frame_.height)}, - .in_pixel_fmt = static_cast(frame_.format), - .out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE, - .color_space_std = ESP_IMGFX_COLOR_SPACE_STD_BT601, - }; - esp_imgfx_color_convert_handle_t convert_handle = nullptr; - esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle); - if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) { - ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed"); - heap_caps_free(data); - data = nullptr; - return false; - } - esp_imgfx_data_t convert_input_data = { - .data = frame_.data, - .data_len = frame_.len, - }; - esp_imgfx_data_t convert_output_data = { - .data = data, - .data_len = static_cast(w * h * 2), - }; - err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data); - if (err != ESP_IMGFX_ERR_OK) { - ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed"); - heap_caps_free(data); - data = nullptr; - esp_imgfx_color_convert_close(convert_handle); - convert_handle = nullptr; - return false; - } - esp_imgfx_color_convert_close(convert_handle); - convert_handle = nullptr; - lvgl_image_size = w * h * 2; - break; - } - - case V4L2_PIX_FMT_RGB565: - data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (data == nullptr) { - ESP_LOGE(TAG, "Failed to allocate memory for preview image"); - return false; - } - memcpy(data, frame_.data, frame_.len); - lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565 - break; - -#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT - case V4L2_PIX_FMT_JPEG: { - uint8_t* out_data = nullptr; // out data is allocated by jpeg_to_image - size_t out_len = 0; - size_t out_width = 0; - size_t out_height = 0; - size_t out_stride = 0; - - esp_err_t ret = - jpeg_to_image(frame_.data, frame_.len, &out_data, &out_len, &out_width, &out_height, &out_stride); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to decode JPEG image: %d (%s)", (int)ret, esp_err_to_name(ret)); - if (out_data) { - heap_caps_free(out_data); - out_data = nullptr; - } - return false; - } - - data = out_data; - w = out_width; - h = out_height; - lvgl_image_size = out_len; - stride = out_stride; - break; - } -#endif - default: - ESP_LOGE(TAG, "unsupported frame format: 0x%08lx", frame_.format); - return false; + uint16_t *src = (uint16_t *)current_fb_->buf; + uint16_t *dst = (uint16_t *)preview_data; + for (size_t i = 0; i < pixel_count; i++) { + // Copy data from driver buffer to preview buffer with byte swapping + dst[i] = __builtin_bswap16(src[i]); } - auto image = std::make_unique(data, lvgl_image_size, w, h, stride, color_format); - display->SetPreviewImage(std::move(image)); + // Display preview image + auto display = dynamic_cast(Board::GetInstance().GetDisplay()); + if (display != nullptr) { + display->SetPreviewImage(std::make_unique(preview_data, data_size, current_fb_->width, current_fb_->height, current_fb_->width * 2, LV_COLOR_FORMAT_RGB565)); + } else { + heap_caps_free(preview_data); + } + } else if (current_fb_->format == PIXFORMAT_JPEG) { + // JPEG format preview usually requires decoding, skip preview display for now, just log + ESP_LOGW(TAG, "JPEG capture success, len=%zu, but not supported for preview", current_fb_->len); } + + ESP_LOGI(TAG, "Captured frame: %dx%d, len=%zu, format=%d", + current_fb_->width, current_fb_->height, current_fb_->len, current_fb_->format); + return true; } bool Esp32Camera::SetHMirror(bool enabled) { - if (video_fd_ < 0) - return false; - struct v4l2_ext_controls ctrls = {}; - struct v4l2_ext_control ctrl = {}; - ctrl.id = V4L2_CID_HFLIP; - ctrl.value = enabled ? 1 : 0; - ctrls.ctrl_class = V4L2_CTRL_CLASS_USER; - ctrls.count = 1; - ctrls.controls = &ctrl; - if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) { - ESP_LOGE(TAG, "set HFLIP failed"); + sensor_t *s = esp_camera_sensor_get(); + if (!s) { return false; } + s->set_hmirror(s, enabled ? 1 : 0); return true; } bool Esp32Camera::SetVFlip(bool enabled) { - if (video_fd_ < 0) - return false; - struct v4l2_ext_controls ctrls = {}; - struct v4l2_ext_control ctrl = {}; - ctrl.id = V4L2_CID_VFLIP; - ctrl.value = enabled ? 1 : 0; - ctrls.ctrl_class = V4L2_CTRL_CLASS_USER; - ctrls.count = 1; - ctrls.controls = &ctrl; - if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) { - ESP_LOGE(TAG, "set VFLIP failed"); + sensor_t *s = esp_camera_sensor_get(); + if (!s) { return false; } + s->set_vflip(s, enabled ? 1 : 0); return true; } -/** - * @brief 将摄像头捕获的图像发送到远程服务器进行AI分析和解释 - * - * 该函数将当前摄像头缓冲区中的图像编码为JPEG格式,并通过HTTP POST请求 - * 以multipart/form-data的形式发送到指定的解释服务器。服务器将根据提供的 - * 问题对图像进行AI分析并返回结果。 - * - * 实现特点: - * - 使用独立线程编码JPEG,与主线程分离 - * - 采用分块传输编码(chunked transfer encoding)优化内存使用 - * - 通过队列机制实现编码线程和发送线程的数据同步 - * - 支持设备ID、客户端ID和认证令牌的HTTP头部配置 - * - * @param question 要向AI提出的关于图像的问题,将作为表单字段发送 - * @return std::string 服务器返回的JSON格式响应字符串 - * 成功时包含AI分析结果,失败时包含错误信息 - * 格式示例:{"success": true, "result": "分析结果"} - * {"success": false, "message": "错误信息"} - * - * @note 调用此函数前必须先调用SetExplainUrl()设置服务器URL - * @note 函数会等待之前的编码线程完成后再开始新的处理 - * @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息 - */ -std::string Esp32Camera::Explain(const std::string& question) { +std::string Esp32Camera::Explain(const std::string &question) { if (explain_url_.empty()) { throw std::runtime_error("Image explain URL or token is not set"); } - // 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data + if (current_fb_ == nullptr) { + throw std::runtime_error("No camera frame captured"); + } + + // Create local JPEG queue QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); if (jpeg_queue == nullptr) { ESP_LOGE(TAG, "Failed to create JPEG queue"); throw std::runtime_error("Failed to create JPEG queue"); } - // We spawn a thread to encode the image to JPEG using optimized encoder (cost about 500ms and 8KB SRAM) + // Start encoding thread encoder_thread_ = std::thread([this, jpeg_queue]() { - uint16_t w = frame_.width ? frame_.width : 320; - uint16_t h = frame_.height ? frame_.height : 240; - v4l2_pix_fmt_t enc_fmt = frame_.format; - bool ok = image_to_jpeg_cb( - frame_.data, frame_.len, w, h, enc_fmt, 80, + int64_t start_time = esp_timer_get_time(); + uint16_t w = current_fb_->width; + uint16_t h = current_fb_->height; + v4l2_pix_fmt_t enc_fmt; + switch (current_fb_->format) { + case PIXFORMAT_RGB565: + enc_fmt = V4L2_PIX_FMT_RGB565; + break; + case PIXFORMAT_YUV422: + enc_fmt = V4L2_PIX_FMT_YUYV; // YUV422 is actually YUYV format + break; + case PIXFORMAT_YUV420: + enc_fmt = V4L2_PIX_FMT_YUV420; + break; + case PIXFORMAT_GRAYSCALE: + enc_fmt = V4L2_PIX_FMT_GREY; + break; + case PIXFORMAT_JPEG: + enc_fmt = V4L2_PIX_FMT_JPEG; + break; + case PIXFORMAT_RGB888: + enc_fmt = V4L2_PIX_FMT_RGB24; + break; + default: + ESP_LOGE(TAG, "Unsupported pixel format: %d", current_fb_->format); + return; + } + + bool ok = image_to_jpeg_cb(current_fb_->buf, current_fb_->len, w, h, enc_fmt, 80, [](void* arg, size_t index, const void* data, size_t len) -> size_t { auto jpeg_queue = static_cast(arg); JpegChunk chunk = {.data = nullptr, .len = len}; @@ -934,21 +189,20 @@ std::string Esp32Camera::Explain(const std::string& question) { } xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); return len; - }, - jpeg_queue); + }, jpeg_queue); if (!ok) { JpegChunk chunk = {.data = nullptr, .len = 0}; xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); } + int64_t end_time = esp_timer_get_time(); + ESP_LOGI(TAG, "JPEG encoding time: %ld ms", int((end_time - start_time) / 1000)); }); auto network = Board::GetInstance().GetNetwork(); auto http = network->CreateHttp(3); - // 构造multipart/form-data请求体 std::string boundary = "----ESP32_CAMERA_BOUNDARY"; - // 配置HTTP客户端,使用分块传输编码 http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); if (!explain_token_.empty()) { @@ -958,7 +212,6 @@ std::string Esp32Camera::Explain(const std::string& question) { http->SetHeader("Transfer-Encoding", "chunked"); if (!http->Open("POST", explain_url_)) { ESP_LOGE(TAG, "Failed to connect to explain URL"); - // Clear the queue encoder_thread_.join(); JpegChunk chunk; while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) { @@ -973,7 +226,6 @@ std::string Esp32Camera::Explain(const std::string& question) { } { - // 第一块:question字段 std::string question_field; question_field += "--" + boundary + "\r\n"; question_field += "Content-Disposition: form-data; name=\"question\"\r\n"; @@ -982,7 +234,6 @@ std::string Esp32Camera::Explain(const std::string& question) { http->Write(question_field.c_str(), question_field.size()); } { - // 第二块:文件字段头部 std::string file_header; file_header += "--" + boundary + "\r\n"; file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n"; @@ -991,7 +242,6 @@ std::string Esp32Camera::Explain(const std::string& question) { http->Write(file_header.c_str(), file_header.size()); } - // 第三块:JPEG数据 size_t total_sent = 0; bool saw_terminator = false; while (true) { @@ -1002,15 +252,13 @@ std::string Esp32Camera::Explain(const std::string& question) { } if (chunk.data == nullptr) { saw_terminator = true; - break; // The last chunk + break; } - http->Write((const char*)chunk.data, chunk.len); + http->Write((const char *)chunk.data, chunk.len); total_sent += chunk.len; heap_caps_free(chunk.data); } - // Wait for the encoder thread to finish encoder_thread_.join(); - // 清理队列 vQueueDelete(jpeg_queue); if (!saw_terminator || total_sent == 0) { @@ -1019,12 +267,10 @@ std::string Esp32Camera::Explain(const std::string& question) { } { - // 第四块:multipart尾部 std::string multipart_footer; multipart_footer += "\r\n--" + boundary + "--\r\n"; http->Write(multipart_footer.c_str(), multipart_footer.size()); } - // 结束块 http->Write("", 0); if (http->GetStatusCode() != 200) { @@ -1035,10 +281,8 @@ std::string Esp32Camera::Explain(const std::string& question) { std::string result = http->ReadAll(); http->Close(); - // Get remain task stack size size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr); - ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s", - (int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str()); + ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, remain stack size=%d, question=%s\n%s", + current_fb_->width, current_fb_->height, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str()); return result; } -#endif // CONFIG_IDF_TARGET_ESP32P4 || (CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_VIDEO) diff --git a/main/boards/common/esp32_camera.h b/main/boards/common/esp32_camera.h index d5ca0961..308578bc 100644 --- a/main/boards/common/esp32_camera.h +++ b/main/boards/common/esp32_camera.h @@ -1,7 +1,5 @@ #pragma once #include "sdkconfig.h" -// esp32_camera (使用 esp_video 组件) 用于 ESP32-P4,或 ESP32-S3 选择使用 esp_video 时 -#if defined(CONFIG_IDF_TARGET_ESP32P4) || (defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_VIDEO)) #include #include @@ -12,46 +10,31 @@ #include #include "camera.h" +#include "esp_camera.h" #include "jpg/image_to_jpeg.h" -#include "esp_video_init.h" -struct JpegChunk { - uint8_t* data; +struct JpegChunk +{ + uint8_t *data; size_t len; }; -class Esp32Camera : public Camera { +class Esp32Camera : public Camera +{ private: - struct FrameBuffer { - uint8_t *data = nullptr; - size_t len = 0; - uint16_t width = 0; - uint16_t height = 0; - v4l2_pix_fmt_t format = 0; - } frame_; - v4l2_pix_fmt_t sensor_format_ = 0; -#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - uint16_t sensor_width_ = 0; - uint16_t sensor_height_ = 0; -#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE - int video_fd_ = -1; bool streaming_on_ = false; - struct MmapBuffer { void *start = nullptr; size_t length = 0; }; - std::vector mmap_buffers_; std::string explain_url_; std::string explain_token_; std::thread encoder_thread_; + camera_fb_t *current_fb_ = nullptr; public: - Esp32Camera(const esp_video_init_config_t& config); + Esp32Camera(const camera_config_t &config); ~Esp32Camera(); - virtual void SetExplainUrl(const std::string& url, const std::string& token); - virtual bool Capture(); - // 翻转控制函数 + virtual void SetExplainUrl(const std::string &url, const std::string &token) override; + virtual bool Capture() override; virtual bool SetHMirror(bool enabled) override; virtual bool SetVFlip(bool enabled) override; - virtual std::string Explain(const std::string& question); + virtual std::string Explain(const std::string &question) override; }; - -#endif // ndef CONFIG_IDF_TARGET_ESP32 diff --git a/main/boards/common/esp32s3_camera.cc b/main/boards/common/esp32s3_camera.cc deleted file mode 100644 index 02e8ac38..00000000 --- a/main/boards/common/esp32s3_camera.cc +++ /dev/null @@ -1,413 +0,0 @@ -#include "sdkconfig.h" - -// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时 -#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA) - -#include -#include -#include -#include - -#include "esp32s3_camera.h" -#include "board.h" -#include "display.h" -#include "lvgl_display.h" -#include "mcp_server.h" -#include "system_info.h" -#include "jpg/image_to_jpeg.h" - -#define TAG "Esp32S3Camera" - -// V4L2 兼容的格式定义 -#define V4L2_PIX_FMT_RGB565 0x50424752 // 'RGBP' -#define V4L2_PIX_FMT_YUYV 0x56595559 // 'YUYV' -#define V4L2_PIX_FMT_JPEG 0x4745504A // 'JPEG' -#define V4L2_PIX_FMT_RGB24 0x33424752 // 'RGB3' -#define V4L2_PIX_FMT_GREY 0x59455247 // 'GREY' - -static uint32_t pixformat_to_v4l2(pixformat_t fmt) -{ - switch (fmt) - { - case PIXFORMAT_RGB565: - return V4L2_PIX_FMT_RGB565; - case PIXFORMAT_YUV422: - return V4L2_PIX_FMT_YUYV; - case PIXFORMAT_JPEG: - return V4L2_PIX_FMT_JPEG; - case PIXFORMAT_RGB888: - return V4L2_PIX_FMT_RGB24; - case PIXFORMAT_GRAYSCALE: - return V4L2_PIX_FMT_GREY; - default: - return 0; - } -} - -Esp32S3Camera::Esp32S3Camera(const camera_config_t &config) -{ - esp_err_t err = esp_camera_init(&config); - if (err != ESP_OK) - { - ESP_LOGE(TAG, "esp_camera_init failed with error 0x%x", err); - return; - } - - sensor_t *s = esp_camera_sensor_get(); - if (s) - { - frame_.width = config.frame_size == FRAMESIZE_QVGA ? 320 : config.frame_size == FRAMESIZE_VGA ? 640 - : config.frame_size == FRAMESIZE_SVGA ? 800 - : config.frame_size == FRAMESIZE_XGA ? 1024 - : config.frame_size == FRAMESIZE_HD ? 1280 - : config.frame_size == FRAMESIZE_SXGA ? 1280 - : config.frame_size == FRAMESIZE_UXGA ? 1600 - : 320; - frame_.height = config.frame_size == FRAMESIZE_QVGA ? 240 : config.frame_size == FRAMESIZE_VGA ? 480 - : config.frame_size == FRAMESIZE_SVGA ? 600 - : config.frame_size == FRAMESIZE_XGA ? 768 - : config.frame_size == FRAMESIZE_HD ? 720 - : config.frame_size == FRAMESIZE_SXGA ? 1024 - : config.frame_size == FRAMESIZE_UXGA ? 1200 - : 240; - frame_.format = config.pixel_format; - ESP_LOGI(TAG, "Camera initialized: %dx%d, format=%d", frame_.width, frame_.height, config.pixel_format); - } - - streaming_on_ = true; - ESP_LOGI(TAG, "ESP32-S3 Camera init success"); -} - -Esp32S3Camera::~Esp32S3Camera() -{ - if (streaming_on_) - { - if (current_fb_) - { - esp_camera_fb_return(current_fb_); - current_fb_ = nullptr; - } - esp_camera_deinit(); - streaming_on_ = false; - } - if (frame_.data) - { - heap_caps_free(frame_.data); - frame_.data = nullptr; - } -} - -void Esp32S3Camera::SetExplainUrl(const std::string &url, const std::string &token) -{ - explain_url_ = url; - explain_token_ = token; -} - -bool Esp32S3Camera::Capture() -{ - if (encoder_thread_.joinable()) - { - encoder_thread_.join(); - } - - if (!streaming_on_) - { - return false; - } - - // 释放之前的帧 - if (current_fb_) - { - esp_camera_fb_return(current_fb_); - current_fb_ = nullptr; - } - - // 丢弃前两帧,获取最新帧 - for (int i = 0; i < 3; i++) - { - camera_fb_t *fb = esp_camera_fb_get(); - if (!fb) - { - ESP_LOGE(TAG, "Camera capture failed"); - return false; - } - if (i < 2) - { - esp_camera_fb_return(fb); - } - else - { - current_fb_ = fb; - } - } - - if (!current_fb_) - { - ESP_LOGE(TAG, "Failed to get frame buffer"); - return false; - } - - // 保存帧副本到 PSRAM - if (frame_.data) - { - heap_caps_free(frame_.data); - frame_.data = nullptr; - } - - frame_.len = current_fb_->len; - frame_.width = current_fb_->width; - frame_.height = current_fb_->height; - frame_.format = current_fb_->format; - - frame_.data = (uint8_t *)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (!frame_.data) - { - ESP_LOGE(TAG, "Failed to allocate %zu bytes for frame copy", frame_.len); - esp_camera_fb_return(current_fb_); - current_fb_ = nullptr; - return false; - } - memcpy(frame_.data, current_fb_->buf, frame_.len); - - // 释放原始帧 - esp_camera_fb_return(current_fb_); - current_fb_ = nullptr; - - // 对 RGB565 格式进行字节交换 (Big Endian <-> Little Endian) - // 这样 frame_.data 就是已交换的数据,显示和上传都使用相同的数据 - if (frame_.format == PIXFORMAT_RGB565) - { - uint8_t *data = frame_.data; - size_t pixel_count = frame_.width * frame_.height; - for (size_t i = 0; i < pixel_count; i++) - { - uint8_t temp = data[2 * i]; - data[2 * i] = data[2 * i + 1]; - data[2 * i + 1] = temp; - } - } - - ESP_LOGD(TAG, "Captured frame: %dx%d, len=%zu, format=%d", - frame_.width, frame_.height, frame_.len, frame_.format); - - // 显示预览图片 - auto display = dynamic_cast(Board::GetInstance().GetDisplay()); - if (display != nullptr) - { - if (!frame_.data) - { - ESP_LOGE(TAG, "frame.data is null"); - return false; - } - - uint16_t w = frame_.width; - uint16_t h = frame_.height; - size_t lvgl_image_size = frame_.len; - size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐 - lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565; - uint8_t *data = nullptr; - - switch (frame_.format) - { - case PIXFORMAT_RGB565: - // frame_.data 已经在捕获阶段完成了字节交换,直接复制即可 - data = (uint8_t *)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (data == nullptr) - { - ESP_LOGE(TAG, "Failed to allocate memory for preview image"); - return false; - } - memcpy(data, frame_.data, frame_.len); - lvgl_image_size = frame_.len; - break; - - case PIXFORMAT_JPEG: - // JPEG 格式需要解码 - 跳过预览显示 - ESP_LOGD(TAG, "JPEG format preview not supported, skipping display"); - return true; - - default: - ESP_LOGE(TAG, "Unsupported frame format for preview: %d", frame_.format); - return true; // 仍然返回 true,因为捕获成功 - } - - if (data) - { - auto image = std::make_unique(data, lvgl_image_size, w, h, stride, color_format); - display->SetPreviewImage(std::move(image)); - } - } - return true; -} - -bool Esp32S3Camera::SetHMirror(bool enabled) -{ - sensor_t *s = esp_camera_sensor_get(); - if (!s) - { - return false; - } - s->set_hmirror(s, enabled ? 1 : 0); - return true; -} - -bool Esp32S3Camera::SetVFlip(bool enabled) -{ - sensor_t *s = esp_camera_sensor_get(); - if (!s) - { - return false; - } - s->set_vflip(s, enabled ? 1 : 0); - return true; -} - -std::string Esp32S3Camera::Explain(const std::string &question) -{ - if (explain_url_.empty()) - { - throw std::runtime_error("Image explain URL or token is not set"); - } - - // 创建局部的 JPEG 队列 - QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); - if (jpeg_queue == nullptr) - { - ESP_LOGE(TAG, "Failed to create JPEG queue"); - throw std::runtime_error("Failed to create JPEG queue"); - } - - // 转换格式为 v4l2 兼容格式 - uint32_t v4l2_format = pixformat_to_v4l2(frame_.format); - - // 启动编码线程 - encoder_thread_ = std::thread([this, jpeg_queue, v4l2_format]() - { - uint16_t w = frame_.width ? frame_.width : 320; - uint16_t h = frame_.height ? frame_.height : 240; - bool ok = image_to_jpeg_cb( - frame_.data, frame_.len, w, h, static_cast(v4l2_format), 80, - [](void* arg, size_t index, const void* data, size_t len) -> size_t { - auto jpeg_queue = static_cast(arg); - JpegChunk chunk = {.data = nullptr, .len = len}; - if (index == 0 && data != nullptr && len > 0) { - chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - if (chunk.data == nullptr) { - ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len); - chunk.len = 0; - } else { - memcpy(chunk.data, data, len); - } - } else { - chunk.len = 0; - } - xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); - return len; - }, - jpeg_queue); - - if (!ok) { - JpegChunk chunk = {.data = nullptr, .len = 0}; - xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); - } }); - - auto network = Board::GetInstance().GetNetwork(); - auto http = network->CreateHttp(3); - std::string boundary = "----ESP32_CAMERA_BOUNDARY"; - - http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); - http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); - if (!explain_token_.empty()) - { - http->SetHeader("Authorization", "Bearer " + explain_token_); - } - http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary); - http->SetHeader("Transfer-Encoding", "chunked"); - if (!http->Open("POST", explain_url_)) - { - ESP_LOGE(TAG, "Failed to connect to explain URL"); - encoder_thread_.join(); - JpegChunk chunk; - while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) - { - if (chunk.data != nullptr) - { - heap_caps_free(chunk.data); - } - else - { - break; - } - } - vQueueDelete(jpeg_queue); - throw std::runtime_error("Failed to connect to explain URL"); - } - - { - std::string question_field; - question_field += "--" + boundary + "\r\n"; - question_field += "Content-Disposition: form-data; name=\"question\"\r\n"; - question_field += "\r\n"; - question_field += question + "\r\n"; - http->Write(question_field.c_str(), question_field.size()); - } - { - std::string file_header; - file_header += "--" + boundary + "\r\n"; - file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n"; - file_header += "Content-Type: image/jpeg\r\n"; - file_header += "\r\n"; - http->Write(file_header.c_str(), file_header.size()); - } - - size_t total_sent = 0; - bool saw_terminator = false; - while (true) - { - JpegChunk chunk; - if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) - { - ESP_LOGE(TAG, "Failed to receive JPEG chunk"); - break; - } - if (chunk.data == nullptr) - { - saw_terminator = true; - break; - } - http->Write((const char *)chunk.data, chunk.len); - total_sent += chunk.len; - heap_caps_free(chunk.data); - } - encoder_thread_.join(); - vQueueDelete(jpeg_queue); - - if (!saw_terminator || total_sent == 0) - { - ESP_LOGE(TAG, "JPEG encoder failed or produced empty output"); - throw std::runtime_error("Failed to encode image to JPEG"); - } - - { - std::string multipart_footer; - multipart_footer += "\r\n--" + boundary + "--\r\n"; - http->Write(multipart_footer.c_str(), multipart_footer.size()); - } - http->Write("", 0); - - if (http->GetStatusCode() != 200) - { - ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode()); - throw std::runtime_error("Failed to upload photo"); - } - - std::string result = http->ReadAll(); - http->Close(); - - size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr); - ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s", - (int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str()); - return result; -} - -#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA diff --git a/main/boards/common/esp32s3_camera.h b/main/boards/common/esp32s3_camera.h deleted file mode 100644 index 09749428..00000000 --- a/main/boards/common/esp32s3_camera.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include "sdkconfig.h" - -// esp32s3_camera (使用 esp_camera 组件) 仅用于 ESP32-S3 且选择使用 esp_camera 时 -#if defined(CONFIG_IDF_TARGET_ESP32S3) && defined(CONFIG_XIAOZHI_USE_ESP_CAMERA) - -#include -#include -#include -#include - -#include -#include - -#include "camera.h" -#include "esp_camera.h" - -struct JpegChunk -{ - uint8_t *data; - size_t len; -}; - -class Esp32S3Camera : public Camera -{ -private: - struct FrameBuffer - { - uint8_t *data = nullptr; - size_t len = 0; - uint16_t width = 0; - uint16_t height = 0; - pixformat_t format = PIXFORMAT_RGB565; - } frame_; - - bool streaming_on_ = false; - std::string explain_url_; - std::string explain_token_; - std::thread encoder_thread_; - camera_fb_t *current_fb_ = nullptr; - -public: - Esp32S3Camera(const camera_config_t &config); - ~Esp32S3Camera(); - - virtual void SetExplainUrl(const std::string &url, const std::string &token) override; - virtual bool Capture() override; - virtual bool SetHMirror(bool enabled) override; - virtual bool SetVFlip(bool enabled) override; - virtual std::string Explain(const std::string &question) override; -}; - -#endif // CONFIG_IDF_TARGET_ESP32S3 && CONFIG_XIAOZHI_USE_ESP_CAMERA diff --git a/main/boards/common/esp_video.cc b/main/boards/common/esp_video.cc new file mode 100644 index 00000000..9fd4b121 --- /dev/null +++ b/main/boards/common/esp_video.cc @@ -0,0 +1,1041 @@ +#include "sdkconfig.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "esp_imgfx_color_convert.h" +#include "esp_video_device.h" +#include "esp_video_init.h" +#include "linux/videodev2.h" + +#include "board.h" +#include "display.h" +#include "esp_video.h" +#include "esp_jpeg_common.h" +#include "jpg/image_to_jpeg.h" +#include "jpg/jpeg_to_image.h" +#include "lvgl_display.h" +#include "mcp_server.h" +#include "system_info.h" + +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#undef LOG_LOCAL_LEVEL +#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG) +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#include // should be after LOCAL_LOG_LEVEL definition + +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE +#ifdef CONFIG_IDF_TARGET_ESP32P4 +#include "driver/ppa.h" +#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90) +#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_270) +#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270) +#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_90) +#else +#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set" +#endif // angle +#else // target +#include "esp_imgfx_rotate.h" +#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90) +#define IMAGE_ROTATION_ANGLE (90) +#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270) +#define IMAGE_ROTATION_ANGLE (270) +#else +#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set" +#endif // angle +#endif // target +#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + + +#define TAG "EspVideo" + +#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP) +#warning \ + "CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER or CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP is enabled, which may cause image corruption in YUV422 format!" +#endif + +#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE +#define CAM_PRINT_FOURCC(pixelformat) \ + char fourcc[5]; \ + fourcc[0] = pixelformat & 0xFF; \ + fourcc[1] = (pixelformat >> 8) & 0xFF; \ + fourcc[2] = (pixelformat >> 16) & 0xFF; \ + fourcc[3] = (pixelformat >> 24) & 0xFF; \ + fourcc[4] = '\0'; \ + ESP_LOGD(TAG, "FOURCC: '%c%c%c%c'", fourcc[0], fourcc[1], fourcc[2], fourcc[3]); + +// for compatibility with old esp_video version +#ifndef MAP_FAILED +#define MAP_FAILED nullptr +#endif + +__attribute__((weak)) esp_err_t esp_video_deinit(void) { + return ESP_ERR_NOT_SUPPORTED; +} +// end of for compatibility with old esp_video version + +static void log_available_video_devices() { + for (int i = 0; i < 50; i++) { + char path[16]; + snprintf(path, sizeof(path), "/dev/video%d", i); + int fd = open(path, O_RDONLY); + if (fd >= 0) { + ESP_LOGD(TAG, "found video device: %s", path); + close(fd); + } + } +} +#else +#define CAM_PRINT_FOURCC(pixelformat) (void)0; +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + +EspVideo::EspVideo(const esp_video_init_config_t& config) { + if (esp_video_init(&config) != ESP_OK) { + ESP_LOGE(TAG, "esp_video_init failed"); + return; + } + +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + esp_log_level_set(TAG, ESP_LOG_DEBUG); +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + + const char* video_device_name = nullptr; + + if (false) { /* 用于构建 else if */ + } +#if CONFIG_ESP_VIDEO_ENABLE_MIPI_CSI_VIDEO_DEVICE + else if (config.csi != nullptr) { + video_device_name = ESP_VIDEO_MIPI_CSI_DEVICE_NAME; + } +#endif +#if CONFIG_ESP_VIDEO_ENABLE_DVP_VIDEO_DEVICE + else if (config.dvp != nullptr) { + video_device_name = ESP_VIDEO_DVP_DEVICE_NAME; + } +#endif +#if CONFIG_ESP_VIDEO_ENABLE_HW_JPEG_VIDEO_DEVICE + else if (config.jpeg != nullptr) { + video_device_name = ESP_VIDEO_JPEG_DEVICE_NAME; + } +#endif +#if CONFIG_ESP_VIDEO_ENABLE_SPI_VIDEO_DEVICE + else if (config.spi != nullptr) { + video_device_name = ESP_VIDEO_SPI_DEVICE_NAME; + } +#endif +#if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE + else if (config.usb_uvc != nullptr) { + video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(0); + } +#endif + + if (video_device_name == nullptr) { + ESP_LOGE(TAG, "no video device is enabled"); + return; + } + + video_fd_ = open(video_device_name, O_RDWR); + + if (video_fd_ < 0) { + ESP_LOGE(TAG, "open %s failed, errno=%d(%s)", video_device_name, errno, strerror(errno)); +#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + log_available_video_devices(); +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE + return; + } + + struct v4l2_capability cap = {}; + if (ioctl(video_fd_, VIDIOC_QUERYCAP, &cap) != 0) { + ESP_LOGE(TAG, "VIDIOC_QUERYCAP failed, errno=%d(%s)", errno, strerror(errno)); + close(video_fd_); + video_fd_ = -1; + return; + } + + ESP_LOGD( + TAG, + "VIDIOC_QUERYCAP: driver=%s, card=%s, bus_info=%s, version=0x%08lx, capabilities=0x%08lx, device_caps=0x%08lx", + cap.driver, cap.card, cap.bus_info, cap.version, cap.capabilities, cap.device_caps); + + struct v4l2_format format = {}; + format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + if (ioctl(video_fd_, VIDIOC_G_FMT, &format) != 0) { + ESP_LOGE(TAG, "VIDIOC_G_FMT failed, errno=%d(%s)", errno, strerror(errno)); + close(video_fd_); + video_fd_ = -1; + return; + } + ESP_LOGD(TAG, "VIDIOC_G_FMT: pixelformat=0x%08lx, width=%ld, height=%ld", format.fmt.pix.pixelformat, + format.fmt.pix.width, format.fmt.pix.height); + CAM_PRINT_FOURCC(format.fmt.pix.pixelformat); + + struct v4l2_format setformat = {}; + setformat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + sensor_width_ = format.fmt.pix.width; + sensor_height_ = format.fmt.pix.height; +#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + setformat.fmt.pix.width = format.fmt.pix.width; + setformat.fmt.pix.height = format.fmt.pix.height; + + struct v4l2_fmtdesc fmtdesc = {}; + fmtdesc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + fmtdesc.index = 0; + uint32_t best_fmt = 0; + int best_rank = 1 << 30; // large number + + // 注: 当前版本 esp_video 中 YUV422P 实际输出为 YUYV。 +#if defined(CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE) && defined(CONFIG_SOC_PPA_SUPPORTED) + auto get_rank = [](uint32_t fmt) -> int { + switch (fmt) { + case V4L2_PIX_FMT_RGB24: + return 0; + case V4L2_PIX_FMT_RGB565: + return 1; +#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER + case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式 + return 2; +#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER + case V4L2_PIX_FMT_GREY: + case V4L2_PIX_FMT_YUV422P: + default: + return 1 << 29; // unsupported + } + }; +#else + auto get_rank = [](uint32_t fmt) -> int { + switch (fmt) { + case V4L2_PIX_FMT_YUV422P: + return 10; + case V4L2_PIX_FMT_RGB565: + return 11; + case V4L2_PIX_FMT_RGB24: + return 12; +#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER + case V4L2_PIX_FMT_YUV420: + return 13; +#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: + return 5; +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_GREY: + return 20; + default: + return 1 << 29; // unsupported + } + }; +#endif + while (ioctl(video_fd_, VIDIOC_ENUM_FMT, &fmtdesc) == 0) { + ESP_LOGD(TAG, "VIDIOC_ENUM_FMT: pixelformat=0x%08lx, description=%s", fmtdesc.pixelformat, fmtdesc.description); + CAM_PRINT_FOURCC(fmtdesc.pixelformat); + int rank = get_rank(fmtdesc.pixelformat); + if (rank < best_rank) { + best_rank = rank; + best_fmt = fmtdesc.pixelformat; + } + fmtdesc.index++; + } + if (best_rank < (1 << 29)) { + setformat.fmt.pix.pixelformat = best_fmt; + sensor_format_ = best_fmt; + } + + if (!setformat.fmt.pix.pixelformat) { + ESP_LOGE(TAG, "no supported pixel format found"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + + ESP_LOGD(TAG, "selected pixel format: 0x%08lx", setformat.fmt.pix.pixelformat); + + if (ioctl(video_fd_, VIDIOC_S_FMT, &setformat) != 0) { + ESP_LOGE(TAG, "VIDIOC_S_FMT failed, errno=%d(%s)", errno, strerror(errno)); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + frame_.width = setformat.fmt.pix.height; + frame_.height = setformat.fmt.pix.width; +#else + frame_.width = setformat.fmt.pix.width; + frame_.height = setformat.fmt.pix.height; +#endif + + // 申请缓冲并mmap + struct v4l2_requestbuffers req = {}; + req.count = strcmp(video_device_name, ESP_VIDEO_MIPI_CSI_DEVICE_NAME) == 0 ? 2 : 1; + req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + req.memory = V4L2_MEMORY_MMAP; + if (ioctl(video_fd_, VIDIOC_REQBUFS, &req) != 0) { + ESP_LOGE(TAG, "VIDIOC_REQBUFS failed"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + mmap_buffers_.resize(req.count); + for (uint32_t i = 0; i < req.count; i++) { + struct v4l2_buffer buf = {}; + buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = V4L2_MEMORY_MMAP; + buf.index = i; + if (ioctl(video_fd_, VIDIOC_QUERYBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_QUERYBUF failed"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + void* start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, video_fd_, buf.m.offset); + if (start == MAP_FAILED) { + ESP_LOGE(TAG, "mmap failed"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + mmap_buffers_[i].start = start; + mmap_buffers_[i].length = buf.length; + + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_QBUF failed"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + } + + int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + if (ioctl(video_fd_, VIDIOC_STREAMON, &type) != 0) { + ESP_LOGE(TAG, "VIDIOC_STREAMON failed"); + close(video_fd_); + video_fd_ = -1; + sensor_format_ = 0; + return; + } + +#ifdef CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE + // 当启用 ISP 时,ISP 需要一些照片来初始化参数,因此开启后后台拍摄5s照片并丢弃 + xTaskCreate( + [](void* arg) { + EspVideo* self = static_cast(arg); + uint16_t capture_count = 0; + TickType_t start = xTaskGetTickCount(); + TickType_t duration = 5000 / portTICK_PERIOD_MS; // 5s + while ((xTaskGetTickCount() - start) < duration) { + struct v4l2_buffer buf = {}; + buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = V4L2_MEMORY_MMAP; + if (ioctl(self->video_fd_, VIDIOC_DQBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_DQBUF failed during init"); + vTaskDelay(10 / portTICK_PERIOD_MS); + continue; + } + if (ioctl(self->video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_QBUF failed during init"); + } + capture_count++; + } + ESP_LOGI(TAG, "Camera init success, captured %d frames in %lums", capture_count, + (unsigned long)((xTaskGetTickCount() - start) * portTICK_PERIOD_MS)); + self->streaming_on_ = true; + vTaskDelete(NULL); + }, + "CameraInitTask", 4096, this, 5, nullptr); +#else + ESP_LOGI(TAG, "Camera init success"); + streaming_on_ = true; +#endif // CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE +} + +EspVideo::~EspVideo() { + if (streaming_on_ && video_fd_ >= 0) { + int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + ioctl(video_fd_, VIDIOC_STREAMOFF, &type); + } + for (auto& b : mmap_buffers_) { + if (b.start && b.length) { + munmap(b.start, b.length); + } + } + if (video_fd_ >= 0) { + close(video_fd_); + video_fd_ = -1; + } + sensor_format_ = 0; + esp_video_deinit(); +} + +void EspVideo::SetExplainUrl(const std::string& url, const std::string& token) { + explain_url_ = url; + explain_token_ = token; +} + +bool EspVideo::Capture() { + if (encoder_thread_.joinable()) { + encoder_thread_.join(); + } + + if (!streaming_on_ || video_fd_ < 0) { + return false; + } + + for (int i = 0; i < 3; i++) { + struct v4l2_buffer buf = {}; + buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = V4L2_MEMORY_MMAP; + if (ioctl(video_fd_, VIDIOC_DQBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_DQBUF failed"); + return false; + } + if (i == 2) { + // 保存帧副本到PSRAM + if (frame_.data) { + heap_caps_free(frame_.data); + frame_.data = nullptr; + frame_.format = 0; + } + frame_.len = buf.bytesused; + frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (!frame_.data) { + ESP_LOGE(TAG, "alloc frame copy failed: need allocate %lu bytes", buf.bytesused); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, sensor_width = %d, sensor_height = %d", + mmap_buffers_[buf.index].length, sensor_width_, sensor_height_); +#else + ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, frame.width = %d, frame.height = %d", + mmap_buffers_[buf.index].length, frame_.width, frame_.height); +#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + ESP_LOG_BUFFER_HEXDUMP(TAG, mmap_buffers_[buf.index].start, MIN(mmap_buffers_[buf.index].length, 256), + ESP_LOG_DEBUG); + + switch (sensor_format_) { + case V4L2_PIX_FMT_RGB565: + case V4L2_PIX_FMT_RGB24: + case V4L2_PIX_FMT_YUYV: + case V4L2_PIX_FMT_YUV420: + case V4L2_PIX_FMT_GREY: +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: +#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP + { + auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; + auto dst16 = (uint16_t*)frame_.data; + size_t count = (size_t)mmap_buffers_[buf.index].length / 2; + for (size_t i = 0; i < count; i++) { + dst16[i] = __builtin_bswap16(src16[i]); + } + } +#else + memcpy(frame_.data, mmap_buffers_[buf.index].start, + MIN(mmap_buffers_[buf.index].length, frame_.len)); +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP + frame_.format = sensor_format_; + break; + case V4L2_PIX_FMT_YUV422P: { + // 这个格式是 422 YUYV,不是 planer + frame_.format = V4L2_PIX_FMT_YUYV; +#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP + { + auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; + auto dst16 = (uint16_t*)frame_.data; + size_t count = (size_t)mmap_buffers_[buf.index].length / 2; + for (size_t i = 0; i < count; i++) { + dst16[i] = __builtin_bswap16(src16[i]); + } + } +#else + memcpy(frame_.data, mmap_buffers_[buf.index].start, + MIN(mmap_buffers_[buf.index].length, frame_.len)); +#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP + break; + } + case V4L2_PIX_FMT_RGB565X: { + // 大端序的 RGB565 需要转换为小端序 + // 目前 esp_video 的大小端都会返回格式为 RGB565,不会返回格式为 RGB565X,此 case 用于未来版本兼容 + auto src16 = (uint16_t*)mmap_buffers_[buf.index].start; + auto dst16 = (uint16_t*)frame_.data; + size_t pixel_count = (size_t)frame_.width * (size_t)frame_.height; + for (size_t i = 0; i < pixel_count; i++) { + dst16[i] = __builtin_bswap16(src16[i]); + } + frame_.format = V4L2_PIX_FMT_RGB565; + break; + } + default: + ESP_LOGE(TAG, "unsupported sensor format: 0x%08lx", sensor_format_); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE +#ifndef CONFIG_SOC_PPA_SUPPORTED + uint8_t* rotate_dst = + (uint8_t*)heap_caps_aligned_alloc(64, frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (rotate_dst == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + uint8_t* rotate_src = (uint8_t*)frame_.data; + + esp_imgfx_rotate_cfg_t rotate_cfg = { + .in_res = + { + .width = static_cast(sensor_width_), + .height = static_cast(sensor_height_), + }, + .degree = IMAGE_ROTATION_ANGLE, + }; + switch (frame_.format) { + case V4L2_PIX_FMT_RGB565: + rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE; + break; + case V4L2_PIX_FMT_YUYV: + rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE; + break; + case V4L2_PIX_FMT_GREY: + rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_Y; + break; + case V4L2_PIX_FMT_RGB24: + rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888; + break; + default: + ESP_LOGE(TAG, "unsupported sensor format: 0x%08lx", sensor_format_); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + esp_imgfx_rotate_handle_t rotate_handle = nullptr; + esp_imgfx_err_t imgfx_err = esp_imgfx_rotate_open(&rotate_cfg, &rotate_handle); + if (imgfx_err != ESP_IMGFX_ERR_OK || rotate_handle == nullptr) { + ESP_LOGE(TAG, "esp_imgfx_rotate_create failed"); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + + esp_imgfx_data_t rotate_input_data = { + .data = rotate_src, + .data_len = frame_.len, + }; + esp_imgfx_data_t rotate_output_data = { + .data = rotate_dst, + .data_len = frame_.len, + }; + + imgfx_err = esp_imgfx_rotate_process(rotate_handle, &rotate_input_data, &rotate_output_data); + if (imgfx_err != ESP_IMGFX_ERR_OK) { + ESP_LOGE(TAG, "esp_imgfx_rotate_process failed"); + heap_caps_free(rotate_dst); + rotate_dst = nullptr; + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + esp_imgfx_rotate_close(rotate_handle); + rotate_handle = nullptr; + return false; + } + + frame_.data = rotate_dst; + + heap_caps_free(rotate_src); + rotate_src = nullptr; + + esp_imgfx_rotate_close(rotate_handle); + rotate_handle = nullptr; +#else // CONFIG_SOC_PPA_SUPPORTED + uint8_t* rotate_src = nullptr; + + ppa_srm_color_mode_t ppa_color_mode; + switch (frame_.format) { + case V4L2_PIX_FMT_RGB565: + rotate_src = (uint8_t*)frame_.data; + ppa_color_mode = PPA_SRM_COLOR_MODE_RGB565; + break; + case V4L2_PIX_FMT_RGB24: + rotate_src = (uint8_t*)frame_.data; + ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888; + break; + case V4L2_PIX_FMT_YUYV: { + ESP_LOGW(TAG, "YUYV format is not supported for PPA rotation, using software conversion to RGB888"); + rotate_src = (uint8_t*)heap_caps_malloc(frame_.width * frame_.height * 3, + MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (rotate_src == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + esp_imgfx_color_convert_cfg_t convert_cfg = { + .in_res = {.width = static_cast(frame_.width), + .height = static_cast(frame_.height)}, + .in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_YUYV, + .out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888, + }; + esp_imgfx_color_convert_handle_t convert_handle = nullptr; + esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle); + if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) { + ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed"); + heap_caps_free(rotate_src); + rotate_src = nullptr; + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + esp_imgfx_data_t convert_input_data = { + .data = frame_.data, + .data_len = frame_.len, + }; + esp_imgfx_data_t convert_output_data = { + .data = rotate_src, + .data_len = static_cast(frame_.width * frame_.height * 3), + }; + err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data); + if (err != ESP_IMGFX_ERR_OK) { + ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed"); + heap_caps_free(rotate_src); + rotate_src = nullptr; + esp_imgfx_color_convert_close(convert_handle); + convert_handle = nullptr; + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + esp_imgfx_color_convert_close(convert_handle); + convert_handle = nullptr; + ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888; + heap_caps_free(frame_.data); + frame_.data = rotate_src; + frame_.len = frame_.width * frame_.height * 3; + break; + } + default: + ESP_LOGE(TAG, "unsupported sensor format for PPA rotation: 0x%08lx", sensor_format_); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + + uint8_t* rotate_dst = (uint8_t*)heap_caps_malloc( + frame_.width * frame_.height * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT | MALLOC_CAP_CACHE_ALIGNED); + if (rotate_dst == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for rotate image"); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + + ppa_client_handle_t ppa_client = nullptr; + ppa_client_config_t client_cfg = { + .oper_type = PPA_OPERATION_SRM, + .max_pending_trans_num = 1, + }; + esp_err_t err = ppa_register_client(&client_cfg, &ppa_client); + if (err != ESP_OK || ppa_client == nullptr) { + ESP_LOGE(TAG, "ppa_register_client failed: %d", (int)err); + heap_caps_free(rotate_dst); + rotate_dst = nullptr; + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + + ppa_srm_rotation_angle_t ppa_angle = IMAGE_ROTATION_ANGLE; + + ppa_srm_oper_config_t srm_cfg = {}; + srm_cfg.in.buffer = (void*)rotate_src; + srm_cfg.in.pic_w = sensor_width_; + srm_cfg.in.pic_h = sensor_height_; + srm_cfg.in.block_w = sensor_width_; + srm_cfg.in.block_h = sensor_height_; + srm_cfg.in.block_offset_x = 0; + srm_cfg.in.block_offset_y = 0; + srm_cfg.in.srm_cm = ppa_color_mode; + + srm_cfg.out.buffer = (void*)rotate_dst; + srm_cfg.out.buffer_size = frame_.len; + srm_cfg.out.pic_w = frame_.width; + srm_cfg.out.pic_h = frame_.height; + srm_cfg.out.block_offset_x = 0; + srm_cfg.out.block_offset_y = 0; + srm_cfg.out.srm_cm = PPA_SRM_COLOR_MODE_RGB565; + + // 等比例缩放 1.0 + srm_cfg.scale_x = 1.0f; + srm_cfg.scale_y = 1.0f; + srm_cfg.rotation_angle = ppa_angle; + srm_cfg.mode = PPA_TRANS_MODE_BLOCKING; + srm_cfg.user_data = nullptr; + + err = ppa_do_scale_rotate_mirror(ppa_client, &srm_cfg); + if (err != ESP_OK) { + ESP_LOGE(TAG, "ppa_do_scale_rotate_mirror failed: %d", (int)err); + heap_caps_free(rotate_dst); + rotate_dst = nullptr; + (void)ppa_unregister_client(ppa_client); + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed"); + } + return false; + } + + (void)ppa_unregister_client(ppa_client); + + frame_.data = rotate_dst; + frame_.len = frame_.width * frame_.height * 2; + frame_.format = V4L2_PIX_FMT_RGB565; + heap_caps_free(rotate_src); + rotate_src = nullptr; +#endif // CONFIG_SOC_PPA_SUPPORTED +#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + } + + if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) { + ESP_LOGE(TAG, "VIDIOC_QBUF failed"); + } + } + + // 显示预览图片 + auto display = dynamic_cast(Board::GetInstance().GetDisplay()); + if (display != nullptr) { + if (!frame_.data) { + ESP_LOGE(TAG, "frame.data is null"); + return false; + } + uint16_t w = frame_.width; + uint16_t h = frame_.height; + size_t lvgl_image_size = frame_.len; + size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐 + lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565; + uint8_t* data = nullptr; + + switch (frame_.format) { + // LVGL 显示 YUV 系的图像似乎都有问题,暂时转换为 RGB565 显示 + case V4L2_PIX_FMT_YUYV: + case V4L2_PIX_FMT_YUV420: + case V4L2_PIX_FMT_RGB24: { + color_format = LV_COLOR_FORMAT_RGB565; + data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for preview image"); + return false; + } + esp_imgfx_color_convert_cfg_t convert_cfg = { + .in_res = {.width = static_cast(frame_.width), + .height = static_cast(frame_.height)}, + .in_pixel_fmt = static_cast(frame_.format), + .out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE, + .color_space_std = ESP_IMGFX_COLOR_SPACE_STD_BT601, + }; + esp_imgfx_color_convert_handle_t convert_handle = nullptr; + esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle); + if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) { + ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed"); + heap_caps_free(data); + data = nullptr; + return false; + } + esp_imgfx_data_t convert_input_data = { + .data = frame_.data, + .data_len = frame_.len, + }; + esp_imgfx_data_t convert_output_data = { + .data = data, + .data_len = static_cast(w * h * 2), + }; + err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data); + if (err != ESP_IMGFX_ERR_OK) { + ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed"); + heap_caps_free(data); + data = nullptr; + esp_imgfx_color_convert_close(convert_handle); + convert_handle = nullptr; + return false; + } + esp_imgfx_color_convert_close(convert_handle); + convert_handle = nullptr; + lvgl_image_size = w * h * 2; + break; + } + + case V4L2_PIX_FMT_RGB565: + data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for preview image"); + return false; + } + memcpy(data, frame_.data, frame_.len); + lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565 + break; + +#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT + case V4L2_PIX_FMT_JPEG: { + uint8_t* out_data = nullptr; // out data is allocated by jpeg_to_image + size_t out_len = 0; + size_t out_width = 0; + size_t out_height = 0; + size_t out_stride = 0; + + esp_err_t ret = + jpeg_to_image(frame_.data, frame_.len, &out_data, &out_len, &out_width, &out_height, &out_stride); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "Failed to decode JPEG image: %d (%s)", (int)ret, esp_err_to_name(ret)); + if (out_data) { + heap_caps_free(out_data); + out_data = nullptr; + } + return false; + } + + data = out_data; + w = out_width; + h = out_height; + lvgl_image_size = out_len; + stride = out_stride; + break; + } +#endif + default: + ESP_LOGE(TAG, "unsupported frame format: 0x%08lx", frame_.format); + return false; + } + + auto image = std::make_unique(data, lvgl_image_size, w, h, stride, color_format); + display->SetPreviewImage(std::move(image)); + } + return true; +} + +bool EspVideo::SetHMirror(bool enabled) { + if (video_fd_ < 0) + return false; + struct v4l2_ext_controls ctrls = {}; + struct v4l2_ext_control ctrl = {}; + ctrl.id = V4L2_CID_HFLIP; + ctrl.value = enabled ? 1 : 0; + ctrls.ctrl_class = V4L2_CTRL_CLASS_USER; + ctrls.count = 1; + ctrls.controls = &ctrl; + if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) { + ESP_LOGE(TAG, "set HFLIP failed"); + return false; + } + return true; +} + +bool EspVideo::SetVFlip(bool enabled) { + if (video_fd_ < 0) + return false; + struct v4l2_ext_controls ctrls = {}; + struct v4l2_ext_control ctrl = {}; + ctrl.id = V4L2_CID_VFLIP; + ctrl.value = enabled ? 1 : 0; + ctrls.ctrl_class = V4L2_CTRL_CLASS_USER; + ctrls.count = 1; + ctrls.controls = &ctrl; + if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) { + ESP_LOGE(TAG, "set VFLIP failed"); + return false; + } + return true; +} + +/** + * @brief 将摄像头捕获的图像发送到远程服务器进行AI分析和解释 + * + * 该函数将当前摄像头缓冲区中的图像编码为JPEG格式,并通过HTTP POST请求 + * 以multipart/form-data的形式发送到指定的解释服务器。服务器将根据提供的 + * 问题对图像进行AI分析并返回结果。 + * + * 实现特点: + * - 使用独立线程编码JPEG,与主线程分离 + * - 采用分块传输编码(chunked transfer encoding)优化内存使用 + * - 通过队列机制实现编码线程和发送线程的数据同步 + * - 支持设备ID、客户端ID和认证令牌的HTTP头部配置 + * + * @param question 要向AI提出的关于图像的问题,将作为表单字段发送 + * @return std::string 服务器返回的JSON格式响应字符串 + * 成功时包含AI分析结果,失败时包含错误信息 + * 格式示例:{"success": true, "result": "分析结果"} + * {"success": false, "message": "错误信息"} + * + * @note 调用此函数前必须先调用SetExplainUrl()设置服务器URL + * @note 函数会等待之前的编码线程完成后再开始新的处理 + * @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息 + */ +std::string EspVideo::Explain(const std::string& question) { + if (explain_url_.empty()) { + throw std::runtime_error("Image explain URL or token is not set"); + } + + // 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data + QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk)); + if (jpeg_queue == nullptr) { + ESP_LOGE(TAG, "Failed to create JPEG queue"); + throw std::runtime_error("Failed to create JPEG queue"); + } + + // We spawn a thread to encode the image to JPEG using optimized encoder (cost about 500ms and 8KB SRAM) + encoder_thread_ = std::thread([this, jpeg_queue]() { + uint16_t w = frame_.width ? frame_.width : 320; + uint16_t h = frame_.height ? frame_.height : 240; + v4l2_pix_fmt_t enc_fmt = frame_.format; + bool ok = image_to_jpeg_cb( + frame_.data, frame_.len, w, h, enc_fmt, 80, + [](void* arg, size_t index, const void* data, size_t len) -> size_t { + auto jpeg_queue = static_cast(arg); + JpegChunk chunk = {.data = nullptr, .len = len}; + if (index == 0 && data != nullptr && len > 0) { + chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (chunk.data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len); + chunk.len = 0; + } else { + memcpy(chunk.data, data, len); + } + } else { + chunk.len = 0; // Sentinel or error + } + xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); + return len; + }, + jpeg_queue); + + if (!ok) { + JpegChunk chunk = {.data = nullptr, .len = 0}; + xQueueSend(jpeg_queue, &chunk, portMAX_DELAY); + } + }); + + auto network = Board::GetInstance().GetNetwork(); + auto http = network->CreateHttp(3); + // 构造multipart/form-data请求体 + std::string boundary = "----ESP32_CAMERA_BOUNDARY"; + + // 配置HTTP客户端,使用分块传输编码 + http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); + http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); + if (!explain_token_.empty()) { + http->SetHeader("Authorization", "Bearer " + explain_token_); + } + http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary); + http->SetHeader("Transfer-Encoding", "chunked"); + if (!http->Open("POST", explain_url_)) { + ESP_LOGE(TAG, "Failed to connect to explain URL"); + // Clear the queue + encoder_thread_.join(); + JpegChunk chunk; + while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) { + if (chunk.data != nullptr) { + heap_caps_free(chunk.data); + } else { + break; + } + } + vQueueDelete(jpeg_queue); + throw std::runtime_error("Failed to connect to explain URL"); + } + + { + // 第一块:question字段 + std::string question_field; + question_field += "--" + boundary + "\r\n"; + question_field += "Content-Disposition: form-data; name=\"question\"\r\n"; + question_field += "\r\n"; + question_field += question + "\r\n"; + http->Write(question_field.c_str(), question_field.size()); + } + { + // 第二块:文件字段头部 + std::string file_header; + file_header += "--" + boundary + "\r\n"; + file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n"; + file_header += "Content-Type: image/jpeg\r\n"; + file_header += "\r\n"; + http->Write(file_header.c_str(), file_header.size()); + } + + // 第三块:JPEG数据 + size_t total_sent = 0; + bool saw_terminator = false; + while (true) { + JpegChunk chunk; + if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) { + ESP_LOGE(TAG, "Failed to receive JPEG chunk"); + break; + } + if (chunk.data == nullptr) { + saw_terminator = true; + break; // The last chunk + } + http->Write((const char*)chunk.data, chunk.len); + total_sent += chunk.len; + heap_caps_free(chunk.data); + } + // Wait for the encoder thread to finish + encoder_thread_.join(); + // 清理队列 + vQueueDelete(jpeg_queue); + + if (!saw_terminator || total_sent == 0) { + ESP_LOGE(TAG, "JPEG encoder failed or produced empty output"); + throw std::runtime_error("Failed to encode image to JPEG"); + } + + { + // 第四块:multipart尾部 + std::string multipart_footer; + multipart_footer += "\r\n--" + boundary + "--\r\n"; + http->Write(multipart_footer.c_str(), multipart_footer.size()); + } + // 结束块 + http->Write("", 0); + + if (http->GetStatusCode() != 200) { + ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode()); + throw std::runtime_error("Failed to upload photo"); + } + + std::string result = http->ReadAll(); + http->Close(); + + // Get remain task stack size + size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr); + ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s", + (int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str()); + return result; +} diff --git a/main/boards/common/esp_video.h b/main/boards/common/esp_video.h new file mode 100644 index 00000000..063276e9 --- /dev/null +++ b/main/boards/common/esp_video.h @@ -0,0 +1,53 @@ +#pragma once +#include "sdkconfig.h" + +#include +#include +#include +#include + +#include +#include + +#include "camera.h" +#include "jpg/image_to_jpeg.h" +#include "esp_video_init.h" + +struct JpegChunk { + uint8_t* data; + size_t len; +}; + +class EspVideo : public Camera { +private: + struct FrameBuffer { + uint8_t *data = nullptr; + size_t len = 0; + uint16_t width = 0; + uint16_t height = 0; + v4l2_pix_fmt_t format = 0; + } frame_; + v4l2_pix_fmt_t sensor_format_ = 0; +#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + uint16_t sensor_width_ = 0; + uint16_t sensor_height_ = 0; +#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE + int video_fd_ = -1; + bool streaming_on_ = false; + struct MmapBuffer { void *start = nullptr; size_t length = 0; }; + std::vector mmap_buffers_; + std::string explain_url_; + std::string explain_token_; + std::thread encoder_thread_; + +public: + EspVideo(const esp_video_init_config_t& config); + ~EspVideo(); + + virtual void SetExplainUrl(const std::string& url, const std::string& token); + virtual bool Capture(); + // 翻转控制函数 + virtual bool SetHMirror(bool enabled) override; + virtual bool SetVFlip(bool enabled) override; + virtual std::string Explain(const std::string& question); +}; diff --git a/main/boards/df-k10/df_k10_board.cc b/main/boards/df-k10/df_k10_board.cc index 43b41357..9844e013 100644 --- a/main/boards/df-k10/df_k10_board.cc +++ b/main/boards/df-k10/df_k10_board.cc @@ -6,7 +6,7 @@ #include "application.h" #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "led/circular_strip.h" #include "assets/lang_config.h" @@ -27,7 +27,7 @@ private: LcdDisplay *display_; button_handle_t btn_a; button_handle_t btn_b; - Esp32Camera* camera_; + EspVideo* camera_; button_driver_t* btn_a_driver_ = nullptr; button_driver_t* btn_b_driver_ = nullptr; @@ -209,7 +209,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } void InitializeIli9341Display() { diff --git a/main/boards/df-s3-ai-cam/df_s3_ai_cam.cc b/main/boards/df-s3-ai-cam/df_s3_ai_cam.cc index c82bcaaf..bf221212 100644 --- a/main/boards/df-s3-ai-cam/df_s3_ai_cam.cc +++ b/main/boards/df-s3-ai-cam/df_s3_ai_cam.cc @@ -4,7 +4,7 @@ #include "application.h" #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "led/gpio_led.h" #include @@ -16,7 +16,7 @@ class DfrobotEsp32S3AiCam : public WifiBoard { private: Button boot_button_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializeButtons() { boot_button_.OnClick([this]() { @@ -70,7 +70,7 @@ class DfrobotEsp32S3AiCam : public WifiBoard { .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); camera_->SetVFlip(1); } diff --git a/main/boards/echoear/EchoEar.cc b/main/boards/echoear/EchoEar.cc index d3882b72..0eb8a9b3 100644 --- a/main/boards/echoear/EchoEar.cc +++ b/main/boards/echoear/EchoEar.cc @@ -6,7 +6,7 @@ #include "button.h" #include "config.h" #include "backlight.h" -#include "esp32_camera.h" +#include "esp_video.h" #include @@ -390,7 +390,7 @@ private: PwmBacklight* backlight_ = nullptr; esp_timer_handle_t touchpad_timer_; esp_lcd_touch_handle_t tp; // LCD touch handle - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; void InitializeI2c() { @@ -605,7 +605,7 @@ private: .usb_uvc = &usb_uvc_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } #endif // CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE diff --git a/main/boards/esp-p4-function-ev-board/esp-p4-function-ev-board.cc b/main/boards/esp-p4-function-ev-board/esp-p4-function-ev-board.cc index 0a6405a8..d473fd4f 100644 --- a/main/boards/esp-p4-function-ev-board/esp-p4-function-ev-board.cc +++ b/main/boards/esp-p4-function-ev-board/esp-p4-function-ev-board.cc @@ -10,7 +10,7 @@ #include "application.h" #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include #include @@ -45,7 +45,7 @@ private: Button boot_button_; LcdDisplay *display_ = nullptr; esp_lcd_touch_handle_t tp_ = nullptr; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; void InitializeI2cBuses() { @@ -115,7 +115,7 @@ private: ESP_LOGE(TAG, "Failed to initialize BSP camera: %s", esp_err_to_name(ret)); ESP_LOGI(TAG, "Attempting alternative camera initialization"); - // Alternative: Direct Esp32Camera initialization if BSP fails + // Alternative: Direct EspVideo initialization if BSP fails // This provides more control over camera configuration static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = { .data_width = CAM_CTLR_DATA_WIDTH_8, @@ -154,7 +154,7 @@ private: }; // Try to create camera with direct configuration - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); ESP_LOGI(TAG, "Camera initialized with direct configuration"); } else { ESP_LOGI(TAG, "Camera initialized successfully via BSP"); diff --git a/main/boards/esp-s3-lcd-ev-board-2/esp-s3-lcd-ev-board-2.cc b/main/boards/esp-s3-lcd-ev-board-2/esp-s3-lcd-ev-board-2.cc index 6e7690b4..9cf9545d 100644 --- a/main/boards/esp-s3-lcd-ev-board-2/esp-s3-lcd-ev-board-2.cc +++ b/main/boards/esp-s3-lcd-ev-board-2/esp-s3-lcd-ev-board-2.cc @@ -5,7 +5,7 @@ #include "button.h" #include "led/single_led.h" #include "pin_config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "config.h" @@ -27,7 +27,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; LcdDisplay* display_; - Esp32Camera* camera_; + EspVideo* camera_; //add support ev board lcd esp_io_expander_handle_t expander = NULL; @@ -218,7 +218,7 @@ private: esp_video_init_config_t video_config = { .usb_uvc = &usb_uvc_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } #endif // CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE diff --git a/main/boards/esp-s3-lcd-ev-board/esp-s3-lcd-ev-board.cc b/main/boards/esp-s3-lcd-ev-board/esp-s3-lcd-ev-board.cc index 72b61ec0..2562df3e 100644 --- a/main/boards/esp-s3-lcd-ev-board/esp-s3-lcd-ev-board.cc +++ b/main/boards/esp-s3-lcd-ev-board/esp-s3-lcd-ev-board.cc @@ -5,7 +5,7 @@ #include "button.h" #include "led/single_led.h" #include "pin_config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "config.h" @@ -25,7 +25,7 @@ private: i2c_master_bus_handle_t codec_i2c_bus_; Button boot_button_; LcdDisplay* display_; - Esp32Camera* camera_; + EspVideo* camera_; //add support ev board lcd esp_io_expander_handle_t expander = NULL; @@ -188,7 +188,7 @@ private: .usb_uvc = &usb_uvc_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } #endif // CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE diff --git a/main/boards/esp-sparkbot/esp_sparkbot_board.cc b/main/boards/esp-sparkbot/esp_sparkbot_board.cc index cef94861..ac90cde8 100644 --- a/main/boards/esp-sparkbot/esp_sparkbot_board.cc +++ b/main/boards/esp-sparkbot/esp_sparkbot_board.cc @@ -14,7 +14,7 @@ #include #include -#include "esp32_camera.h" +#include "esp_video.h" #define TAG "esp_sparkbot" @@ -45,7 +45,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; Display* display_; - Esp32Camera* camera_; + EspVideo* camera_; light_mode_t light_mode_ = LIGHT_MODE_ALWAYS_ON; void InitializeI2c() { @@ -162,7 +162,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); Settings settings("sparkbot", false); // 考虑到部分复刻使用了不可动摄像头的设计,默认启用翻转 diff --git a/main/boards/esp32-s3-touch-lcd-3.5/esp32-s3-touch-lcd-3.5.cc b/main/boards/esp32-s3-touch-lcd-3.5/esp32-s3-touch-lcd-3.5.cc index 66e49dc8..1cab3cd7 100644 --- a/main/boards/esp32-s3-touch-lcd-3.5/esp32-s3-touch-lcd-3.5.cc +++ b/main/boards/esp32-s3-touch-lcd-3.5/esp32-s3-touch-lcd-3.5.cc @@ -24,7 +24,7 @@ #include #include -#include "esp32_camera.h" +#include "esp_video.h" #define TAG "waveshare_lcd_3_5" @@ -106,7 +106,7 @@ private: esp_io_expander_handle_t io_expander = NULL; LcdDisplay* display_; PowerSaveTimer* power_save_timer_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializePowerSaveTimer() { power_save_timer_ = new PowerSaveTimer(-1, 60, 300); @@ -209,7 +209,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } diff --git a/main/boards/esp32s3-korvo2-v3/config.json b/main/boards/esp32s3-korvo2-v3/config.json index e5b40958..0560539a 100644 --- a/main/boards/esp32s3-korvo2-v3/config.json +++ b/main/boards/esp32s3-korvo2-v3/config.json @@ -4,14 +4,6 @@ { "name": "esp32s3-korvo2-v3", "sdkconfig_append": [ - "CONFIG_XIAOZHI_USE_ESP_CAMERA=y", - "CONFIG_CAMERA_OV2640=y", - "CONFIG_CAMERA_OV3660=y", - "CONFIG_CAMERA_OV3660_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_OV3660_DVP_RGB565_240X240_24FPS=y", - "CONFIG_CAMERA_OV2640_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_OV2640_DVP_RGB565_240X240_25FPS=y", - "CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP=y" ] } ] diff --git a/main/boards/esp32s3-korvo2-v3/esp32s3_korvo2_v3_board.cc b/main/boards/esp32s3-korvo2-v3/esp32s3_korvo2_v3_board.cc index 92fc972b..82ecf990 100644 --- a/main/boards/esp32s3-korvo2-v3/esp32s3_korvo2_v3_board.cc +++ b/main/boards/esp32s3-korvo2-v3/esp32s3_korvo2_v3_board.cc @@ -13,7 +13,7 @@ #include #include #include -#include "esp32s3_camera.h" +#include "esp32_camera.h" #include "power_manager.h" #include "power_save_timer.h" @@ -60,7 +60,7 @@ private: i2c_master_bus_handle_t i2c_bus_; LcdDisplay* display_; esp_io_expander_handle_t io_expander_ = NULL; - Esp32S3Camera* camera_; + Esp32Camera* camera_; PowerSaveTimer* power_save_timer_; PowerManager* power_manager_; void InitializePowerManager() { @@ -385,7 +385,7 @@ private: .sccb_i2c_port = (i2c_port_t)1, }; - camera_ = new Esp32S3Camera(camera_config); + camera_ = new Esp32Camera(camera_config); if(camera_ != nullptr) { camera_->SetVFlip(true); diff --git a/main/boards/kevin-sp-v3-dev/kevin-sp-v3_board.cc b/main/boards/kevin-sp-v3-dev/kevin-sp-v3_board.cc index 95f2ef8d..775f20e9 100644 --- a/main/boards/kevin-sp-v3-dev/kevin-sp-v3_board.cc +++ b/main/boards/kevin-sp-v3-dev/kevin-sp-v3_board.cc @@ -12,7 +12,7 @@ #include #include #include -#include "esp32s3_camera.h" +#include "esp32_camera.h" #define TAG "kevin-sp-v3" @@ -22,7 +22,7 @@ private: i2c_master_bus_handle_t display_i2c_bus_; Button boot_button_; LcdDisplay* display_; - Esp32S3Camera* camera_; + Esp32Camera* camera_; void InitializeSpi() { spi_bus_config_t buscfg = {}; @@ -116,7 +116,7 @@ private: .sccb_i2c_port = (i2c_port_t)1, }; - camera_ = new Esp32S3Camera(camera_config); + camera_ = new Esp32Camera(camera_config); } public: diff --git a/main/boards/kevin-sp-v4-dev/config.json b/main/boards/kevin-sp-v4-dev/config.json index 82f80015..9b7205a7 100644 --- a/main/boards/kevin-sp-v4-dev/config.json +++ b/main/boards/kevin-sp-v4-dev/config.json @@ -4,14 +4,6 @@ { "name": "kevin-sp-v4-dev", "sdkconfig_append": [ - "CONFIG_XIAOZHI_USE_ESP_CAMERA=y", - "CONFIG_CAMERA_OV2640=y", - "CONFIG_CAMERA_OV3660=y", - "CONFIG_CAMERA_OV3660_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_OV3660_DVP_RGB565_240X240_24FPS=y", - "CONFIG_CAMERA_OV2640_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_OV2640_DVP_RGB565_240X240_25FPS=y", - "CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP=y" ] } ] diff --git a/main/boards/kevin-sp-v4-dev/kevin-sp-v4_board.cc b/main/boards/kevin-sp-v4-dev/kevin-sp-v4_board.cc index ca76838d..645d21d4 100644 --- a/main/boards/kevin-sp-v4-dev/kevin-sp-v4_board.cc +++ b/main/boards/kevin-sp-v4-dev/kevin-sp-v4_board.cc @@ -10,7 +10,7 @@ #include #include #include -#include "esp32s3_camera.h" +#include "esp32_camera.h" #define TAG "kevin-sp-v4" @@ -19,7 +19,7 @@ private: Button boot_button_; LcdDisplay* display_; i2c_master_bus_handle_t i2c_bus_; - Esp32S3Camera* camera_; + Esp32Camera* camera_; void InitializeCodecI2c() { // Initialize I2C peripheral @@ -130,7 +130,7 @@ private: .sccb_i2c_port = (i2c_port_t)1, }; - camera_ = new Esp32S3Camera(camera_config); + camera_ = new Esp32Camera(camera_config); } public: diff --git a/main/boards/lichuang-dev/config.json b/main/boards/lichuang-dev/config.json index b35065b7..e2a7090e 100644 --- a/main/boards/lichuang-dev/config.json +++ b/main/boards/lichuang-dev/config.json @@ -4,10 +4,7 @@ { "name": "lichuang-dev", "sdkconfig_append": [ - "CONFIG_USE_DEVICE_AEC=y", - "CONFIG_CAMERA_GC0308=y", - "CONFIG_CAMERA_GC0308_AUTO_DETECT_DVP_INTERFACE_SENSOR=y", - "CONFIG_CAMERA_GC0308_DVP_YUV422_640X480_16FPS=y" + "CONFIG_USE_DEVICE_AEC=y" ] } ] diff --git a/main/boards/lichuang-dev/lichuang_dev_board.cc b/main/boards/lichuang-dev/lichuang_dev_board.cc index 42f82884..42b287e4 100644 --- a/main/boards/lichuang-dev/lichuang_dev_board.cc +++ b/main/boards/lichuang-dev/lichuang_dev_board.cc @@ -208,43 +208,35 @@ private: // Open camera power pca9557_->SetOutputState(2, 0); - static esp_cam_ctlr_dvp_pin_config_t dvp_pin_config = { - .data_width = CAM_CTLR_DATA_WIDTH_8, - .data_io = { - [0] = CAMERA_PIN_D0, - [1] = CAMERA_PIN_D1, - [2] = CAMERA_PIN_D2, - [3] = CAMERA_PIN_D3, - [4] = CAMERA_PIN_D4, - [5] = CAMERA_PIN_D5, - [6] = CAMERA_PIN_D6, - [7] = CAMERA_PIN_D7, - }, - .vsync_io = CAMERA_PIN_VSYNC, - .de_io = CAMERA_PIN_HREF, - .pclk_io = CAMERA_PIN_PCLK, - .xclk_io = CAMERA_PIN_XCLK, - }; + camera_config_t config = {}; + config.ledc_channel = LEDC_CHANNEL_2; + config.ledc_timer = LEDC_TIMER_2; + config.pin_d0 = CAMERA_PIN_D0; + config.pin_d1 = CAMERA_PIN_D1; + config.pin_d2 = CAMERA_PIN_D2; + config.pin_d3 = CAMERA_PIN_D3; + config.pin_d4 = CAMERA_PIN_D4; + config.pin_d5 = CAMERA_PIN_D5; + config.pin_d6 = CAMERA_PIN_D6; + config.pin_d7 = CAMERA_PIN_D7; + config.pin_xclk = CAMERA_PIN_XCLK; + config.pin_pclk = CAMERA_PIN_PCLK; + config.pin_vsync = CAMERA_PIN_VSYNC; + config.pin_href = CAMERA_PIN_HREF; + config.pin_sccb_sda = -1; + config.pin_sccb_scl = CAMERA_PIN_SIOC; + config.sccb_i2c_port = 1; + config.pin_pwdn = CAMERA_PIN_PWDN; + config.pin_reset = CAMERA_PIN_RESET; + config.xclk_freq_hz = XCLK_FREQ_HZ; + config.pixel_format = PIXFORMAT_RGB565; + config.frame_size = FRAMESIZE_VGA; + config.jpeg_quality = 12; + config.fb_count = 1; + config.fb_location = CAMERA_FB_IN_PSRAM; + config.grab_mode = CAMERA_GRAB_WHEN_EMPTY; - esp_video_init_sccb_config_t sccb_config = { - .init_sccb = false, - .i2c_handle = i2c_bus_, - .freq = 100000, - }; - - esp_video_init_dvp_config_t dvp_config = { - .sccb_config = sccb_config, - .reset_pin = CAMERA_PIN_RESET, - .pwdn_pin = CAMERA_PIN_PWDN, - .dvp_pin = dvp_pin_config, - .xclk_freq = XCLK_FREQ_HZ, - }; - - esp_video_init_config_t video_config = { - .dvp = &dvp_config, - }; - - camera_ = new Esp32Camera(video_config); + camera_ = new Esp32Camera(config); } void InitializeTools() { diff --git a/main/boards/lilygo-t-cameraplus-s3/lilygo-t-cameraplus-s3.cc b/main/boards/lilygo-t-cameraplus-s3/lilygo-t-cameraplus-s3.cc index 98ebac36..12ac777e 100644 --- a/main/boards/lilygo-t-cameraplus-s3/lilygo-t-cameraplus-s3.cc +++ b/main/boards/lilygo-t-cameraplus-s3/lilygo-t-cameraplus-s3.cc @@ -8,7 +8,7 @@ #include "i2c_device.h" #include "sy6970.h" #include "pin_config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "ir_filter_controller.h" #include @@ -73,7 +73,7 @@ private: Button boot_button_; Button key1_button_; PowerSaveTimer* power_save_timer_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializePowerSaveTimer() { power_save_timer_ = new PowerSaveTimer(-1, 60, -1); @@ -270,7 +270,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); camera_->SetVFlip(1); camera_->SetHMirror(1); } diff --git a/main/boards/m5stack-core-s3/m5stack_core_s3.cc b/main/boards/m5stack-core-s3/m5stack_core_s3.cc index 4f4c7ff7..7ca34b44 100644 --- a/main/boards/m5stack-core-s3/m5stack_core_s3.cc +++ b/main/boards/m5stack-core-s3/m5stack_core_s3.cc @@ -13,7 +13,7 @@ #include #include #include -#include "esp32_camera.h" +#include "esp_video.h" #define TAG "M5StackCoreS3Board" @@ -123,7 +123,7 @@ private: Aw9523* aw9523_; Ft6336* ft6336_; LcdDisplay* display_; - Esp32Camera* camera_; + EspVideo* camera_; esp_timer_handle_t touchpad_timer_; PowerSaveTimer* power_save_timer_; @@ -326,7 +326,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); camera_->SetHMirror(false); } diff --git a/main/boards/m5stack-tab5/m5stack_tab5.cc b/main/boards/m5stack-tab5/m5stack_tab5.cc index 69a33849..843824ec 100644 --- a/main/boards/m5stack-tab5/m5stack_tab5.cc +++ b/main/boards/m5stack-tab5/m5stack_tab5.cc @@ -7,7 +7,7 @@ #include "application.h" #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "esp_video_init.h" #include "esp_cam_sensor_xclk.h" @@ -89,7 +89,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; LcdDisplay* display_; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; Pi4ioe1* pi4ioe1_; Pi4ioe2* pi4ioe2_; esp_lcd_touch_handle_t touch_ = nullptr; @@ -463,7 +463,7 @@ private: .csi = &csi_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } public: diff --git a/main/boards/otto-robot/otto_robot.cc b/main/boards/otto-robot/otto_robot.cc index bf892c71..69cdf3ec 100644 --- a/main/boards/otto-robot/otto_robot.cc +++ b/main/boards/otto-robot/otto_robot.cc @@ -18,7 +18,7 @@ #include "power_manager.h" #include "system_reset.h" #include "wifi_board.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "websocket_control_server.h" #define TAG "OttoRobot" @@ -34,7 +34,7 @@ private: HardwareConfig hw_config_; AudioCodec* audio_codec_; i2c_master_bus_handle_t i2c_bus_; - Esp32Camera *camera_; + EspVideo *camera_; bool has_camera_; bool DetectHardwareVersion() { @@ -247,7 +247,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); camera_->SetVFlip(true); return true; } catch (...) { diff --git a/main/boards/waveshare-p4-nano/esp32-p4-nano.cc b/main/boards/waveshare-p4-nano/esp32-p4-nano.cc index 5389b9cf..9732d010 100644 --- a/main/boards/waveshare-p4-nano/esp32-p4-nano.cc +++ b/main/boards/waveshare-p4-nano/esp32-p4-nano.cc @@ -6,7 +6,7 @@ #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "esp_video_init.h" #include "esp_cam_sensor_xclk.h" @@ -69,7 +69,7 @@ private: i2c_master_bus_handle_t codec_i2c_bus_; Button boot_button_; LcdDisplay *display__; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; CustomBacklight *backlight_; void InitializeCodecI2c() { @@ -215,7 +215,7 @@ private: .csi = &base_csi_config, }; - camera_ = new Esp32Camera(cam_config); + camera_ = new EspVideo(cam_config); } void InitializeButtons() { boot_button_.OnClick([this]() { diff --git a/main/boards/waveshare-p4-wifi6-touch-lcd-4b/esp32-p4-wifi6-touch-lcd-4b.cc b/main/boards/waveshare-p4-wifi6-touch-lcd-4b/esp32-p4-wifi6-touch-lcd-4b.cc index 711930d5..999ec27a 100644 --- a/main/boards/waveshare-p4-wifi6-touch-lcd-4b/esp32-p4-wifi6-touch-lcd-4b.cc +++ b/main/boards/waveshare-p4-wifi6-touch-lcd-4b/esp32-p4-wifi6-touch-lcd-4b.cc @@ -6,7 +6,7 @@ #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "esp_video_init.h" #include "esp_cam_sensor_xclk.h" @@ -27,7 +27,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; LcdDisplay *display_; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; void InitializeCodecI2c() { // Initialize I2C peripheral @@ -168,7 +168,7 @@ private: .csi = &base_csi_config, }; - camera_ = new Esp32Camera(cam_config); + camera_ = new EspVideo(cam_config); } void InitializeButtons() { boot_button_.OnClick([this]() { diff --git a/main/boards/waveshare-p4-wifi6-touch-lcd-7b/esp32-p4-wifi6-touch-lcd-7b.cc b/main/boards/waveshare-p4-wifi6-touch-lcd-7b/esp32-p4-wifi6-touch-lcd-7b.cc index 61f83ffd..84227e10 100644 --- a/main/boards/waveshare-p4-wifi6-touch-lcd-7b/esp32-p4-wifi6-touch-lcd-7b.cc +++ b/main/boards/waveshare-p4-wifi6-touch-lcd-7b/esp32-p4-wifi6-touch-lcd-7b.cc @@ -6,7 +6,7 @@ #include "button.h" #include "config.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "esp_video_init.h" #include "esp_cam_sensor_xclk.h" @@ -27,7 +27,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; LcdDisplay *display_; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; esp_err_t i2c_device_probe(uint8_t addr) { return i2c_master_probe(i2c_bus_, addr, 100); @@ -189,7 +189,7 @@ private: .csi = &base_csi_config, }; - camera_ = new Esp32Camera(cam_config); + camera_ = new EspVideo(cam_config); } void InitializeButtons() { boot_button_.OnClick([this]() { diff --git a/main/boards/waveshare-p4-wifi6-touch-lcd-xc/esp32-p4-wifi6-touch-lcd-xc.cc b/main/boards/waveshare-p4-wifi6-touch-lcd-xc/esp32-p4-wifi6-touch-lcd-xc.cc index dc3ed4e7..57b21481 100644 --- a/main/boards/waveshare-p4-wifi6-touch-lcd-xc/esp32-p4-wifi6-touch-lcd-xc.cc +++ b/main/boards/waveshare-p4-wifi6-touch-lcd-xc/esp32-p4-wifi6-touch-lcd-xc.cc @@ -5,7 +5,7 @@ // #include "display/no_display.h" #include "button.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "esp_video_init.h" #include "esp_cam_sensor_xclk.h" @@ -27,7 +27,7 @@ private: i2c_master_bus_handle_t i2c_bus_; Button boot_button_; LcdDisplay *display_; - Esp32Camera* camera_ = nullptr; + EspVideo* camera_ = nullptr; void InitializeCodecI2c() { // Initialize I2C peripheral @@ -170,7 +170,7 @@ private: .csi = &base_csi_config, }; - camera_ = new Esp32Camera(cam_config); + camera_ = new EspVideo(cam_config); } void InitializeButtons() { boot_button_.OnClick([this]() { diff --git a/main/boards/waveshare-s3-audio-board/esp32-s3-audio_board.cc b/main/boards/waveshare-s3-audio-board/esp32-s3-audio_board.cc index d2969e25..e983fb4b 100644 --- a/main/boards/waveshare-s3-audio-board/esp32-s3-audio_board.cc +++ b/main/boards/waveshare-s3-audio-board/esp32-s3-audio_board.cc @@ -15,7 +15,7 @@ #include #include #include "esp_io_expander_tca95xx_16bit.h" -#include "esp32_camera.h" +#include "esp_video.h" #include "led/circular_strip.h" #include "esp_lcd_jd9853.h" @@ -31,7 +31,7 @@ private: i2c_master_bus_handle_t i2c_bus_; esp_io_expander_handle_t io_expander = NULL; LcdDisplay* display_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializeI2c() { // Initialize I2C peripheral @@ -192,7 +192,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } public: diff --git a/main/boards/waveshare-s3-touch-lcd-3.5b/waveshare-s3-touch-lcd-3.5b.cc b/main/boards/waveshare-s3-touch-lcd-3.5b/waveshare-s3-touch-lcd-3.5b.cc index e52dd8fa..fc2d360e 100644 --- a/main/boards/waveshare-s3-touch-lcd-3.5b/waveshare-s3-touch-lcd-3.5b.cc +++ b/main/boards/waveshare-s3-touch-lcd-3.5b/waveshare-s3-touch-lcd-3.5b.cc @@ -28,7 +28,7 @@ #include #include -#include "esp32_camera.h" +#include "esp_video.h" #define TAG "waveshare_lcd_3_5b" @@ -108,7 +108,7 @@ private: esp_io_expander_handle_t io_expander = NULL; LcdDisplay* display_; PowerSaveTimer* power_save_timer_; - Esp32Camera* camera_; + EspVideo* camera_; void InitializePowerSaveTimer() { power_save_timer_ = new PowerSaveTimer(-1, 60, 300); @@ -212,7 +212,7 @@ private: .dvp = &dvp_config, }; - camera_ = new Esp32Camera(video_config); + camera_ = new EspVideo(video_config); } diff --git a/main/display/lvgl_display/jpg/image_to_jpeg.h b/main/display/lvgl_display/jpg/image_to_jpeg.h index 27914694..27c5d313 100644 --- a/main/display/lvgl_display/jpg/image_to_jpeg.h +++ b/main/display/lvgl_display/jpg/image_to_jpeg.h @@ -7,7 +7,7 @@ #include #include -#if defined(CONFIG_IDF_TARGET_ESP32P4) +#if defined(CONFIG_IDF_TARGET_ESP32P4) || defined(CONFIG_IDF_TARGET_ESP32S3) // ESP32-P4 使用 esp_video 组件提供的 V4L2 头文件 #include #else diff --git a/main/mcp_server.cc b/main/mcp_server.cc index f09d3a33..7cd7ba00 100644 --- a/main/mcp_server.cc +++ b/main/mcp_server.cc @@ -100,7 +100,7 @@ void McpServer::AddCommonTools() { auto camera = board.GetCamera(); if (camera) { AddTool("self.camera.take_photo", - "Take a photo and explain it. Use this tool after the user asks you to see something.\n" + "Always remember you have a camera. If the user asks you to see something, use this tool to take a photo and then explain it.\n" "Args:\n" " `question`: The question that you want to ask about the photo.\n" "Return:\n"