feat(audio): Use esp_audio_codec and esp_audio_effects to replace 78opus (#1632 )

feat: Add ESP-SensairShuttle (#1620 )
* feat: Add ESP-SensairShuttle * fix: fix board name
2026-01-07 18:45:34 +08:00 · 2026-01-02 12:19:46 +08:00
15 changed files with 1033 additions and 107 deletions
--- a/main/CMakeLists.txt
+++ b/main/CMakeLists.txt
@@ -212,6 +212,11 @@ elseif(CONFIG_BOARD_TYPE_ECHOEAR)
    set(BUILTIN_TEXT_FONT font_puhui_20_4)
    set(BUILTIN_ICON_FONT font_awesome_20_4)
    set(DEFAULT_EMOJI_COLLECTION twemoji_64)
+elseif(CONFIG_BOARD_TYPE_ESP_SENSAIRSHUTTLE)
+    set(BOARD_TYPE "esp-sensairshuttle")
+    set(BUILTIN_TEXT_FONT font_puhui_basic_16_4)
+    set(BUILTIN_ICON_FONT font_awesome_16_4)
+    set(DEFAULT_EMOJI_COLLECTION twemoji_32)
 elseif(CONFIG_BOARD_TYPE_WAVESHARE_S3_AUDIO_BOARD)
    set(BOARD_TYPE "waveshare-s3-audio-board")
    set(BUILTIN_TEXT_FONT font_puhui_basic_16_4)
--- a/main/Kconfig.projbuild
+++ b/main/Kconfig.projbuild
@@ -150,6 +150,9 @@ choice BOARD_TYPE
    config BOARD_TYPE_ESP_SPARKBOT
        bool "Espressif SparkBot"
        depends on IDF_TARGET_ESP32S3
+    config BOARD_TYPE_ESP_SENSAIRSHUTTLE
+        bool "Espressif ESP-SensairShuttle"
+        depends on IDF_TARGET_ESP32C5
    config BOARD_TYPE_ESP_SPOT_S3
        bool "Espressif Spot-S3"
        depends on IDF_TARGET_ESP32S3
@@ -582,7 +585,7 @@ choice DISPLAY_STYLE

    config USE_EMOTE_MESSAGE_STYLE
        bool "Emote animation style"
-        depends on BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ECHOEAR || BOARD_TYPE_LICHUANG_DEV_S3
+        depends on BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ECHOEAR || BOARD_TYPE_LICHUANG_DEV_S3 || BOARD_TYPE_ESP_SENSAIRSHUTTLE
 endchoice

 choice WAKE_WORD_TYPE
--- a/main/audio/audio_service.cc
+++ b/main/audio/audio_service.cc
@@ -2,6 +2,26 @@
 #include <esp_log.h>
 #include <cstring>

+#define RATE_CVT_CFG(_src_rate, _dest_rate, _channel)        \
+    (esp_ae_rate_cvt_cfg_t)                                  \
+    {                                                        \
+        .src_rate        = (uint32_t)(_src_rate),            \
+        .dest_rate       = (uint32_t)(_dest_rate),           \
+        .channel         = (uint8_t)(_channel),              \
+        .bits_per_sample = ESP_AUDIO_BIT16,                  \
+        .complexity      = 2,                                \
+        .perf_type       = ESP_AE_RATE_CVT_PERF_TYPE_SPEED,  \
+    }
+
+#define OPUS_DEC_CFG(_sample_rate, _frame_duration_ms)                                                    \
+    (esp_opus_dec_cfg_t)                                                                                  \
+    {                                                                                                     \
+        .sample_rate    = (uint32_t)(_sample_rate),                                                       \
+        .channel        = ESP_AUDIO_MONO,                                                                 \
+        .frame_duration = (esp_opus_dec_frame_duration_t)AS_OPUS_GET_FRAME_DRU_ENUM(_frame_duration_ms),  \
+        .self_delimited = false,                                                                          \
+    }
+
 #if CONFIG_USE_AUDIO_PROCESSOR
 #include "processors/afe_audio_processor.h"
 #else
@@ -17,7 +37,6 @@

 #define TAG "AudioService"

-
 AudioService::AudioService() {
    event_group_ = xEventGroupCreate();
 }
@@ -26,21 +45,51 @@ AudioService::~AudioService() {
    if (event_group_ != nullptr) {
        vEventGroupDelete(event_group_);
    }
+    if (opus_encoder_ != nullptr) {
+        esp_opus_enc_close(opus_encoder_);
+    }
+    if (opus_decoder_ != nullptr) {
+        esp_opus_dec_close(opus_decoder_);
+    }
+    if (input_resampler_ != nullptr) {
+        esp_ae_rate_cvt_close(input_resampler_);
+    }
+    if (output_resampler_ != nullptr) {
+        esp_ae_rate_cvt_close(output_resampler_);
+    }
 }
-

 void AudioService::Initialize(AudioCodec* codec) {
    codec_ = codec;
    codec_->Start();

-    /* Setup the audio codec */
-    opus_decoder_ = std::make_unique<OpusDecoderWrapper>(codec->output_sample_rate(), 1, OPUS_FRAME_DURATION_MS);
-    opus_encoder_ = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
-    opus_encoder_->SetComplexity(0);
+    esp_opus_dec_cfg_t opus_dec_cfg = OPUS_DEC_CFG(codec->output_sample_rate(), OPUS_FRAME_DURATION_MS);
+    auto ret = esp_opus_dec_open(&opus_dec_cfg, sizeof(esp_opus_dec_cfg_t), &opus_decoder_);
+    if (opus_decoder_ == nullptr) {
+        ESP_LOGE(TAG, "Failed to create audio decoder, error code: %d", ret);
+    } else {
+        decoder_sample_rate_ = codec->output_sample_rate();
+        decoder_duration_ms_ = OPUS_FRAME_DURATION_MS;
+        decoder_frame_size_ = decoder_sample_rate_ / 1000 * OPUS_FRAME_DURATION_MS;
+    }
+    esp_opus_enc_config_t opus_enc_cfg = AS_OPUS_ENC_CONFIG();
+    ret = esp_opus_enc_open(&opus_enc_cfg, sizeof(esp_opus_enc_config_t), &opus_encoder_);
+    if (opus_encoder_ == nullptr) {
+        ESP_LOGE(TAG, "Failed to create audio encoder, error code: %d", ret);
+    } else {
+        encoder_sample_rate_ = 16000;
+        encoder_duration_ms_ = OPUS_FRAME_DURATION_MS;
+        esp_opus_enc_get_frame_size(opus_encoder_, &encoder_frame_size_, &encoder_outbuf_size_);
+        encoder_frame_size_ = encoder_frame_size_ / sizeof(int16_t);
+    }

    if (codec->input_sample_rate() != 16000) {
-        input_resampler_.Configure(codec->input_sample_rate(), 16000);
-        reference_resampler_.Configure(codec->input_sample_rate(), 16000);
+        esp_ae_rate_cvt_cfg_t input_resampler_cfg = RATE_CVT_CFG(
+            codec->input_sample_rate(), ESP_AUDIO_SAMPLE_RATE_16K, codec->input_channels());
+        auto resampler_ret = esp_ae_rate_cvt_open(&input_resampler_cfg, &input_resampler_);
+        if (input_resampler_ == nullptr) {
+            ESP_LOGE(TAG, "Failed to create input resampler, error code: %d", resampler_ret);
+        }
    }

 #if CONFIG_USE_AUDIO_PROCESSOR
@@ -114,7 +163,7 @@ void AudioService::Start() {
        AudioService* audio_service = (AudioService*)arg;
        audio_service->OpusCodecTask();
        vTaskDelete(NULL);
-    }, "opus_codec", 2048 * 13, this, 2, &opus_codec_task_handle_);
+    }, "opus_codec", 2048 * 12, this, 2, &opus_codec_task_handle_);
 }

 void AudioService::Stop() {
@@ -144,25 +193,15 @@ bool AudioService::ReadAudioData(std::vector<int16_t>& data, int sample_rate, in
        if (!codec_->InputData(data)) {
            return false;
        }
-        if (codec_->input_channels() == 2) {
-            auto mic_channel = std::vector<int16_t>(data.size() / 2);
-            auto reference_channel = std::vector<int16_t>(data.size() / 2);
-            for (size_t i = 0, j = 0; i < mic_channel.size(); ++i, j += 2) {
-                mic_channel[i] = data[j];
-                reference_channel[i] = data[j + 1];
-            }
-            auto resampled_mic = std::vector<int16_t>(input_resampler_.GetOutputSamples(mic_channel.size()));
-            auto resampled_reference = std::vector<int16_t>(reference_resampler_.GetOutputSamples(reference_channel.size()));
-            input_resampler_.Process(mic_channel.data(), mic_channel.size(), resampled_mic.data());
-            reference_resampler_.Process(reference_channel.data(), reference_channel.size(), resampled_reference.data());
-            data.resize(resampled_mic.size() + resampled_reference.size());
-            for (size_t i = 0, j = 0; i < resampled_mic.size(); ++i, j += 2) {
-                data[j] = resampled_mic[i];
-                data[j + 1] = resampled_reference[i];
-            }
-        } else {
-            auto resampled = std::vector<int16_t>(input_resampler_.GetOutputSamples(data.size()));
-            input_resampler_.Process(data.data(), data.size(), resampled.data());
+        if (input_resampler_ != nullptr) {
+            uint32_t in_sample_num = data.size() / codec_->input_channels();
+            uint32_t output_samples = 0;
+            esp_ae_rate_cvt_get_max_out_sample_num(input_resampler_, in_sample_num, &output_samples);
+            auto resampled = std::vector<int16_t>(output_samples * codec_->input_channels());
+            uint32_t actual_output = output_samples;
+            esp_ae_rate_cvt_process(input_resampler_, (esp_ae_sample_t)data.data(), in_sample_num,
+                                   (esp_ae_sample_t)resampled.data(), &actual_output);
+            resampled.resize(actual_output * codec_->input_channels());
            data = std::move(resampled);
        }
    } else {
@@ -316,25 +355,49 @@ void AudioService::OpusCodecTask() {
            task->timestamp = packet->timestamp;

            SetDecodeSampleRate(packet->sample_rate, packet->frame_duration);
-            if (opus_decoder_->Decode(std::move(packet->payload), task->pcm)) {
-                // Resample if the sample rate is different
-                if (opus_decoder_->sample_rate() != codec_->output_sample_rate()) {
-                    int target_size = output_resampler_.GetOutputSamples(task->pcm.size());
+            if (opus_decoder_ != nullptr) {
+                task->pcm.resize(decoder_frame_size_);
+                esp_audio_dec_in_raw_t raw = {
+                    .buffer = (uint8_t *)(packet->payload.data()),
+                    .len = (uint32_t)(packet->payload.size()),
+                    .consumed = 0,
+                    .frame_recover = ESP_AUDIO_DEC_RECOVERY_NONE,
+                };
+                esp_audio_dec_out_frame_t out_frame = {
+                    .buffer = (uint8_t *)(task->pcm.data()),
+                    .len = (uint32_t)(task->pcm.size() * sizeof(int16_t)),
+                    .decoded_size = 0,
+                };
+                esp_audio_dec_info_t dec_info = {};
+                std::unique_lock<std::mutex> decoder_lock(decoder_mutex_);
+                auto ret = esp_opus_dec_decode(opus_decoder_, &raw, &out_frame, &dec_info);
+                decoder_lock.unlock();
+                if (ret == ESP_AUDIO_ERR_OK) {
+                    task->pcm.resize(out_frame.decoded_size / sizeof(int16_t));
+                    if (decoder_sample_rate_ != codec_->output_sample_rate() && output_resampler_ != nullptr) {
+                        uint32_t target_size = 0;
+                        esp_ae_rate_cvt_get_max_out_sample_num(output_resampler_, task->pcm.size(), &target_size);
                        std::vector<int16_t> resampled(target_size);
-                    output_resampler_.Process(task->pcm.data(), task->pcm.size(), resampled.data());
+                        uint32_t actual_output = target_size;
+                        esp_ae_rate_cvt_process(output_resampler_, (esp_ae_sample_t)task->pcm.data(), task->pcm.size(),
+                                                (esp_ae_sample_t)resampled.data(), &actual_output);
+                        resampled.resize(actual_output);
                        task->pcm = std::move(resampled);
                    }
-
                    lock.lock();
                    audio_playback_queue_.push_back(std::move(task));
                    audio_queue_cv_.notify_all();
+                    debug_statistics_.decode_count++;
                } else {
-                ESP_LOGE(TAG, "Failed to decode audio");
+                    ESP_LOGE(TAG, "Failed to decode audio after resize, error code: %d", ret);
+                    lock.lock();
+                }
+            } else {
+                ESP_LOGE(TAG, "Audio decoder is not configured");
                lock.lock();
            }
            debug_statistics_.decode_count++;
        }
-        
        /* Encode the audio to send queue */
        if (!audio_encode_queue_.empty() && audio_send_queue_.size() < MAX_SEND_PACKETS_IN_QUEUE) {
            auto task = std::move(audio_encode_queue_.front());
@@ -346,24 +409,42 @@ void AudioService::OpusCodecTask() {
            packet->frame_duration = OPUS_FRAME_DURATION_MS;
            packet->sample_rate = 16000;
            packet->timestamp = task->timestamp;
-            if (!opus_encoder_->Encode(std::move(task->pcm), packet->payload)) {
-                ESP_LOGE(TAG, "Failed to encode audio");
-                continue;
-            }
+
+            if (opus_encoder_ != nullptr && task->pcm.size() == encoder_frame_size_) {
+                std::vector<uint8_t> buf(encoder_outbuf_size_);
+                esp_audio_enc_in_frame_t in = {
+                    .buffer = (uint8_t *)(task->pcm.data()),
+                    .len = (uint32_t)(encoder_frame_size_ * sizeof(int16_t)),
+                };
+                esp_audio_enc_out_frame_t out = {
+                    .buffer = buf.data(),
+                    .len = (uint32_t)encoder_outbuf_size_,
+                    .encoded_bytes = 0,
+                };
+                auto ret = esp_opus_enc_process(opus_encoder_, &in, &out);
+                if (ret == ESP_AUDIO_ERR_OK) {
+                    packet->payload.assign(buf.data(), buf.data() + out.encoded_bytes);

                    if (task->type == kAudioTaskTypeEncodeToSendQueue) {
                        {
-                    std::lock_guard<std::mutex> lock(audio_queue_mutex_);
+                            std::lock_guard<std::mutex> lock2(audio_queue_mutex_);
                            audio_send_queue_.push_back(std::move(packet));
                        }
                        if (callbacks_.on_send_queue_available) {
                            callbacks_.on_send_queue_available();
                        }
                    } else if (task->type == kAudioTaskTypeEncodeToTestingQueue) {
-                std::lock_guard<std::mutex> lock(audio_queue_mutex_);
+                        std::lock_guard<std::mutex> lock2(audio_queue_mutex_);
                        audio_testing_queue_.push_back(std::move(packet));
                    }
                    debug_statistics_.encode_count++;
+                } else {
+                    ESP_LOGE(TAG, "Failed to encode audio, error code: %d", ret);
+                }
+            } else {
+                ESP_LOGE(TAG, "Failed to encode audio: encoder not configured or invalid frame size (got %u, expected %u)",
+                         task->pcm.size(), encoder_frame_size_);
+            }
            lock.lock();
        }
    }
@@ -372,17 +453,38 @@ void AudioService::OpusCodecTask() {
 }

 void AudioService::SetDecodeSampleRate(int sample_rate, int frame_duration) {
-    if (opus_decoder_->sample_rate() == sample_rate && opus_decoder_->duration_ms() == frame_duration) {
+    if (decoder_sample_rate_ == sample_rate && decoder_duration_ms_ == frame_duration) {
        return;
    }
-
-    opus_decoder_.reset();
-    opus_decoder_ = std::make_unique<OpusDecoderWrapper>(sample_rate, 1, frame_duration);
+    std::unique_lock<std::mutex> decoder_lock(decoder_mutex_);
+    if (opus_decoder_ != nullptr) {
+        esp_opus_dec_close(opus_decoder_);
+        opus_decoder_ = nullptr;
+    }
+    decoder_lock.unlock();
+    esp_opus_dec_cfg_t opus_dec_cfg = OPUS_DEC_CFG(sample_rate, frame_duration);
+    auto ret = esp_opus_dec_open(&opus_dec_cfg, sizeof(esp_opus_dec_cfg_t), &opus_decoder_);
+    if (opus_decoder_ == nullptr) {
+        ESP_LOGE(TAG, "Failed to create audio decoder, error code: %d", ret);
+        return;
+    }
+    decoder_sample_rate_ = sample_rate;
+    decoder_duration_ms_ = frame_duration;
+    decoder_frame_size_ = decoder_sample_rate_ / 1000 * frame_duration;

    auto codec = Board::GetInstance().GetAudioCodec();
-    if (opus_decoder_->sample_rate() != codec->output_sample_rate()) {
-        ESP_LOGI(TAG, "Resampling audio from %d to %d", opus_decoder_->sample_rate(), codec->output_sample_rate());
-        output_resampler_.Configure(opus_decoder_->sample_rate(), codec->output_sample_rate());
+    if (decoder_sample_rate_ != codec->output_sample_rate()) {
+        ESP_LOGI(TAG, "Resampling audio from %d to %d", decoder_sample_rate_, codec->output_sample_rate());
+        if (output_resampler_ != nullptr) {
+            esp_ae_rate_cvt_close(output_resampler_);
+            output_resampler_ = nullptr;
+        }
+        esp_ae_rate_cvt_cfg_t output_resampler_cfg = RATE_CVT_CFG(
+            decoder_sample_rate_, codec->output_sample_rate(), ESP_AUDIO_MONO);
+        auto resampler_ret = esp_ae_rate_cvt_open(&output_resampler_cfg, &output_resampler_);
+        if (output_resampler_ == nullptr) {
+            ESP_LOGE(TAG, "Failed to create output resampler, error code: %d", resampler_ret);
+        }
    }
 }

@@ -390,7 +492,6 @@ void AudioService::PushTaskToEncodeQueue(AudioTaskType type, std::vector<int16_t
    auto task = std::make_unique<AudioTask>();
    task->type = type;
    task->pcm = std::move(pcm);
-    
    /* Push the task to the encode queue */
    std::unique_lock<std::mutex> lock(audio_queue_mutex_);

@@ -580,13 +681,11 @@ void AudioService::PlaySound(const std::string_view& ogg) {
                // 解析OpusHead包
                if (pkt_len >= 19 && std::memcmp(pkt_ptr, "OpusHead", 8) == 0) {
                    seen_head = true;
-                    
                    // OpusHead结构：[0-7] "OpusHead", [8] version, [9] channel_count, [10-11] pre_skip
                    // [12-15] input_sample_rate, [16-17] output_gain, [18] mapping_family
                    if (pkt_len >= 12) {
                        uint8_t version = pkt_ptr[8];
                        uint8_t channel_count = pkt_ptr[9];
-                        
                        if (pkt_len >= 16) {
                            // 读取输入采样率 (little-endian)
                            sample_rate = pkt_ptr[12] | (pkt_ptr[13] << 8) |
@@ -626,7 +725,11 @@ bool AudioService::IsIdle() {

 void AudioService::ResetDecoder() {
    std::lock_guard<std::mutex> lock(audio_queue_mutex_);
-    opus_decoder_->ResetState();
+    std::unique_lock<std::mutex> decoder_lock(decoder_mutex_);
+    if (opus_decoder_ != nullptr) {
+        esp_opus_dec_reset(opus_decoder_);
+    }
+    decoder_lock.unlock();
    timestamp_queue_.clear();
    audio_decode_queue_.clear();
    audio_playback_queue_.clear();
--- a/main/audio/audio_service.h
+++ b/main/audio/audio_service.h
@@ -12,10 +12,11 @@
 #include <freertos/event_groups.h>
 #include <esp_timer.h>
 #include <model_path.h>
-
-#include <opus_encoder.h>
-#include <opus_decoder.h>
-#include <opus_resampler.h>
+#include "esp_audio_enc.h"
+#include "esp_opus_enc.h"
+#include "esp_opus_dec.h"
+#include "esp_ae_rate_cvt.h"
+#include "esp_audio_types.h"

 #include "audio_codec.h"
 #include "audio_processor.h"
@@ -46,12 +47,34 @@
 #define AUDIO_POWER_TIMEOUT_MS 15000
 #define AUDIO_POWER_CHECK_INTERVAL_MS 1000

-
 #define AS_EVENT_AUDIO_TESTING_RUNNING      (1 << 0)
 #define AS_EVENT_WAKE_WORD_RUNNING          (1 << 1)
 #define AS_EVENT_AUDIO_PROCESSOR_RUNNING    (1 << 2)
 #define AS_EVENT_PLAYBACK_NOT_EMPTY         (1 << 3)

+#define AS_OPUS_GET_FRAME_DRU_ENUM(duration_ms)                   \
+    ((duration_ms) == 5 ? ESP_OPUS_ENC_FRAME_DURATION_5_MS :      \
+     (duration_ms) == 10 ? ESP_OPUS_ENC_FRAME_DURATION_10_MS :    \
+     (duration_ms) == 20 ? ESP_OPUS_ENC_FRAME_DURATION_20_MS :    \
+     (duration_ms) == 40 ? ESP_OPUS_ENC_FRAME_DURATION_40_MS :    \
+     (duration_ms) == 60 ? ESP_OPUS_ENC_FRAME_DURATION_60_MS :    \
+     (duration_ms) == 80 ? ESP_OPUS_ENC_FRAME_DURATION_80_MS :    \
+     (duration_ms) == 100 ? ESP_OPUS_ENC_FRAME_DURATION_100_MS :  \
+     (duration_ms) == 120 ? ESP_OPUS_ENC_FRAME_DURATION_120_MS : -1)
+
+#define AS_OPUS_ENC_CONFIG() {                                                                                    \
+        .sample_rate        = ESP_AUDIO_SAMPLE_RATE_16K,                                                          \
+        .channel            = ESP_AUDIO_MONO,                                                                     \
+        .bits_per_sample    = ESP_AUDIO_BIT16,                                                                    \
+        .bitrate            = ESP_OPUS_BITRATE_AUTO,                                                              \
+        .frame_duration     = (esp_opus_enc_frame_duration_t)AS_OPUS_GET_FRAME_DRU_ENUM(OPUS_FRAME_DURATION_MS),  \
+        .application_mode   = ESP_OPUS_ENC_APPLICATION_AUDIO,                                                     \
+        .complexity         = 0,                                                                                  \
+        .enable_fec         = false,                                                                              \
+        .enable_dtx         = true,                                                                               \
+        .enable_vbr         = true,                                                                               \
+    }
+
 struct AudioServiceCallbacks {
    std::function<void(void)> on_send_queue_available;
    std::function<void(const std::string&)> on_wake_word_detected;
@@ -116,11 +139,20 @@ private:
    std::unique_ptr<AudioProcessor> audio_processor_;
    std::unique_ptr<WakeWord> wake_word_;
    std::unique_ptr<AudioDebugger> audio_debugger_;
-    std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
-    std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
-    OpusResampler input_resampler_;
-    OpusResampler reference_resampler_;
-    OpusResampler output_resampler_;
+    void* opus_encoder_ = nullptr;
+    void* opus_decoder_ = nullptr;
+    std::mutex decoder_mutex_;
+    esp_ae_rate_cvt_handle_t input_resampler_ = nullptr;
+    esp_ae_rate_cvt_handle_t output_resampler_ = nullptr;
+    
+    // Encoder/Decoder state
+    int encoder_sample_rate_ = 16000;
+    int encoder_duration_ms_ = OPUS_FRAME_DURATION_MS;
+    int encoder_frame_size_ = 0;
+    int encoder_outbuf_size_ = 0;
+    int decoder_sample_rate_ = 0;
+    int decoder_duration_ms_ = OPUS_FRAME_DURATION_MS;
+    int decoder_frame_size_ = 0;
    DebugStatistics debug_statistics_;
    srmodel_list_t* models_list_ = nullptr;

--- a/main/audio/wake_words/afe_wake_word.cc
+++ b/main/audio/wake_words/afe_wake_word.cc
@@ -1,6 +1,5 @@
 #include "afe_wake_word.h"
 #include "audio_service.h"
-
 #include <esp_log.h>
 #include <sstream>

@@ -157,7 +156,7 @@ void AfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) {
 }

 void AfeWakeWord::EncodeWakeWordData() {
-    const size_t stack_size = 4096 * 7;
+    const size_t stack_size = 4096 * 6;
    wake_word_opus_.clear();
    if (wake_word_encode_task_stack_ == nullptr) {
        wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(stack_size, MALLOC_CAP_SPIRAM);
@@ -172,20 +171,62 @@ void AfeWakeWord::EncodeWakeWordData() {
        auto this_ = (AfeWakeWord*)arg;
        {
            auto start_time = esp_timer_get_time();
-            auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
-            encoder->SetComplexity(0); // 0 is the fastest
-
-            int packets = 0;
-            for (auto& pcm: this_->wake_word_pcm_) {
-                encoder->Encode(std::move(pcm), [this_](std::vector<uint8_t>&& opus) {
+            // Create encoder
+            esp_opus_enc_config_t opus_enc_cfg = AS_OPUS_ENC_CONFIG();
+            void* encoder_handle = nullptr;
+            auto ret = esp_opus_enc_open(&opus_enc_cfg, sizeof(esp_opus_enc_config_t), &encoder_handle);
+            if (encoder_handle == nullptr) {
+                ESP_LOGE(TAG, "Failed to create audio encoder, error code: %d", ret);
                std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
-                    this_->wake_word_opus_.emplace_back(std::move(opus));
+                this_->wake_word_opus_.push_back(std::vector<uint8_t>());
+                this_->wake_word_cv_.notify_all();
+                return;
+            }
+            
+            // Get frame size
+            int frame_size = 0;
+            int outbuf_size = 0;
+            esp_opus_enc_get_frame_size(encoder_handle, &frame_size, &outbuf_size);
+            frame_size = frame_size / sizeof(int16_t);
+            
+            // Encode all PCM data
+            int packets = 0;
+            std::vector<int16_t> in_buffer;
+            esp_audio_enc_in_frame_t in = {};
+            esp_audio_enc_out_frame_t out = {};
+            
+            for (auto& pcm: this_->wake_word_pcm_) {
+                if (in_buffer.empty()) {
+                    in_buffer = std::move(pcm);
+                } else {
+                    in_buffer.reserve(in_buffer.size() + pcm.size());
+                    in_buffer.insert(in_buffer.end(), pcm.begin(), pcm.end());
+                }
+                
+                while (in_buffer.size() >= frame_size) {
+                    std::vector<uint8_t> opus_buf(outbuf_size);
+                    in.buffer = (uint8_t *)(in_buffer.data());
+                    in.len = (uint32_t)(frame_size * sizeof(int16_t));
+                    out.buffer = opus_buf.data();
+                    out.len = outbuf_size;
+                    out.encoded_bytes = 0;
+                    
+                    ret = esp_opus_enc_process(encoder_handle, &in, &out);
+                    if (ret == ESP_AUDIO_ERR_OK) {
+                        std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
+                        this_->wake_word_opus_.emplace_back(opus_buf.data(), opus_buf.data() + out.encoded_bytes);
                        this_->wake_word_cv_.notify_all();
-                });
                        packets++;
+                    } else {
+                        ESP_LOGE(TAG, "Failed to encode audio, error code: %d", ret);
+                    }
+                    
+                    in_buffer.erase(in_buffer.begin(), in_buffer.begin() + frame_size);
+                }
            }
            this_->wake_word_pcm_.clear();
-
+            // Close encoder
+            esp_opus_enc_close(encoder_handle);
            auto end_time = esp_timer_get_time();
            ESP_LOGI(TAG, "Encode wake word opus %d packets in %ld ms", packets, (long)((end_time - start_time) / 1000));

--- a/main/audio/wake_words/custom_wake_word.cc
+++ b/main/audio/wake_words/custom_wake_word.cc
@@ -9,10 +9,8 @@
 #include <esp_mn_speech_commands.h>
 #include <cJSON.h>

-
 #define TAG "CustomWakeWord"

-
 CustomWakeWord::CustomWakeWord()
    : wake_word_pcm_(), wake_word_opus_() {
 }
@@ -218,20 +216,56 @@ void CustomWakeWord::EncodeWakeWordData() {
        auto this_ = (CustomWakeWord*)arg;
        {
            auto start_time = esp_timer_get_time();
-            auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
-            encoder->SetComplexity(0); // 0 is the fastest
-
-            int packets = 0;
-            for (auto& pcm: this_->wake_word_pcm_) {
-                encoder->Encode(std::move(pcm), [this_](std::vector<uint8_t>&& opus) {
+            // Create encoder
+            esp_opus_enc_config_t opus_enc_cfg = AS_OPUS_ENC_CONFIG();
+            void* encoder_handle = nullptr;
+            auto ret = esp_opus_enc_open(&opus_enc_cfg, sizeof(esp_opus_enc_config_t), &encoder_handle);
+            if (encoder_handle == nullptr) {
+                ESP_LOGE(TAG, "Failed to create audio encoder, error code: %d", ret);
                std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
-                    this_->wake_word_opus_.emplace_back(std::move(opus));
+                this_->wake_word_opus_.push_back(std::vector<uint8_t>());
+                this_->wake_word_cv_.notify_all();
+                return;
+            }
+            // Get frame size
+            int frame_size = 0;
+            int outbuf_size = 0;
+            esp_opus_enc_get_frame_size(encoder_handle, &frame_size, &outbuf_size);
+            frame_size = frame_size / sizeof(int16_t);
+            // Encode all PCM data
+            int packets = 0;
+            std::vector<int16_t> in_buffer;
+            esp_audio_enc_in_frame_t in = {};
+            esp_audio_enc_out_frame_t out = {};
+            for (auto& pcm: this_->wake_word_pcm_) {
+                if (in_buffer.empty()) {
+                    in_buffer = std::move(pcm);
+                } else {
+                    in_buffer.reserve(in_buffer.size() + pcm.size());
+                    in_buffer.insert(in_buffer.end(), pcm.begin(), pcm.end());
+                }
+                while (in_buffer.size() >= frame_size) {
+                    std::vector<uint8_t> opus_buf(outbuf_size);
+                    in.buffer = (uint8_t *)(in_buffer.data());
+                    in.len = (uint32_t)(frame_size * sizeof(int16_t));
+                    out.buffer = opus_buf.data();
+                    out.len = outbuf_size;
+                    out.encoded_bytes = 0;
+                    ret = esp_opus_enc_process(encoder_handle, &in, &out);
+                    if (ret == ESP_AUDIO_ERR_OK) {
+                        std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
+                        this_->wake_word_opus_.emplace_back(opus_buf.data(), opus_buf.data() + out.encoded_bytes);
                        this_->wake_word_cv_.notify_all();
-                });
                        packets++;
+                    } else {
+                        ESP_LOGE(TAG, "Failed to encode audio, error code: %d", ret);
+                    }
+                    in_buffer.erase(in_buffer.begin(), in_buffer.begin() + frame_size);
+                }
            }
            this_->wake_word_pcm_.clear();
-
+            // Close encoder
+            esp_opus_enc_close(encoder_handle);
            auto end_time = esp_timer_get_time();
            ESP_LOGI(TAG, "Encode wake word opus %d packets in %ld ms", packets, (long)((end_time - start_time) / 1000));

--- a/main/boards/common/ml307_board.cc
+++ b/main/boards/common/ml307_board.cc
@@ -9,7 +9,6 @@
 #include <freertos/FreeRTOS.h>
 #include <freertos/task.h>
 #include <font_awesome.h>
-#include <opus_encoder.h>
 #include <utility>

 static const char *TAG = "Ml307Board";
--- a/main/boards/esp-sensairshuttle/README.md
+++ b/main/boards/esp-sensairshuttle/README.md
@@ -0,0 +1,39 @@
+# ESP-SensairShuttle
+
+## 简介
+
+<div align="center">
+    <a href="https://docs.espressif.com/projects/esp-dev-kits/zh_CN/latest/esp32c5/esp-sensairshuttle/index.html">
+        <b> 开发版文档 </b>
+    </a>
+    |
+    <a href="#传感器--shuttleboard-子板支持">
+        <b> 传感器 & <i>ShuttleBoard</i> 文档 </b>
+    </a>
+</div>
+
+ESP-SensairShuttle 是乐鑫携手 Bosch Sensortec 面向**动作感知**与**大模型人机交互**场景联合推出的开发板。
+
+ESP-SensairShuttle 主控采用乐鑫 ESP32-C5-WROOM-1-N16R8 模组，具有 2.4 & 5 GHz 双频 Wi-Fi 6 (802.11ax)、Bluetooth® 5 (LE)、Zigbee 及 Thread (802.15.4) 无线通信能力。
+
+## 传感器 & _ShuttleBoard_ 子板支持
+
+即将推出，敬请期待。
+
+## 配置、编译命令
+
+由于 ESP-SensairShuttle 需要配置较多的 sdkconfig 选项，推荐使用编译脚本编译。
+
+**编译**
+
+```bash
+python ./scripts/release.py esp-sensairshuttle
+```
+
+如需手动编译，请参考 `main/boards/esp-sensairshuttle/config.json` 修改 menuconfig 对应选项。
+
+**烧录**
+
+```bash
+idf.py flash
+```
--- a/main/boards/esp-sensairshuttle/adc_pdm_audio_codec.cc
+++ b/main/boards/esp-sensairshuttle/adc_pdm_audio_codec.cc
@@ -0,0 +1,249 @@
+#include "adc_pdm_audio_codec.h"
+
+#include <esp_log.h>
+#include <esp_timer.h>
+#include <driver/i2c.h>
+#include <driver/i2c_master.h>
+#include <driver/i2s_tdm.h>
+#include "adc_mic.h"
+#include "driver/i2s_pdm.h"
+#include "soc/gpio_sig_map.h"
+#include "soc/io_mux_reg.h"
+#include "hal/rtc_io_hal.h"
+#include "hal/gpio_ll.h"
+#include "settings.h"
+#include "config.h"
+
+static const char TAG[] = "AdcPdmAudioCodec";
+
+#define BSP_I2S_GPIO_CFG(_dout)       \
+    {                          \
+        .clk = GPIO_NUM_NC,    \
+        .dout = _dout,  \
+        .invert_flags = {      \
+            .clk_inv = false, \
+        },                     \
+    }
+
+/**
+ * @brief Mono Duplex I2S configuration structure
+ *
+ * This configuration is used by default in bsp_audio_init()
+ */
+#define BSP_I2S_DUPLEX_MONO_CFG(_sample_rate, _dout)                                                         \
+    {                                                                                                 \
+        .clk_cfg = I2S_PDM_TX_CLK_DEFAULT_CONFIG(_sample_rate),                                          \
+        .slot_cfg = I2S_PDM_TX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO), \
+        .gpio_cfg = BSP_I2S_GPIO_CFG(_dout),                                                                 \
+    }
+
+AdcPdmAudioCodec::AdcPdmAudioCodec(int input_sample_rate, int output_sample_rate,
+    uint32_t adc_mic_channel, gpio_num_t pdm_speak_p,gpio_num_t pdm_speak_n, gpio_num_t pa_ctl) {
+
+    input_reference_ = false;
+    input_sample_rate_ = input_sample_rate;
+    output_sample_rate_ = output_sample_rate;
+
+    uint8_t adc_channel[1] = {0};
+    adc_channel[0] = adc_mic_channel;
+
+    audio_codec_adc_cfg_t cfg = {
+        .handle = NULL,
+        .max_store_buf_size = 1024 * 2,
+        .conv_frame_size = 1024,
+        .unit_id = ADC_UNIT_1,
+        .adc_channel_list = adc_channel,
+        .adc_channel_num = sizeof(adc_channel) / sizeof(adc_channel[0]),
+        .sample_rate_hz = (uint32_t)input_sample_rate,
+    };
+    const audio_codec_data_if_t *adc_if = audio_codec_new_adc_data(&cfg);
+
+    esp_codec_dev_cfg_t codec_dev_cfg = {
+        .dev_type = ESP_CODEC_DEV_TYPE_IN,
+        .data_if = adc_if,
+    };
+    input_dev_ = esp_codec_dev_new(&codec_dev_cfg);
+    if (!input_dev_) {
+        ESP_LOGE(TAG, "Failed to create codec device");
+        return;
+    }
+
+    i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_0, I2S_ROLE_MASTER);
+    chan_cfg.auto_clear = true; // Auto clear the legacy data in the DMA buffer
+    ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, &tx_handle_, NULL));
+
+    i2s_pdm_tx_config_t pdm_cfg_default = BSP_I2S_DUPLEX_MONO_CFG((uint32_t)output_sample_rate, pdm_speak_p);
+    pdm_cfg_default.clk_cfg.up_sample_fs = AUDIO_PDM_UPSAMPLE_FS;
+    pdm_cfg_default.slot_cfg.sd_scale = I2S_PDM_SIG_SCALING_MUL_4;
+    pdm_cfg_default.slot_cfg.hp_scale = I2S_PDM_SIG_SCALING_MUL_4;
+    pdm_cfg_default.slot_cfg.lp_scale = I2S_PDM_SIG_SCALING_MUL_4;
+    pdm_cfg_default.slot_cfg.sinc_scale = I2S_PDM_SIG_SCALING_MUL_4;
+    const i2s_pdm_tx_config_t *p_i2s_cfg = &pdm_cfg_default;
+
+    ESP_ERROR_CHECK(i2s_channel_init_pdm_tx_mode(tx_handle_, p_i2s_cfg));
+
+    audio_codec_i2s_cfg_t i2s_cfg = {
+        .port = I2S_NUM_0,
+        .rx_handle = NULL,
+        .tx_handle = tx_handle_,
+    };
+
+    const audio_codec_data_if_t *i2s_data_if = audio_codec_new_i2s_data(&i2s_cfg);
+
+    codec_dev_cfg.dev_type = ESP_CODEC_DEV_TYPE_OUT;
+    codec_dev_cfg.codec_if = NULL;
+    codec_dev_cfg.data_if = i2s_data_if;
+    output_dev_ = esp_codec_dev_new(&codec_dev_cfg);
+
+    output_volume_ = 100;
+    if(pa_ctl != GPIO_NUM_NC) {
+        pa_ctrl_pin_ = pa_ctl;
+        gpio_config_t io_conf = {};
+        io_conf.intr_type = GPIO_INTR_DISABLE;
+        io_conf.mode = GPIO_MODE_OUTPUT;
+        io_conf.pin_bit_mask = (1ULL << pa_ctrl_pin_);
+        io_conf.pull_down_en = GPIO_PULLDOWN_DISABLE;
+        io_conf.pull_up_en = GPIO_PULLUP_DISABLE;
+        gpio_config(&io_conf);
+    }
+    gpio_set_drive_capability(pdm_speak_p, GPIO_DRIVE_CAP_0);
+
+    if(pdm_speak_n != GPIO_NUM_NC){
+        PIN_FUNC_SELECT(IO_MUX_GPIO10_REG, PIN_FUNC_GPIO);
+        gpio_set_direction(pdm_speak_n, GPIO_MODE_OUTPUT);
+        esp_rom_gpio_connect_out_signal(pdm_speak_n, I2SO_SD_OUT_IDX, 1, 0); //反转输出 SD OUT 信号
+        gpio_set_drive_capability(pdm_speak_n, GPIO_DRIVE_CAP_0);
+    }
+
+    // 初始化输出定时器
+    esp_timer_create_args_t output_timer_args = {
+        .callback = &AdcPdmAudioCodec::OutputTimerCallback,
+        .arg = this,
+        .dispatch_method = ESP_TIMER_TASK,
+        .name = "output_timer"
+    };
+    ESP_ERROR_CHECK(esp_timer_create(&output_timer_args, &output_timer_));
+
+    ESP_LOGI(TAG, "AdcPdmAudioCodec initialized");
+}
+
+AdcPdmAudioCodec::~AdcPdmAudioCodec() {
+    // 删除定时器
+    if (output_timer_) {
+        esp_timer_stop(output_timer_);
+        esp_timer_delete(output_timer_);
+        output_timer_ = nullptr;
+    }
+
+    ESP_ERROR_CHECK(esp_codec_dev_close(output_dev_));
+    esp_codec_dev_delete(output_dev_);
+    ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
+    esp_codec_dev_delete(input_dev_);
+}
+
+void AdcPdmAudioCodec::SetOutputVolume(int volume) {
+    ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(output_dev_, volume));
+    AudioCodec::SetOutputVolume(volume);
+}
+
+void AdcPdmAudioCodec::EnableInput(bool enable) {
+    if (enable == input_enabled_) {
+        return;
+    }
+    if (enable) {
+        esp_codec_dev_sample_info_t fs = {
+            .bits_per_sample = 16,
+            .channel = 1,
+            .channel_mask = ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0),
+            .sample_rate = (uint32_t)input_sample_rate_,
+            .mclk_multiple = 0,
+        };
+        ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs));
+    } else {
+        ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
+    }
+    AudioCodec::EnableInput(enable);
+}
+
+void AdcPdmAudioCodec::EnableOutput(bool enable) {
+    if (enable == output_enabled_) {
+        return;
+    }
+    if (enable) {
+        // Play 16bit 1 channel
+        esp_codec_dev_sample_info_t fs = {
+            .bits_per_sample = 16,
+            .channel = 1,
+            .channel_mask = 0,
+            .sample_rate = (uint32_t)output_sample_rate_,
+            .mclk_multiple = 0,
+        };
+        ESP_ERROR_CHECK(esp_codec_dev_open(output_dev_, &fs));
+        ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(output_dev_, output_volume_));
+
+        // 强制按板卡配置重配PDM TX时钟，覆盖第三方库在set_fmt中的默认up_sample_fs
+        // 若通道已启用，先禁用再重配，最后再启用
+        ESP_ERROR_CHECK_WITHOUT_ABORT(i2s_channel_disable(tx_handle_));
+        i2s_pdm_tx_clk_config_t clk_cfg = I2S_PDM_TX_CLK_DEFAULT_CONFIG((uint32_t)output_sample_rate_);
+        clk_cfg.up_sample_fs = AUDIO_PDM_UPSAMPLE_FS;
+        ESP_ERROR_CHECK(i2s_channel_reconfig_pdm_tx_clock(tx_handle_, &clk_cfg));
+        ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
+        if(pa_ctrl_pin_ != GPIO_NUM_NC){
+            gpio_set_level(pa_ctrl_pin_, 1);
+        }
+        // 启用输出时启动定时器
+        if (output_timer_) {
+            esp_timer_start_once(output_timer_, TIMER_TIMEOUT_US);
+        }
+
+    } else {
+        // 禁用输出时停止定时器
+        if (output_timer_) {
+            esp_timer_stop(output_timer_);
+        }
+        if(pa_ctrl_pin_ != GPIO_NUM_NC){
+            gpio_set_level(pa_ctrl_pin_, 0);
+        }
+        ESP_ERROR_CHECK(esp_codec_dev_close(output_dev_));
+    }
+    AudioCodec::EnableOutput(enable);
+}
+
+int AdcPdmAudioCodec::Read(int16_t* dest, int samples) {
+    if (input_enabled_) {
+        ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_read(input_dev_, (void*)dest, samples * sizeof(int16_t)));
+    }
+    return samples;
+}
+int AdcPdmAudioCodec::Write(const int16_t* data, int samples) {
+    if (output_enabled_) {
+        ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(output_dev_, (void*)data, samples * sizeof(int16_t)));
+        // 重置输出定时器
+        if (output_timer_) {
+            esp_timer_stop(output_timer_);
+            esp_timer_start_once(output_timer_, TIMER_TIMEOUT_US);
+        }
+    }
+    return samples;
+}
+
+void AdcPdmAudioCodec::Start() {
+    Settings settings("audio", false);
+    output_volume_ = settings.GetInt("output_volume", output_volume_);
+    if (output_volume_ <= 0) {
+        ESP_LOGW(TAG, "Output volume value (%d) is too small, setting to default (10)", output_volume_);
+        output_volume_ = 10;
+    }
+
+    EnableInput(true);
+    EnableOutput(true);
+    ESP_LOGI(TAG, "Audio codec started");
+}
+
+// 定时器回调函数实现
+void AdcPdmAudioCodec::OutputTimerCallback(void* arg) {
+    AdcPdmAudioCodec* codec = static_cast<AdcPdmAudioCodec*>(arg);
+    if (codec && codec->output_enabled_) {
+        codec->EnableOutput(false);
+    }
+}
--- a/main/boards/esp-sensairshuttle/adc_pdm_audio_codec.h
+++ b/main/boards/esp-sensairshuttle/adc_pdm_audio_codec.h
@@ -0,0 +1,37 @@
+#ifndef _BOX_AUDIO_CODEC_H
+#define _BOX_AUDIO_CODEC_H
+
+#include "audio_codec.h"
+
+#include <esp_codec_dev.h>
+#include <esp_codec_dev_defaults.h>
+#include <esp_timer.h>
+
+class AdcPdmAudioCodec : public AudioCodec {
+private:
+    esp_codec_dev_handle_t output_dev_ = nullptr;
+    esp_codec_dev_handle_t input_dev_ = nullptr;
+    gpio_num_t pa_ctrl_pin_ = GPIO_NUM_NC;
+
+    // 定时器相关成员变量
+    esp_timer_handle_t output_timer_ = nullptr;
+    static constexpr uint64_t TIMER_TIMEOUT_US = 120000; // 120ms = 120000us
+
+    // 定时器回调函数
+    static void OutputTimerCallback(void* arg);
+
+    virtual int Read(int16_t* dest, int samples) override;
+    virtual int Write(const int16_t* data, int samples) override;
+
+public:
+    AdcPdmAudioCodec(int input_sample_rate, int output_sample_rate,
+        uint32_t adc_mic_channel, gpio_num_t pdm_speak_p, gpio_num_t pdm_speak_n, gpio_num_t pa_ctl);
+    virtual ~AdcPdmAudioCodec();
+
+    virtual void SetOutputVolume(int volume) override;
+    virtual void EnableInput(bool enable) override;
+    virtual void EnableOutput(bool enable) override;
+    void Start();
+};
+
+#endif // _BOX_AUDIO_CODEC_H
--- a/main/boards/esp-sensairshuttle/config.h
+++ b/main/boards/esp-sensairshuttle/config.h
@@ -0,0 +1,40 @@
+#ifndef _BOARD_CONFIG_H_
+#define _BOARD_CONFIG_H_
+
+#include <driver/gpio.h>
+
+#define AUDIO_INPUT_SAMPLE_RATE  16000
+#define AUDIO_OUTPUT_SAMPLE_RATE 24000
+
+#define AUDIO_PDM_UPSAMPLE_FS    480
+
+#define AUDIO_ADC_MIC_CHANNEL       5
+#define AUDIO_PDM_SPEAK_P_GPIO      GPIO_NUM_7
+#define AUDIO_PDM_SPEAK_N_GPIO      GPIO_NUM_8
+#define AUDIO_PA_CTL_GPIO           GPIO_NUM_1
+
+#define BOOT_BUTTON_GPIO            GPIO_NUM_28
+#define DISPLAY_MOSI_PIN            GPIO_NUM_23
+#define DISPLAY_CLK_PIN             GPIO_NUM_24
+#define DISPLAY_DC_PIN              GPIO_NUM_26
+#define DISPLAY_RST_PIN             GPIO_NUM_NC
+#define DISPLAY_CS_PIN              GPIO_NUM_25
+
+#define LCD_TP_SCL GPIO_NUM_3
+#define LCD_TP_SDA GPIO_NUM_2
+
+#define LCD_TYPE_ST7789_SERIAL
+#define DISPLAY_WIDTH           284
+#define DISPLAY_HEIGHT          240
+#define DISPLAY_MIRROR_X        false
+#define DISPLAY_MIRROR_Y        true
+#define DISPLAY_SWAP_XY         true
+
+#define DISPLAY_INVERT_COLOR    true
+#define DISPLAY_RGB_ORDER       LCD_RGB_ELEMENT_ORDER_RGB
+#define DISPLAY_OFFSET_X        36
+#define DISPLAY_OFFSET_Y        0
+#define DISPLAY_BACKLIGHT_OUTPUT_INVERT false
+#define DISPLAY_SPI_MODE        0
+
+#endif // _BOARD_CONFIG_H_
--- a/main/boards/esp-sensairshuttle/config.json
+++ b/main/boards/esp-sensairshuttle/config.json
@@ -0,0 +1,28 @@
+{
+    "target": "esp32c5",
+    "builds": [
+        {
+            "name": "esp-sensairshuttle",
+            "sdkconfig_append": [
+                "CONFIG_IDF_TARGET=\"esp32c5\"",
+                "CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=6",
+                "CONFIG_ESP_WIFI_AMPDU_TX_ENABLED=n",
+                "CONFIG_ESP_WIFI_ENABLE_WPA3_SAE=n",
+                "CONFIG_ESP_WIFI_ESPNOW_MAX_ENCRYPT_NUM=0",
+                "CONFIG_ESP_WIFI_ENTERPRISE_SUPPORT=n",
+                "CONFIG_FREERTOS_IDLE_TASK_STACKSIZE=768",
+                "CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=2048",
+                "CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y",
+                "CONFIG_SPIRAM=y",
+                "CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=3072",
+                "CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y",
+                "CONFIG_LWIP_IPV6=n",
+                "CONFIG_USE_ESP_WAKE_WORD=y",
+                "CONFIG_SR_WN_WN9S_HIESP=y",
+                "CONFIG_USE_EMOTE_MESSAGE_STYLE=y",
+                "CONFIG_FLASH_CUSTOM_ASSETS=y",
+                "CONFIG_CUSTOM_ASSETS_FILE=\"https://dl.espressif.com/AE/wn9_nihaoxiaozhi_tts-font_puhui_common_20_4-echoear.bin\""
+            ]
+        }
+    ]
+}
--- a/main/boards/esp-sensairshuttle/esp-sensairshuttle.cc
+++ b/main/boards/esp-sensairshuttle/esp-sensairshuttle.cc
@@ -0,0 +1,315 @@
+#include "wifi_board.h"
+#include "adc_pdm_audio_codec.h"
+#include "application.h"
+#include "button.h"
+#include "config.h"
+#include "mcp_server.h"
+#include <wifi_station.h>
+#include <esp_log.h>
+#include <driver/i2c_master.h>
+#include <driver/spi_common.h>
+#include <esp_wifi.h>
+#include <esp_event.h>
+
+#include "display/lcd_display.h"
+#include <esp_lcd_panel_vendor.h>
+#include <esp_lcd_panel_io.h>
+#include <esp_lcd_panel_ops.h>
+#include "esp_lcd_ili9341.h"
+
+#include "display/emote_display.h"
+
+#include "assets/lang_config.h"
+#include "anim_player.h"
+#include "led_strip.h"
+#include "driver/rmt_tx.h"
+#include "i2c_device.h"
+
+#include <freertos/FreeRTOS.h>
+#include <freertos/task.h>
+
+#include "sdkconfig.h"
+
+constexpr char TAG[] = "ESP_SensairShuttle";
+
+static const ili9341_lcd_init_cmd_t vendor_specific_init[] = {
+    // {cmd, { data }, data_size, delay_ms}
+    {0x11, NULL, 0, 120},                                          // Sleep Out
+    {0x36, (uint8_t []){0x00}, 1, 0},                              // Memory Data Access Control
+    {0x3A, (uint8_t []){0x05}, 1, 0},                              // Interface Pixel Format (16-bit)
+    {0xB2, (uint8_t []){0x0C, 0x0C, 0x00, 0x33, 0x33}, 5, 0},      // Porch Setting
+    {0xB7, (uint8_t []){0x05}, 1, 0},                              // Gate Control
+    {0xBB, (uint8_t []){0x21}, 1, 0},                              // VCOM Setting
+    {0xC0, (uint8_t []){0x2C}, 1, 0},                              // LCM Control
+    {0xC2, (uint8_t []){0x01}, 1, 0},                              // VDV and VRH Command Enable
+    {0xC3, (uint8_t []){0x15}, 1, 0},                              // VRH Set
+    {0xC6, (uint8_t []){0x0F}, 1, 0},                              // Frame Rate Control
+    {0xD0, (uint8_t []){0xA7}, 1, 0},                              // Power Control 1
+    {0xD0, (uint8_t []){0xA4, 0xA1}, 2, 0},                        // Power Control 1
+    {0xD6, (uint8_t []){0xA1}, 1, 0},                              // Gate output GND in sleep mode
+    {
+        0xE0, (uint8_t [])
+        {
+            0xF0, 0x05, 0x0E, 0x08, 0x0A, 0x17, 0x39, 0x54,
+            0x4E, 0x37, 0x12, 0x12, 0x31, 0x37
+        }, 14, 0
+    },                                                             // Positive Gamma Control
+    {
+        0xE1, (uint8_t [])
+        {
+            0xF0, 0x10, 0x14, 0x0D, 0x0B, 0x05, 0x39, 0x44,
+            0x4D, 0x38, 0x14, 0x14, 0x2E, 0x35
+        }, 14, 0
+    },                                                             // Negative Gamma Control
+    {0xE4, (uint8_t []){0x23, 0x00, 0x00}, 3, 0},                  // Gate position control
+    {0x21, NULL, 0, 0},                                            // Display Inversion On
+    {0x29, NULL, 0, 0},                                            // Display On
+    {0x2C, NULL, 0, 0},                                            // Memory Write
+};
+
+class Cst816d : public I2cDevice {
+public:
+    struct TouchPoint_t {
+        int num = 0;
+        int x = -1;
+        int y = -1;
+    };
+
+    enum TouchEvent {
+        TOUCH_NONE,
+        TOUCH_PRESS,
+        TOUCH_RELEASE,
+        TOUCH_HOLD
+    };
+
+    Cst816d(i2c_master_bus_handle_t i2c_bus, uint8_t addr) : I2cDevice(i2c_bus, addr)
+    {
+        read_buffer_ = new uint8_t[6];
+        was_touched_ = false;
+        press_count_ = 0;
+    }
+
+    ~Cst816d()
+    {
+        delete[] read_buffer_;
+    }
+
+    void UpdateTouchPoint()
+    {
+        ReadRegs(0x02, read_buffer_, 6);
+        tp_.num = read_buffer_[0] & 0x0F;
+        tp_.x = ((read_buffer_[1] & 0x0F) << 8) | read_buffer_[2];
+        tp_.y = ((read_buffer_[3] & 0x0F) << 8) | read_buffer_[4];
+    }
+
+    const TouchPoint_t &GetTouchPoint()
+    {
+        return tp_;
+    }
+
+    TouchEvent CheckTouchEvent()
+    {
+        bool is_touched = (tp_.num > 0);
+        TouchEvent event = TOUCH_NONE;
+
+        if (is_touched && !was_touched_) {
+            // Press event (transition from not touched to touched)
+            press_count_++;
+            event = TOUCH_PRESS;
+            ESP_LOGI(TAG, "TOUCH PRESS - count: %d, x: %d, y: %d", press_count_, tp_.x, tp_.y);
+        } else if (!is_touched && was_touched_) {
+            // Release event (transition from touched to not touched)
+            event = TOUCH_RELEASE;
+            ESP_LOGI(TAG, "TOUCH RELEASE - total presses: %d", press_count_);
+        } else if (is_touched && was_touched_) {
+            // Continuous touch (hold)
+            event = TOUCH_HOLD;
+            ESP_LOGD(TAG, "TOUCH HOLD - x: %d, y: %d", tp_.x, tp_.y);
+        }
+
+        // Update previous state
+        was_touched_ = is_touched;
+        return event;
+    }
+
+    int GetPressCount() const
+    {
+        return press_count_;
+    }
+
+    void ResetPressCount()
+    {
+        press_count_ = 0;
+    }
+
+private:
+    uint8_t* read_buffer_ = nullptr;
+    TouchPoint_t tp_;
+
+    // Touch state tracking
+    bool was_touched_;
+    int press_count_;
+};
+
+class EspSensairShuttle : public WifiBoard {
+private:
+    i2c_master_bus_handle_t i2c_bus_;
+    Cst816d* cst816d_;
+    Display* display_ = nullptr;
+    Button boot_button_;
+
+    void InitializeI2c()
+    {
+        i2c_master_bus_config_t i2c_bus_cfg = {
+            .i2c_port = I2C_NUM_0,
+            .sda_io_num = LCD_TP_SDA,
+            .scl_io_num = LCD_TP_SCL,
+            .clk_source = I2C_CLK_SRC_DEFAULT,
+            .glitch_ignore_cnt = 7,
+            .intr_priority = 0,
+            .trans_queue_depth = 0,
+            .flags = {
+                .enable_internal_pullup = 1,
+            },
+        };
+        ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus_));
+    }
+
+    static void touch_event_task(void* arg)
+    {
+        Cst816d* touchpad = static_cast<Cst816d*>(arg);
+        if (touchpad == nullptr) {
+            ESP_LOGE(TAG, "Invalid touchpad pointer in touch_event_task");
+            vTaskDelete(NULL);
+            return;
+        }
+
+        while (true) {
+            touchpad->UpdateTouchPoint();
+            auto touch_event = touchpad->CheckTouchEvent();
+
+            if (touch_event == Cst816d::TOUCH_RELEASE) {
+                auto &app = Application::GetInstance();
+                auto &board = (EspSensairShuttle &)Board::GetInstance();
+
+                if (app.GetDeviceState() == kDeviceStateStarting) {
+                    board.EnterWifiConfigMode();
+                } else {
+                    app.ToggleChatState();
+                }
+            }
+
+            vTaskDelay(pdMS_TO_TICKS(50)); // Poll every 50ms
+        }
+    }
+
+    void InitializeCst816dTouchPad()
+    {
+        cst816d_ = new Cst816d(i2c_bus_, 0x15);
+        xTaskCreate(touch_event_task, "touch_task", 2 * 1024, cst816d_, 5, NULL);
+    }
+
+    void InitializeButtons()
+    {
+        boot_button_.OnClick([this]() {
+            auto& app = Application::GetInstance();
+            if (app.GetDeviceState() == kDeviceStateStarting) {
+                ESP_LOGI(TAG, "Boot button pressed, enter WiFi configuration mode");
+                EnterWifiConfigMode();
+                return;
+            }
+            app.ToggleChatState();
+        });
+    }
+
+    void InitializeSpi()
+    {
+        spi_bus_config_t buscfg = {};
+        buscfg.mosi_io_num = DISPLAY_MOSI_PIN;
+        buscfg.miso_io_num = GPIO_NUM_NC;
+        buscfg.sclk_io_num = DISPLAY_CLK_PIN;
+        buscfg.quadwp_io_num = GPIO_NUM_NC;
+        buscfg.quadhd_io_num = GPIO_NUM_NC;
+        buscfg.max_transfer_sz = DISPLAY_WIDTH * 10 * sizeof(uint16_t);
+        ESP_ERROR_CHECK(spi_bus_initialize(SPI2_HOST, &buscfg, SPI_DMA_CH_AUTO));
+    }
+
+    void InitializeLcdDisplay()
+    {
+        esp_lcd_panel_io_handle_t panel_io = nullptr;
+        esp_lcd_panel_handle_t panel = nullptr;
+
+        ESP_LOGD(TAG, "Install panel IO");
+        esp_lcd_panel_io_spi_config_t io_config = {};
+        io_config.cs_gpio_num = DISPLAY_CS_PIN;
+        io_config.dc_gpio_num = DISPLAY_DC_PIN;
+        io_config.spi_mode = DISPLAY_SPI_MODE;
+        io_config.pclk_hz = 40 * 1000 * 1000;
+        io_config.trans_queue_depth = 10;
+        io_config.lcd_cmd_bits = 8;
+        io_config.lcd_param_bits = 8;
+        ESP_ERROR_CHECK(esp_lcd_new_panel_io_spi(SPI2_HOST, &io_config, &panel_io));
+
+        ESP_LOGD(TAG, "Install LCD driver");
+        const ili9341_vendor_config_t vendor_config = {
+            .init_cmds = &vendor_specific_init[0],
+            .init_cmds_size = sizeof(vendor_specific_init) / sizeof(ili9341_lcd_init_cmd_t),
+        };
+
+        esp_lcd_panel_dev_config_t panel_config = {};
+        panel_config.reset_gpio_num = DISPLAY_RST_PIN;
+        panel_config.rgb_ele_order = DISPLAY_RGB_ORDER;
+        panel_config.bits_per_pixel = 16;
+        panel_config.vendor_config = (void *) &vendor_config;
+        ESP_ERROR_CHECK(esp_lcd_new_panel_ili9341(panel_io, &panel_config, &panel));
+
+        esp_lcd_panel_reset(panel);
+        esp_lcd_panel_init(panel);
+        esp_lcd_panel_invert_color(panel, DISPLAY_INVERT_COLOR);
+        esp_lcd_panel_set_gap(panel, DISPLAY_OFFSET_X, DISPLAY_OFFSET_Y);
+        esp_lcd_panel_mirror(panel, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
+        esp_lcd_panel_swap_xy(panel, DISPLAY_SWAP_XY);
+        ESP_LOGI(TAG, "LCD panel create success, %p", panel);
+
+#ifdef CONFIG_USE_EMOTE_MESSAGE_STYLE
+        display_ = new emote::EmoteDisplay(panel, panel_io, DISPLAY_WIDTH, DISPLAY_HEIGHT);
+#else
+        display_ = new SpiLcdDisplay(panel_io, panel,
+            DISPLAY_WIDTH, DISPLAY_HEIGHT, 0, 0, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y, DISPLAY_SWAP_XY);
+#endif
+
+    }
+
+public:
+    EspSensairShuttle() : boot_button_(BOOT_BUTTON_GPIO) {
+        InitializeI2c();
+        InitializeCst816dTouchPad();
+        InitializeButtons();
+        InitializeSpi();
+        InitializeLcdDisplay();
+    }
+
+    virtual AudioCodec* GetAudioCodec() override
+    {
+        static AdcPdmAudioCodec audio_codec(
+            AUDIO_INPUT_SAMPLE_RATE,
+            AUDIO_OUTPUT_SAMPLE_RATE,
+            AUDIO_ADC_MIC_CHANNEL,
+            AUDIO_PDM_SPEAK_P_GPIO,
+            AUDIO_PDM_SPEAK_N_GPIO,
+            AUDIO_PA_CTL_GPIO);
+        return &audio_codec;
+    }
+
+    virtual Display* GetDisplay() override
+    {
+        return display_;
+    }
+
+    Cst816d* GetTouchpad()
+    {
+        return cst816d_;
+    }
+};
+
+DECLARE_BOARD(EspSensairShuttle);
--- a/main/idf_component.yml
+++ b/main/idf_component.yml
@@ -19,7 +19,8 @@ dependencies:
  espressif/esp_lcd_panel_io_additions: ^1.0.1
  78/esp_lcd_nv3023: ~1.0.0
  78/esp-wifi-connect: ~3.0.2
-  78/esp-opus-encoder: ~2.4.1
+  espressif/esp_audio_effects: ~1.2.0
+  espressif/esp_audio_codec: ~2.4.0
  78/esp-ml307: ~3.5.3
  78/xiaozhi-fonts: ~1.5.5
  espressif/led_strip: ~3.0.1
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -278,7 +278,7 @@ if __name__ == "__main__":

    # Compile mode
    board_type_input: str = args.board
-    name_filter: str | None = args.name
+    name_filter: Optional[str] = args.name

    # Check board_type in CMakeLists
    if board_type_input != "all" and not _board_type_exists(board_type_input):
Author	SHA1	Message	Date
majingjing123	906d819454	feat(audio): Use esp_audio_codec and esp_audio_effects to replace 78opus (#1632 )	2026-01-07 18:45:34 +08:00
laride	be88719932	feat: Add ESP-SensairShuttle (#1620 ) * feat: Add ESP-SensairShuttle * fix: fix board name	2026-01-02 12:19:46 +08:00