Enhance audio processing and wake word detection (#1739)

* Enhance audio processing and wake word detection - Set task priority in Application::Run to improve responsiveness. - Log detected wake words with their state in HandleWakeWordDetectedEvent. - Streamline audio feeding in AudioService to handle both wake word and audio processor events. - Implement input buffering in AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to manage audio data more efficiently. - Clear input buffers on stop to prevent residual data issues. * Refactor audio processing to enhance thread safety and state management - Implement early return checks in Feed methods of AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to prevent processing when not running. - Introduce std::atomic for running state in CustomWakeWord and EspWakeWord to ensure thread-safe access. - Consolidate input buffer management with mutex locks to avoid race conditions during Stop and Feed operations. * Refactor listening mode handling and wake word detection configuration - Replace direct mode setting logic with a new GetDefaultListeningMode method for improved clarity and maintainability. - Update HandleToggleChatEvent, HandleWakeWordDetectedEvent, and ContinueWakeWordInvoke to utilize the new method for determining listening mode. - Introduce Kconfig option WAKE_WORD_DETECTION_IN_LISTENING to enable or disable wake word detection during listening mode, enhancing configurability.
2026-02-04 14:28:21 +08:00
parent 37110a9d05
commit 2b025c4ea6
13 changed files with 167 additions and 61 deletions
--- a/main/application.cc
+++ b/main/application.cc
@@ -691,7 +691,7 @@ void Application::HandleToggleChatEvent() {
    }

    if (state == kDeviceStateIdle) {
-        ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
+        ListeningMode mode = GetDefaultListeningMode();
        if (!protocol_->IsAudioChannelOpened()) {
            SetDeviceState(kDeviceStateConnecting);
            // Schedule to let the state change be processed first (UI update)
@@ -777,7 +777,9 @@ void Application::HandleWakeWordDetectedEvent() {
    }

    auto state = GetDeviceState();
-    
+    auto wake_word = audio_service_.GetLastWakeWord();
+    ESP_LOGI(TAG, "Wake word detected: %s (state: %d)", wake_word.c_str(), (int)state);
+
    if (state == kDeviceStateIdle) {
        audio_service_.EncodeWakeWord();
        auto wake_word = audio_service_.GetLastWakeWord();
@@ -793,8 +795,22 @@ void Application::HandleWakeWordDetectedEvent() {
        }
        // Channel already opened, continue directly
        ContinueWakeWordInvoke(wake_word);
-    } else if (state == kDeviceStateSpeaking) {
+    } else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) {
        AbortSpeaking(kAbortReasonWakeWordDetected);
+        // Clear send queue to avoid sending residues to server
+        while (audio_service_.PopPacketFromSendQueue());
+
+        if (state == kDeviceStateListening) {
+            protocol_->SendStartListening(GetDefaultListeningMode());
+            audio_service_.ResetDecoder();
+            audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
+            // Re-enable wake word detection as it was stopped by the detection itself
+            audio_service_.EnableWakeWordDetection(true);
+        } else {
+            // Play popup sound and start listening again
+            play_popup_on_listening_ = true;
+            SetListeningMode(GetDefaultListeningMode());
+        }
    } else if (state == kDeviceStateActivating) {
        // Restart the activation check if the wake word is detected during activation
        SetDeviceState(kDeviceStateIdle);
@@ -822,12 +838,15 @@ void Application::ContinueWakeWordInvoke(const std::string& wake_word) {
    }
    // Set the chat state to wake word detected
    protocol_->SendWakeWordDetected(wake_word);
-    SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
+
+    // Set flag to play popup sound after state changes to listening
+    play_popup_on_listening_ = true;
+    SetListeningMode(GetDefaultListeningMode());
 #else
    // Set flag to play popup sound after state changes to listening
    // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
    play_popup_on_listening_ = true;
-    SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
+    SetListeningMode(GetDefaultListeningMode());
 #endif
 }

@@ -859,7 +878,7 @@ void Application::HandleStateChangedEvent() {
            display->SetEmotion("neutral");

            // Make sure the audio processor is running
-            if (!audio_service_.IsAudioProcessorRunning()) {
+            if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) {
                // For auto mode, wait for playback queue to be empty before enabling voice processing
                // This prevents audio truncation when STOP arrives late due to network jitter
                if (listening_mode_ == kListeningModeAutoStop) {
@@ -869,9 +888,16 @@ void Application::HandleStateChangedEvent() {
                // Send the start listening command
                protocol_->SendStartListening(listening_mode_);
                audio_service_.EnableVoiceProcessing(true);
-                audio_service_.EnableWakeWordDetection(false);
            }

+#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
+            // Enable wake word detection in listening mode (configured via Kconfig)
+            audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
+#else
+            // Disable wake word detection in listening mode
+            audio_service_.EnableWakeWordDetection(false);
+#endif
+            
            // Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
            if (play_popup_on_listening_) {
                play_popup_on_listening_ = false;
@@ -919,6 +945,10 @@ void Application::SetListeningMode(ListeningMode mode) {
    SetDeviceState(kDeviceStateListening);
 }

+ListeningMode Application::GetDefaultListeningMode() const {
+    return aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
+}
+
 void Application::Reboot() {
    ESP_LOGI(TAG, "Rebooting...");
    // Disconnect the audio channel