Refactor audio channel handling and wake word detection in Application class (#1722)

- Introduced ContinueOpenAudioChannel and ContinueWakeWordInvoke methods to streamline audio channel management and wake word processing. - Updated HandleToggleChatEvent and HandleWakeWordDetectedEvent to utilize scheduling for state changes, improving UI responsiveness. - Simplified logic for setting listening modes based on audio channel state, enhancing code clarity and maintainability.
2026-02-01 14:55:47 +08:00
parent b6c61fe390
commit 0883a36537
2 changed files with 75 additions and 50 deletions
--- a/main/application.cc
+++ b/main/application.cc
@@ -691,14 +691,16 @@ void Application::HandleToggleChatEvent() {
    }

    if (state == kDeviceStateIdle) {
+        ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
        if (!protocol_->IsAudioChannelOpened()) {
            SetDeviceState(kDeviceStateConnecting);
-            if (!protocol_->OpenAudioChannel()) {
-                return;
-            }
+            // Schedule to let the state change be processed first (UI update)
+            Schedule([this, mode]() {
+                ContinueOpenAudioChannel(mode);
+            });
+            return;
        }
-
-        SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
+        SetListeningMode(mode);
    } else if (state == kDeviceStateSpeaking) {
        AbortSpeaking(kAbortReasonNone);
    } else if (state == kDeviceStateListening) {
@@ -706,6 +708,21 @@ void Application::HandleToggleChatEvent() {
    }
 }

+void Application::ContinueOpenAudioChannel(ListeningMode mode) {
+    // Check state again in case it was changed during scheduling
+    if (GetDeviceState() != kDeviceStateConnecting) {
+        return;
+    }
+
+    if (!protocol_->IsAudioChannelOpened()) {
+        if (!protocol_->OpenAudioChannel()) {
+            return;
+        }
+    }
+
+    SetListeningMode(mode);
+}
+
 void Application::HandleStartListeningEvent() {
    auto state = GetDeviceState();
    
@@ -726,11 +743,12 @@ void Application::HandleStartListeningEvent() {
    if (state == kDeviceStateIdle) {
        if (!protocol_->IsAudioChannelOpened()) {
            SetDeviceState(kDeviceStateConnecting);
-            if (!protocol_->OpenAudioChannel()) {
-                return;
-            }
+            // Schedule to let the state change be processed first (UI update)
+            Schedule([this]() {
+                ContinueOpenAudioChannel(kListeningModeManualStop);
+            });
+            return;
        }
-
        SetListeningMode(kListeningModeManualStop);
    } else if (state == kDeviceStateSpeaking) {
        AbortSpeaking(kAbortReasonNone);
@@ -762,31 +780,19 @@ void Application::HandleWakeWordDetectedEvent() {
    
    if (state == kDeviceStateIdle) {
        audio_service_.EncodeWakeWord();
+        auto wake_word = audio_service_.GetLastWakeWord();

        if (!protocol_->IsAudioChannelOpened()) {
            SetDeviceState(kDeviceStateConnecting);
-            if (!protocol_->OpenAudioChannel()) {
-                audio_service_.EnableWakeWordDetection(true);
-                return;
-            }
+            // Schedule to let the state change be processed first (UI update),
+            // then continue with OpenAudioChannel which may block for ~1 second
+            Schedule([this, wake_word]() {
+                ContinueWakeWordInvoke(wake_word);
+            });
+            return;
        }
-
-        auto wake_word = audio_service_.GetLastWakeWord();
-        ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
-#if CONFIG_SEND_WAKE_WORD_DATA
-        // Encode and send the wake word data to the server
-        while (auto packet = audio_service_.PopWakeWordPacket()) {
-            protocol_->SendAudio(std::move(packet));
-        }
-        // Set the chat state to wake word detected
-        protocol_->SendWakeWordDetected(wake_word);
-        SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
-#else
-        // Set flag to play popup sound after state changes to listening
-        // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
-        play_popup_on_listening_ = true;
-        SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
-#endif
+        // Channel already opened, continue directly
+        ContinueWakeWordInvoke(wake_word);
    } else if (state == kDeviceStateSpeaking) {
        AbortSpeaking(kAbortReasonWakeWordDetected);
    } else if (state == kDeviceStateActivating) {
@@ -795,6 +801,36 @@ void Application::HandleWakeWordDetectedEvent() {
    }
 }

+void Application::ContinueWakeWordInvoke(const std::string& wake_word) {
+    // Check state again in case it was changed during scheduling
+    if (GetDeviceState() != kDeviceStateConnecting) {
+        return;
+    }
+
+    if (!protocol_->IsAudioChannelOpened()) {
+        if (!protocol_->OpenAudioChannel()) {
+            audio_service_.EnableWakeWordDetection(true);
+            return;
+        }
+    }
+
+    ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
+#if CONFIG_SEND_WAKE_WORD_DATA
+    // Encode and send the wake word data to the server
+    while (auto packet = audio_service_.PopWakeWordPacket()) {
+        protocol_->SendAudio(std::move(packet));
+    }
+    // Set the chat state to wake word detected
+    protocol_->SendWakeWordDetected(wake_word);
+    SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
+#else
+    // Set flag to play popup sound after state changes to listening
+    // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
+    play_popup_on_listening_ = true;
+    SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
+#endif
+}
+
 void Application::HandleStateChangedEvent() {
    DeviceState new_state = state_machine_.GetState();
    clock_ticks_ = 0;
@@ -960,27 +996,14 @@ void Application::WakeWordInvoke(const std::string& wake_word) {

        if (!protocol_->IsAudioChannelOpened()) {
            SetDeviceState(kDeviceStateConnecting);
-            if (!protocol_->OpenAudioChannel()) {
-                audio_service_.EnableWakeWordDetection(true);
-                return;
-            }
+            // Schedule to let the state change be processed first (UI update)
+            Schedule([this, wake_word]() {
+                ContinueWakeWordInvoke(wake_word);
+            });
+            return;
        }
-
-        ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
-#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
-        // Encode and send the wake word data to the server
-        while (auto packet = audio_service_.PopWakeWordPacket()) {
-            protocol_->SendAudio(std::move(packet));
-        }
-        // Set the chat state to wake word detected
-        protocol_->SendWakeWordDetected(wake_word);
-        SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
-#else
-        // Set flag to play popup sound after state changes to listening
-        // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
-        play_popup_on_listening_ = true;
-        SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
-#endif
+        // Channel already opened, continue directly
+        ContinueWakeWordInvoke(wake_word);
    } else if (state == kDeviceStateSpeaking) {
        Schedule([this]() {
            AbortSpeaking(kAbortReasonNone);
--- a/main/application.h
+++ b/main/application.h
@@ -153,6 +153,8 @@ private:
    void HandleNetworkDisconnectedEvent();
    void HandleActivationDoneEvent();
    void HandleWakeWordDetectedEvent();
+    void ContinueOpenAudioChannel(ListeningMode mode);
+    void ContinueWakeWordInvoke(const std::string& wake_word);

    // Activation task (runs in background)
    void ActivationTask();