From 0883a36537904f0e2068c8cf15ce20411544ec1f Mon Sep 17 00:00:00 2001 From: Xiaoxia Date: Sun, 1 Feb 2026 14:55:47 +0800 Subject: [PATCH] Refactor audio channel handling and wake word detection in Application class (#1722) - Introduced ContinueOpenAudioChannel and ContinueWakeWordInvoke methods to streamline audio channel management and wake word processing. - Updated HandleToggleChatEvent and HandleWakeWordDetectedEvent to utilize scheduling for state changes, improving UI responsiveness. - Simplified logic for setting listening modes based on audio channel state, enhancing code clarity and maintainability. --- main/application.cc | 123 ++++++++++++++++++++++++++------------------ main/application.h | 2 + 2 files changed, 75 insertions(+), 50 deletions(-) diff --git a/main/application.cc b/main/application.cc index 3538d9d3..89d5eb62 100644 --- a/main/application.cc +++ b/main/application.cc @@ -691,14 +691,16 @@ void Application::HandleToggleChatEvent() { } if (state == kDeviceStateIdle) { + ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime; if (!protocol_->IsAudioChannelOpened()) { SetDeviceState(kDeviceStateConnecting); - if (!protocol_->OpenAudioChannel()) { - return; - } + // Schedule to let the state change be processed first (UI update) + Schedule([this, mode]() { + ContinueOpenAudioChannel(mode); + }); + return; } - - SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); + SetListeningMode(mode); } else if (state == kDeviceStateSpeaking) { AbortSpeaking(kAbortReasonNone); } else if (state == kDeviceStateListening) { @@ -706,6 +708,21 @@ void Application::HandleToggleChatEvent() { } } +void Application::ContinueOpenAudioChannel(ListeningMode mode) { + // Check state again in case it was changed during scheduling + if (GetDeviceState() != kDeviceStateConnecting) { + return; + } + + if (!protocol_->IsAudioChannelOpened()) { + if (!protocol_->OpenAudioChannel()) { + return; + } + } + + SetListeningMode(mode); +} + void Application::HandleStartListeningEvent() { auto state = GetDeviceState(); @@ -726,11 +743,12 @@ void Application::HandleStartListeningEvent() { if (state == kDeviceStateIdle) { if (!protocol_->IsAudioChannelOpened()) { SetDeviceState(kDeviceStateConnecting); - if (!protocol_->OpenAudioChannel()) { - return; - } + // Schedule to let the state change be processed first (UI update) + Schedule([this]() { + ContinueOpenAudioChannel(kListeningModeManualStop); + }); + return; } - SetListeningMode(kListeningModeManualStop); } else if (state == kDeviceStateSpeaking) { AbortSpeaking(kAbortReasonNone); @@ -762,31 +780,19 @@ void Application::HandleWakeWordDetectedEvent() { if (state == kDeviceStateIdle) { audio_service_.EncodeWakeWord(); + auto wake_word = audio_service_.GetLastWakeWord(); if (!protocol_->IsAudioChannelOpened()) { SetDeviceState(kDeviceStateConnecting); - if (!protocol_->OpenAudioChannel()) { - audio_service_.EnableWakeWordDetection(true); - return; - } + // Schedule to let the state change be processed first (UI update), + // then continue with OpenAudioChannel which may block for ~1 second + Schedule([this, wake_word]() { + ContinueWakeWordInvoke(wake_word); + }); + return; } - - auto wake_word = audio_service_.GetLastWakeWord(); - ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); -#if CONFIG_SEND_WAKE_WORD_DATA - // Encode and send the wake word data to the server - while (auto packet = audio_service_.PopWakeWordPacket()) { - protocol_->SendAudio(std::move(packet)); - } - // Set the chat state to wake word detected - protocol_->SendWakeWordDetected(wake_word); - SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); -#else - // Set flag to play popup sound after state changes to listening - // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing) - play_popup_on_listening_ = true; - SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); -#endif + // Channel already opened, continue directly + ContinueWakeWordInvoke(wake_word); } else if (state == kDeviceStateSpeaking) { AbortSpeaking(kAbortReasonWakeWordDetected); } else if (state == kDeviceStateActivating) { @@ -795,6 +801,36 @@ void Application::HandleWakeWordDetectedEvent() { } } +void Application::ContinueWakeWordInvoke(const std::string& wake_word) { + // Check state again in case it was changed during scheduling + if (GetDeviceState() != kDeviceStateConnecting) { + return; + } + + if (!protocol_->IsAudioChannelOpened()) { + if (!protocol_->OpenAudioChannel()) { + audio_service_.EnableWakeWordDetection(true); + return; + } + } + + ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); +#if CONFIG_SEND_WAKE_WORD_DATA + // Encode and send the wake word data to the server + while (auto packet = audio_service_.PopWakeWordPacket()) { + protocol_->SendAudio(std::move(packet)); + } + // Set the chat state to wake word detected + protocol_->SendWakeWordDetected(wake_word); + SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); +#else + // Set flag to play popup sound after state changes to listening + // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing) + play_popup_on_listening_ = true; + SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); +#endif +} + void Application::HandleStateChangedEvent() { DeviceState new_state = state_machine_.GetState(); clock_ticks_ = 0; @@ -960,27 +996,14 @@ void Application::WakeWordInvoke(const std::string& wake_word) { if (!protocol_->IsAudioChannelOpened()) { SetDeviceState(kDeviceStateConnecting); - if (!protocol_->OpenAudioChannel()) { - audio_service_.EnableWakeWordDetection(true); - return; - } + // Schedule to let the state change be processed first (UI update) + Schedule([this, wake_word]() { + ContinueWakeWordInvoke(wake_word); + }); + return; } - - ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); -#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD - // Encode and send the wake word data to the server - while (auto packet = audio_service_.PopWakeWordPacket()) { - protocol_->SendAudio(std::move(packet)); - } - // Set the chat state to wake word detected - protocol_->SendWakeWordDetected(wake_word); - SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); -#else - // Set flag to play popup sound after state changes to listening - // (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing) - play_popup_on_listening_ = true; - SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); -#endif + // Channel already opened, continue directly + ContinueWakeWordInvoke(wake_word); } else if (state == kDeviceStateSpeaking) { Schedule([this]() { AbortSpeaking(kAbortReasonNone); diff --git a/main/application.h b/main/application.h index 33bda4cf..bcb81112 100644 --- a/main/application.h +++ b/main/application.h @@ -153,6 +153,8 @@ private: void HandleNetworkDisconnectedEvent(); void HandleActivationDoneEvent(); void HandleWakeWordDetectedEvent(); + void ContinueOpenAudioChannel(ListeningMode mode); + void ContinueWakeWordInvoke(const std::string& wake_word); // Activation task (runs in background) void ActivationTask();