Enhance audio processing and wake word detection (#1739)

* Enhance audio processing and wake word detection

- Set task priority in Application::Run to improve responsiveness.
- Log detected wake words with their state in HandleWakeWordDetectedEvent.
- Streamline audio feeding in AudioService to handle both wake word and audio processor events.
- Implement input buffering in AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to manage audio data more efficiently.
- Clear input buffers on stop to prevent residual data issues.

* Refactor audio processing to enhance thread safety and state management

- Implement early return checks in Feed methods of AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to prevent processing when not running.
- Introduce std::atomic for running state in CustomWakeWord and EspWakeWord to ensure thread-safe access.
- Consolidate input buffer management with mutex locks to avoid race conditions during Stop and Feed operations.

* Refactor listening mode handling and wake word detection configuration

- Replace direct mode setting logic with a new GetDefaultListeningMode method for improved clarity and maintainability.
- Update HandleToggleChatEvent, HandleWakeWordDetectedEvent, and ContinueWakeWordInvoke to utilize the new method for determining listening mode.
- Introduce Kconfig option WAKE_WORD_DETECTION_IN_LISTENING to enable or disable wake word detection during listening mode, enhancing configurability.
This commit is contained in:
Xiaoxia
2026-02-04 14:28:21 +08:00
committed by GitHub
parent 37110a9d05
commit 2b025c4ea6
13 changed files with 167 additions and 61 deletions

View File

@@ -691,7 +691,7 @@ void Application::HandleToggleChatEvent() {
}
if (state == kDeviceStateIdle) {
ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
ListeningMode mode = GetDefaultListeningMode();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
// Schedule to let the state change be processed first (UI update)
@@ -777,7 +777,9 @@ void Application::HandleWakeWordDetectedEvent() {
}
auto state = GetDeviceState();
auto wake_word = audio_service_.GetLastWakeWord();
ESP_LOGI(TAG, "Wake word detected: %s (state: %d)", wake_word.c_str(), (int)state);
if (state == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
auto wake_word = audio_service_.GetLastWakeWord();
@@ -793,8 +795,22 @@ void Application::HandleWakeWordDetectedEvent() {
}
// Channel already opened, continue directly
ContinueWakeWordInvoke(wake_word);
} else if (state == kDeviceStateSpeaking) {
} else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) {
AbortSpeaking(kAbortReasonWakeWordDetected);
// Clear send queue to avoid sending residues to server
while (audio_service_.PopPacketFromSendQueue());
if (state == kDeviceStateListening) {
protocol_->SendStartListening(GetDefaultListeningMode());
audio_service_.ResetDecoder();
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
// Re-enable wake word detection as it was stopped by the detection itself
audio_service_.EnableWakeWordDetection(true);
} else {
// Play popup sound and start listening again
play_popup_on_listening_ = true;
SetListeningMode(GetDefaultListeningMode());
}
} else if (state == kDeviceStateActivating) {
// Restart the activation check if the wake word is detected during activation
SetDeviceState(kDeviceStateIdle);
@@ -822,12 +838,15 @@ void Application::ContinueWakeWordInvoke(const std::string& wake_word) {
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Set flag to play popup sound after state changes to listening
play_popup_on_listening_ = true;
SetListeningMode(GetDefaultListeningMode());
#else
// Set flag to play popup sound after state changes to listening
// (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
play_popup_on_listening_ = true;
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
SetListeningMode(GetDefaultListeningMode());
#endif
}
@@ -859,7 +878,7 @@ void Application::HandleStateChangedEvent() {
display->SetEmotion("neutral");
// Make sure the audio processor is running
if (!audio_service_.IsAudioProcessorRunning()) {
if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) {
// For auto mode, wait for playback queue to be empty before enabling voice processing
// This prevents audio truncation when STOP arrives late due to network jitter
if (listening_mode_ == kListeningModeAutoStop) {
@@ -869,9 +888,16 @@ void Application::HandleStateChangedEvent() {
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
audio_service_.EnableWakeWordDetection(false);
}
#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
// Enable wake word detection in listening mode (configured via Kconfig)
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
#else
// Disable wake word detection in listening mode
audio_service_.EnableWakeWordDetection(false);
#endif
// Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
if (play_popup_on_listening_) {
play_popup_on_listening_ = false;
@@ -919,6 +945,10 @@ void Application::SetListeningMode(ListeningMode mode) {
SetDeviceState(kDeviceStateListening);
}
ListeningMode Application::GetDefaultListeningMode() const {
return aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
}
void Application::Reboot() {
ESP_LOGI(TAG, "Rebooting...");
// Disconnect the audio channel