forked from xiaozhi/xiaozhi-esp32
Enhance audio processing and wake word detection (#1739)
* Enhance audio processing and wake word detection - Set task priority in Application::Run to improve responsiveness. - Log detected wake words with their state in HandleWakeWordDetectedEvent. - Streamline audio feeding in AudioService to handle both wake word and audio processor events. - Implement input buffering in AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to manage audio data more efficiently. - Clear input buffers on stop to prevent residual data issues. * Refactor audio processing to enhance thread safety and state management - Implement early return checks in Feed methods of AfeAudioProcessor, AfeWakeWord, CustomWakeWord, and EspWakeWord to prevent processing when not running. - Introduce std::atomic for running state in CustomWakeWord and EspWakeWord to ensure thread-safe access. - Consolidate input buffer management with mutex locks to avoid race conditions during Stop and Feed operations. * Refactor listening mode handling and wake word detection configuration - Replace direct mode setting logic with a new GetDefaultListeningMode method for improved clarity and maintainability. - Update HandleToggleChatEvent, HandleWakeWordDetectedEvent, and ContinueWakeWordInvoke to utilize the new method for determining listening mode. - Introduce Kconfig option WAKE_WORD_DETECTION_IN_LISTENING to enable or disable wake word detection during listening mode, enhancing configurability.
This commit is contained in:
@@ -691,7 +691,7 @@ void Application::HandleToggleChatEvent() {
|
||||
}
|
||||
|
||||
if (state == kDeviceStateIdle) {
|
||||
ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
|
||||
ListeningMode mode = GetDefaultListeningMode();
|
||||
if (!protocol_->IsAudioChannelOpened()) {
|
||||
SetDeviceState(kDeviceStateConnecting);
|
||||
// Schedule to let the state change be processed first (UI update)
|
||||
@@ -777,7 +777,9 @@ void Application::HandleWakeWordDetectedEvent() {
|
||||
}
|
||||
|
||||
auto state = GetDeviceState();
|
||||
|
||||
auto wake_word = audio_service_.GetLastWakeWord();
|
||||
ESP_LOGI(TAG, "Wake word detected: %s (state: %d)", wake_word.c_str(), (int)state);
|
||||
|
||||
if (state == kDeviceStateIdle) {
|
||||
audio_service_.EncodeWakeWord();
|
||||
auto wake_word = audio_service_.GetLastWakeWord();
|
||||
@@ -793,8 +795,22 @@ void Application::HandleWakeWordDetectedEvent() {
|
||||
}
|
||||
// Channel already opened, continue directly
|
||||
ContinueWakeWordInvoke(wake_word);
|
||||
} else if (state == kDeviceStateSpeaking) {
|
||||
} else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) {
|
||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||
// Clear send queue to avoid sending residues to server
|
||||
while (audio_service_.PopPacketFromSendQueue());
|
||||
|
||||
if (state == kDeviceStateListening) {
|
||||
protocol_->SendStartListening(GetDefaultListeningMode());
|
||||
audio_service_.ResetDecoder();
|
||||
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
|
||||
// Re-enable wake word detection as it was stopped by the detection itself
|
||||
audio_service_.EnableWakeWordDetection(true);
|
||||
} else {
|
||||
// Play popup sound and start listening again
|
||||
play_popup_on_listening_ = true;
|
||||
SetListeningMode(GetDefaultListeningMode());
|
||||
}
|
||||
} else if (state == kDeviceStateActivating) {
|
||||
// Restart the activation check if the wake word is detected during activation
|
||||
SetDeviceState(kDeviceStateIdle);
|
||||
@@ -822,12 +838,15 @@ void Application::ContinueWakeWordInvoke(const std::string& wake_word) {
|
||||
}
|
||||
// Set the chat state to wake word detected
|
||||
protocol_->SendWakeWordDetected(wake_word);
|
||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||
|
||||
// Set flag to play popup sound after state changes to listening
|
||||
play_popup_on_listening_ = true;
|
||||
SetListeningMode(GetDefaultListeningMode());
|
||||
#else
|
||||
// Set flag to play popup sound after state changes to listening
|
||||
// (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
|
||||
play_popup_on_listening_ = true;
|
||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||
SetListeningMode(GetDefaultListeningMode());
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -859,7 +878,7 @@ void Application::HandleStateChangedEvent() {
|
||||
display->SetEmotion("neutral");
|
||||
|
||||
// Make sure the audio processor is running
|
||||
if (!audio_service_.IsAudioProcessorRunning()) {
|
||||
if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) {
|
||||
// For auto mode, wait for playback queue to be empty before enabling voice processing
|
||||
// This prevents audio truncation when STOP arrives late due to network jitter
|
||||
if (listening_mode_ == kListeningModeAutoStop) {
|
||||
@@ -869,9 +888,16 @@ void Application::HandleStateChangedEvent() {
|
||||
// Send the start listening command
|
||||
protocol_->SendStartListening(listening_mode_);
|
||||
audio_service_.EnableVoiceProcessing(true);
|
||||
audio_service_.EnableWakeWordDetection(false);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
|
||||
// Enable wake word detection in listening mode (configured via Kconfig)
|
||||
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
|
||||
#else
|
||||
// Disable wake word detection in listening mode
|
||||
audio_service_.EnableWakeWordDetection(false);
|
||||
#endif
|
||||
|
||||
// Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
|
||||
if (play_popup_on_listening_) {
|
||||
play_popup_on_listening_ = false;
|
||||
@@ -919,6 +945,10 @@ void Application::SetListeningMode(ListeningMode mode) {
|
||||
SetDeviceState(kDeviceStateListening);
|
||||
}
|
||||
|
||||
ListeningMode Application::GetDefaultListeningMode() const {
|
||||
return aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
|
||||
}
|
||||
|
||||
void Application::Reboot() {
|
||||
ESP_LOGI(TAG, "Rebooting...");
|
||||
// Disconnect the audio channel
|
||||
|
||||
Reference in New Issue
Block a user