mirror of
https://github.com/78/xiaozhi-esp32.git
synced 2026-02-18 18:08:11 +00:00
Compare commits
3 Commits
fix_setupu
...
fix_afe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e96f0f027 | ||
|
|
f39c112970 | ||
|
|
f7e258979e |
@@ -680,6 +680,16 @@ config SEND_WAKE_WORD_DATA
|
|||||||
help
|
help
|
||||||
Send wake word data to the server as the first message of the conversation and wait for response
|
Send wake word data to the server as the first message of the conversation and wait for response
|
||||||
|
|
||||||
|
config WAKE_WORD_DETECTION_IN_LISTENING
|
||||||
|
bool "Enable Wake Word Detection in Listening Mode"
|
||||||
|
default n
|
||||||
|
depends on USE_AFE_WAKE_WORD || USE_CUSTOM_WAKE_WORD
|
||||||
|
help
|
||||||
|
Enable wake word detection while in listening mode.
|
||||||
|
When enabled, the device can detect wake word during listening,
|
||||||
|
which allows interrupting the current conversation.
|
||||||
|
When disabled (default), wake word detection is turned off during listening.
|
||||||
|
|
||||||
config USE_AUDIO_PROCESSOR
|
config USE_AUDIO_PROCESSOR
|
||||||
bool "Enable Audio Noise Reduction"
|
bool "Enable Audio Noise Reduction"
|
||||||
default y
|
default y
|
||||||
|
|||||||
@@ -691,7 +691,7 @@ void Application::HandleToggleChatEvent() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (state == kDeviceStateIdle) {
|
if (state == kDeviceStateIdle) {
|
||||||
ListeningMode mode = aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
|
ListeningMode mode = GetDefaultListeningMode();
|
||||||
if (!protocol_->IsAudioChannelOpened()) {
|
if (!protocol_->IsAudioChannelOpened()) {
|
||||||
SetDeviceState(kDeviceStateConnecting);
|
SetDeviceState(kDeviceStateConnecting);
|
||||||
// Schedule to let the state change be processed first (UI update)
|
// Schedule to let the state change be processed first (UI update)
|
||||||
@@ -777,7 +777,9 @@ void Application::HandleWakeWordDetectedEvent() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto state = GetDeviceState();
|
auto state = GetDeviceState();
|
||||||
|
auto wake_word = audio_service_.GetLastWakeWord();
|
||||||
|
ESP_LOGI(TAG, "Wake word detected: %s (state: %d)", wake_word.c_str(), (int)state);
|
||||||
|
|
||||||
if (state == kDeviceStateIdle) {
|
if (state == kDeviceStateIdle) {
|
||||||
audio_service_.EncodeWakeWord();
|
audio_service_.EncodeWakeWord();
|
||||||
auto wake_word = audio_service_.GetLastWakeWord();
|
auto wake_word = audio_service_.GetLastWakeWord();
|
||||||
@@ -793,8 +795,22 @@ void Application::HandleWakeWordDetectedEvent() {
|
|||||||
}
|
}
|
||||||
// Channel already opened, continue directly
|
// Channel already opened, continue directly
|
||||||
ContinueWakeWordInvoke(wake_word);
|
ContinueWakeWordInvoke(wake_word);
|
||||||
} else if (state == kDeviceStateSpeaking) {
|
} else if (state == kDeviceStateSpeaking || state == kDeviceStateListening) {
|
||||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||||
|
// Clear send queue to avoid sending residues to server
|
||||||
|
while (audio_service_.PopPacketFromSendQueue());
|
||||||
|
|
||||||
|
if (state == kDeviceStateListening) {
|
||||||
|
protocol_->SendStartListening(GetDefaultListeningMode());
|
||||||
|
audio_service_.ResetDecoder();
|
||||||
|
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
|
||||||
|
// Re-enable wake word detection as it was stopped by the detection itself
|
||||||
|
audio_service_.EnableWakeWordDetection(true);
|
||||||
|
} else {
|
||||||
|
// Play popup sound and start listening again
|
||||||
|
play_popup_on_listening_ = true;
|
||||||
|
SetListeningMode(GetDefaultListeningMode());
|
||||||
|
}
|
||||||
} else if (state == kDeviceStateActivating) {
|
} else if (state == kDeviceStateActivating) {
|
||||||
// Restart the activation check if the wake word is detected during activation
|
// Restart the activation check if the wake word is detected during activation
|
||||||
SetDeviceState(kDeviceStateIdle);
|
SetDeviceState(kDeviceStateIdle);
|
||||||
@@ -822,12 +838,15 @@ void Application::ContinueWakeWordInvoke(const std::string& wake_word) {
|
|||||||
}
|
}
|
||||||
// Set the chat state to wake word detected
|
// Set the chat state to wake word detected
|
||||||
protocol_->SendWakeWordDetected(wake_word);
|
protocol_->SendWakeWordDetected(wake_word);
|
||||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
|
||||||
|
// Set flag to play popup sound after state changes to listening
|
||||||
|
play_popup_on_listening_ = true;
|
||||||
|
SetListeningMode(GetDefaultListeningMode());
|
||||||
#else
|
#else
|
||||||
// Set flag to play popup sound after state changes to listening
|
// Set flag to play popup sound after state changes to listening
|
||||||
// (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
|
// (PlaySound here would be cleared by ResetDecoder in EnableVoiceProcessing)
|
||||||
play_popup_on_listening_ = true;
|
play_popup_on_listening_ = true;
|
||||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
SetListeningMode(GetDefaultListeningMode());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -859,7 +878,7 @@ void Application::HandleStateChangedEvent() {
|
|||||||
display->SetEmotion("neutral");
|
display->SetEmotion("neutral");
|
||||||
|
|
||||||
// Make sure the audio processor is running
|
// Make sure the audio processor is running
|
||||||
if (!audio_service_.IsAudioProcessorRunning()) {
|
if (play_popup_on_listening_ || !audio_service_.IsAudioProcessorRunning()) {
|
||||||
// For auto mode, wait for playback queue to be empty before enabling voice processing
|
// For auto mode, wait for playback queue to be empty before enabling voice processing
|
||||||
// This prevents audio truncation when STOP arrives late due to network jitter
|
// This prevents audio truncation when STOP arrives late due to network jitter
|
||||||
if (listening_mode_ == kListeningModeAutoStop) {
|
if (listening_mode_ == kListeningModeAutoStop) {
|
||||||
@@ -869,9 +888,16 @@ void Application::HandleStateChangedEvent() {
|
|||||||
// Send the start listening command
|
// Send the start listening command
|
||||||
protocol_->SendStartListening(listening_mode_);
|
protocol_->SendStartListening(listening_mode_);
|
||||||
audio_service_.EnableVoiceProcessing(true);
|
audio_service_.EnableVoiceProcessing(true);
|
||||||
audio_service_.EnableWakeWordDetection(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_WAKE_WORD_DETECTION_IN_LISTENING
|
||||||
|
// Enable wake word detection in listening mode (configured via Kconfig)
|
||||||
|
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
|
||||||
|
#else
|
||||||
|
// Disable wake word detection in listening mode
|
||||||
|
audio_service_.EnableWakeWordDetection(false);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
|
// Play popup sound after ResetDecoder (in EnableVoiceProcessing) has been called
|
||||||
if (play_popup_on_listening_) {
|
if (play_popup_on_listening_) {
|
||||||
play_popup_on_listening_ = false;
|
play_popup_on_listening_ = false;
|
||||||
@@ -919,6 +945,10 @@ void Application::SetListeningMode(ListeningMode mode) {
|
|||||||
SetDeviceState(kDeviceStateListening);
|
SetDeviceState(kDeviceStateListening);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ListeningMode Application::GetDefaultListeningMode() const {
|
||||||
|
return aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime;
|
||||||
|
}
|
||||||
|
|
||||||
void Application::Reboot() {
|
void Application::Reboot() {
|
||||||
ESP_LOGI(TAG, "Rebooting...");
|
ESP_LOGI(TAG, "Rebooting...");
|
||||||
// Disconnect the audio channel
|
// Disconnect the audio channel
|
||||||
|
|||||||
@@ -165,6 +165,7 @@ private:
|
|||||||
void InitializeProtocol();
|
void InitializeProtocol();
|
||||||
void ShowActivationCode(const std::string& code, const std::string& message);
|
void ShowActivationCode(const std::string& code, const std::string& message);
|
||||||
void SetListeningMode(ListeningMode mode);
|
void SetListeningMode(ListeningMode mode);
|
||||||
|
ListeningMode GetDefaultListeningMode() const;
|
||||||
|
|
||||||
// State change handler called by state machine
|
// State change handler called by state machine
|
||||||
void OnStateChanged(DeviceState old_state, DeviceState new_state);
|
void OnStateChanged(DeviceState old_state, DeviceState new_state);
|
||||||
|
|||||||
@@ -265,27 +265,18 @@ void AudioService::AudioInputTask() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Feed the wake word */
|
/* Feed the wake word and/or audio processor */
|
||||||
if (bits & AS_EVENT_WAKE_WORD_RUNNING) {
|
if (bits & (AS_EVENT_WAKE_WORD_RUNNING | AS_EVENT_AUDIO_PROCESSOR_RUNNING)) {
|
||||||
|
int samples = 160; // 10ms
|
||||||
std::vector<int16_t> data;
|
std::vector<int16_t> data;
|
||||||
int samples = wake_word_->GetFeedSize();
|
if (ReadAudioData(data, 16000, samples)) {
|
||||||
if (samples > 0) {
|
if (bits & AS_EVENT_WAKE_WORD_RUNNING) {
|
||||||
if (ReadAudioData(data, 16000, samples)) {
|
|
||||||
wake_word_->Feed(data);
|
wake_word_->Feed(data);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
if (bits & AS_EVENT_AUDIO_PROCESSOR_RUNNING) {
|
||||||
}
|
|
||||||
|
|
||||||
/* Feed the audio processor */
|
|
||||||
if (bits & AS_EVENT_AUDIO_PROCESSOR_RUNNING) {
|
|
||||||
std::vector<int16_t> data;
|
|
||||||
int samples = audio_processor_->GetFeedSize();
|
|
||||||
if (samples > 0) {
|
|
||||||
if (ReadAudioData(data, 16000, samples)) {
|
|
||||||
audio_processor_->Feed(std::move(data));
|
audio_processor_->Feed(std::move(data));
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -92,7 +92,18 @@ void AfeAudioProcessor::Feed(std::vector<int16_t>&& data) {
|
|||||||
if (afe_data_ == nullptr) {
|
if (afe_data_ == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
afe_iface_->feed(afe_data_, data.data());
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
|
// Check running state inside lock to avoid TOCTOU race with Stop()
|
||||||
|
if (!IsRunning()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||||
|
size_t chunk_size = afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
||||||
|
while (input_buffer_.size() >= chunk_size) {
|
||||||
|
afe_iface_->feed(afe_data_, input_buffer_.data());
|
||||||
|
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunk_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AfeAudioProcessor::Start() {
|
void AfeAudioProcessor::Start() {
|
||||||
@@ -101,9 +112,12 @@ void AfeAudioProcessor::Start() {
|
|||||||
|
|
||||||
void AfeAudioProcessor::Stop() {
|
void AfeAudioProcessor::Stop() {
|
||||||
xEventGroupClearBits(event_group_, PROCESSOR_RUNNING);
|
xEventGroupClearBits(event_group_, PROCESSOR_RUNNING);
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
if (afe_data_ != nullptr) {
|
if (afe_data_ != nullptr) {
|
||||||
afe_iface_->reset_buffer(afe_data_);
|
afe_iface_->reset_buffer(afe_data_);
|
||||||
}
|
}
|
||||||
|
input_buffer_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AfeAudioProcessor::IsRunning() {
|
bool AfeAudioProcessor::IsRunning() {
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include "audio_processor.h"
|
#include "audio_processor.h"
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
@@ -37,6 +38,8 @@ private:
|
|||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
int frame_samples_ = 0;
|
int frame_samples_ = 0;
|
||||||
bool is_speaking_ = false;
|
bool is_speaking_ = false;
|
||||||
|
std::vector<int16_t> input_buffer_;
|
||||||
|
std::mutex input_buffer_mutex_;
|
||||||
std::vector<int16_t> output_buffer_;
|
std::vector<int16_t> output_buffer_;
|
||||||
|
|
||||||
void AudioProcessorTask();
|
void AudioProcessorTask();
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
#include "audio_processor.h"
|
#include "audio_processor.h"
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
@@ -27,7 +28,7 @@ private:
|
|||||||
int frame_samples_ = 0;
|
int frame_samples_ = 0;
|
||||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||||
bool is_running_ = false;
|
std::atomic<bool> is_running_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -99,19 +99,30 @@ void AfeWakeWord::Start() {
|
|||||||
|
|
||||||
void AfeWakeWord::Stop() {
|
void AfeWakeWord::Stop() {
|
||||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
if (afe_data_ != nullptr) {
|
if (afe_data_ != nullptr) {
|
||||||
afe_iface_->reset_buffer(afe_data_);
|
afe_iface_->reset_buffer(afe_data_);
|
||||||
}
|
}
|
||||||
|
input_buffer_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
|
void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
if (afe_data_ == nullptr) {
|
if (afe_data_ == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
|
// Check running state inside lock to avoid TOCTOU race with Stop()
|
||||||
if (!(xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT)) {
|
if (!(xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
afe_iface_->feed(afe_data_, data.data());
|
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||||
|
size_t chunk_size = afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
||||||
|
while (input_buffer_.size() >= chunk_size) {
|
||||||
|
afe_iface_->feed(afe_data_, input_buffer_.data());
|
||||||
|
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunk_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t AfeWakeWord::GetFeedSize() {
|
size_t AfeWakeWord::GetFeedSize() {
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ private:
|
|||||||
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
std::string last_detected_wake_word_;
|
std::string last_detected_wake_word_;
|
||||||
|
std::vector<int16_t> input_buffer_;
|
||||||
|
std::mutex input_buffer_mutex_;
|
||||||
|
|
||||||
TaskHandle_t wake_word_encode_task_ = nullptr;
|
TaskHandle_t wake_word_encode_task_ = nullptr;
|
||||||
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
|
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
|
||||||
|
|||||||
@@ -138,49 +138,64 @@ void CustomWakeWord::Start() {
|
|||||||
|
|
||||||
void CustomWakeWord::Stop() {
|
void CustomWakeWord::Stop() {
|
||||||
running_ = false;
|
running_ = false;
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
|
input_buffer_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CustomWakeWord::Feed(const std::vector<int16_t>& data) {
|
void CustomWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
if (multinet_model_data_ == nullptr || !running_) {
|
if (multinet_model_data_ == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
|
// Check running state inside lock to avoid TOCTOU race with Stop()
|
||||||
|
if (!running_) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
esp_mn_state_t mn_state;
|
|
||||||
// If input channels is 2, we need to fetch the left channel data
|
// If input channels is 2, we need to fetch the left channel data
|
||||||
if (codec_->input_channels() == 2) {
|
if (codec_->input_channels() == 2) {
|
||||||
auto mono_data = std::vector<int16_t>(data.size() / 2);
|
for (size_t i = 0; i < data.size(); i += 2) {
|
||||||
for (size_t i = 0, j = 0; i < mono_data.size(); ++i, j += 2) {
|
input_buffer_.push_back(data[i]);
|
||||||
mono_data[i] = data[j];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StoreWakeWordData(mono_data);
|
|
||||||
mn_state = multinet_->detect(multinet_model_data_, const_cast<int16_t*>(mono_data.data()));
|
|
||||||
} else {
|
} else {
|
||||||
StoreWakeWordData(data);
|
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||||
mn_state = multinet_->detect(multinet_model_data_, const_cast<int16_t*>(data.data()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mn_state == ESP_MN_STATE_DETECTING) {
|
int chunksize = multinet_->get_samp_chunksize(multinet_model_data_);
|
||||||
return;
|
while (input_buffer_.size() >= chunksize) {
|
||||||
} else if (mn_state == ESP_MN_STATE_DETECTED) {
|
std::vector<int16_t> chunk(input_buffer_.begin(), input_buffer_.begin() + chunksize);
|
||||||
esp_mn_results_t *mn_result = multinet_->get_results(multinet_model_data_);
|
StoreWakeWordData(chunk);
|
||||||
for (int i = 0; i < mn_result->num && running_; i++) {
|
|
||||||
ESP_LOGI(TAG, "Custom wake word detected: command_id=%d, string=%s, prob=%f",
|
esp_mn_state_t mn_state = multinet_->detect(multinet_model_data_, chunk.data());
|
||||||
mn_result->command_id[i], mn_result->string, mn_result->prob[i]);
|
|
||||||
auto& command = commands_[mn_result->command_id[i] - 1];
|
if (mn_state == ESP_MN_STATE_DETECTED) {
|
||||||
if (command.action == "wake") {
|
esp_mn_results_t *mn_result = multinet_->get_results(multinet_model_data_);
|
||||||
last_detected_wake_word_ = command.text;
|
for (int i = 0; i < mn_result->num && running_; i++) {
|
||||||
running_ = false;
|
ESP_LOGI(TAG, "Custom wake word detected: command_id=%d, string=%s, prob=%f",
|
||||||
|
mn_result->command_id[i], mn_result->string, mn_result->prob[i]);
|
||||||
if (wake_word_detected_callback_) {
|
auto& command = commands_[mn_result->command_id[i] - 1];
|
||||||
wake_word_detected_callback_(last_detected_wake_word_);
|
if (command.action == "wake") {
|
||||||
|
last_detected_wake_word_ = command.text;
|
||||||
|
running_ = false;
|
||||||
|
input_buffer_.clear();
|
||||||
|
|
||||||
|
if (wake_word_detected_callback_) {
|
||||||
|
wake_word_detected_callback_(last_detected_wake_word_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
multinet_->clean(multinet_model_data_);
|
||||||
|
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
|
||||||
|
ESP_LOGD(TAG, "Command word detection timeout, cleaning state");
|
||||||
|
multinet_->clean(multinet_model_data_);
|
||||||
}
|
}
|
||||||
multinet_->clean(multinet_model_data_);
|
|
||||||
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
|
if (!running_) {
|
||||||
ESP_LOGD(TAG, "Command word detection timeout, cleaning state");
|
break;
|
||||||
multinet_->clean(multinet_model_data_);
|
}
|
||||||
|
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunksize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ private:
|
|||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
std::string last_detected_wake_word_;
|
std::string last_detected_wake_word_;
|
||||||
std::atomic<bool> running_ = false;
|
std::atomic<bool> running_ = false;
|
||||||
|
std::vector<int16_t> input_buffer_;
|
||||||
|
std::mutex input_buffer_mutex_;
|
||||||
|
|
||||||
TaskHandle_t wake_word_encode_task_ = nullptr;
|
TaskHandle_t wake_word_encode_task_ = nullptr;
|
||||||
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
|
StaticTask_t* wake_word_encode_task_buffer_ = nullptr;
|
||||||
|
|||||||
@@ -54,21 +54,44 @@ void EspWakeWord::Start() {
|
|||||||
|
|
||||||
void EspWakeWord::Stop() {
|
void EspWakeWord::Stop() {
|
||||||
running_ = false;
|
running_ = false;
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
|
input_buffer_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void EspWakeWord::Feed(const std::vector<int16_t>& data) {
|
void EspWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
if (wakenet_data_ == nullptr || !running_) {
|
if (wakenet_data_ == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
|
std::lock_guard<std::mutex> lock(input_buffer_mutex_);
|
||||||
if (res > 0) {
|
// Check running state inside lock to avoid TOCTOU race with Stop()
|
||||||
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
|
if (!running_) {
|
||||||
running_ = false;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (wake_word_detected_callback_) {
|
if (codec_->input_channels() == 2) {
|
||||||
wake_word_detected_callback_(last_detected_wake_word_);
|
for (size_t i = 0; i < data.size(); i += 2) {
|
||||||
|
input_buffer_.push_back(data[i]);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
int chunksize = wakenet_iface_->get_samp_chunksize(wakenet_data_);
|
||||||
|
while (input_buffer_.size() >= chunksize) {
|
||||||
|
int res = wakenet_iface_->detect(wakenet_data_, input_buffer_.data());
|
||||||
|
if (res > 0) {
|
||||||
|
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
|
||||||
|
running_ = false;
|
||||||
|
input_buffer_.clear();
|
||||||
|
|
||||||
|
if (wake_word_detected_callback_) {
|
||||||
|
wake_word_detected_callback_(last_detected_wake_word_);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunksize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
#include "wake_word.h"
|
#include "wake_word.h"
|
||||||
@@ -37,6 +38,8 @@ private:
|
|||||||
|
|
||||||
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
||||||
std::string last_detected_wake_word_;
|
std::string last_detected_wake_word_;
|
||||||
|
std::vector<int16_t> input_buffer_;
|
||||||
|
std::mutex input_buffer_mutex_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user