Compare commits

...

7 Commits

Author SHA1 Message Date
Xiaoxia
1deb477e0e Merge pull request #13 from 78/mqtt
add settings
2024-11-15 05:31:23 +08:00
Terrence
58de3852c5 add settings 2024-11-15 04:44:53 +08:00
Xiaoxia
01fe53c4c7 Merge pull request #12 from 78/mqtt
bump to 0.8.0
2024-11-14 23:23:51 +08:00
Terrence
ec918748f1 bump to 0.8.0 2024-11-14 23:15:43 +08:00
Xiaoxia
ddb375173e Merge pull request #10 from Justin-sky/main
Delete useless code.
2024-11-07 01:53:25 +08:00
Terrence
35cd80feb7 fixbug: internal memory not enough 2024-11-06 22:48:21 +08:00
Justin
77044b410f Delete useless code. 2024-11-06 15:55:20 +08:00
42 changed files with 1287 additions and 542 deletions

View File

@@ -4,7 +4,7 @@
# CMakeLists in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.16)
set(PROJECT_VER "0.7.1")
set(PROJECT_VER "0.8.1")
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(xiaozhi)

View File

@@ -8,12 +8,15 @@ set(SOURCES "audio_codec.cc"
"board.cc"
"boards/wifi_board.cc"
"boards/ml307_board.cc"
"protocol.cc"
"protocols/mqtt_protocol.cc"
"system_info.cc"
"system_reset.cc"
"application.cc"
"button.cc"
"led.cc"
"ota.cc"
"settings.cc"
"main.cc"
)
set(INCLUDE_DIRS ".")
@@ -29,6 +32,8 @@ elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_0)
set(BOARD_TYPE "kevin-box-0")
elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_1)
set(BOARD_TYPE "kevin-box-1")
elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_2)
set(BOARD_TYPE "kevin-box-2")
elseif(CONFIG_BOARD_TYPE_LICHUANG_DEV)
set(BOARD_TYPE "lichuang-dev")
endif()

View File

@@ -33,6 +33,8 @@ choice BOARD_TYPE
bool "Kevin Box 0"
config BOARD_TYPE_KEVIN_BOX_1
bool "Kevin Box 1"
config BOARD_TYPE_KEVIN_BOX_2
bool "Kevin Box 2"
config BOARD_TYPE_LICHUANG_DEV
bool "立创开发板"
endchoice

View File

@@ -2,6 +2,7 @@
#include "system_info.h"
#include "ml307_ssl_transport.h"
#include "audio_codec.h"
#include "protocols/mqtt_protocol.h"
#include <cstring>
#include <esp_log.h>
@@ -27,12 +28,8 @@ Application::Application() {
}
Application::~Application() {
if (update_display_timer_ != nullptr) {
esp_timer_stop(update_display_timer_);
esp_timer_delete(update_display_timer_);
}
if (ws_client_ != nullptr) {
delete ws_client_;
if (protocol_ != nullptr) {
delete protocol_;
}
if (opus_decoder_ != nullptr) {
opus_decoder_destroy(opus_decoder_);
@@ -40,9 +37,6 @@ Application::~Application() {
if (audio_encode_task_stack_ != nullptr) {
heap_caps_free(audio_encode_task_stack_);
}
if (main_loop_task_stack_ != nullptr) {
heap_caps_free(main_loop_task_stack_);
}
vEventGroupDelete(event_group_);
}
@@ -50,24 +44,34 @@ Application::~Application() {
void Application::CheckNewVersion() {
// Check if there is a new firmware version available
ota_.SetPostData(Board::GetInstance().GetJson());
ota_.CheckVersion();
if (ota_.HasNewVersion()) {
// Wait for the chat state to be idle
while (chat_state_ != kChatStateIdle) {
vTaskDelay(100);
while (true) {
if (ota_.CheckVersion()) {
if (ota_.HasNewVersion()) {
// Wait for the chat state to be idle
while (chat_state_ != kChatStateIdle) {
vTaskDelay(100);
}
SetChatState(kChatStateUpgrading);
ota_.StartUpgrade([](int progress, size_t speed) {
char buffer[64];
snprintf(buffer, sizeof(buffer), "Upgrading...\n %d%% %zuKB/s", progress, speed / 1024);
auto display = Board::GetInstance().GetDisplay();
display->SetText(buffer);
});
// If upgrade success, the device will reboot and never reach here
ESP_LOGI(TAG, "Firmware upgrade failed...");
SetChatState(kChatStateIdle);
} else {
ota_.MarkCurrentVersionValid();
}
return;
}
SetChatState(kChatStateUpgrading);
ota_.StartUpgrade([](int progress, size_t speed) {
char buffer[64];
snprintf(buffer, sizeof(buffer), "Upgrading...\n %d%% %zuKB/s", progress, speed / 1024);
auto display = Board::GetInstance().GetDisplay();
display->SetText(buffer);
});
// If upgrade success, the device will reboot and never reach here
ESP_LOGI(TAG, "Firmware upgrade failed...");
SetChatState(kChatStateIdle);
} else {
ota_.MarkCurrentVersionValid();
// Check again in 60 seconds
vTaskDelay(pdMS_TO_TICKS(60000));
}
}
@@ -88,49 +92,36 @@ void Application::Alert(const std::string&& title, const std::string&& message)
void Application::PlayLocalFile(const char* data, size_t size) {
ESP_LOGI(TAG, "PlayLocalFile: %zu bytes", size);
SetDecodeSampleRate(16000);
auto codec = Board::GetInstance().GetAudioCodec();
codec->EnableOutput(true);
for (const char* p = data; p < data + size; ) {
auto p3 = (BinaryProtocol3*)p;
p += sizeof(BinaryProtocol3);
auto payload_size = ntohs(p3->payload_size);
std::string opus;
opus.resize(payload_size);
memcpy(opus.data(), p3->payload, payload_size);
p += payload_size;
{
std::lock_guard<std::mutex> lock(mutex_);
auto packet = new AudioPacket();
packet->type = kAudioPacketTypeStart;
audio_decode_queue_.push_back(packet);
}
ParseBinaryProtocol3(data, size);
{
std::lock_guard<std::mutex> lock(mutex_);
auto packet = new AudioPacket();
packet->type = kAudioPacketTypeStop;
audio_decode_queue_.push_back(packet);
cv_.notify_all();
audio_decode_queue_.emplace_back(std::move(opus));
}
cv_.notify_all();
}
void Application::ToggleChatState() {
Schedule([this]() {
if (chat_state_ == kChatStateIdle) {
SetChatState(kChatStateConnecting);
StartWebSocketClient();
if (ws_client_ && ws_client_->IsConnected()) {
if (protocol_->OpenAudioChannel()) {
opus_encoder_.ResetState();
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Start();
#endif
SetChatState(kChatStateListening);
ESP_LOGI(TAG, "Communication started");
} else {
SetChatState(kChatStateIdle);
}
} else if (chat_state_ == kChatStateSpeaking) {
AbortSpeaking();
} else if (chat_state_ == kChatStateListening) {
if (ws_client_ && ws_client_->IsConnected()) {
ws_client_->Close();
}
protocol_->CloseAudioChannel();
}
});
}
@@ -143,21 +134,19 @@ void Application::Start() {
builtin_led->SetBlue();
builtin_led->StartContinuousBlink(100);
/* Setup the display */
auto display = board.GetDisplay();
display->SetupUI();
/* Setup the audio codec */
auto codec = board.GetAudioCodec();
opus_decode_sample_rate_ = codec->output_sample_rate();
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
opus_encoder_.Configure(codec->input_sample_rate(), 1);
opus_encoder_.Configure(16000, 1);
if (codec->input_sample_rate() != 16000) {
input_resampler_.Configure(codec->input_sample_rate(), 16000);
reference_resampler_.Configure(codec->input_sample_rate(), 16000);
}
codec->EnableInput(true);
codec->EnableOutput(true);
codec->EnableOutput(false);
codec->OnInputData([this, codec](std::vector<int16_t>&& data) {
if (codec->input_sample_rate() != 16000) {
if (codec->input_channels() == 2) {
@@ -200,8 +189,7 @@ void Application::Start() {
#endif
});
// OPUS encoder / decoder use a lot of stack memory
const size_t opus_stack_size = 4096 * 8;
const size_t opus_stack_size = 4096 * 8; // OPUS encoder / decoder use a lot of stack memory
audio_encode_task_stack_ = (StackType_t*)heap_caps_malloc(opus_stack_size, MALLOC_CAP_SPIRAM);
audio_encode_task_ = xTaskCreateStatic([](void* arg) {
Application* app = (Application*)arg;
@@ -209,33 +197,32 @@ void Application::Start() {
vTaskDelete(NULL);
}, "opus_encode", opus_stack_size, this, 1, audio_encode_task_stack_, &audio_encode_task_buffer_);
xTaskCreate([](void* arg) {
Application* app = (Application*)arg;
app->AudioPlayTask();
vTaskDelete(NULL);
}, "play_audio", 4096 * 4, this, 4, NULL);
codec->Start();
/* Wait for the network to be ready */
board.StartNetwork();
// Blink the LED to indicate the device is running
builtin_led->SetGreen();
builtin_led->BlinkOnce();
const size_t main_loop_stack_size = 4096 * 2;
main_loop_task_stack_ = (StackType_t*)heap_caps_malloc(main_loop_stack_size, MALLOC_CAP_SPIRAM);
xTaskCreateStatic([](void* arg) {
xTaskCreate([](void* arg) {
Application* app = (Application*)arg;
app->MainLoop();
vTaskDelete(NULL);
}, "main_loop", main_loop_stack_size, this, 1, main_loop_task_stack_, &main_loop_task_buffer_);
}, "main_loop", 4096 * 2, this, 1, nullptr);
// Launch a task to check for new firmware version
// Check for new firmware version or get the MQTT broker address
xTaskCreate([](void* arg) {
Application* app = (Application*)arg;
app->CheckNewVersion();
vTaskDelete(NULL);
}, "check_new_version", 4096 * 2, this, 1, NULL);
}, "check_new_version", 4096 * 2, this, 1, nullptr);
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Initialize(codec->input_channels(), codec->input_reference());
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
std::lock_guard<std::mutex> lock(mutex_);
audio_encode_queue_.emplace_back(std::move(data));
cv_.notify_all();
});
wake_word_detect_.Initialize(codec->input_channels(), codec->input_reference());
wake_word_detect_.OnVadStateChange([this](bool speaking) {
Schedule([this, speaking]() {
@@ -254,24 +241,18 @@ void Application::Start() {
wake_word_detect_.OnWakeWordDetected([this]() {
Schedule([this]() {
if (chat_state_ == kChatStateIdle) {
// Encode the wake word data and start websocket client at the same time
// They both consume a lot of time (700ms), so we can do them in parallel
SetChatState(kChatStateConnecting);
wake_word_detect_.EncodeWakeWordData();
SetChatState(kChatStateConnecting);
if (ws_client_ == nullptr) {
StartWebSocketClient();
}
if (ws_client_ && ws_client_->IsConnected()) {
auto encoded = wake_word_detect_.GetWakeWordStream();
// Send the wake word data to the server
ws_client_->Send(encoded.data(), encoded.size(), true);
if (protocol_->OpenAudioChannel()) {
std::string opus;
// Encode and send the wake word data to the server
while (wake_word_detect_.GetWakeWordOpus(opus)) {
protocol_->SendAudio(opus);
}
opus_encoder_.ResetState();
// Send a ready message to indicate the server that the wake word data is sent
SetChatState(kChatStateWakeWordDetected);
// If connected, the hello message is already sent, so we can start communication
audio_processor_.Start();
ESP_LOGI(TAG, "Audio processor started");
} else {
SetChatState(kChatStateIdle);
}
@@ -284,20 +265,70 @@ void Application::Start() {
});
});
wake_word_detect_.StartDetection();
audio_processor_.Initialize(codec->input_channels(), codec->input_reference());
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
Schedule([this, data = std::move(data)]() {
if (chat_state_ == kChatStateListening) {
std::lock_guard<std::mutex> lock(mutex_);
audio_encode_queue_.emplace_back(std::move(data));
cv_.notify_all();
}
});
});
#endif
chat_state_ = kChatStateIdle;
// Initialize the protocol
display->SetText("Starting\nProtocol...");
protocol_ = new MqttProtocol();
protocol_->OnIncomingAudio([this](const std::string& data) {
std::lock_guard<std::mutex> lock(mutex_);
audio_decode_queue_.emplace_back(std::move(data));
cv_.notify_all();
});
protocol_->OnAudioChannelOpened([this, codec]() {
if (protocol_->GetServerSampleRate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "服务器的音频采样率 %d 与设备输出的采样率 %d 不一致,重采样后可能会失真",
protocol_->GetServerSampleRate(), codec->output_sample_rate());
}
SetDecodeSampleRate(protocol_->GetServerSampleRate());
});
protocol_->OnAudioChannelClosed([this]() {
Schedule([this]() {
SetChatState(kChatStateIdle);
});
});
protocol_->OnIncomingJson([this](const cJSON* root) {
// Parse JSON data
auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
Schedule([this]() {
skip_to_end_ = false;
SetChatState(kChatStateSpeaking);
});
} else if (strcmp(state->valuestring, "stop") == 0) {
Schedule([this]() {
auto codec = Board::GetInstance().GetAudioCodec();
codec->WaitForOutputDone();
if (chat_state_ == kChatStateSpeaking) {
SetChatState(kChatStateListening);
}
});
} else if (strcmp(state->valuestring, "sentence_start") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (text != NULL) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
}
}
} else if (strcmp(type->valuestring, "stt") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (text != NULL) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
}
} else if (strcmp(type->valuestring, "llm") == 0) {
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (emotion != NULL) {
ESP_LOGD(TAG, "EMOTION: %s", emotion->valuestring);
}
}
});
// Blink the LED to indicate the device is running
builtin_led->SetGreen();
builtin_led->BlinkOnce();
SetChatState(kChatStateIdle);
display->UpdateDisplay();
}
@@ -325,16 +356,11 @@ void Application::MainLoop() {
void Application::AbortSpeaking() {
ESP_LOGI(TAG, "Abort speaking");
skip_to_end_ = true;
protocol_->SendAbort();
if (ws_client_ && ws_client_->IsConnected()) {
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "type", "abort");
char* json = cJSON_PrintUnformatted(root);
ws_client_->Send(json);
cJSON_Delete(root);
free(json);
}
skip_to_end_ = true;
auto codec = Board::GetInstance().GetAudioCodec();
codec->ClearOutputQueue();
}
void Application::SetChatState(ChatState state) {
@@ -363,6 +389,9 @@ void Application::SetChatState(ChatState state) {
case kChatStateIdle:
builtin_led->TurnOff();
display->SetText("I'm\nIdle.");
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Stop();
#endif
break;
case kChatStateConnecting:
builtin_led->SetBlue();
@@ -373,11 +402,17 @@ void Application::SetChatState(ChatState state) {
builtin_led->SetRed();
builtin_led->TurnOn();
display->SetText("I'm\nListening...");
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Start();
#endif
break;
case kChatStateSpeaking:
builtin_led->SetGreen();
builtin_led->TurnOn();
display->SetText("I'm\nSpeaking...");
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Stop();
#endif
break;
case kChatStateWakeWordDetected:
builtin_led->SetBlue();
@@ -389,37 +424,17 @@ void Application::SetChatState(ChatState state) {
break;
}
if (ws_client_ && ws_client_->IsConnected()) {
cJSON* root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "type", "state");
cJSON_AddStringToObject(root, "state", state_str[chat_state_]);
char* json = cJSON_PrintUnformatted(root);
ws_client_->Send(json);
cJSON_Delete(root);
free(json);
}
}
BinaryProtocol3* Application::AllocateBinaryProtocol3(const uint8_t* payload, size_t payload_size) {
auto protocol = (BinaryProtocol3*)heap_caps_malloc(sizeof(BinaryProtocol3) + payload_size, MALLOC_CAP_SPIRAM);
assert(protocol != nullptr);
protocol->type = 0;
protocol->reserved = 0;
protocol->payload_size = htons(payload_size);
assert(sizeof(BinaryProtocol3) == 4UL);
memcpy(protocol->payload, payload, payload_size);
return protocol;
protocol_->SendState(state_str[chat_state_]);
}
void Application::AudioEncodeTask() {
ESP_LOGI(TAG, "Audio encode task started");
const int max_audio_play_queue_size_ = 2; // avoid decoding too fast
auto codec = Board::GetInstance().GetAudioCodec();
while (true) {
std::unique_lock<std::mutex> lock(mutex_);
cv_.wait(lock, [this]() {
return !audio_encode_queue_.empty() || (!audio_decode_queue_.empty() && audio_play_queue_.size() < max_audio_play_queue_size_);
return !audio_encode_queue_.empty() || !audio_decode_queue_.empty();
});
if (!audio_encode_queue_.empty()) {
@@ -427,108 +442,38 @@ void Application::AudioEncodeTask() {
audio_encode_queue_.pop_front();
lock.unlock();
// Encode audio data
opus_encoder_.Encode(pcm, [this](const uint8_t* opus, size_t opus_size) {
auto protocol = AllocateBinaryProtocol3(opus, opus_size);
Schedule([this, protocol, opus_size]() {
if (ws_client_ && ws_client_->IsConnected()) {
if (!ws_client_->Send(protocol, sizeof(BinaryProtocol3) + opus_size, true)) {
ESP_LOGE(TAG, "Failed to send audio data");
}
}
heap_caps_free(protocol);
Schedule([this, data = std::string(reinterpret_cast<const char*>(opus), opus_size)]() {
protocol_->SendAudio(data);
});
});
} else if (!audio_decode_queue_.empty()) {
auto packet = std::move(audio_decode_queue_.front());
auto opus = std::move(audio_decode_queue_.front());
audio_decode_queue_.pop_front();
lock.unlock();
if (packet->type == kAudioPacketTypeData && !skip_to_end_) {
int frame_size = opus_decode_sample_rate_ * opus_duration_ms_ / 1000;
packet->pcm.resize(frame_size);
int ret = opus_decode(opus_decoder_, packet->opus.data(), packet->opus.size(), packet->pcm.data(), frame_size, 0);
if (ret < 0) {
ESP_LOGE(TAG, "Failed to decode audio, error code: %d", ret);
delete packet;
continue;
}
if (opus_decode_sample_rate_ != codec->output_sample_rate()) {
int target_size = output_resampler_.GetOutputSamples(frame_size);
std::vector<int16_t> resampled(target_size);
output_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
packet->pcm = std::move(resampled);
}
if (skip_to_end_) {
continue;
}
std::lock_guard<std::mutex> lock(mutex_);
audio_play_queue_.push_back(packet);
cv_.notify_all();
}
}
}
int frame_size = opus_decode_sample_rate_ * OPUS_FRAME_DURATION_MS / 1000;
std::vector<int16_t> pcm(frame_size);
void Application::HandleAudioPacket(AudioPacket* packet) {
switch (packet->type)
{
case kAudioPacketTypeData: {
if (skip_to_end_) {
break;
}
// This will block until the audio device has finished playing the audio
auto codec = Board::GetInstance().GetAudioCodec();
codec->OutputData(packet->pcm);
break;
}
case kAudioPacketTypeStart:
break_speaking_ = false;
skip_to_end_ = false;
Schedule([this]() {
SetChatState(kChatStateSpeaking);
});
break;
case kAudioPacketTypeStop:
Schedule([this]() {
if (ws_client_ && ws_client_->IsConnected()) {
SetChatState(kChatStateListening);
} else {
SetChatState(kChatStateIdle);
int ret = opus_decode(opus_decoder_, (const unsigned char*)opus.data(), opus.size(), pcm.data(), frame_size, 0);
if (ret < 0) {
ESP_LOGE(TAG, "Failed to decode audio, error code: %d", ret);
continue;
}
});
break;
case kAudioPacketTypeSentenceStart:
ESP_LOGI(TAG, "<< %s", packet->text.c_str());
break;
case kAudioPacketTypeSentenceEnd:
if (break_speaking_) {
skip_to_end_ = true;
// Resample if the sample rate is different
if (opus_decode_sample_rate_ != codec->output_sample_rate()) {
int target_size = output_resampler_.GetOutputSamples(frame_size);
std::vector<int16_t> resampled(target_size);
output_resampler_.Process(pcm.data(), frame_size, resampled.data());
pcm = std::move(resampled);
}
codec->OutputData(pcm);
}
break;
default:
ESP_LOGI(TAG, "Unknown packet type: %d", packet->type);
break;
}
delete packet;
}
void Application::AudioPlayTask() {
ESP_LOGI(TAG, "Audio play task started");
while (true) {
std::unique_lock<std::mutex> lock(mutex_);
cv_.wait(lock, [this]() {
return !audio_play_queue_.empty();
});
auto packet = std::move(audio_play_queue_.front());
audio_play_queue_.pop_front();
cv_.notify_all();
lock.unlock();
HandleAudioPacket(packet);
}
}
@@ -547,127 +492,3 @@ void Application::SetDecodeSampleRate(int sample_rate) {
output_resampler_.Configure(opus_decode_sample_rate_, codec->output_sample_rate());
}
}
void Application::ParseBinaryProtocol3(const char* data, size_t size) {
for (const char* p = data; p < data + size; ) {
auto protocol = (BinaryProtocol3*)p;
p += sizeof(BinaryProtocol3);
auto packet = new AudioPacket();
packet->type = kAudioPacketTypeData;
auto payload_size = ntohs(protocol->payload_size);
packet->opus.resize(payload_size);
memcpy(packet->opus.data(), protocol->payload, payload_size);
p += payload_size;
std::lock_guard<std::mutex> lock(mutex_);
audio_decode_queue_.push_back(packet);
}
}
void Application::StartWebSocketClient() {
if (ws_client_ != nullptr) {
ESP_LOGW(TAG, "WebSocket client already exists");
delete ws_client_;
}
std::string url = CONFIG_WEBSOCKET_URL;
std::string token = "Bearer " + std::string(CONFIG_WEBSOCKET_ACCESS_TOKEN);
ws_client_ = Board::GetInstance().CreateWebSocket();
ws_client_->SetHeader("Authorization", token.c_str());
ws_client_->SetHeader("Protocol-Version", std::to_string(PROTOCOL_VERSION).c_str());
ws_client_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
ws_client_->OnConnected([this]() {
ESP_LOGI(TAG, "Websocket connected");
// Send hello message to describe the client
// keys: message type, version, wakeup_model, audio_params (format, sample_rate, channels)
std::string message = "{";
message += "\"type\":\"hello\",";
message += "\"audio_params\":{";
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1";
message += "}}";
ws_client_->Send(message);
});
ws_client_->OnData([this](const char* data, size_t len, bool binary) {
if (binary) {
ParseBinaryProtocol3(data, len);
cv_.notify_all();
} else {
// Parse JSON data
auto root = cJSON_Parse(data);
auto type = cJSON_GetObjectItem(root, "type");
if (type != NULL) {
if (strcmp(type->valuestring, "tts") == 0) {
auto packet = new AudioPacket();
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
packet->type = kAudioPacketTypeStart;
auto sample_rate = cJSON_GetObjectItem(root, "sample_rate");
if (sample_rate != NULL) {
SetDecodeSampleRate(sample_rate->valueint);
}
// If the device is speaking, we need to skip the last session
skip_to_end_ = true;
} else if (strcmp(state->valuestring, "stop") == 0) {
packet->type = kAudioPacketTypeStop;
} else if (strcmp(state->valuestring, "sentence_end") == 0) {
packet->type = kAudioPacketTypeSentenceEnd;
} else if (strcmp(state->valuestring, "sentence_start") == 0) {
packet->type = kAudioPacketTypeSentenceStart;
packet->text = cJSON_GetObjectItem(root, "text")->valuestring;
}
std::lock_guard<std::mutex> lock(mutex_);
audio_decode_queue_.push_back(packet);
cv_.notify_all();
} else if (strcmp(type->valuestring, "stt") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (text != NULL) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
}
} else if (strcmp(type->valuestring, "llm") == 0) {
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (emotion != NULL) {
ESP_LOGD(TAG, "EMOTION: %s", emotion->valuestring);
}
} else {
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
}
} else {
ESP_LOGE(TAG, "Missing message type, data: %s", data);
}
cJSON_Delete(root);
}
});
ws_client_->OnError([this](int error) {
ESP_LOGE(TAG, "Websocket error: %d", error);
});
ws_client_->OnDisconnected([this]() {
ESP_LOGI(TAG, "Websocket disconnected");
Schedule([this]() {
auto codec = Board::GetInstance().GetAudioCodec();
codec->EnableOutput(false);
#ifdef CONFIG_USE_AFE_SR
audio_processor_.Stop();
#endif
delete ws_client_;
ws_client_ = nullptr;
SetChatState(kChatStateIdle);
});
});
if (!ws_client_->Connect(url.c_str())) {
ESP_LOGE(TAG, "Failed to connect to websocket server");
return;
}
// 建立语音通道后打开音频输出,避免待机时喇叭底噪
auto codec = Board::GetInstance().GetAudioCodec();
codec->EnableOutput(true);
}

View File

@@ -11,8 +11,8 @@
#include "opus_encoder.h"
#include "opus_resampler.h"
#include <web_socket.h>
#include "protocol.h"
#include "display.h"
#include "board.h"
#include "ota.h"
@@ -22,10 +22,6 @@
#include "audio_processor.h"
#endif
#define DETECTION_RUNNING 1
#define COMMUNICATION_RUNNING 2
#define PROTOCOL_VERSION 3
struct BinaryProtocol3 {
uint8_t type;
uint8_t reserved;
@@ -33,24 +29,6 @@ struct BinaryProtocol3 {
uint8_t payload[];
} __attribute__((packed));
enum AudioPacketType {
kAudioPacketTypeUnkonwn = 0,
kAudioPacketTypeStart,
kAudioPacketTypeStop,
kAudioPacketTypeData,
kAudioPacketTypeSentenceStart,
kAudioPacketTypeSentenceEnd
};
struct AudioPacket {
AudioPacketType type = kAudioPacketTypeUnkonwn;
std::string text;
std::vector<uint8_t> opus;
std::vector<int16_t> pcm;
uint32_t timestamp;
};
enum ChatState {
kChatStateUnknown,
kChatStateIdle,
@@ -61,6 +39,8 @@ enum ChatState {
kChatStateUpgrading
};
#define OPUS_FRAME_DURATION_MS 60
class Application {
public:
static Application& GetInstance() {
@@ -91,44 +71,31 @@ private:
std::mutex mutex_;
std::condition_variable_any cv_;
std::list<std::function<void()>> main_tasks_;
WebSocket* ws_client_ = nullptr;
Protocol* protocol_ = nullptr;
EventGroupHandle_t event_group_;
volatile ChatState chat_state_ = kChatStateUnknown;
volatile bool break_speaking_ = false;
bool skip_to_end_ = false;
esp_timer_handle_t update_display_timer_ = nullptr;
// Audio encode / decode
TaskHandle_t audio_encode_task_ = nullptr;
StaticTask_t audio_encode_task_buffer_;
StackType_t* audio_encode_task_stack_ = nullptr;
std::list<std::vector<int16_t>> audio_encode_queue_;
std::list<AudioPacket*> audio_decode_queue_;
std::list<AudioPacket*> audio_play_queue_;
std::list<std::string> audio_decode_queue_;
OpusEncoder opus_encoder_;
OpusDecoder* opus_decoder_ = nullptr;
int opus_duration_ms_ = 60;
int opus_decode_sample_rate_ = -1;
OpusResampler input_resampler_;
OpusResampler reference_resampler_;
OpusResampler output_resampler_;
TaskHandle_t main_loop_task_ = nullptr;
StaticTask_t main_loop_task_buffer_;
StackType_t* main_loop_task_stack_ = nullptr;
void MainLoop();
BinaryProtocol3* AllocateBinaryProtocol3(const uint8_t* payload, size_t payload_size);
void ParseBinaryProtocol3(const char* data, size_t size);
void SetDecodeSampleRate(int sample_rate);
void StartWebSocketClient();
void CheckNewVersion();
void AudioEncodeTask();
void AudioPlayTask();
void HandleAudioPacket(AudioPacket* packet);
void PlayLocalFile(const char* data, size_t size);
};

View File

@@ -1,22 +1,59 @@
#include "audio_codec.h"
#include "board.h"
#include "settings.h"
#include <esp_log.h>
#include <cstring>
#include <driver/i2s_common.h>
#define TAG "AudioCodec"
AudioCodec::AudioCodec() {
audio_event_group_ = xEventGroupCreate();
}
AudioCodec::~AudioCodec() {
if (audio_input_task_ != nullptr) {
vTaskDelete(audio_input_task_);
}
if (audio_output_task_ != nullptr) {
vTaskDelete(audio_output_task_);
}
if (audio_event_group_ != nullptr) {
vEventGroupDelete(audio_event_group_);
}
}
void AudioCodec::OnInputData(std::function<void(std::vector<int16_t>&& data)> callback) {
on_input_data_ = callback;
}
void AudioCodec::OutputData(std::vector<int16_t>& data) {
std::lock_guard<std::mutex> lock(audio_output_queue_mutex_);
audio_output_queue_.emplace_back(std::move(data));
audio_output_queue_cv_.notify_one();
}
IRAM_ATTR bool AudioCodec::on_sent(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx) {
auto audio_codec = (AudioCodec*)user_ctx;
xEventGroupSetBits(audio_codec->audio_event_group_, AUDIO_EVENT_OUTPUT_DONE);
return false;
}
void AudioCodec::Start() {
Settings settings("audio", false);
output_volume_ = settings.GetInt("output_volume", output_volume_);
// 注册音频输出回调
i2s_event_callbacks_t callbacks = {};
callbacks.on_sent = on_sent;
i2s_channel_register_event_callback(tx_handle_, &callbacks, this);
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
EnableInput(true);
EnableOutput(true);
// 创建音频输入任务
if (audio_input_task_ == nullptr) {
@@ -25,15 +62,19 @@ void AudioCodec::OnInputData(std::function<void(std::vector<int16_t>&& data)> ca
audio_device->InputTask();
}, "audio_input", 4096 * 2, this, 3, &audio_input_task_);
}
}
void AudioCodec::OutputData(std::vector<int16_t>& data) {
Write(data.data(), data.size());
// 创建音频输出任务
if (audio_output_task_ == nullptr) {
xTaskCreate([](void* arg) {
auto audio_device = (AudioCodec*)arg;
audio_device->OutputTask();
}, "audio_output", 4096 * 2, this, 3, &audio_output_task_);
}
}
void AudioCodec::InputTask() {
int duration = 30;
int input_frame_size = input_sample_rate_ / 1000 * duration * input_channels_;
while (true) {
std::vector<int16_t> input_data(input_frame_size);
int samples = Read(input_data.data(), input_data.size());
@@ -45,9 +86,51 @@ void AudioCodec::InputTask() {
}
}
void AudioCodec::OutputTask() {
while (true) {
std::unique_lock<std::mutex> lock(audio_output_queue_mutex_);
if (!audio_output_queue_cv_.wait_for(lock, std::chrono::seconds(30), [this]() {
return !audio_output_queue_.empty();
})) {
// If timeout, disable output
EnableOutput(false);
continue;
}
auto data = std::move(audio_output_queue_.front());
audio_output_queue_.pop_front();
lock.unlock();
if (!output_enabled_) {
EnableOutput(true);
}
xEventGroupClearBits(audio_event_group_, AUDIO_EVENT_OUTPUT_DONE);
Write(data.data(), data.size());
audio_output_queue_cv_.notify_all();
}
}
void AudioCodec::WaitForOutputDone() {
// Wait for the output queue to be empty and the output is done
std::unique_lock<std::mutex> lock(audio_output_queue_mutex_);
audio_output_queue_cv_.wait(lock, [this]() {
return audio_output_queue_.empty();
});
lock.unlock();
xEventGroupWaitBits(audio_event_group_, AUDIO_EVENT_OUTPUT_DONE, pdFALSE, pdFALSE, portMAX_DELAY);
}
void AudioCodec::ClearOutputQueue() {
std::lock_guard<std::mutex> lock(audio_output_queue_mutex_);
audio_output_queue_.clear();
}
void AudioCodec::SetOutputVolume(int volume) {
output_volume_ = volume;
ESP_LOGI(TAG, "Set output volume to %d", output_volume_);
Settings settings("audio", true);
settings.SetInt("output_volume", output_volume_);
}
void AudioCodec::EnableInput(bool enable) {

View File

@@ -3,24 +3,32 @@
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <driver/i2s_std.h>
#include <vector>
#include <string>
#include <functional>
#include <list>
#include <mutex>
#include <condition_variable>
#include "board.h"
#define AUDIO_EVENT_OUTPUT_DONE (1 << 0)
class AudioCodec {
public:
AudioCodec();
virtual ~AudioCodec();
virtual void SetOutputVolume(int volume);
virtual void EnableInput(bool enable);
virtual void EnableOutput(bool enable);
void Start();
void OnInputData(std::function<void(std::vector<int16_t>&& data)> callback);
void OutputData(std::vector<int16_t>& data);
void WaitForOutputDone();
void ClearOutputQueue();
inline bool duplex() const { return duplex_; }
inline bool input_reference() const { return input_reference_; }
@@ -32,11 +40,21 @@ public:
private:
TaskHandle_t audio_input_task_ = nullptr;
TaskHandle_t audio_output_task_ = nullptr;
std::function<void(std::vector<int16_t>&& data)> on_input_data_;
std::list<std::vector<int16_t>> audio_output_queue_;
std::mutex audio_output_queue_mutex_;
std::condition_variable audio_output_queue_cv_;
EventGroupHandle_t audio_event_group_ = nullptr;
IRAM_ATTR static bool on_sent(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx);
void InputTask();
void OutputTask();
protected:
i2s_chan_handle_t tx_handle_ = nullptr;
i2s_chan_handle_t rx_handle_ = nullptr;
bool duplex_ = false;
bool input_reference_ = false;
bool input_enabled_ = false;
@@ -49,6 +67,8 @@ protected:
virtual int Read(int16_t* dest, int samples) = 0;
virtual int Write(const int16_t* data, int samples) = 0;
virtual void EnableInput(bool enable);
virtual void EnableOutput(bool enable);
};
#endif // _AUDIO_CODEC_H

View File

@@ -2,6 +2,7 @@
#include <esp_log.h>
#include <driver/i2c.h>
#include <driver/i2s_tdm.h>
static const char TAG[] = "BoxAudioCodec";
@@ -174,8 +175,6 @@ void BoxAudioCodec::CreateDuplexChannels(gpio_num_t mclk, gpio_num_t bclk, gpio_
ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle_, &std_cfg));
ESP_ERROR_CHECK(i2s_channel_init_tdm_mode(rx_handle_, &tdm_cfg));
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
ESP_LOGI(TAG, "Duplex channels created");
}
@@ -200,7 +199,7 @@ void BoxAudioCodec::EnableInput(bool enable) {
fs.channel_mask |= ESP_CODEC_DEV_MAKE_CHANNEL_MASK(1);
}
ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs));
ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 30.0));
ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 40.0));
} else {
ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
}

View File

@@ -3,16 +3,11 @@
#include "audio_codec.h"
#include <driver/i2s_std.h>
#include <driver/i2s_tdm.h>
#include <esp_codec_dev.h>
#include <esp_codec_dev_defaults.h>
class BoxAudioCodec : public AudioCodec {
private:
i2s_chan_handle_t tx_handle_ = nullptr;
i2s_chan_handle_t rx_handle_ = nullptr;
const audio_codec_data_if_t* data_if_ = nullptr;
const audio_codec_ctrl_if_t* out_ctrl_if_ = nullptr;
const audio_codec_if_t* out_codec_if_ = nullptr;
@@ -27,6 +22,8 @@ private:
virtual int Read(int16_t* dest, int samples) override;
virtual int Write(const int16_t* data, int samples) override;
virtual void EnableInput(bool enable) override;
virtual void EnableOutput(bool enable) override;
public:
BoxAudioCodec(void* i2c_master_handle, int input_sample_rate, int output_sample_rate,
@@ -35,8 +32,6 @@ public:
virtual ~BoxAudioCodec();
virtual void SetOutputVolume(int volume) override;
virtual void EnableInput(bool enable) override;
virtual void EnableOutput(bool enable) override;
};
#endif // _BOX_AUDIO_CODEC_H

View File

@@ -130,14 +130,11 @@ NoAudioCodec::NoAudioCodec(int input_sample_rate, int output_sample_rate, gpio_n
std_cfg.gpio_cfg.dout = I2S_GPIO_UNUSED;
std_cfg.gpio_cfg.din = mic_din;
ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle_, &std_cfg));
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
ESP_LOGI(TAG, "Simplex channels created");
}
int NoAudioCodec::Write(const int16_t* data, int samples) {
int32_t buffer[samples];
std::vector<int32_t> buffer(samples);
// output_volume_: 0-100
// volume_factor_: 0-65536
@@ -154,15 +151,15 @@ int NoAudioCodec::Write(const int16_t* data, int samples) {
}
size_t bytes_written;
ESP_ERROR_CHECK(i2s_channel_write(tx_handle_, buffer, samples * sizeof(int32_t), &bytes_written, portMAX_DELAY));
ESP_ERROR_CHECK(i2s_channel_write(tx_handle_, buffer.data(), samples * sizeof(int32_t), &bytes_written, portMAX_DELAY));
return bytes_written / sizeof(int32_t);
}
int NoAudioCodec::Read(int16_t* dest, int samples) {
size_t bytes_read;
int32_t bit32_buffer[samples];
if (i2s_channel_read(rx_handle_, bit32_buffer, samples * sizeof(int32_t), &bytes_read, portMAX_DELAY) != ESP_OK) {
std::vector<int32_t> bit32_buffer(samples);
if (i2s_channel_read(rx_handle_, bit32_buffer.data(), samples * sizeof(int32_t), &bytes_read, portMAX_DELAY) != ESP_OK) {
ESP_LOGE(TAG, "Read Failed!");
return 0;
}

View File

@@ -3,14 +3,10 @@
#include "audio_codec.h"
#include <driver/i2s_std.h>
#include <driver/gpio.h>
class NoAudioCodec : public AudioCodec {
private:
i2s_chan_handle_t tx_handle_ = nullptr;
i2s_chan_handle_t rx_handle_ = nullptr;
virtual int Write(const int16_t* data, int samples) override;
virtual int Read(int16_t* dest, int samples) override;

View File

@@ -7,7 +7,7 @@
// static const char *TAG = "Board";
bool Board::GetBatteryVoltage(int &voltage, bool& charging) {
bool Board::GetBatteryLevel(int &level, bool& charging) {
return false;
}

View File

@@ -3,6 +3,8 @@
#include <http.h>
#include <web_socket.h>
#include <mqtt.h>
#include <udp.h>
#include <string>
#include "led.h"
@@ -36,8 +38,10 @@ public:
virtual Display* GetDisplay() = 0;
virtual Http* CreateHttp() = 0;
virtual WebSocket* CreateWebSocket() = 0;
virtual Mqtt* CreateMqtt() = 0;
virtual Udp* CreateUdp() = 0;
virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) = 0;
virtual bool GetBatteryVoltage(int &voltage, bool& charging);
virtual bool GetBatteryLevel(int &level, bool& charging);
virtual std::string GetJson();
};

View File

@@ -5,7 +5,7 @@
#define AUDIO_INPUT_SAMPLE_RATE 16000
#define AUDIO_OUTPUT_SAMPLE_RATE 16000
#define AUDIO_DEFAULT_OUTPUT_VOLUME 90
#define AUDIO_DEFAULT_OUTPUT_VOLUME 80
#define AUDIO_INPUT_REFERENCE true

View File

@@ -10,16 +10,11 @@
#include <esp_spiffs.h>
#include <driver/gpio.h>
#include <driver/i2c_master.h>
#include <esp_adc/adc_oneshot.h>
#include <esp_adc/adc_cali.h>
#include <esp_adc/adc_cali_scheme.h>
static const char *TAG = "KevinBoxBoard";
class KevinBoxBoard : public Ml307Board {
private:
adc_oneshot_unit_handle_t adc1_handle_;
adc_cali_handle_t adc1_cali_handle_;
i2c_master_bus_handle_t display_i2c_bus_;
i2c_master_bus_handle_t codec_i2c_bus_;
Button boot_button_;
@@ -50,27 +45,6 @@ private:
gpio_set_level(GPIO_NUM_15, 1);
}
void InitializeADC() {
adc_oneshot_unit_init_cfg_t init_config1 = {};
init_config1.unit_id = ADC_UNIT_1;
ESP_ERROR_CHECK(adc_oneshot_new_unit(&init_config1, &adc1_handle_));
//-------------ADC1 Config---------------//
adc_oneshot_chan_cfg_t config = {
.atten = ADC_ATTEN_DB_12,
.bitwidth = ADC_BITWIDTH_DEFAULT,
};
ESP_ERROR_CHECK(adc_oneshot_config_channel(adc1_handle_, ADC_CHANNEL_0, &config));
adc_cali_curve_fitting_config_t cali_config = {
.unit_id = ADC_UNIT_1,
.chan = ADC_CHANNEL_0,
.atten = ADC_ATTEN_DB_12,
.bitwidth = ADC_BITWIDTH_DEFAULT,
};
ESP_ERROR_CHECK(adc_cali_create_scheme_curve_fitting(&cali_config, &adc1_cali_handle_));
}
void InitializeDisplayI2c() {
i2c_master_bus_config_t bus_config = {
.i2c_port = I2C_NUM_0,
@@ -153,7 +127,6 @@ public:
ESP_LOGI(TAG, "Initializing KevinBoxBoard");
InitializeDisplayI2c();
InitializeCodecI2c();
InitializeADC();
MountStorage();
Enable4GModule();
@@ -178,14 +151,6 @@ public:
static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
return &display;
}
virtual bool GetBatteryVoltage(int &voltage, bool& charging) override {
ESP_ERROR_CHECK(adc_oneshot_get_calibrated_result(adc1_handle_, adc1_cali_handle_, ADC_CHANNEL_0, &voltage));
voltage *= 3;
charging = false;
ESP_LOGI(TAG, "Battery voltage: %d, Charging: %d", voltage, charging);
return true;
}
};
DECLARE_BOARD(KevinBoxBoard);

View File

@@ -10,16 +10,11 @@
#include <esp_spiffs.h>
#include <driver/gpio.h>
#include <driver/i2c_master.h>
#include <esp_adc/adc_oneshot.h>
#include <esp_adc/adc_cali.h>
#include <esp_adc/adc_cali_scheme.h>
static const char *TAG = "KevinBoxBoard";
class KevinBoxBoard : public Ml307Board {
private:
adc_oneshot_unit_handle_t adc1_handle_;
adc_cali_handle_t adc1_cali_handle_;
i2c_master_bus_handle_t display_i2c_bus_;
i2c_master_bus_handle_t codec_i2c_bus_;
Button boot_button_;
@@ -51,27 +46,6 @@ private:
gpio_set_level(GPIO_NUM_18, 1);
}
void InitializeADC() {
adc_oneshot_unit_init_cfg_t init_config1 = {};
init_config1.unit_id = ADC_UNIT_1;
ESP_ERROR_CHECK(adc_oneshot_new_unit(&init_config1, &adc1_handle_));
//-------------ADC1 Config---------------//
adc_oneshot_chan_cfg_t config = {
.atten = ADC_ATTEN_DB_12,
.bitwidth = ADC_BITWIDTH_DEFAULT,
};
ESP_ERROR_CHECK(adc_oneshot_config_channel(adc1_handle_, ADC_CHANNEL_0, &config));
adc_cali_curve_fitting_config_t cali_config = {
.unit_id = ADC_UNIT_1,
.chan = ADC_CHANNEL_0,
.atten = ADC_ATTEN_DB_12,
.bitwidth = ADC_BITWIDTH_DEFAULT,
};
ESP_ERROR_CHECK(adc_cali_create_scheme_curve_fitting(&cali_config, &adc1_cali_handle_));
}
void InitializeDisplayI2c() {
i2c_master_bus_config_t bus_config = {
.i2c_port = I2C_NUM_0,
@@ -154,19 +128,9 @@ public:
ESP_LOGI(TAG, "Initializing KevinBoxBoard");
InitializeDisplayI2c();
InitializeCodecI2c();
InitializeADC();
MountStorage();
Enable4GModule();
gpio_config_t charging_io = {
.pin_bit_mask = (1ULL << 2),
.mode = GPIO_MODE_INPUT,
.pull_up_en = GPIO_PULLUP_ENABLE,
.pull_down_en = GPIO_PULLDOWN_DISABLE,
.intr_type = GPIO_INTR_DISABLE,
};
gpio_config(&charging_io);
InitializeButtons();
Ml307Board::Initialize();
@@ -188,14 +152,6 @@ public:
static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
return &display;
}
virtual bool GetBatteryVoltage(int &voltage, bool& charging) override {
ESP_ERROR_CHECK(adc_oneshot_get_calibrated_result(adc1_handle_, adc1_cali_handle_, ADC_CHANNEL_0, &voltage));
voltage *= 3;
charging = gpio_get_level(GPIO_NUM_2) == 0;
ESP_LOGI(TAG, "Battery voltage: %d, Charging: %d", voltage, charging);
return true;
}
};
DECLARE_BOARD(KevinBoxBoard);

View File

@@ -0,0 +1,84 @@
#include "axp2101.h"
#include <esp_log.h>
static const char *TAG = "AXP2101";
bool Axp2101::Initialize(i2c_master_bus_handle_t i2c_bus, int i2c_device_address) {
i2c_device_config_t axp2101_cfg = {
.dev_addr_length = I2C_ADDR_BIT_LEN_7,
.device_address = (uint16_t)i2c_device_address,
.scl_speed_hz = 100000,
.scl_wait_us = 0,
.flags = {
.disable_ack_check = 0,
},
};
ESP_ERROR_CHECK(i2c_master_bus_add_device(i2c_bus, &axp2101_cfg, &i2c_device_));
WriteReg(0x93, 0x1c); // 配置aldo2输出为3.3v
uint8_t value = ReadReg(0x90); // XPOWERS_AXP2101_LDO_ONOFF_CTRL0
value = value | 0x02; // set bit 1 (ALDO2)
WriteReg(0x90, value); // and power channels now enabled
WriteReg(0x64, 0x03); // CV charger voltage setting to 42V
value = ReadReg(0x62);
ESP_LOGI(TAG, "axp2101 read 0x62 get: 0x%X", value);
WriteReg(0x61, 0x05); // set Main battery precharge current to 125mA
WriteReg(0x62, 0x10); // set Main battery charger current to 900mA ( 0x08-200mA, 0x09-300mA, 0x0A-400mA )
WriteReg(0x63, 0x15); // set Main battery term charge current to 125mA
value = ReadReg(0x62);
ESP_LOGI(TAG, "axp2101 read 0x62 get: 0x%X", value);
value = ReadReg(0x18);
ESP_LOGI(TAG, "axp2101 read 0x18 get: 0x%X", value);
value = value & 0b11100000;
value = value | 0b00001110;
WriteReg(0x18, value);
value = ReadReg(0x18);
ESP_LOGI(TAG, "axp2101 read 0x18 get: 0x%X", value);
WriteReg(0x14, 0x00); // set minimum system voltage to 4.1V (default 4.7V), for poor USB cables
WriteReg(0x15, 0x00); // set input voltage limit to 3.88v, for poor USB cables
WriteReg(0x16, 0x05); // set input voltage limit to 3.88v, for poor USB cables
WriteReg(0x24, 0x01); // set Vsys for PWROFF threshold to 3.2V (default - 2.6V and kill battery)
WriteReg(0x50, 0x14); // set TS pin to EXTERNAL input (not temperature)
return true;
}
void Axp2101::WriteReg(uint8_t reg, uint8_t value) {
uint8_t buffer[2];
buffer[0] = reg;
buffer[1] = value;
ESP_ERROR_CHECK(i2c_master_transmit(i2c_device_, buffer, 2, 100));
}
uint8_t Axp2101::ReadReg(uint8_t reg) {
uint8_t buffer[1];
ESP_ERROR_CHECK(i2c_master_transmit_receive(i2c_device_, &reg, 1, buffer, 1, 100));
return buffer[0];
}
bool Axp2101::IsCharging() {
uint8_t value = ReadReg(0x01);
return (value & 0b01100000) == 0b00100000;
}
bool Axp2101::IsChargingDone() {
uint8_t value = ReadReg(0x01);
return (value & 0b00000111) == 0b00000100;
}
int Axp2101::GetBatteryLevel() {
uint8_t value = ReadReg(0xA4);
return value;
}
void Axp2101::PowerOff() {
uint8_t value = ReadReg(0x10);
value = value | 0x01;
WriteReg(0x10, value);
}

View File

@@ -0,0 +1,22 @@
#ifndef __AXP2101_H__
#define __AXP2101_H__
#include <driver/i2c_master.h>
class Axp2101 {
public:
Axp2101() = default;
bool Initialize(i2c_master_bus_handle_t i2c_bus, int i2c_device_address);
bool IsCharging();
bool IsChargingDone();
int GetBatteryLevel();
void PowerOff();
private:
i2c_master_dev_handle_t i2c_device_ = nullptr;
void WriteReg(uint8_t reg, uint8_t value);
uint8_t ReadReg(uint8_t reg);
};
#endif

View File

@@ -0,0 +1,41 @@
#ifndef _BOARD_CONFIG_H_
#define _BOARD_CONFIG_H_
#include <driver/gpio.h>
#define AUDIO_INPUT_SAMPLE_RATE 24000
#define AUDIO_OUTPUT_SAMPLE_RATE 24000
#define AUDIO_INPUT_REFERENCE true
#define AUDIO_I2S_GPIO_MCLK GPIO_NUM_40
#define AUDIO_I2S_GPIO_WS GPIO_NUM_47
#define AUDIO_I2S_GPIO_BCLK GPIO_NUM_38
#define AUDIO_I2S_GPIO_DIN GPIO_NUM_39
#define AUDIO_I2S_GPIO_DOUT GPIO_NUM_48
#define AUDIO_CODEC_PA_PIN GPIO_NUM_9
#define AUDIO_CODEC_I2C_SDA_PIN GPIO_NUM_42
#define AUDIO_CODEC_I2C_SCL_PIN GPIO_NUM_41
#define AUDIO_CODEC_ES8311_ADDR ES8311_CODEC_DEFAULT_ADDR
#define AUDIO_CODEC_ES7210_ADDR ES7210_CODEC_DEFAULT_ADDR
#define BUILTIN_LED_GPIO GPIO_NUM_3
#define BOOT_BUTTON_GPIO GPIO_NUM_0
#define VOLUME_UP_BUTTON_GPIO GPIO_NUM_1
#define VOLUME_DOWN_BUTTON_GPIO GPIO_NUM_2
#define DISPLAY_SDA_PIN GPIO_NUM_7
#define DISPLAY_SCL_PIN GPIO_NUM_8
#define DISPLAY_WIDTH 128
#define DISPLAY_HEIGHT 64
#define DISPLAY_MIRROR_X false
#define DISPLAY_MIRROR_Y false
#define ML307_RX_PIN GPIO_NUM_5
#define ML307_TX_PIN GPIO_NUM_6
#define AXP2101_I2C_ADDR 0x34
#endif // _BOARD_CONFIG_H_

View File

@@ -0,0 +1,168 @@
#include "boards/ml307_board.h"
#include "audio_codecs/box_audio_codec.h"
#include "display/ssd1306_display.h"
#include "application.h"
#include "button.h"
#include "led.h"
#include "config.h"
#include "axp2101.h"
#include <esp_log.h>
#include <esp_spiffs.h>
#include <driver/gpio.h>
#include <driver/i2c_master.h>
static const char *TAG = "KevinBoxBoard";
class KevinBoxBoard : public Ml307Board {
private:
i2c_master_bus_handle_t display_i2c_bus_;
i2c_master_bus_handle_t codec_i2c_bus_;
Axp2101 axp2101_;
Button boot_button_;
Button volume_up_button_;
Button volume_down_button_;
uint8_t _data_buffer[2];
void MountStorage() {
// Mount the storage partition
esp_vfs_spiffs_conf_t conf = {
.base_path = "/storage",
.partition_label = "storage",
.max_files = 5,
.format_if_mount_failed = true,
};
esp_vfs_spiffs_register(&conf);
}
void Enable4GModule() {
// Make GPIO HIGH to enable the 4G module
gpio_config_t ml307_enable_config = {
.pin_bit_mask = (1ULL << 4),
.mode = GPIO_MODE_OUTPUT,
.pull_up_en = GPIO_PULLUP_DISABLE,
.pull_down_en = GPIO_PULLDOWN_DISABLE,
.intr_type = GPIO_INTR_DISABLE,
};
gpio_config(&ml307_enable_config);
gpio_set_level(GPIO_NUM_4, 1);
}
void InitializeDisplayI2c() {
i2c_master_bus_config_t bus_config = {
.i2c_port = I2C_NUM_0,
.sda_io_num = DISPLAY_SDA_PIN,
.scl_io_num = DISPLAY_SCL_PIN,
.clk_source = I2C_CLK_SRC_DEFAULT,
.glitch_ignore_cnt = 7,
.intr_priority = 0,
.trans_queue_depth = 0,
.flags = {
.enable_internal_pullup = 1,
},
};
ESP_ERROR_CHECK(i2c_new_master_bus(&bus_config, &display_i2c_bus_));
}
void InitializeCodecI2c() {
// Initialize I2C peripheral
i2c_master_bus_config_t i2c_bus_cfg = {
.i2c_port = I2C_NUM_1,
.sda_io_num = AUDIO_CODEC_I2C_SDA_PIN,
.scl_io_num = AUDIO_CODEC_I2C_SCL_PIN,
.clk_source = I2C_CLK_SRC_DEFAULT,
.glitch_ignore_cnt = 7,
.intr_priority = 0,
.trans_queue_depth = 0,
.flags = {
.enable_internal_pullup = 1,
},
};
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &codec_i2c_bus_));
}
void InitializeButtons() {
boot_button_.OnClick([this]() {
Application::GetInstance().ToggleChatState();
});
volume_up_button_.OnClick([this]() {
auto codec = GetAudioCodec();
auto volume = codec->output_volume() + 10;
if (volume > 100) {
volume = 100;
}
codec->SetOutputVolume(volume);
GetDisplay()->ShowNotification("Volume\n" + std::to_string(volume));
});
volume_up_button_.OnLongPress([this]() {
auto codec = GetAudioCodec();
codec->SetOutputVolume(100);
GetDisplay()->ShowNotification("Volume\n100");
});
volume_down_button_.OnClick([this]() {
auto codec = GetAudioCodec();
auto volume = codec->output_volume() - 10;
if (volume < 0) {
volume = 0;
}
codec->SetOutputVolume(volume);
GetDisplay()->ShowNotification("Volume\n" + std::to_string(volume));
});
volume_down_button_.OnLongPress([this]() {
auto codec = GetAudioCodec();
codec->SetOutputVolume(0);
GetDisplay()->ShowNotification("Volume\n0");
});
}
public:
KevinBoxBoard() : Ml307Board(ML307_TX_PIN, ML307_RX_PIN, 4096),
boot_button_(BOOT_BUTTON_GPIO),
volume_up_button_(VOLUME_UP_BUTTON_GPIO),
volume_down_button_(VOLUME_DOWN_BUTTON_GPIO) {
}
virtual void Initialize() override {
ESP_LOGI(TAG, "Initializing KevinBoxBoard");
InitializeDisplayI2c();
InitializeCodecI2c();
axp2101_.Initialize(codec_i2c_bus_, AXP2101_I2C_ADDR);
MountStorage();
Enable4GModule();
InitializeButtons();
Ml307Board::Initialize();
}
virtual Led* GetBuiltinLed() override {
static Led led(BUILTIN_LED_GPIO);
return &led;
}
virtual AudioCodec* GetAudioCodec() override {
static BoxAudioCodec audio_codec(codec_i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE,
AUDIO_I2S_GPIO_MCLK, AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
AUDIO_CODEC_PA_PIN, AUDIO_CODEC_ES8311_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
return &audio_codec;
}
virtual Display* GetDisplay() override {
static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y);
return &display;
}
virtual bool GetBatteryLevel(int &level, bool& charging) override {
level = axp2101_.GetBatteryLevel();
charging = axp2101_.IsCharging();
ESP_LOGI(TAG, "Battery level: %d, Charging: %d", level, charging);
return true;
}
};
DECLARE_BOARD(KevinBoxBoard);

View File

@@ -5,7 +5,7 @@
#define AUDIO_INPUT_SAMPLE_RATE 24000
#define AUDIO_OUTPUT_SAMPLE_RATE 24000
#define AUDIO_DEFAULT_OUTPUT_VOLUME 90
#define AUDIO_DEFAULT_OUTPUT_VOLUME 80
#define AUDIO_INPUT_REFERENCE true

View File

@@ -109,7 +109,7 @@ public:
audio_codec = new BoxAudioCodec(i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE,
AUDIO_I2S_GPIO_MCLK, AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN,
GPIO_NUM_NC, AUDIO_CODEC_ES8311_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE);
// audio_codec->SetOutputVolume(AUDIO_DEFAULT_OUTPUT_VOLUME);
audio_codec->SetOutputVolume(AUDIO_DEFAULT_OUTPUT_VOLUME);
}
return audio_codec;
}

View File

@@ -6,6 +6,8 @@
#include <ml307_http.h>
#include <ml307_ssl_transport.h>
#include <web_socket.h>
#include <ml307_mqtt.h>
#include <ml307_udp.h>
static const char *TAG = "Ml307Board";
@@ -83,6 +85,14 @@ WebSocket* Ml307Board::CreateWebSocket() {
return new WebSocket(new Ml307SslTransport(modem_, 0));
}
Mqtt* Ml307Board::CreateMqtt() {
return new Ml307Mqtt(modem_, 0);
}
Udp* Ml307Board::CreateUdp() {
return new Ml307Udp(modem_, 0);
}
bool Ml307Board::GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) {
if (!modem_.network_ready()) {
return false;
@@ -96,16 +106,11 @@ bool Ml307Board::GetNetworkState(std::string& network_name, int& signal_quality,
std::string Ml307Board::GetBoardJson() {
// Set the board type for OTA
std::string board_type = BOARD_TYPE;
std::string module_name = modem_.GetModuleName();
std::string carrier_name = modem_.GetCarrierName();
std::string imei = modem_.GetImei();
std::string iccid = modem_.GetIccid();
int csq = modem_.GetCsq();
std::string board_json = std::string("{\"type\":\"" + board_type + "\",");
board_json += "\"revision\":\"" + module_name + "\",";
board_json += "\"carrier\":\"" + carrier_name + "\",";
board_json += "\"csq\":\"" + std::to_string(csq) + "\",";
board_json += "\"imei\":\"" + imei + "\",";
board_json += "\"iccid\":\"" + iccid + "\"}";
board_json += "\"revision\":\"" + modem_.GetModuleName() + "\",";
board_json += "\"carrier\":\"" + modem_.GetCarrierName() + "\",";
board_json += "\"csq\":\"" + std::to_string(modem_.GetCsq()) + "\",";
board_json += "\"imei\":\"" + modem_.GetImei() + "\",";
board_json += "\"iccid\":\"" + modem_.GetIccid() + "\"}";
return board_json;
}

View File

@@ -17,6 +17,8 @@ public:
virtual void StartNetwork() override;
virtual Http* CreateHttp() override;
virtual WebSocket* CreateWebSocket() override;
virtual Mqtt* CreateMqtt() override;
virtual Udp* CreateUdp() override;
virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) override;
};

View File

@@ -5,6 +5,8 @@
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <esp_http.h>
#include <esp_mqtt.h>
#include <esp_udp.h>
#include <tcp_transport.h>
#include <tls_transport.h>
#include <web_socket.h>
@@ -39,12 +41,15 @@ void WifiBoard::StartNetwork() {
display->SetText(std::string("Connect to WiFi\n") + wifi_station.GetSsid());
wifi_station.Start();
if (!wifi_station.IsConnected()) {
application.Alert("Info", "Configuring WiFi");
builtin_led->SetBlue();
builtin_led->Blink(1000, 500);
auto& wifi_ap = WifiConfigurationAp::GetInstance();
wifi_ap.SetSsidPrefix("Xiaozhi");
wifi_ap.Start();
// 播报配置 WiFi 的提示
application.Alert("Info", "Configuring WiFi");
// 显示 WiFi 配置 AP 的 SSID 和 Web 服务器 URL
display->SetText(wifi_ap.GetSsid() + "\n" + wifi_ap.GetWebServerUrl());
// Wait forever until reset after configuration
while (true) {
vTaskDelay(pdMS_TO_TICKS(1000));
@@ -69,6 +74,14 @@ WebSocket* WifiBoard::CreateWebSocket() {
}
}
Mqtt* WifiBoard::CreateMqtt() {
return new EspMqtt();
}
Udp* WifiBoard::CreateUdp() {
return new EspUdp();
}
bool WifiBoard::GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) {
if (wifi_config_mode_) {
auto& wifi_ap = WifiConfigurationAp::GetInstance();

View File

@@ -14,6 +14,8 @@ public:
virtual void StartNetwork() override;
virtual Http* CreateHttp() override;
virtual WebSocket* CreateWebSocket() override;
virtual Mqtt* CreateMqtt() override;
virtual Udp* CreateUdp() override;
virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) override;
};

View File

@@ -123,10 +123,10 @@ void Display::UpdateDisplay() {
}
}
int battery_voltage;
int battery_level;
bool charging;
if (board.GetBatteryVoltage(battery_voltage, charging)) {
text += "\n" + std::to_string(battery_voltage) + "mV";
if (board.GetBatteryLevel(battery_level, charging)) {
text += "\nPower " + std::to_string(battery_level) + "%";
if (charging) {
text += " (Charging)";
}

View File

@@ -48,8 +48,8 @@ St7789Display::St7789Display(esp_lcd_panel_io_handle_t panel_io, esp_lcd_panel_h
.mirror_y = mirror_y_,
},
.flags = {
.buff_dma = 0,
.buff_spiram = 1,
.buff_dma = 1,
.buff_spiram = 0,
.sw_rotate = 0,
.full_refresh = 0,
.direct_mode = 0,

View File

@@ -2,7 +2,7 @@
dependencies:
78/esp-wifi-connect: "~1.3.0"
78/esp-opus-encoder: "~1.1.0"
78/esp-ml307: "~1.4.0"
78/esp-ml307: "~1.6.0"
espressif/led_strip: "^2.4.1"
espressif/esp_codec_dev: "^1.3.1"
espressif/esp-sr: "^1.9.0"

View File

@@ -18,6 +18,7 @@ extern "C" void app_main(void)
// Initialize NVS flash for WiFi configuration
esp_err_t ret = nvs_flash_init();
if (ret == ESP_ERR_NVS_NO_FREE_PAGES || ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
ESP_LOGW(TAG, "Erasing NVS flash to fix corruption");
ESP_ERROR_CHECK(nvs_flash_erase());
ret = nvs_flash_init();
}

View File

@@ -1,6 +1,7 @@
#include "ota.h"
#include "system_info.h"
#include "board.h"
#include "settings.h"
#include <cJSON.h>
#include <esp_log.h>
@@ -34,13 +35,13 @@ void Ota::SetPostData(const std::string& post_data) {
post_data_ = post_data;
}
void Ota::CheckVersion() {
bool Ota::CheckVersion() {
std::string current_version = esp_app_get_description()->version;
ESP_LOGI(TAG, "Current version: %s", current_version.c_str());
if (check_version_url_.length() < 10) {
ESP_LOGE(TAG, "Check version URL is not properly set");
return;
return false;
}
auto http = Board::GetInstance().CreateHttp();
@@ -67,25 +68,40 @@ void Ota::CheckVersion() {
cJSON *root = cJSON_Parse(response.c_str());
if (root == NULL) {
ESP_LOGE(TAG, "Failed to parse JSON response");
return;
return false;
}
cJSON *mqtt = cJSON_GetObjectItem(root, "mqtt");
if (mqtt != NULL) {
Settings settings("mqtt", true);
cJSON *item = NULL;
cJSON_ArrayForEach(item, mqtt) {
if (item->type == cJSON_String) {
if (settings.GetString(item->string) != item->valuestring) {
settings.SetString(item->string, item->valuestring);
}
}
}
has_mqtt_config_ = true;
}
cJSON *firmware = cJSON_GetObjectItem(root, "firmware");
if (firmware == NULL) {
ESP_LOGE(TAG, "Failed to get firmware object");
cJSON_Delete(root);
return;
return false;
}
cJSON *version = cJSON_GetObjectItem(firmware, "version");
if (version == NULL) {
ESP_LOGE(TAG, "Failed to get version object");
cJSON_Delete(root);
return;
return false;
}
cJSON *url = cJSON_GetObjectItem(firmware, "url");
if (url == NULL) {
ESP_LOGE(TAG, "Failed to get url object");
cJSON_Delete(root);
return;
return false;
}
firmware_version_ = version->valuestring;
@@ -99,6 +115,7 @@ void Ota::CheckVersion() {
} else {
ESP_LOGI(TAG, "Current is the latest version");
}
return true;
}
void Ota::MarkCurrentVersionValid() {
@@ -148,11 +165,12 @@ void Ota::Upgrade(const std::string& firmware_url) {
return;
}
char buffer[4096];
std::vector<char> buffer(4096);
size_t total_read = 0, recent_read = 0;
auto last_calc_time = esp_timer_get_time();
while (true) {
int ret = http->Read(buffer, sizeof(buffer));
taskYIELD(); // Avoid watchdog timeout
int ret = http->Read(buffer.data(), buffer.size());
if (ret < 0) {
ESP_LOGE(TAG, "Failed to read HTTP data: %s", esp_err_to_name(ret));
delete http;
@@ -178,7 +196,7 @@ void Ota::Upgrade(const std::string& firmware_url) {
if (!image_header_checked) {
image_header.append(buffer, ret);
image_header.append(buffer.data(), ret);
if (image_header.size() >= sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t) + sizeof(esp_app_desc_t)) {
esp_app_desc_t new_app_info;
memcpy(&new_app_info, image_header.data() + sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t), sizeof(esp_app_desc_t));
@@ -201,7 +219,7 @@ void Ota::Upgrade(const std::string& firmware_url) {
image_header_checked = true;
}
}
auto err = esp_ota_write(update_handle, buffer, ret);
auto err = esp_ota_write(update_handle, buffer.data(), ret);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to write OTA data: %s", esp_err_to_name(err));
esp_ota_abort(update_handle);

View File

@@ -13,14 +13,16 @@ public:
void SetCheckVersionUrl(std::string check_version_url);
void SetHeader(const std::string& key, const std::string& value);
void SetPostData(const std::string& post_data);
void CheckVersion();
bool CheckVersion();
bool HasNewVersion() { return has_new_version_; }
bool HasMqttConfig() { return has_mqtt_config_; }
void StartUpgrade(std::function<void(int progress, size_t speed)> callback);
void MarkCurrentVersionValid();
private:
std::string check_version_url_;
bool has_new_version_ = false;
bool has_mqtt_config_ = false;
std::string firmware_version_;
std::string firmware_url_;
std::string post_data_;

0
main/protocol.cc Normal file
View File

27
main/protocol.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef PROTOCOL_H
#define PROTOCOL_H
#include <cJSON.h>
#include <string>
#include <functional>
class Protocol {
public:
virtual ~Protocol() = default;
virtual void OnIncomingAudio(std::function<void(const std::string& data)> callback) = 0;
virtual void OnIncomingJson(std::function<void(const cJSON* root)> callback) = 0;
virtual void SendAudio(const std::string& data) = 0;
virtual void SendText(const std::string& text) = 0;
virtual void SendState(const std::string& state) = 0;
virtual void SendAbort() = 0;
virtual bool OpenAudioChannel() = 0;
virtual void CloseAudioChannel() = 0;
virtual void OnAudioChannelOpened(std::function<void()> callback) = 0;
virtual void OnAudioChannelClosed(std::function<void()> callback) = 0;
virtual bool IsAudioChannelOpened() const = 0;
virtual int GetServerSampleRate() const = 0;
};
#endif // PROTOCOL_H

View File

@@ -0,0 +1,326 @@
#include "mqtt_protocol.h"
#include "board.h"
#include "application.h"
#include "settings.h"
#include <esp_log.h>
#include <ml307_mqtt.h>
#include <ml307_udp.h>
#include <cstring>
#include <arpa/inet.h>
#define TAG "MQTT"
MqttProtocol::MqttProtocol() {
event_group_handle_ = xEventGroupCreate();
StartMqttClient();
}
MqttProtocol::~MqttProtocol() {
ESP_LOGI(TAG, "MqttProtocol deinit");
if (udp_ != nullptr) {
delete udp_;
}
if (mqtt_ != nullptr) {
delete mqtt_;
}
vEventGroupDelete(event_group_handle_);
}
bool MqttProtocol::StartMqttClient() {
if (mqtt_ != nullptr) {
ESP_LOGW(TAG, "Mqtt client already started");
delete mqtt_;
}
Settings settings("mqtt", false);
endpoint_ = settings.GetString("endpoint");
client_id_ = settings.GetString("client_id");
username_ = settings.GetString("username");
password_ = settings.GetString("password");
subscribe_topic_ = settings.GetString("subscribe_topic");
publish_topic_ = settings.GetString("publish_topic");
if (endpoint_.empty()) {
ESP_LOGE(TAG, "MQTT endpoint is not specified");
return false;
}
mqtt_ = Board::GetInstance().CreateMqtt();
mqtt_->SetKeepAlive(90);
mqtt_->OnDisconnected([this]() {
ESP_LOGI(TAG, "Disconnected from endpoint");
});
mqtt_->OnMessage([this](const std::string& topic, const std::string& payload) {
cJSON* root = cJSON_Parse(payload.c_str());
if (root == nullptr) {
ESP_LOGE(TAG, "Failed to parse json message %s", payload.c_str());
return;
}
cJSON* type = cJSON_GetObjectItem(root, "type");
if (type == nullptr) {
ESP_LOGE(TAG, "Message type is not specified");
cJSON_Delete(root);
return;
}
if (strcmp(type->valuestring, "hello") == 0) {
ParseServerHello(root);
} else if (strcmp(type->valuestring, "goodbye") == 0) {
auto session_id = cJSON_GetObjectItem(root, "session_id");
if (session_id == nullptr || session_id_ == session_id->valuestring) {
if (on_audio_channel_closed_ != nullptr) {
on_audio_channel_closed_();
}
}
} else if (on_incoming_json_ != nullptr) {
on_incoming_json_(root);
}
cJSON_Delete(root);
});
ESP_LOGI(TAG, "Connecting to endpoint %s", endpoint_.c_str());
if (!mqtt_->Connect(endpoint_, 8883, client_id_, username_, password_)) {
ESP_LOGE(TAG, "Failed to connect to endpoint");
return false;
}
ESP_LOGI(TAG, "Connected to endpoint");
if (!subscribe_topic_.empty()) {
mqtt_->Subscribe(subscribe_topic_, 2);
}
return true;
}
void MqttProtocol::SendText(const std::string& text) {
if (publish_topic_.empty()) {
return;
}
mqtt_->Publish(publish_topic_, text);
}
void MqttProtocol::SendAudio(const std::string& data) {
std::lock_guard<std::mutex> lock(channel_mutex_);
if (udp_ == nullptr) {
return;
}
std::string nonce(aes_nonce_);
*(uint16_t*)&nonce[2] = htons(data.size());
*(uint32_t*)&nonce[12] = htonl(++local_sequence_);
std::string encrypted;
encrypted.resize(aes_nonce_.size() + data.size());
memcpy(encrypted.data(), nonce.data(), nonce.size());
size_t nc_off = 0;
uint8_t stream_block[16] = {0};
if (mbedtls_aes_crypt_ctr(&aes_ctx_, data.size(), &nc_off, (uint8_t*)nonce.c_str(), stream_block,
(uint8_t*)data.data(), (uint8_t*)&encrypted[nonce.size()]) != 0) {
ESP_LOGE(TAG, "Failed to encrypt audio data");
return;
}
udp_->Send(encrypted);
}
void MqttProtocol::SendState(const std::string& state) {
std::string message = "{";
message += "\"session_id\":\"" + session_id_ + "\",";
message += "\"type\":\"state\",";
message += "\"state\":\"" + state + "\"";
message += "}";
SendText(message);
}
void MqttProtocol::SendAbort() {
std::string message = "{";
message += "\"session_id\":\"" + session_id_ + "\",";
message += "\"type\":\"abort\"";
message += "}";
SendText(message);
}
void MqttProtocol::CloseAudioChannel() {
{
std::lock_guard<std::mutex> lock(channel_mutex_);
if (udp_ != nullptr) {
delete udp_;
udp_ = nullptr;
}
}
std::string message = "{";
message += "\"session_id\":\"" + session_id_ + "\",";
message += "\"type\":\"goodbye\"";
message += "}";
SendText(message);
if (on_audio_channel_closed_ != nullptr) {
on_audio_channel_closed_();
}
}
bool MqttProtocol::OpenAudioChannel() {
if (mqtt_ == nullptr || !mqtt_->IsConnected()) {
ESP_LOGI(TAG, "MQTT is not connected, try to connect now");
if (!StartMqttClient()) {
ESP_LOGE(TAG, "Failed to connect to MQTT");
return false;
}
}
session_id_ = "";
// 发送 hello 消息申请 UDP 通道
std::string message = "{";
message += "\"type\":\"hello\",";
message += "\"version\": 3,";
message += "\"transport\":\"udp\",";
message += "\"audio_params\":{";
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
message += "}}";
SendText(message);
// 等待服务器响应
EventBits_t bits = xEventGroupWaitBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT, pdTRUE, pdFALSE, pdMS_TO_TICKS(10000));
if (!(bits & MQTT_PROTOCOL_SERVER_HELLO_EVENT)) {
ESP_LOGE(TAG, "Failed to receive server hello");
return false;
}
std::lock_guard<std::mutex> lock(channel_mutex_);
if (udp_ != nullptr) {
delete udp_;
}
udp_ = Board::GetInstance().CreateUdp();
udp_->OnMessage([this](const std::string& data) {
if (data.size() < sizeof(aes_nonce_)) {
ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
return;
}
if (data[0] != 0x01) {
ESP_LOGE(TAG, "Invalid audio packet type: %x", data[0]);
return;
}
uint32_t sequence = ntohl(*(uint32_t*)&data[12]);
if (sequence < remote_sequence_) {
ESP_LOGW(TAG, "Received audio packet with old sequence: %lu, expected: %lu", sequence, remote_sequence_);
return;
}
if (sequence != remote_sequence_ + 1) {
ESP_LOGW(TAG, "Received audio packet with wrong sequence: %lu, expected: %lu", sequence, remote_sequence_ + 1);
}
std::string decrypted;
size_t decrypted_size = data.size() - aes_nonce_.size();
size_t nc_off = 0;
uint8_t stream_block[16] = {0};
decrypted.resize(decrypted_size);
auto nonce = (uint8_t*)data.data();
auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)decrypted.data());
if (ret != 0) {
ESP_LOGE(TAG, "Failed to decrypt audio data, ret: %d", ret);
return;
}
if (on_incoming_audio_ != nullptr) {
on_incoming_audio_(decrypted);
}
remote_sequence_ = sequence;
});
udp_->Connect(udp_server_, udp_port_);
if (on_audio_channel_opened_ != nullptr) {
on_audio_channel_opened_();
}
return true;
}
void MqttProtocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
on_incoming_json_ = callback;
}
void MqttProtocol::OnIncomingAudio(std::function<void(const std::string& data)> callback) {
on_incoming_audio_ = callback;
}
void MqttProtocol::OnAudioChannelOpened(std::function<void()> callback) {
on_audio_channel_opened_ = callback;
}
void MqttProtocol::OnAudioChannelClosed(std::function<void()> callback) {
on_audio_channel_closed_ = callback;
}
void MqttProtocol::ParseServerHello(const cJSON* root) {
auto transport = cJSON_GetObjectItem(root, "transport");
if (transport == nullptr || strcmp(transport->valuestring, "udp") != 0) {
ESP_LOGE(TAG, "Unsupported transport: %s", transport->valuestring);
return;
}
auto session_id = cJSON_GetObjectItem(root, "session_id");
if (session_id != nullptr) {
session_id_ = session_id->valuestring;
}
// Get sample rate from hello message
auto audio_params = cJSON_GetObjectItem(root, "audio_params");
if (audio_params != NULL) {
auto sample_rate = cJSON_GetObjectItem(audio_params, "sample_rate");
if (sample_rate != NULL) {
server_sample_rate_ = sample_rate->valueint;
}
}
auto udp = cJSON_GetObjectItem(root, "udp");
if (udp == nullptr) {
ESP_LOGE(TAG, "UDP is not specified");
return;
}
udp_server_ = cJSON_GetObjectItem(udp, "server")->valuestring;
udp_port_ = cJSON_GetObjectItem(udp, "port")->valueint;
auto key = cJSON_GetObjectItem(udp, "key")->valuestring;
auto nonce = cJSON_GetObjectItem(udp, "nonce")->valuestring;
// auto encryption = cJSON_GetObjectItem(udp, "encryption")->valuestring;
// ESP_LOGI(TAG, "UDP server: %s, port: %d, encryption: %s", udp_server_.c_str(), udp_port_, encryption);
aes_nonce_ = DecodeHexString(nonce);
mbedtls_aes_init(&aes_ctx_);
mbedtls_aes_setkey_enc(&aes_ctx_, (const unsigned char*)DecodeHexString(key).c_str(), 128);
local_sequence_ = 0;
remote_sequence_ = 0;
xEventGroupSetBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT);
}
int MqttProtocol::GetServerSampleRate() const {
return server_sample_rate_;
}
static const char hex_chars[] = "0123456789ABCDEF";
// 辅助函数,将单个十六进制字符转换为对应的数值
static inline uint8_t CharToHex(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
return 0; // 对于无效输入返回0
}
std::string MqttProtocol::DecodeHexString(const std::string& hex_string) {
std::string decoded;
decoded.reserve(hex_string.size() / 2);
for (size_t i = 0; i < hex_string.size(); i += 2) {
char byte = (CharToHex(hex_string[i]) << 4) | CharToHex(hex_string[i + 1]);
decoded.push_back(byte);
}
return decoded;
}
bool MqttProtocol::IsAudioChannelOpened() const {
return udp_ != nullptr;
}

View File

@@ -0,0 +1,73 @@
#ifndef MQTT_PROTOCOL_H
#define MQTT_PROTOCOL_H
#include "protocol.h"
#include <mqtt.h>
#include <udp.h>
#include <cJSON.h>
#include <mbedtls/aes.h>
#include <freertos/FreeRTOS.h>
#include <freertos/event_groups.h>
#include <functional>
#include <string>
#include <map>
#include <mutex>
#define MQTT_PING_INTERVAL_SECONDS 90
#define MQTT_RECONNECT_INTERVAL_MS 10000
#define MQTT_PROTOCOL_SERVER_HELLO_EVENT (1 << 0)
class MqttProtocol : public Protocol {
public:
MqttProtocol();
~MqttProtocol();
void OnIncomingAudio(std::function<void(const std::string& data)> callback);
void OnIncomingJson(std::function<void(const cJSON* root)> callback);
void SendAudio(const std::string& data);
void SendText(const std::string& text);
void SendState(const std::string& state);
void SendAbort();
bool OpenAudioChannel();
void CloseAudioChannel();
void OnAudioChannelOpened(std::function<void()> callback);
void OnAudioChannelClosed(std::function<void()> callback);
bool IsAudioChannelOpened() const;
int GetServerSampleRate() const;
private:
EventGroupHandle_t event_group_handle_;
std::function<void(const cJSON* root)> on_incoming_json_;
std::function<void(const std::string& data)> on_incoming_audio_;
std::function<void()> on_audio_channel_opened_;
std::function<void()> on_audio_channel_closed_;
std::string endpoint_;
std::string client_id_;
std::string username_;
std::string password_;
std::string subscribe_topic_;
std::string publish_topic_;
std::mutex channel_mutex_;
Mqtt* mqtt_ = nullptr;
Udp* udp_ = nullptr;
mbedtls_aes_context aes_ctx_;
std::string aes_nonce_;
std::string udp_server_;
int udp_port_;
uint32_t local_sequence_;
uint32_t remote_sequence_;
std::string session_id_;
int server_sample_rate_ = 16000;
bool StartMqttClient();
void ParseServerHello(const cJSON* root);
std::string DecodeHexString(const std::string& hex_string);
};
#endif // MQTT_PROTOCOL_H

63
main/settings.cc Normal file
View File

@@ -0,0 +1,63 @@
#include "settings.h"
#include <esp_log.h>
#include <nvs_flash.h>
#define TAG "Settings"
Settings::Settings(const std::string& ns, bool read_write) : ns_(ns), read_write_(read_write) {
nvs_open(ns.c_str(), read_write_ ? NVS_READWRITE : NVS_READONLY, &nvs_handle_);
}
Settings::~Settings() {
if (nvs_handle_ != 0) {
if (read_write_) {
ESP_ERROR_CHECK(nvs_commit(nvs_handle_));
}
nvs_close(nvs_handle_);
}
}
std::string Settings::GetString(const std::string& key, const std::string& default_value) {
if (nvs_handle_ == 0) {
return default_value;
}
size_t length = 0;
if (nvs_get_str(nvs_handle_, key.c_str(), nullptr, &length) != ESP_OK) {
return default_value;
}
std::string value;
value.resize(length);
ESP_ERROR_CHECK(nvs_get_str(nvs_handle_, key.c_str(), value.data(), &length));
return value;
}
void Settings::SetString(const std::string& key, const std::string& value) {
if (read_write_) {
ESP_ERROR_CHECK(nvs_set_str(nvs_handle_, key.c_str(), value.c_str()));
} else {
ESP_LOGW(TAG, "Namespace %s is not open for writing", ns_.c_str());
}
}
int32_t Settings::GetInt(const std::string& key, int32_t default_value) {
if (nvs_handle_ == 0) {
return default_value;
}
int32_t value;
if (nvs_get_i32(nvs_handle_, key.c_str(), &value) != ESP_OK) {
return default_value;
}
return value;
}
void Settings::SetInt(const std::string& key, int32_t value) {
if (read_write_) {
ESP_ERROR_CHECK(nvs_set_i32(nvs_handle_, key.c_str(), value));
} else {
ESP_LOGW(TAG, "Namespace %s is not open for writing", ns_.c_str());
}
}

23
main/settings.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef SETTINGS_H
#define SETTINGS_H
#include <string>
#include <nvs_flash.h>
class Settings {
public:
Settings(const std::string& ns, bool read_write = false);
~Settings();
std::string GetString(const std::string& key, const std::string& default_value = "");
void SetString(const std::string& key, const std::string& value);
int32_t GetInt(const std::string& key, int32_t default_value = 0);
void SetInt(const std::string& key, int32_t value);
private:
std::string ns_;
nvs_handle_t nvs_handle_ = 0;
bool read_write_ = false;
};
#endif

View File

@@ -26,6 +26,9 @@ WakeWordDetect::~WakeWordDetect() {
if (wake_word_encode_task_stack_ != nullptr) {
free(wake_word_encode_task_stack_);
}
if (audio_detection_task_stack_ != nullptr) {
heap_caps_free(audio_detection_task_stack_);
}
vEventGroupDelete(event_group_);
}
@@ -77,11 +80,13 @@ void WakeWordDetect::Initialize(int channels, bool reference) {
afe_detection_data_ = esp_afe_sr_v1.create_from_config(&afe_config);
xTaskCreate([](void* arg) {
const size_t audio_detection_task_stack_size = 4096 * 2;
audio_detection_task_stack_ = (StackType_t*)heap_caps_malloc(audio_detection_task_stack_size, MALLOC_CAP_SPIRAM);
xTaskCreateStatic([](void* arg) {
auto this_ = (WakeWordDetect*)arg;
this_->AudioDetectionTask();
vTaskDelete(NULL);
}, "audio_detection", 4096 * 2, this, 1, NULL);
}, "audio_detection", audio_detection_task_stack_size, this, 1, audio_detection_task_stack_, &audio_detection_task_buffer_);
}
void WakeWordDetect::OnWakeWordDetected(std::function<void()> callback) {
@@ -165,6 +170,8 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
}
void WakeWordDetect::EncodeWakeWordData() {
xEventGroupClearBits(event_group_, WAKE_WORD_ENCODED_EVENT);
wake_word_opus_.clear();
if (wake_word_encode_task_stack_ == nullptr) {
wake_word_encode_task_stack_ = (StackType_t*)malloc(4096 * 8);
}
@@ -175,34 +182,34 @@ void WakeWordDetect::EncodeWakeWordData() {
OpusEncoder* encoder = new OpusEncoder();
encoder->Configure(16000, 1, 60);
encoder->SetComplexity(0);
this_->wake_word_opus_.resize(4096 * 4);
size_t offset = 0;
for (auto& pcm: this_->wake_word_pcm_) {
encoder->Encode(pcm, [this_, &offset](const uint8_t* opus, size_t opus_size) {
size_t protocol_size = sizeof(BinaryProtocol3) + opus_size;
if (offset + protocol_size < this_->wake_word_opus_.size()) {
auto protocol = (BinaryProtocol3*)(&this_->wake_word_opus_[offset]);
protocol->type = 0;
protocol->reserved = 0;
protocol->payload_size = htons(opus_size);
memcpy(protocol->payload, opus, opus_size);
offset += protocol_size;
}
encoder->Encode(pcm, [this_](const uint8_t* opus, size_t opus_size) {
std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
this_->wake_word_opus_.emplace_back(std::string(reinterpret_cast<const char*>(opus), opus_size));
this_->wake_word_cv_.notify_one();
});
}
this_->wake_word_pcm_.clear();
this_->wake_word_opus_.resize(offset);
auto end_time = esp_timer_get_time();
ESP_LOGI(TAG, "Encode wake word opus: %zu bytes in %lld ms", this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
ESP_LOGI(TAG, "Encode wake word opus %zu packets in %lld ms", this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
xEventGroupSetBits(this_->event_group_, WAKE_WORD_ENCODED_EVENT);
this_->wake_word_cv_.notify_one();
delete encoder;
vTaskDelete(NULL);
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
}
const std::string&& WakeWordDetect::GetWakeWordStream() {
xEventGroupWaitBits(event_group_, WAKE_WORD_ENCODED_EVENT, pdTRUE, pdTRUE, portMAX_DELAY);
return std::move(wake_word_opus_);
bool WakeWordDetect::GetWakeWordOpus(std::string& opus) {
std::unique_lock<std::mutex> lock(wake_word_mutex_);
wake_word_cv_.wait(lock, [this]() {
return !wake_word_opus_.empty() || (xEventGroupGetBits(event_group_) & WAKE_WORD_ENCODED_EVENT);
});
if (wake_word_opus_.empty()) {
return false;
}
opus.swap(wake_word_opus_.front());
wake_word_opus_.pop_front();
return true;
}

View File

@@ -12,6 +12,8 @@
#include <string>
#include <vector>
#include <functional>
#include <mutex>
#include <condition_variable>
class WakeWordDetect {
@@ -27,7 +29,7 @@ public:
void StopDetection();
bool IsDetectionRunning();
void EncodeWakeWordData();
const std::string&& GetWakeWordStream();
bool GetWakeWordOpus(std::string& opus);
private:
esp_afe_sr_data_t* afe_detection_data_ = nullptr;
@@ -40,11 +42,17 @@ private:
int channels_;
bool reference_;
TaskHandle_t audio_detection_task_ = nullptr;
StaticTask_t audio_detection_task_buffer_;
StackType_t* audio_detection_task_stack_ = nullptr;
TaskHandle_t wake_word_encode_task_ = nullptr;
StaticTask_t wake_word_encode_task_buffer_;
StackType_t* wake_word_encode_task_stack_ = nullptr;
std::list<std::vector<int16_t>> wake_word_pcm_;
std::string wake_word_opus_;
std::list<std::string> wake_word_opus_;
std::mutex wake_word_mutex_;
std::condition_variable wake_word_cv_;
void StoreWakeWordData(uint16_t* data, size_t size);
void AudioDetectionTask();

48
release.py Normal file
View File

@@ -0,0 +1,48 @@
import sys
import os
import json
def get_board_type():
with open("build/compile_commands.json") as f:
data = json.load(f)
for item in data:
if not item["file"].endswith("main.cc"):
continue
command = item["command"]
# extract -DBOARD_TYPE=xxx
board_type = command.split("-DBOARD_TYPE=\\\"")[1].split("\\\"")[0].strip()
return board_type
return None
def get_project_version():
with open("CMakeLists.txt") as f:
for line in f:
if line.startswith("set(PROJECT_VER"):
return line.split("\"")[1].split("\"")[0].strip()
return None
def merge_bin():
if os.system("idf.py merge-bin") != 0:
print("merge bin failed")
sys.exit(1)
def zip_bin(board_type, project_version):
if not os.path.exists("releases"):
os.makedirs("releases")
output_path = f"releases/v{project_version}_{board_type}.zip"
if os.path.exists(output_path):
os.remove(output_path)
if os.system(f"zip -j {output_path} build/merged-binary.bin") != 0:
print("zip bin failed")
sys.exit(1)
print(f"zip bin to {output_path} done")
if __name__ == "__main__":
merge_bin()
board_type = get_board_type()
print("board type:", board_type)
project_version = get_project_version()
print("project version:", project_version)
zip_bin(board_type, project_version)

View File

@@ -64,6 +64,8 @@ def get_board_name(folder):
return "bread-compact-wifi"
elif "KevinBox1" in basename:
return "kevin-box-1"
if basename.startswith("v0.7"):
return basename.split("_")[1]
raise Exception(f"Unknown board name: {basename}")
def read_binary(dir_path):