Compare commits

...

11 Commits

Author SHA1 Message Date
Terrence
7af366b7b2 fix: ES7120_SEL_MIC1 => ES7210_SEL_MIC1 2025-08-23 16:05:49 +08:00
Xiaoxia
ddbb24942d v1.8.9: Upgrade component versions (#1118) 2025-08-23 07:12:14 +08:00
Ben
610a4a0703 Update README.md (#1115)
delete '的'
2025-08-22 18:49:26 +08:00
香草味的纳西妲喵
7cd37427b2 feat: 添加批量转换OGG音频的相关脚本,移动声波配网HTML文件到scripts文件夹下 (#1107)
* feat: 添加批量转换OGG音频的相关脚本,移动声波配网HTML文件到scripts文件夹下

* Rename

* moved README.md
2025-08-22 00:53:18 +08:00
laride
2d772dad68 fix: resolve some audio issues on esp-hi (#1027)
* fix: resolve crash when closing codec dev on esp-hi

* fix: fix incorrect status display in non-zh-CN languages

* fix: reduce noise when not in Speaking state
2025-08-19 11:50:00 +08:00
Terrence
156eb15f58 fix: dual mic without afe 2025-08-16 03:08:00 +08:00
Xiaoxia
c59c515706 v1.8.8: release with esp-sr==2.1.4 and without font placeholder (#1086) 2025-08-15 04:50:33 +08:00
Terrence
44b8d5e4c1 fix: c3 wakeword not working with esp-sr 2.1.5 2025-08-15 01:07:00 +08:00
Xiaoxia
cc07ef447e Revert "camera 优化:在原有的RGB565处理下,容易超时改为JPEG格式 (#1029)" (#1085)
This reverts commit d6b1414967.
2025-08-14 22:23:29 +08:00
Ky1eYang
cf4afde88e add: 添加声音检测的可视化以及声波demod的准确度 (#1077)
Co-authored-by: yangkaiyue <yangkaiyue1@tenclass.com>
2025-08-14 22:11:56 +08:00
Dong Ning
d6b1414967 camera 优化:在原有的RGB565处理下,容易超时改为JPEG格式 (#1029)
* camera 优化

feat(camera): 修改摄像头配置为JPEG格式并优化图像处理逻辑

将摄像头输出格式从RGB565改为JPEG以提高传输效率,同时调整JPEG质量为10
重构预览图像处理逻辑,支持直接处理JPEG格式并自动转换为RGB565
优化Explain方法中的JPEG队列处理,减少内存分配和拷贝操作

* 修复代码缩进

调整代码缩进格式以提升可读性

* fix(esp32_camera): 修复RGB565格式预览图像的字节序问题

添加字节交换处理,将大端序转换为小端序,确保预览图像显示正确

* 使用旧的处理方式

* refactor(esp32_camera): 移除preview_buffer_直接使用preview_image_.data
2025-08-14 22:11:15 +08:00
30 changed files with 1089 additions and 49 deletions

View File

@@ -4,7 +4,7 @@
# CMakeLists in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.16)
set(PROJECT_VER "1.8.7")
set(PROJECT_VER "1.8.9")
# Add this line to disable the specific warning
add_compile_options(-Wno-missing-field-initializers)

View File

@@ -130,7 +130,7 @@
## 大模型配置
如果你已经拥有一个小智 AI 聊天机器人设备,并且已接入官方服务器,可以登录 [xiaozhi.me](https://xiaozhi.me) 控制台进行配置。
如果你已经拥有一个小智 AI 聊天机器人设备,并且已接入官方服务器,可以登录 [xiaozhi.me](https://xiaozhi.me) 控制台进行配置。
👉 [后台操作视频教程(旧版界面)](https://www.bilibili.com/video/BV1jUCUY2EKM/)

View File

@@ -540,6 +540,12 @@ void AudioService::SetCallbacks(AudioServiceCallbacks& callbacks) {
}
void AudioService::PlaySound(const std::string_view& ogg) {
if (!codec_->output_enabled()) {
esp_timer_stop(audio_power_timer_);
esp_timer_start_periodic(audio_power_timer_, AUDIO_POWER_CHECK_INTERVAL_MS * 1000);
codec_->EnableOutput(true);
}
const uint8_t* buf = reinterpret_cast<const uint8_t*>(ogg.data());
size_t size = ogg.size();
size_t offset = 0;

View File

@@ -64,7 +64,7 @@ BoxAudioCodec::BoxAudioCodec(void* i2c_master_handle, int input_sample_rate, int
es7210_codec_cfg_t es7210_cfg = {};
es7210_cfg.ctrl_if = in_ctrl_if_;
es7210_cfg.mic_selected = ES7120_SEL_MIC1 | ES7120_SEL_MIC2 | ES7120_SEL_MIC3 | ES7120_SEL_MIC4;
es7210_cfg.mic_selected = ES7210_SEL_MIC1 | ES7210_SEL_MIC2 | ES7210_SEL_MIC3 | ES7210_SEL_MIC4;
in_codec_if_ = es7210_codec_new(&es7210_cfg);
assert(in_codec_if_ != NULL);

View File

@@ -13,11 +13,6 @@ void NoAudioProcessor::Feed(std::vector<int16_t>&& data) {
return;
}
if (data.size() != frame_samples_) {
ESP_LOGE(TAG, "Feed data size is not equal to frame size, feed size: %u, frame size: %u", data.size(), frame_samples_);
return;
}
if (codec_->input_channels() == 2) {
// If input channels is 2, we need to fetch the left channel data
auto mono_data = std::vector<int16_t>(data.size() / 2);

View File

@@ -28,7 +28,7 @@ Esp32Camera::Esp32Camera(const camera_config_t& config) {
memset(&preview_image_, 0, sizeof(preview_image_));
preview_image_.header.magic = LV_IMAGE_HEADER_MAGIC;
preview_image_.header.cf = LV_COLOR_FORMAT_RGB565;
preview_image_.header.flags = LV_IMAGE_FLAGS_ALLOCATED | LV_IMAGE_FLAGS_MODIFIABLE;
preview_image_.header.flags = 0;
switch (config.frame_size) {
case FRAMESIZE_SVGA:

View File

@@ -141,7 +141,7 @@ void ElectronEmojiDisplay::SetChatMessage(const char* role, const char* content)
}
lv_label_set_text(chat_message_label_, content);
lv_obj_clear_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
ESP_LOGI(TAG, "设置聊天消息 [%s]: %s", role, content);
}
@@ -163,7 +163,7 @@ void ElectronEmojiDisplay::SetIcon(const char* icon) {
}
lv_label_set_text(chat_message_label_, icon_message.c_str());
lv_obj_clear_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
ESP_LOGI(TAG, "设置图标: %s", icon);
}

View File

@@ -141,8 +141,7 @@ void AdcPdmAudioCodec::EnableInput(bool enable) {
};
ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs));
} else {
// ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
return;
ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
}
AudioCodec::EnableInput(enable);
}

View File

@@ -3,6 +3,7 @@
#include <esp_log.h>
#include "mmap_generate_emoji.h"
#include "emoji_display.h"
#include "assets/lang_config.h"
#include <esp_lcd_panel_io.h>
#include <freertos/FreeRTOS.h>
@@ -146,9 +147,9 @@ void EmojiWidget::SetEmotion(const char* emotion)
void EmojiWidget::SetStatus(const char* status)
{
if (player_) {
if (strcmp(status, "聆听中...") == 0) {
if (strcmp(status, Lang::Strings::LISTENING) == 0) {
player_->StartPlayer(MMAP_EMOJI_ASKING_AAF, true, 15);
} else if (strcmp(status, "待命") == 0) {
} else if (strcmp(status, Lang::Strings::STANDBY) == 0) {
player_->StartPlayer(MMAP_EMOJI_WAKE_AAF, true, 15);
}
}

View File

@@ -23,6 +23,7 @@
#include "servo_dog_ctrl.h"
#include "led_strip.h"
#include "driver/rmt_tx.h"
#include "device_state_event.h"
#include "sdkconfig.h"
@@ -284,13 +285,14 @@ private:
ESP_LOGI(TAG, "Create emoji widget, panel: %p, panel_io: %p", panel, panel_io);
display_ = new anim::EmojiWidget(panel, panel_io);
#if CONFIG_ESP_CONSOLE_NONE
servo_dog_ctrl_config_t config = {
.fl_gpio_num = FL_GPIO_NUM,
.fr_gpio_num = FR_GPIO_NUM,
.bl_gpio_num = BL_GPIO_NUM,
.br_gpio_num = BR_GPIO_NUM,
};
#if CONFIG_ESP_CONSOLE_NONE
servo_dog_ctrl_init(&config);
#endif
}
@@ -378,7 +380,7 @@ private:
int r = properties["r"].value<int>();
int g = properties["g"].value<int>();
int b = properties["b"].value<int>();
led_on_ = true;
SetLedColor(r, g, b);
return true;
@@ -395,6 +397,11 @@ public:
InitializeSpi();
InitializeLcdDisplay();
InitializeTools();
DeviceStateEventManager::GetInstance().RegisterStateChangeCallback([this](DeviceState previous_state, DeviceState current_state) {
ESP_LOGD(TAG, "Device state changed from %d to %d", previous_state, current_state);
this->GetAudioCodec()->EnableOutput(current_state == kDeviceStateSpeaking);
});
}
virtual AudioCodec* GetAudioCodec() override

View File

@@ -64,7 +64,7 @@ CoreS3AudioCodec::CoreS3AudioCodec(void* i2c_master_handle, int input_sample_rat
es7210_codec_cfg_t es7210_cfg = {};
es7210_cfg.ctrl_if = in_ctrl_if_;
es7210_cfg.mic_selected = ES7120_SEL_MIC1 | ES7120_SEL_MIC2 | ES7120_SEL_MIC3;
es7210_cfg.mic_selected = ES7210_SEL_MIC1 | ES7210_SEL_MIC2 | ES7210_SEL_MIC3;
in_codec_if_ = es7210_codec_new(&es7210_cfg);
assert(in_codec_if_ != NULL);

View File

@@ -66,7 +66,7 @@ Tab5AudioCodec::Tab5AudioCodec(void* i2c_master_handle, int input_sample_rate, i
es7210_codec_cfg_t es7210_cfg = {};
es7210_cfg.ctrl_if = in_ctrl_if_;
es7210_cfg.mic_selected = ES7120_SEL_MIC1 | ES7120_SEL_MIC2 | ES7120_SEL_MIC3 | ES7120_SEL_MIC4;
es7210_cfg.mic_selected = ES7210_SEL_MIC1 | ES7210_SEL_MIC2 | ES7210_SEL_MIC3 | ES7210_SEL_MIC4;
in_codec_if_ = es7210_codec_new(&es7210_cfg);
assert(in_codec_if_ != NULL);

View File

@@ -142,7 +142,7 @@ void OttoEmojiDisplay::SetChatMessage(const char* role, const char* content) {
}
lv_label_set_text(chat_message_label_, content);
lv_obj_clear_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
ESP_LOGI(TAG, "设置聊天消息 [%s]: %s", role, content);
}
@@ -164,7 +164,7 @@ void OttoEmojiDisplay::SetIcon(const char* icon) {
}
lv_label_set_text(chat_message_label_, icon_message.c_str());
lv_obj_clear_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(chat_message_label_, LV_OBJ_FLAG_HIDDEN);
ESP_LOGI(TAG, "设置图标: %s", icon);
}

View File

@@ -15,7 +15,7 @@ public:
void SetupHighTempWarningPopup() {
// 创建高温警告弹窗
high_temp_popup_ = lv_obj_create(lv_scr_act()); // 使用当前屏幕
high_temp_popup_ = lv_obj_create(lv_screen_active()); // 使用当前屏幕
lv_obj_set_scrollbar_mode(high_temp_popup_, LV_SCROLLBAR_MODE_OFF);
lv_obj_set_size(high_temp_popup_, LV_HOR_RES * 0.9, fonts_.text_font->line_height * 2);
lv_obj_align(high_temp_popup_, LV_ALIGN_BOTTOM_MID, 0, 0);
@@ -47,7 +47,7 @@ public:
void ShowHighTempWarning() {
if (high_temp_popup_ && lv_obj_has_flag(high_temp_popup_, LV_OBJ_FLAG_HIDDEN)) {
lv_obj_clear_flag(high_temp_popup_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(high_temp_popup_, LV_OBJ_FLAG_HIDDEN);
}
}

View File

@@ -21,7 +21,7 @@ Display::Display() {
Display *display = static_cast<Display*>(arg);
DisplayLockGuard lock(display);
lv_obj_add_flag(display->notification_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_clear_flag(display->status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(display->status_label_, LV_OBJ_FLAG_HIDDEN);
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
@@ -67,7 +67,7 @@ void Display::SetStatus(const char* status) {
return;
}
lv_label_set_text(status_label_, status);
lv_obj_clear_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(notification_label_, LV_OBJ_FLAG_HIDDEN);
last_status_update_time_ = std::chrono::system_clock::now();
@@ -83,7 +83,7 @@ void Display::ShowNotification(const char* notification, int duration_ms) {
return;
}
lv_label_set_text(notification_label_, notification);
lv_obj_clear_flag(notification_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(notification_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(status_label_, LV_OBJ_FLAG_HIDDEN);
esp_timer_stop(notification_timer_);
@@ -157,7 +157,7 @@ void Display::UpdateStatusBar(bool update_all) {
if (low_battery_popup_ != nullptr) {
if (strcmp(icon, FONT_AWESOME_BATTERY_EMPTY) == 0 && discharging) {
if (lv_obj_has_flag(low_battery_popup_, LV_OBJ_FLAG_HIDDEN)) { // 如果低电量提示框隐藏,则显示
lv_obj_clear_flag(low_battery_popup_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(low_battery_popup_, LV_OBJ_FLAG_HIDDEN);
app.PlaySound(Lang::Sounds::OGG_LOW_BATTERY);
}
} else {

View File

@@ -105,7 +105,7 @@ SpiLcdDisplay::SpiLcdDisplay(esp_lcd_panel_io_handle_t panel_io, esp_lcd_panel_h
ESP_LOGI(TAG, "Initialize LVGL port");
lvgl_port_cfg_t port_cfg = ESP_LVGL_PORT_INIT_CONFIG();
port_cfg.task_priority = 1;
port_cfg.timer_period_ms = 50;
port_cfg.timer_period_ms = 40;
lvgl_port_init(&port_cfg);
ESP_LOGI(TAG, "Adding LCD display");
@@ -814,11 +814,11 @@ void LcdDisplay::SetPreviewImage(const lv_img_dsc_t* img_dsc) {
}
if (img_dsc != nullptr) {
// zoom factor 0.5
lv_image_set_scale(preview_image_, 128 * width_ / img_dsc->header.w);
// 设置图片源并显示预览图片
lv_image_set_src(preview_image_, img_dsc);
lv_obj_clear_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
// zoom factor 0.5
lv_image_set_scale(preview_image_, 128 * width_ / img_dsc->header.w);
lv_obj_remove_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
// 隐藏emotion_label_
if (emotion_label_ != nullptr) {
lv_obj_add_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
@@ -827,7 +827,7 @@ void LcdDisplay::SetPreviewImage(const lv_img_dsc_t* img_dsc) {
// 隐藏预览图片并显示emotion_label_
lv_obj_add_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
if (emotion_label_ != nullptr) {
lv_obj_clear_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
}
}
}
@@ -883,7 +883,7 @@ void LcdDisplay::SetEmotion(const char* emotion) {
#if !CONFIG_USE_WECHAT_MESSAGE_STYLE
// 显示emotion_label_隐藏preview_image_
lv_obj_clear_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
if (preview_image_ != nullptr) {
lv_obj_add_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
}
@@ -900,7 +900,7 @@ void LcdDisplay::SetIcon(const char* icon) {
#if !CONFIG_USE_WECHAT_MESSAGE_STYLE
// 显示emotion_label_隐藏preview_image_
lv_obj_clear_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(emotion_label_, LV_OBJ_FLAG_HIDDEN);
if (preview_image_ != nullptr) {
lv_obj_add_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
}

View File

@@ -23,7 +23,7 @@ OledDisplay::OledDisplay(esp_lcd_panel_io_handle_t panel_io, esp_lcd_panel_handl
lvgl_port_cfg_t port_cfg = ESP_LVGL_PORT_INIT_CONFIG();
port_cfg.task_priority = 1;
port_cfg.task_stack = 6144;
port_cfg.timer_period_ms = 50;
port_cfg.timer_period_ms = 40;
lvgl_port_init(&port_cfg);
ESP_LOGI(TAG, "Adding OLED display");
@@ -112,7 +112,7 @@ void OledDisplay::SetChatMessage(const char* role, const char* content) {
lv_obj_add_flag(content_right_, LV_OBJ_FLAG_HIDDEN);
} else {
lv_label_set_text(chat_message_label_, content_str.c_str());
lv_obj_clear_flag(content_right_, LV_OBJ_FLAG_HIDDEN);
lv_obj_remove_flag(content_right_, LV_OBJ_FLAG_HIDDEN);
}
}
}

View File

@@ -13,27 +13,27 @@ dependencies:
espressif/esp_io_expander_tca9554: ==2.0.0
espressif/esp_lcd_panel_io_additions: ^1.0.1
78/esp_lcd_nv3023: ~1.0.0
78/esp-wifi-connect: ~2.5.0
78/esp-wifi-connect: ~2.5.1
78/esp-opus-encoder: ~2.4.1
78/esp-ml307: ~3.2.6
78/esp-ml307: ~3.2.8
78/xiaozhi-fonts: ~1.4.0
espressif/led_strip: ^2.5.5
espressif/esp_codec_dev: ~1.3.6
espressif/esp-sr: ~2.1.4
espressif/led_strip: ~3.0.1
espressif/esp_codec_dev: ~1.4.0
espressif/esp-sr: ~2.1.5
espressif/button: ~4.1.3
espressif/knob: ^1.0.0
espressif/esp32-camera: ^2.0.15
espressif/esp32-camera: ~2.1.2
espressif/esp_lcd_touch_ft5x06: ~1.0.7
espressif/esp_lcd_touch_gt911: ^1
espressif/esp_lcd_touch_gt1151: ^1
waveshare/esp_lcd_touch_cst9217: ^1.0.3
espressif/esp_lcd_touch_cst816s: ^1.0.6
lvgl/lvgl: ~9.2.2
lvgl/lvgl: ~9.3.0
esp_lvgl_port: ~2.6.0
espressif/esp_io_expander_tca95xx_16bit: ^2.0.0
espressif2022/image_player: ==1.1.0~1
espressif2022/esp_emote_gfx: ^1.0.0
espressif/adc_mic: ^0.2.0
espressif/adc_mic: ^0.2.1
espressif/esp_mmap_assets: '>=1.2'
txp666/otto-emoji-gif-component: ~1.0.2
espressif/adc_battery_estimation: ^0.2.0

View File

@@ -15,7 +15,7 @@ CircularStrip::CircularStrip(gpio_num_t gpio, uint8_t max_leds) : max_leds_(max_
led_strip_config_t strip_config = {};
strip_config.strip_gpio_num = gpio;
strip_config.max_leds = max_leds_;
strip_config.led_pixel_format = LED_PIXEL_FORMAT_GRB;
strip_config.color_component_format = LED_STRIP_COLOR_COMPONENT_FMT_GRB;
strip_config.led_model = LED_MODEL_WS2812;
led_strip_rmt_config_t rmt_config = {};

View File

@@ -18,7 +18,7 @@ SingleLed::SingleLed(gpio_num_t gpio) {
led_strip_config_t strip_config = {};
strip_config.strip_gpio_num = gpio;
strip_config.max_leds = 1;
strip_config.led_pixel_format = LED_PIXEL_FORMAT_GRB;
strip_config.color_component_format = LED_STRIP_COLOR_COMPONENT_FMT_GRB;
strip_config.led_model = LED_MODEL_WS2812;
led_strip_rmt_config_t rmt_config = {};

View File

@@ -55,13 +55,15 @@ std::unique_ptr<Http> Ota::SetupHttp() {
auto network = board.GetNetwork();
auto http = network->CreateHttp(0);
auto user_agent = std::string(BOARD_NAME "/") + app_desc->version;
http->SetHeader("Activation-Version", has_serial_number_ ? "2" : "1");
http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
http->SetHeader("Client-Id", board.GetUuid());
if (has_serial_number_) {
http->SetHeader("Serial-Number", serial_number_.c_str());
ESP_LOGI(TAG, "Setup HTTP, User-Agent: %s, Serial-Number: %s", user_agent.c_str(), serial_number_.c_str());
}
http->SetHeader("User-Agent", std::string(BOARD_NAME "/") + app_desc->version);
http->SetHeader("User-Agent", user_agent);
http->SetHeader("Accept-Language", Lang::CODE);
http->SetHeader("Content-Type", "application/json");

View File

@@ -0,0 +1,280 @@
"""
实时AFSK解调器 - 基于Goertzel算法
"""
import numpy as np
from collections import deque
class TraceGoertzel:
"""实时Goertzel算法实现"""
def __init__(self, freq: float, n: int):
"""
初始化Goertzel算法
Args:
freq: 归一化频率 (目标频率/采样频率)
n: 窗口大小
"""
self.freq = freq
self.n = n
# 预计算系数 - 与参考代码一致
self.k = int(freq * n)
self.w = 2.0 * np.pi * freq
self.cw = np.cos(self.w)
self.sw = np.sin(self.w)
self.c = 2.0 * self.cw
# 初始化状态变量 - 使用deque存储最近两个值
self.zs = deque([0.0, 0.0], maxlen=2)
def reset(self):
"""重置算法状态"""
self.zs.clear()
self.zs.extend([0.0, 0.0])
def __call__(self, xs):
"""
处理一组采样点 - 与参考代码一致的接口
Args:
xs: 采样点序列
Returns:
计算出的振幅
"""
self.reset()
for x in xs:
z1, z2 = self.zs[-1], self.zs[-2] # Z[-1], Z[-2]
z0 = x + self.c * z1 - z2 # S[n] = x[n] + C * S[n-1] - S[n-2]
self.zs.append(float(z0)) # 更新序列
return self.amp
@property
def amp(self) -> float:
"""计算当前振幅 - 与参考代码一致"""
z1, z2 = self.zs[-1], self.zs[-2]
ip = self.cw * z1 - z2
qp = self.sw * z1
return np.sqrt(ip**2 + qp**2) / (self.n / 2.0)
class PairGoertzel:
"""双频Goertzel解调器"""
def __init__(self, f_sample: int, f_space: int, f_mark: int,
bit_rate: int, win_size: int):
"""
初始化双频解调器
Args:
f_sample: 采样频率
f_space: Space频率 (通常对应0)
f_mark: Mark频率 (通常对应1)
bit_rate: 比特率
win_size: Goertzel窗口大小
"""
assert f_sample % bit_rate == 0, "采样频率必须是比特率的整数倍"
self.Fs = f_sample
self.F0 = f_space
self.F1 = f_mark
self.bit_rate = bit_rate
self.n_per_bit = int(f_sample // bit_rate) # 每个比特的采样点数
# 计算归一化频率
f1 = f_mark / f_sample
f0 = f_space / f_sample
# 初始化Goertzel算法
self.g0 = TraceGoertzel(freq=f0, n=win_size)
self.g1 = TraceGoertzel(freq=f1, n=win_size)
# 输入缓冲区
self.in_buffer = deque(maxlen=win_size)
self.out_count = 0
print(f"PairGoertzel initialized: f0={f0:.6f}, f1={f1:.6f}, win_size={win_size}, n_per_bit={self.n_per_bit}")
def __call__(self, s: float):
"""
处理单个采样点 - 与参考代码一致的接口
Args:
s: 采样点值
Returns:
(amp0, amp1, p1_prob) - 空间频率振幅,标记频率振幅,标记概率
"""
self.in_buffer.append(s)
self.out_count += 1
amp0, amp1, p1_prob = 0, 0, None
# 每个比特周期输出一次结果
if self.out_count >= self.n_per_bit:
amp0 = self.g0(self.in_buffer) # 计算space频率振幅
amp1 = self.g1(self.in_buffer) # 计算mark频率振幅
p1_prob = amp1 / (amp0 + amp1 + 1e-8) # 计算mark概率
self.out_count = 0
return amp0, amp1, p1_prob
class RealTimeAFSKDecoder:
"""实时AFSK解码器 - 基于起始帧触发"""
def __init__(self, f_sample: int = 16000, mark_freq: int = 1800,
space_freq: int = 1500, bitrate: int = 100,
s_goertzel: int = 9, threshold: float = 0.5):
"""
初始化实时AFSK解码器
Args:
f_sample: 采样频率
mark_freq: Mark频率
space_freq: Space频率
bitrate: 比特率
s_goertzel: Goertzel窗口大小系数 (win_size = f_sample // mark_freq * s_goertzel)
threshold: 判决门限
"""
self.f_sample = f_sample
self.mark_freq = mark_freq
self.space_freq = space_freq
self.bitrate = bitrate
self.threshold = threshold
# 计算窗口大小 - 与参考代码一致
win_size = int(f_sample / mark_freq * s_goertzel)
# 初始化解调器
self.demodulator = PairGoertzel(f_sample, space_freq, mark_freq,
bitrate, win_size)
# 帧定义 - 与参考代码一致
self.start_bytes = b'\x01\x02'
self.end_bytes = b'\x03\x04'
self.start_bits = "".join(format(int(x), '08b') for x in self.start_bytes)
self.end_bits = "".join(format(int(x), '08b') for x in self.end_bytes)
# 状态机
self.state = "idle" # idle / entering
# 存储解调结果
self.buffer_prelude:deque = deque(maxlen=len(self.start_bits)) # 判断是否启动
self.indicators = [] # 存储概率序列
self.signal_bits = "" # 存储比特序列
self.text_cache = ""
# 解码结果
self.decoded_messages = []
self.total_bits_received = 0
print(f"Decoder initialized: win_size={win_size}")
print(f"Start frame: {self.start_bits} (from {self.start_bytes.hex()})")
print(f"End frame: {self.end_bits} (from {self.end_bytes.hex()})")
def process_audio(self, samples: np.array) -> str:
"""
处理音频数据并返回解码文本
Args:
audio_data: 音频字节数据 (16-bit PCM)
Returns:
新解码的文本
"""
new_text = ""
# 逐个处理采样点
for sample in samples:
amp0, amp1, p1_prob = self.demodulator(sample)
# 如果有概率输出,记录并判决
if p1_prob is not None:
bit = '1' if p1_prob > self.threshold else '0'
match self.state:
case "idle":
self.buffer_prelude.append(bit)
pass
case "entering":
self.buffer_prelude.append(bit)
self.signal_bits += bit
self.total_bits_received += 1
case _:
pass
self.indicators.append(p1_prob)
# 检查状态机
if self.state == "idle" and "".join(self.buffer_prelude) == self.start_bits:
self.state = "entering"
self.text_cache = ""
self.signal_bits = "" # 清空比特序列
self.buffer_prelude.clear()
elif self.state == "entering" and ("".join(self.buffer_prelude) == self.end_bits or len(self.signal_bits) >= 256):
self.state = "idle"
self.buffer_prelude.clear()
# 每收集一定数量的比特后尝试解码
if len(self.signal_bits) >= 8:
text = self._decode_bits_to_text(self.signal_bits)
if len(text) > len(self.text_cache):
new_text = text[len(self.text_cache) - len(text):]
self.text_cache = text
return new_text
def _decode_bits_to_text(self, bits: str) -> str:
"""
将比特串解码为文本
Args:
bits: 比特串
Returns:
解码出的文本
"""
if len(bits) < 8:
return ""
decoded_text = ""
byte_count = len(bits) // 8
for i in range(byte_count):
# 提取8位
byte_bits = bits[i*8:(i+1)*8]
# 位转字节
byte_val = int(byte_bits, 2)
# 尝试解码为ASCII字符
if 32 <= byte_val <= 126: # 可打印ASCII字符
decoded_text += chr(byte_val)
elif byte_val == 0: # NULL字符忽略
continue
else:
# 非可打印字符pass以十六进制显示
pass
# decoded_text += f"\\x{byte_val:02X}"
return decoded_text
def clear(self):
"""清空解码状态"""
self.indicators = []
self.signal_bits = ""
self.decoded_messages = []
self.total_bits_received = 0
print("解码器状态已清空")
def get_stats(self) -> dict:
"""获取解码统计信息"""
return {
'prelude_bits': "".join(self.buffer_prelude),
"state": self.state,
'total_chars': sum(len(msg) for msg in self.text_cache),
'buffer_bits': len(self.signal_bits),
'mark_freq': self.mark_freq,
'space_freq': self.space_freq,
'bitrate': self.bitrate,
'threshold': self.threshold,
}

View File

@@ -0,0 +1,444 @@
import sys
import numpy as np
import asyncio
import wave
from collections import deque
import qasync
import matplotlib
matplotlib.use('qtagg')
from matplotlib.backends.backend_qtagg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qtagg import NavigationToolbar2QT as NavigationToolbar # noqa: F401
from matplotlib.figure import Figure
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QWidget,
QHBoxLayout, QLineEdit, QPushButton, QLabel, QTextEdit)
from PyQt6.QtCore import QTimer
# 导入解码器
from demod import RealTimeAFSKDecoder
class UDPServerProtocol(asyncio.DatagramProtocol):
"""UDP服务器协议类"""
def __init__(self, data_queue):
self.client_address = None
self.data_queue: deque = data_queue
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
# 如果还没有客户端地址,记录第一个连接的客户端
if self.client_address is None:
self.client_address = addr
print(f"接受来自 {addr} 的连接")
# 只处理来自已记录客户端的数据
if addr == self.client_address:
# 将接收到的音频数据添加到队列
self.data_queue.extend(data)
else:
print(f"忽略来自未知地址 {addr} 的数据")
class MatplotlibWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
# 创建 Matplotlib 的 Figure 对象
self.figure = Figure()
# 创建 FigureCanvas 对象,它是 Figure 的 QWidget 容器
self.canvas = FigureCanvas(self.figure)
# 创建 Matplotlib 的导航工具栏
# self.toolbar = NavigationToolbar(self.canvas, self)
self.toolbar = None
# 创建布局
layout = QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
# 初始化音频数据参数
self.freq = 16000 # 采样频率
self.time_window = 20 # 显示时间窗口
self.wave_data = deque(maxlen=self.freq * self.time_window * 2) # 缓冲队列, 用于分发计算/绘图
self.signals = deque(maxlen=self.freq * self.time_window) # 双端队列存储信号数据
# 创建包含两个子图的画布
self.ax1 = self.figure.add_subplot(2, 1, 1)
self.ax2 = self.figure.add_subplot(2, 1, 2)
# 时域子图
self.ax1.set_title('Real-time Audio Waveform')
self.ax1.set_xlabel('Sample Index')
self.ax1.set_ylabel('Amplitude')
self.line_time, = self.ax1.plot([], [])
self.ax1.grid(True, alpha=0.3)
# 频域子图
self.ax2.set_title('Real-time Frequency Spectrum')
self.ax2.set_xlabel('Frequency (Hz)')
self.ax2.set_ylabel('Magnitude')
self.line_freq, = self.ax2.plot([], [])
self.ax2.grid(True, alpha=0.3)
self.figure.tight_layout()
# 定时器用于更新图表
self.timer = QTimer(self)
self.timer.setInterval(100) # 100毫秒更新一次
self.timer.timeout.connect(self.update_plot)
# 初始化AFSK解码器
self.decoder = RealTimeAFSKDecoder(
f_sample=self.freq,
mark_freq=1800,
space_freq=1500,
bitrate=100,
s_goertzel=9,
threshold=0.5
)
# 解码结果回调
self.decode_callback = None
def start_plotting(self):
"""开始绘图"""
self.timer.start()
def stop_plotting(self):
"""停止绘图"""
self.timer.stop()
def update_plot(self):
"""更新绘图数据"""
if len(self.wave_data) >= 2:
# 进行实时解码
# 获取最新的音频数据进行解码
even = len(self.wave_data) // 2 * 2
print(f"length of wave_data: {len(self.wave_data)}")
drained = [self.wave_data.popleft() for _ in range(even)]
signal = np.frombuffer(bytearray(drained), dtype='<i2') / 32768
decoded_text_new = self.decoder.process_audio(signal) # 处理新增信号, 返回全量解码文本
if decoded_text_new and self.decode_callback:
self.decode_callback(decoded_text_new)
self.signals.extend(signal.tolist()) # 将波形数据添加到绘图数据
if len(self.signals) > 0:
# 只显示最近的一段数据,避免图表过于密集
signal = np.array(self.signals)
max_samples = min(len(signal), self.freq * self.time_window)
if len(signal) > max_samples:
signal = signal[-max_samples:]
# 更新时域图
x = np.arange(len(signal))
self.line_time.set_data(x, signal)
# 自动调整时域坐标轴范围
if len(signal) > 0:
self.ax1.set_xlim(0, len(signal))
y_min, y_max = np.min(signal), np.max(signal)
if y_min != y_max:
margin = (y_max - y_min) * 0.1
self.ax1.set_ylim(y_min - margin, y_max + margin)
else:
self.ax1.set_ylim(-1, 1)
# 计算频谱(短时离散傅立叶变换)
if len(signal) > 1:
# 计算FFT
fft_signal = np.abs(np.fft.fft(signal))
frequencies = np.fft.fftfreq(len(signal), 1/self.freq)
# 只取正频率部分
positive_freq_idx = frequencies >= 0
freq_positive = frequencies[positive_freq_idx]
fft_positive = fft_signal[positive_freq_idx]
# 更新频域图
self.line_freq.set_data(freq_positive, fft_positive)
# 自动调整频域坐标轴范围
if len(fft_positive) > 0:
# 限制频率显示范围到0-4000Hz避免过于密集
max_freq_show = min(4000, self.freq // 2)
freq_mask = freq_positive <= max_freq_show
if np.any(freq_mask):
self.ax2.set_xlim(0, max_freq_show)
fft_masked = fft_positive[freq_mask]
if len(fft_masked) > 0:
fft_max = np.max(fft_masked)
if fft_max > 0:
self.ax2.set_ylim(0, fft_max * 1.1)
else:
self.ax2.set_ylim(0, 1)
self.canvas.draw()
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Acoustic Check")
self.setGeometry(100, 100, 1000, 800)
# 主窗口部件
main_widget = QWidget()
self.setCentralWidget(main_widget)
# 主布局
main_layout = QVBoxLayout(main_widget)
# 绘图区域
self.matplotlib_widget = MatplotlibWidget()
main_layout.addWidget(self.matplotlib_widget)
# 控制面板
control_panel = QWidget()
control_layout = QHBoxLayout(control_panel)
# 监听地址和端口输入
control_layout.addWidget(QLabel("监听地址:"))
self.address_input = QLineEdit("0.0.0.0")
self.address_input.setFixedWidth(120)
control_layout.addWidget(self.address_input)
control_layout.addWidget(QLabel("端口:"))
self.port_input = QLineEdit("8000")
self.port_input.setFixedWidth(80)
control_layout.addWidget(self.port_input)
# 监听按钮
self.listen_button = QPushButton("开始监听")
self.listen_button.clicked.connect(self.toggle_listening)
control_layout.addWidget(self.listen_button)
# 状态标签
self.status_label = QLabel("状态: 未连接")
control_layout.addWidget(self.status_label)
# 数据统计标签
self.data_label = QLabel("接收数据: 0 bytes")
control_layout.addWidget(self.data_label)
# 保存按钮
self.save_button = QPushButton("保存音频")
self.save_button.clicked.connect(self.save_audio)
self.save_button.setEnabled(False)
control_layout.addWidget(self.save_button)
control_layout.addStretch() # 添加弹性空间
main_layout.addWidget(control_panel)
# 解码显示区域
decode_panel = QWidget()
decode_layout = QVBoxLayout(decode_panel)
# 解码标题
decode_title = QLabel("实时AFSK解码结果:")
decode_title.setStyleSheet("font-weight: bold; font-size: 14px;")
decode_layout.addWidget(decode_title)
# 解码文本显示
self.decode_text = QTextEdit()
self.decode_text.setMaximumHeight(150)
self.decode_text.setReadOnly(True)
self.decode_text.setStyleSheet("font-family: 'Courier New', monospace; font-size: 12px;")
decode_layout.addWidget(self.decode_text)
# 解码控制按钮
decode_control_layout = QHBoxLayout()
# 清空按钮
self.clear_decode_button = QPushButton("清空解码")
self.clear_decode_button.clicked.connect(self.clear_decode_text)
decode_control_layout.addWidget(self.clear_decode_button)
# 解码统计标签
self.decode_stats_label = QLabel("解码统计: 0 bits, 0 chars")
decode_control_layout.addWidget(self.decode_stats_label)
decode_control_layout.addStretch()
decode_layout.addLayout(decode_control_layout)
main_layout.addWidget(decode_panel)
# 设置解码回调
self.matplotlib_widget.decode_callback = self.on_decode_text
# UDP相关属性
self.udp_transport = None
self.is_listening = False
# 数据统计定时器
self.stats_timer = QTimer(self)
self.stats_timer.setInterval(1000) # 每秒更新一次统计
self.stats_timer.timeout.connect(self.update_stats)
def on_decode_text(self, new_text: str):
"""解码文本回调"""
if new_text:
# 添加新解码的文本
current_text = self.decode_text.toPlainText()
updated_text = current_text + new_text
# 限制文本长度保留最新的1000个字符
if len(updated_text) > 1000:
updated_text = updated_text[-1000:]
self.decode_text.setPlainText(updated_text)
# 滚动到底部
cursor = self.decode_text.textCursor()
cursor.movePosition(cursor.MoveOperation.End)
self.decode_text.setTextCursor(cursor)
def clear_decode_text(self):
"""清空解码文本"""
self.decode_text.clear()
if hasattr(self.matplotlib_widget, 'decoder'):
self.matplotlib_widget.decoder.clear()
self.decode_stats_label.setText("解码统计: 0 bits, 0 chars")
def update_decode_stats(self):
"""更新解码统计"""
if hasattr(self.matplotlib_widget, 'decoder'):
stats = self.matplotlib_widget.decoder.get_stats()
stats_text = (
f"前置: {stats['prelude_bits']} , 已接收{stats['total_chars']} chars, "
f"缓冲: {stats['buffer_bits']} bits, 状态: {stats['state']}"
)
self.decode_stats_label.setText(stats_text)
def toggle_listening(self):
"""切换监听状态"""
if not self.is_listening:
self.start_listening()
else:
self.stop_listening()
async def start_listening_async(self):
"""异步启动UDP监听"""
try:
address = self.address_input.text().strip()
port = int(self.port_input.text().strip())
loop = asyncio.get_running_loop()
self.udp_transport, protocol = await loop.create_datagram_endpoint(
lambda: UDPServerProtocol(self.matplotlib_widget.wave_data),
local_addr=(address, port)
)
self.status_label.setText(f"状态: 监听中 ({address}:{port})")
print(f"UDP服务器启动, 监听 {address}:{port}")
except Exception as e:
self.status_label.setText(f"状态: 启动失败 - {str(e)}")
print(f"UDP服务器启动失败: {e}")
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
def start_listening(self):
"""开始监听"""
try:
int(self.port_input.text().strip()) # 验证端口号格式
except ValueError:
self.status_label.setText("状态: 端口号必须是数字")
return
self.is_listening = True
self.listen_button.setText("停止监听")
self.address_input.setEnabled(False)
self.port_input.setEnabled(False)
self.save_button.setEnabled(True)
# 清空数据队列
self.matplotlib_widget.wave_data.clear()
# 启动绘图和统计更新
self.matplotlib_widget.start_plotting()
self.stats_timer.start()
# 异步启动UDP服务器
loop = asyncio.get_event_loop()
loop.create_task(self.start_listening_async())
def stop_listening(self):
"""停止监听"""
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
# 停止UDP服务器
if self.udp_transport:
self.udp_transport.close()
self.udp_transport = None
# 停止绘图和统计更新
self.matplotlib_widget.stop_plotting()
self.matplotlib_widget.wave_data.clear()
self.stats_timer.stop()
self.status_label.setText("状态: 已停止")
def update_stats(self):
"""更新数据统计"""
data_size = len(self.matplotlib_widget.signals)
self.data_label.setText(f"接收数据: {data_size} 采样")
# 更新解码统计
self.update_decode_stats()
def save_audio(self):
"""保存音频数据"""
if len(self.matplotlib_widget.signals) > 0:
try:
signal_data = np.array(self.matplotlib_widget.signals)
# 保存为WAV文件
with wave.open("received_audio.wav", "wb") as wf:
wf.setnchannels(1) # 单声道
wf.setsampwidth(2) # 采样宽度为2字节
wf.setframerate(self.matplotlib_widget.freq) # 设置采样率
wf.writeframes(signal_data.tobytes()) # 写入数据
self.status_label.setText("状态: 音频已保存为 received_audio.wav")
print("音频已保存为 received_audio.wav")
except Exception as e:
self.status_label.setText(f"状态: 保存失败 - {str(e)}")
print(f"保存音频失败: {e}")
else:
self.status_label.setText("状态: 没有足够的数据可保存")
async def main():
"""异步主函数"""
app = QApplication(sys.argv)
# 设置异步事件循环
loop = qasync.QEventLoop(app)
asyncio.set_event_loop(loop)
window = MainWindow()
window.show()
try:
with loop:
await loop.run_forever()
except KeyboardInterrupt:
print("程序被用户中断")
finally:
# 确保清理资源
if window.udp_transport:
window.udp_transport.close()

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""
音频实时监听与绘图系统主程序
基于Qt GUI + Matplotlib + UDP接收 + AFSK解码字符串
"""
import sys
import asyncio
from graphic import main
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print("程序被用户中断")
except Exception as e:
print(f"程序执行出错: {e}")
sys.exit(1)

View File

@@ -0,0 +1,23 @@
# 声波测试
该gui用于测试接受小智设备通过`udp`回传的`pcm`转时域/频域, 可以保存窗口长度的声音, 用于判断噪音频率分布和测试声波传输ascii的准确度,
固件测试需要打开`USE_AUDIO_DEBUGGER`, 并设置好`AUDIO_DEBUG_UDP_SERVER`是本机地址.
声波`demod`可以通过`sonic_wifi_config.html`或者上传至`PinMe`的[小智声波配网](https://iqf7jnhi.pinit.eth.limo)来输出声波测试
# 声波解码测试记录
> `✓`代表在I2S DIN接收原始PCM信号时就能成功解码, `△`代表需要降噪或额外操作可稳定解码, `X`代表降噪后效果也不好(可能能解部分但非常不稳定)。
> 个别ADC需要I2C配置阶段做更精细的降噪调整, 由于设备不通用暂只按照boards内提供的config测试
| 设备 | ADC | MIC | 效果 | 备注 |
| ---- | ---- | --- | --- | ---- |
| bread-compact | INMP441 | 集成MEMEMIC | ✓ |
| atk-dnesp32s3-box | ES8311 | | ✓ |
| magiclick-2p5 | ES8311 | | ✓ |
| lichuang-dev | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| kevin-box-2 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| m5stack-core-s3 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| xmini-c3 | ES8311 | | △ | 需降噪
| atoms3r-echo-base | ES8311 | | △ | 需降噪
| atk-dnesp32s3-box0 | ES8311 | | X | 能接收且解码, 但是丢包率很高
| movecall-moji-esp32s3 | ES8311 | | X | 能接收且解码, 但是丢包率很高

View File

@@ -0,0 +1,4 @@
matplotlib==3.10.5
numpy==2.3.2
PyQt6==6.9.1
qasync==0.27.1

View File

@@ -0,0 +1,29 @@
# ogg_covertor 小智AI OGG 批量转换器
本脚本为OGG批量转换工具支持将输入的音频文件转换为小智可使用的OGG格式
基于Python第三方库`ffmpeg-python`实现
支持OGG和音频之间的互转响度调节等功能
# 创建并激活虚拟环境
```bash
# 创建虚拟环境
python -m venv venv
# 激活虚拟环境
source venv/bin/activate # Mac/Linux
venv\Scripts\activate # Windows
```
# 安装依赖
请在虚拟环境中执行
```bash
pip install ffmpeg-python
```
# 运行脚本
```bash
python ogg_covertor.py
```

View File

@@ -0,0 +1,230 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
import ffmpeg
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("小智AI OGG音频批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_ogg")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转到OGG", variable=self.mode,
value="audio_to_ogg", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="OGG转回音频", variable=self.mode,
value="ogg_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_ogg":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mogg *.ogg *.flac") if self.mode.get() == "audio_to_ogg"
else ("ogg文件", "*.ogg")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_ogg":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_ogg, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_ogg_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_ogg(self, target_lufs, input_files):
"""音频转到ogg转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_ogg_to_audio(self, input_files):
"""ogg转回音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -47,11 +47,13 @@ CONFIG_LV_USE_CLIB_MALLOC=y
CONFIG_LV_USE_CLIB_STRING=y
CONFIG_LV_USE_CLIB_SPRINTF=y
CONFIG_LV_USE_IMGFONT=y
CONFIG_LV_USE_ASSERT_STYLE=y
CONFIG_LV_USE_GIF=y
# Use compressed font
CONFIG_LV_FONT_FMT_TXT_LARGE=y
CONFIG_LV_USE_FONT_COMPRESSED=y
CONFIG_LV_USE_FONT_PLACEHOLDER=y
CONFIG_LV_USE_FONT_COMPRESSED=n
CONFIG_LV_USE_FONT_PLACEHOLDER=n
# Disable extra widgets to save flash size
CONFIG_LV_USE_ANIMIMG=n