增加流式ogg解封装支持 (#1705)

* 增加流式ogg解封装支持

* 增加TF卡引脚连接说明

* 修复圆角图标屏幕导致的显示问题

* 优化聊天消息显示

* 修改解封装实现

---------

Co-authored-by: smalllin0 <aslinqf@163.com>
This commit is contained in:
小林同志
2026-02-05 00:12:40 +08:00
committed by GitHub
parent 6f71868bad
commit 49cd6625f4
7 changed files with 416 additions and 92 deletions

View File

@@ -1,6 +1,7 @@
# Define source files
set(SOURCES "audio/audio_codec.cc"
"audio/audio_service.cc"
"audio/demuxer/ogg_demuxer.cc"
"audio/codecs/no_audio_codec.cc"
"audio/codecs/box_audio_codec.cc"
"audio/codecs/es8311_audio_codec.cc"
@@ -38,7 +39,7 @@ set(SOURCES "audio/audio_codec.cc"
"main.cc"
)
set(INCLUDE_DIRS "." "display" "display/lvgl_display" "display/lvgl_display/jpg" "audio" "protocols")
set(INCLUDE_DIRS "." "display" "display/lvgl_display" "display/lvgl_display/jpg" "audio" "audio/demuxer" "protocols")
# Add board common files
list(APPEND SOURCES

View File

@@ -39,6 +39,15 @@
AudioService::AudioService() {
event_group_ = xEventGroupCreate();
demuxer_.OnDemuxerFinished([this](const uint8_t* data, int sample_rate, size_t size){
auto packet = std::make_unique<AudioStreamPacket>();
packet->sample_rate = sample_rate;
packet->frame_duration = 60;
packet->payload.resize(size);
std::memcpy(packet->payload.data(), data, size);
PushPacketToDecodeQueue(std::move(packet), true);
});
}
AudioService::~AudioService() {
@@ -636,94 +645,10 @@ void AudioService::PlaySound(const std::string_view& ogg) {
codec_->EnableOutput(true);
}
const uint8_t* buf = reinterpret_cast<const uint8_t*>(ogg.data());
const auto* buf = reinterpret_cast<const uint8_t*>(ogg.data());
size_t size = ogg.size();
size_t offset = 0;
auto find_page = [&](size_t start)->size_t {
for (size_t i = start; i + 4 <= size; ++i) {
if (buf[i] == 'O' && buf[i+1] == 'g' && buf[i+2] == 'g' && buf[i+3] == 'S') return i;
}
return static_cast<size_t>(-1);
};
bool seen_head = false;
bool seen_tags = false;
int sample_rate = 16000; // 默认值
while (true) {
size_t pos = find_page(offset);
if (pos == static_cast<size_t>(-1)) break;
offset = pos;
if (offset + 27 > size) break;
const uint8_t* page = buf + offset;
uint8_t page_segments = page[26];
size_t seg_table_off = offset + 27;
if (seg_table_off + page_segments > size) break;
size_t body_size = 0;
for (size_t i = 0; i < page_segments; ++i) body_size += page[27 + i];
size_t body_off = seg_table_off + page_segments;
if (body_off + body_size > size) break;
// Parse packets using lacing
size_t cur = body_off;
size_t seg_idx = 0;
while (seg_idx < page_segments) {
size_t pkt_len = 0;
size_t pkt_start = cur;
bool continued = false;
do {
uint8_t l = page[27 + seg_idx++];
pkt_len += l;
cur += l;
continued = (l == 255);
} while (continued && seg_idx < page_segments);
if (pkt_len == 0) continue;
const uint8_t* pkt_ptr = buf + pkt_start;
if (!seen_head) {
// 解析OpusHead包
if (pkt_len >= 19 && std::memcmp(pkt_ptr, "OpusHead", 8) == 0) {
seen_head = true;
// OpusHead结构[0-7] "OpusHead", [8] version, [9] channel_count, [10-11] pre_skip
// [12-15] input_sample_rate, [16-17] output_gain, [18] mapping_family
if (pkt_len >= 12) {
uint8_t version = pkt_ptr[8];
uint8_t channel_count = pkt_ptr[9];
if (pkt_len >= 16) {
// 读取输入采样率 (little-endian)
sample_rate = pkt_ptr[12] | (pkt_ptr[13] << 8) |
(pkt_ptr[14] << 16) | (pkt_ptr[15] << 24);
ESP_LOGI(TAG, "OpusHead: version=%d, channels=%d, sample_rate=%d",
version, channel_count, sample_rate);
}
}
}
continue;
}
if (!seen_tags) {
// Expect OpusTags in second packet
if (pkt_len >= 8 && std::memcmp(pkt_ptr, "OpusTags", 8) == 0) {
seen_tags = true;
}
continue;
}
// Audio packet (Opus)
auto packet = std::make_unique<AudioStreamPacket>();
packet->sample_rate = sample_rate;
packet->frame_duration = 60;
packet->payload.resize(pkt_len);
std::memcpy(packet->payload.data(), pkt_ptr, pkt_len);
PushPacketToDecodeQueue(std::move(packet), true);
}
offset = body_off + body_size;
}
demuxer_.Reset();
demuxer_.Process(buf, size);
}
bool AudioService::IsIdle() {

View File

@@ -23,7 +23,7 @@
#include "processors/audio_debugger.h"
#include "wake_word.h"
#include "protocol.h"
#include "ogg_demuxer.h"
/*
* There are two types of audio data flow:
@@ -146,6 +146,8 @@ private:
std::mutex input_resampler_mutex_;
esp_ae_rate_cvt_handle_t input_resampler_ = nullptr;
esp_ae_rate_cvt_handle_t output_resampler_ = nullptr;
OggDemuxer demuxer_;
// Encoder/Decoder state
int encoder_sample_rate_ = 16000;

View File

@@ -0,0 +1,311 @@
#include "ogg_demuxer.h"
#include "esp_log.h"
#define TAG "OggDemuxer"
/// @brief 重置解封器
void OggDemuxer::Reset()
{
opus_info_ = {
.head_seen = false,
.tags_seen = false,
.sample_rate = 48000
};
state_ = ParseState::FIND_PAGE;
ctx_.packet_len = 0;
ctx_.seg_count = 0;
ctx_.seg_index = 0;
ctx_.data_offset = 0;
ctx_.bytes_needed = 4; // 需要4字节"OggS"
ctx_.seg_remaining = 0;
ctx_.body_size = 0;
ctx_.body_offset = 0;
ctx_.packet_continued = false;
// 清空缓冲区数据
memset(ctx_.header, 0, sizeof(ctx_.header));
memset(ctx_.seg_table, 0, sizeof(ctx_.seg_table));
memset(ctx_.packet_buf, 0, sizeof(ctx_.packet_buf));
}
/// @brief 处理数据块
/// @param data 输入数据
/// @param size 输入数据大小
/// @return 已处理的字节数
size_t OggDemuxer::Process(const uint8_t* data, size_t size)
{
size_t processed = 0; // 已处理的字节数
while (processed < size) {
switch (state_) {
case ParseState::FIND_PAGE: {
// 寻找页头"OggS"
if (ctx_.bytes_needed < 4) {
// 处理不完整的"OggS"匹配(跨数据块)
size_t to_copy = std::min(size - processed, ctx_.bytes_needed);
memcpy(ctx_.header + (4 - ctx_.bytes_needed), data + processed, to_copy);
processed += to_copy;
ctx_.bytes_needed -= to_copy;
if (ctx_.bytes_needed == 0) {
// 检查是否匹配"OggS"
if (memcmp(ctx_.header, "OggS", 4) == 0) {
state_ = ParseState::PARSE_HEADER;
ctx_.data_offset = 4;
ctx_.bytes_needed = 27 - 4; // 还需要23字节完成页头
} else {
// 匹配失败滑动1字节继续匹配
memmove(ctx_.header, ctx_.header + 1, 3);
ctx_.bytes_needed = 1;
}
} else {
// 数据不足,等待更多数据
return processed;
}
} else if (ctx_.bytes_needed == 4) {
// 在数据块中查找完整的"OggS"
bool found = false;
size_t i = 0;
size_t remaining = size - processed;
// 搜索"OggS"
for (; i + 4 <= remaining; i++) {
if (memcmp(data + processed + i, "OggS", 4) == 0) {
found = true;
break;
}
}
if (found) {
// 找到"OggS",跳过已搜索的字节
processed += i;
// 不记录找到的"OggS",无必要
// memcpy(ctx_.header, data + processed, 4);
processed += 4;
state_ = ParseState::PARSE_HEADER;
ctx_.data_offset = 4;
ctx_.bytes_needed = 27 - 4; // 还需要23字节
} else {
// 没有找到完整"OggS",保存可能的部分匹配
size_t partial_len = remaining - i;
if (partial_len > 0) {
memcpy(ctx_.header, data + processed + i, partial_len);
ctx_.bytes_needed = 4 - partial_len;
processed += i + partial_len;
} else {
processed += i; // 已搜索所有字节
}
return processed; // 返回已处理的字节数
}
} else {
ESP_LOGE(TAG, "OggDemuxer run in error state: bytes_needed=%zu", ctx_.bytes_needed);
Reset();
return processed;
}
break;
}
case ParseState::PARSE_HEADER: {
size_t available = size - processed;
if (available < ctx_.bytes_needed) {
// 数据不足,复制可用的部分
memcpy(ctx_.header + ctx_.data_offset,
data + processed, available);
ctx_.data_offset += available;
ctx_.bytes_needed -= available;
processed += available;
return processed; // 等待更多数据
} else {
// 有足够的数据完成页头
size_t to_copy = ctx_.bytes_needed;
memcpy(ctx_.header + ctx_.data_offset,
data + processed, to_copy);
processed += to_copy;
ctx_.data_offset += to_copy;
ctx_.bytes_needed = 0;
// 验证页头
if (ctx_.header[4] != 0) {
ESP_LOGE(TAG, "无效的Ogg版本: %d", ctx_.header[4]);
state_ = ParseState::FIND_PAGE;
ctx_.bytes_needed = 4;
ctx_.data_offset = 0;
break;
}
ctx_.seg_count = ctx_.header[26];
if (ctx_.seg_count > 0 && ctx_.seg_count <= 255) {
state_ = ParseState::PARSE_SEGMENTS;
ctx_.bytes_needed = ctx_.seg_count;
ctx_.data_offset = 0;
} else if (ctx_.seg_count == 0) {
// 没有段,直接跳到下一个页面
state_ = ParseState::FIND_PAGE;
ctx_.bytes_needed = 4;
ctx_.data_offset = 0;
} else {
ESP_LOGE(TAG, "无效的段数: %u", ctx_.seg_count);
state_ = ParseState::FIND_PAGE;
ctx_.bytes_needed = 4;
ctx_.data_offset = 0;
}
}
break;
}
case ParseState::PARSE_SEGMENTS: {
size_t available = size - processed;
if (available < ctx_.bytes_needed) {
memcpy(ctx_.seg_table + ctx_.data_offset,
data + processed, available);
ctx_.data_offset += available;
ctx_.bytes_needed -= available;
processed += available;
return processed; // 等待更多数据
} else {
size_t to_copy = ctx_.bytes_needed;
memcpy(ctx_.seg_table + ctx_.data_offset,
data + processed, to_copy);
processed += to_copy;
ctx_.data_offset += to_copy;
ctx_.bytes_needed = 0;
state_ = ParseState::PARSE_DATA;
ctx_.seg_index = 0;
ctx_.data_offset = 0;
// 计算数据体总大小
ctx_.body_size = 0;
for (size_t i = 0; i < ctx_.seg_count; ++i) {
ctx_.body_size += ctx_.seg_table[i];
}
ctx_.body_offset = 0;
ctx_.seg_remaining = 0;
}
break;
}
case ParseState::PARSE_DATA: {
while (ctx_.seg_index < ctx_.seg_count && processed < size) {
uint8_t seg_len = ctx_.seg_table[ctx_.seg_index];
// 检查段数据是否已经部分读取
if (ctx_.seg_remaining > 0) {
seg_len = ctx_.seg_remaining;
} else {
ctx_.seg_remaining = seg_len;
}
// 检查缓冲区是否足够
if (ctx_.packet_len + seg_len > sizeof(ctx_.packet_buf)) {
ESP_LOGE(TAG, "包缓冲区溢出: %zu + %u > %zu", ctx_.packet_len, seg_len, sizeof(ctx_.packet_buf));
state_ = ParseState::FIND_PAGE;
ctx_.packet_len = 0;
ctx_.packet_continued = false;
ctx_.seg_remaining = 0;
ctx_.bytes_needed = 4;
return processed;
}
// 复制数据
size_t to_copy = std::min(size - processed, (size_t)seg_len);
memcpy(ctx_.packet_buf + ctx_.packet_len, data + processed, to_copy);
processed += to_copy;
ctx_.packet_len += to_copy;
ctx_.body_offset += to_copy;
ctx_.seg_remaining -= to_copy;
// 检查段是否完整
if (ctx_.seg_remaining > 0) {
// 段不完整,等待更多数据
return processed;
}
// 段完整
bool seg_continued = (ctx_.seg_table[ctx_.seg_index] == 255);
if (!seg_continued) {
// 包结束
if (ctx_.packet_len) {
if (!opus_info_.head_seen) {
if (ctx_.packet_len >=8 && memcmp(ctx_.packet_buf, "OpusHead", 8) == 0) {
opus_info_.head_seen = true;
if (ctx_.packet_len >= 19) {
opus_info_.sample_rate = ctx_.packet_buf[12] |
(ctx_.packet_buf[13] << 8) |
(ctx_.packet_buf[14] << 16) |
(ctx_.packet_buf[15] << 24);
ESP_LOGI(TAG, "OpusHead found, sample_rate=%d", opus_info_.sample_rate);
}
ctx_.packet_len = 0;
ctx_.packet_continued = false;
ctx_.seg_index++;
ctx_.seg_remaining = 0;
continue;
}
}
if (!opus_info_.tags_seen) {
if (ctx_.packet_len >= 8 && memcmp(ctx_.packet_buf, "OpusTags", 8) == 0) {
opus_info_.tags_seen = true;
ESP_LOGI(TAG, "OpusTags found.");
ctx_.packet_len = 0;
ctx_.packet_continued = false;
ctx_.seg_index++;
ctx_.seg_remaining = 0;
continue;
}
}
if (opus_info_.head_seen && opus_info_.tags_seen) {
if (on_demuxer_finished_) {
on_demuxer_finished_(ctx_.packet_buf, opus_info_.sample_rate, ctx_.packet_len);
}
} else {
ESP_LOGW(TAG, "当前Ogg容器未解析到OpusHead/OpusTags丢弃");
}
}
ctx_.packet_len = 0;
ctx_.packet_continued = false;
} else {
ctx_.packet_continued = true;
}
ctx_.seg_index++;
ctx_.seg_remaining = 0;
}
if (ctx_.seg_index == ctx_.seg_count) {
// 检查是否所有数据体都已读取
if (ctx_.body_offset < ctx_.body_size) {
ESP_LOGW(TAG, "数据体不完整: %zu/%zu",
ctx_.body_offset, ctx_.body_size);
}
// 如果包跨页保持packet_len和packet_continued
if (!ctx_.packet_continued) {
ctx_.packet_len = 0;
}
// 进入下一页面
state_ = ParseState::FIND_PAGE;
ctx_.bytes_needed = 4;
ctx_.data_offset = 0;
}
break;
}
}
}
return processed;
}

View File

@@ -0,0 +1,63 @@
#ifndef OGG_DEMUXER_H_
#define OGG_DEMUXER_H_
#include <functional>
#include <cstdint>
#include <cstring>
#include <vector>
class OggDemuxer {
private:
enum ParseState : int8_t {
FIND_PAGE,
PARSE_HEADER,
PARSE_SEGMENTS,
PARSE_DATA
};
struct Opus_t {
bool head_seen{false};
bool tags_seen{false};
int sample_rate{48000};
};
// 使用固定大小的缓冲区避免动态分配
struct context_t {
bool packet_continued{false}; // 当前包是否跨多个段
uint8_t header[27]; // Ogg页头
uint8_t seg_table[255]; // 当前存储的段表
uint8_t packet_buf[8192]; // 8KB包缓冲区
size_t packet_len = 0; // 缓冲区中累计的数据长度
size_t seg_count = 0; // 当前页段数
size_t seg_index = 0; // 当前处理的段索引
size_t data_offset = 0; // 解析当前阶段已读取的字节数
size_t bytes_needed = 0; // 解析当前字段还需要读取的字节数
size_t seg_remaining = 0; // 当前段剩余需要读取的字节数
size_t body_size = 0; // 数据体总大小
size_t body_offset = 0; // 数据体已读取的字节数
};
public:
OggDemuxer() {
Reset();
}
void Reset();
size_t Process(const uint8_t* data, size_t size);
/// @brief 设置解封装完毕后回调处理函数
/// @param on_demuxer_finished
void OnDemuxerFinished(std::function<void(const uint8_t* data, int sample_rate, size_t len)> on_demuxer_finished) {
on_demuxer_finished_ = on_demuxer_finished;
}
private:
ParseState state_ = ParseState::FIND_PAGE;
context_t ctx_;
Opus_t opus_info_;
std::function<void(const uint8_t*, int, size_t)> on_demuxer_finished_;
};
#endif

View File

@@ -21,4 +21,19 @@
2. 关闭设备电源后,长按电源键不松手;
3. 在烧录工具中选择对应的串口COM Port
4. 点击烧录按钮,选择 UART 模式;
5. 烧录完成前请勿松开电源键。
5. 烧录完成前请勿松开电源键。
## 引脚
- 1-9
- 1. DAT2 NC
- 2. CD/DAT3 片选,低电平有效。(未知)
- 3. CMD IO48Command/Response Line主机通过此线向TF卡发送命令和数据
- 4. VDD 供电
- 5. CLX IO47时钟由主机产生同步数据通信
- 6. VSS GND
- 7. DAT0 IO21SPI_MISOTF卡通过此线向主机返回响应和数据
- 8. DAT1 NC
依次为从右向左为1-9

View File

@@ -41,8 +41,15 @@ public:
{
DisplayLockGuard lock(this);
lv_obj_set_style_pad_left(status_bar_, LV_HOR_RES * 0.167, 0);
lv_obj_set_style_pad_right(status_bar_, LV_HOR_RES * 0.167, 0);
// 状态栏容器适配
lv_obj_set_style_pad_left(top_bar_, LV_HOR_RES * 0.12, 0); // 左侧填充12%
lv_obj_set_style_pad_right(top_bar_, LV_HOR_RES * 0.12, 0); // 右侧填充12%
// 表情容器上移适配
lv_obj_align(emoji_box_, LV_ALIGN_CENTER, 0, -50); // 向上偏移50
// 消息栏适配
lv_obj_align(bottom_bar_, LV_ALIGN_BOTTOM_MID, 0, -40); // 向上偏移40
}
};