Initial commit

This commit is contained in:
2026-04-26 21:35:04 +08:00
commit da6ca1b09a
1483 changed files with 115719 additions and 0 deletions

View File

@@ -0,0 +1,467 @@
#include <esp_attr.h>
#include <esp_heap_caps.h>
#include <esp_log.h>
#include <stddef.h>
#include <string.h>
#include <utility>
#include "esp_jpeg_common.h"
#include "esp_jpeg_enc.h"
#include "esp_imgfx_color_convert.h"
#if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
#include "driver/jpeg_encode.h"
#endif
#include "image_to_jpeg.h"
#define TAG "image_to_jpeg"
static void* malloc_psram(size_t size) {
void* p = malloc(size);
if (p)
return p;
#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
return heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
#else
return NULL;
#endif
}
static __always_inline uint8_t expand_5_to_8(uint8_t v) {
return (uint8_t)((v << 3) | (v >> 2));
}
static __always_inline uint8_t expand_6_to_8(uint8_t v) {
return (uint8_t)((v << 2) | (v >> 4));
}
static uint8_t* convert_input_to_encoder_buf(const uint8_t* src, uint16_t width, uint16_t height, v4l2_pix_fmt_t format,
jpeg_pixel_format_t* out_fmt, int* out_size) {
// GRAY 直接作为 JPEG_PIXEL_FORMAT_GRAY 输入
if (format == V4L2_PIX_FMT_GREY) {
int sz = (int)width * (int)height;
uint8_t* buf = (uint8_t*)jpeg_calloc_align(sz, 16);
if (!buf)
return NULL;
memcpy(buf, src, sz);
if (out_fmt)
*out_fmt = JPEG_PIXEL_FORMAT_GRAY;
if (out_size)
*out_size = sz;
return buf;
}
// V4L2 YUYV (Y Cb Y Cr) 可直接作为 JPEG_PIXEL_FORMAT_YCbYCr 输入
if (format == V4L2_PIX_FMT_YUYV) {
int sz = (int)width * (int)height * 2;
uint8_t* buf = (uint8_t*)jpeg_calloc_align(sz, 16);
if (!buf)
return NULL;
memcpy(buf, src, sz);
if (out_fmt)
*out_fmt = JPEG_PIXEL_FORMAT_YCbYCr;
if (out_size)
*out_size = sz;
return buf;
}
// V4L2 UYVY (Cb Y Cr Y) -> 重排为 YUYV 再作为 YCbYCr 输入
// 当前版本暂时不会出现 UYVY 格式
if (format == V4L2_PIX_FMT_UYVY) [[unlikely]] {
int sz = (int)width * (int)height * 2;
const uint8_t* s = src;
uint8_t* buf = (uint8_t*)jpeg_calloc_align(sz, 16);
if (!buf)
return NULL;
uint8_t* d = buf;
for (int i = 0; i < sz; i += 4) {
// src: Cb, Y0, Cr, Y1 -> dst: Y0, Cb, Y1, Cr
d[0] = s[1];
d[1] = s[0];
d[2] = s[3];
d[3] = s[2];
s += 4;
d += 4;
}
if (out_fmt)
*out_fmt = JPEG_PIXEL_FORMAT_YCbYCr;
if (out_size)
*out_size = sz;
return buf;
}
// V4L2 YUV422P (YUV422 Planar) -> 重排为 YUYV (YCbYCr)
// 当前版本暂时不会出现 YUV422P 格式
if (format == V4L2_PIX_FMT_YUV422P) [[unlikely]] {
int sz = (int)width * (int)height * 2;
const uint8_t* y_plane = src;
const uint8_t* u_plane = y_plane + (int)width * (int)height;
const uint8_t* v_plane = u_plane + ((int)width / 2) * (int)height;
uint8_t* buf = (uint8_t*)jpeg_calloc_align(sz, 16);
if (!buf)
return NULL;
uint8_t* dst = buf;
for (int y = 0; y < height; y++) {
const uint8_t* y_row = y_plane + y * (int)width;
const uint8_t* u_row = u_plane + y * ((int)width / 2);
const uint8_t* v_row = v_plane + y * ((int)width / 2);
for (int x = 0; x < width; x += 2) {
uint8_t y0 = y_row[x + 0];
uint8_t y1 = y_row[x + 1];
uint8_t cb = u_row[x / 2];
uint8_t cr = v_row[x / 2];
dst[0] = y0;
dst[1] = cb;
dst[2] = y1;
dst[3] = cr;
dst += 4;
}
}
if (out_fmt)
*out_fmt = JPEG_PIXEL_FORMAT_YCbYCr;
if (out_size)
*out_size = sz;
return buf;
}
// RGB 转换为 YUV422 (YCbYCr) 再输入
// 见 https://github.com/78/xiaozhi-esp32/issues/1380#issuecomment-3497156378
else if (format == V4L2_PIX_FMT_RGB24 || format == V4L2_PIX_FMT_RGB565 || format == V4L2_PIX_FMT_RGB565X) {
esp_imgfx_pixel_fmt_t in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888;
uint32_t src_len = 0;
switch (format) {
case V4L2_PIX_FMT_RGB24:
in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888;
src_len = static_cast<uint32_t>(width * height * 3);
break;
case V4L2_PIX_FMT_RGB565:
in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE;
src_len = static_cast<uint32_t>(width * height * 2);
break;
[[unlikely]] case V4L2_PIX_FMT_RGB565X: // 当前版本暂时不会出现 RGB565X
in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_BE;
src_len = static_cast<uint32_t>(width * height * 2);
break;
[[unlikely]] default:
ESP_LOGE(TAG, "[Unreachable Case] unsupported format: 0x%08lx", format);
std::unreachable();
}
int sz = (int)width * (int)height * 2;
uint8_t* buf = (uint8_t*)jpeg_calloc_align(sz, 16);
if (!buf)
return nullptr;
esp_imgfx_color_convert_cfg_t convert_cfg = {
.in_res = {.width = static_cast<int16_t>(width),
.height = static_cast<int16_t>(height)},
.in_pixel_fmt = in_pixel_fmt,
.out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_YUYV,
.color_space_std = ESP_IMGFX_COLOR_SPACE_STD_BT601,
};
esp_imgfx_color_convert_handle_t convert_handle = nullptr;
esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle);
if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed");
jpeg_free_align(buf);
return nullptr;
}
esp_imgfx_data_t convert_input_data = {
.data = const_cast<uint8_t*>(src),
.data_len = static_cast<uint32_t>(src_len),
};
esp_imgfx_data_t convert_output_data = {
.data = buf,
.data_len = static_cast<uint32_t>(sz),
};
err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data);
if (err != ESP_IMGFX_ERR_OK) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed");
jpeg_free_align(buf);
return nullptr;
}
esp_imgfx_color_convert_close(convert_handle);
convert_handle = nullptr;
if (out_fmt)
*out_fmt = JPEG_PIXEL_FORMAT_YCbYCr;
if (out_size)
*out_size = sz;
return buf;
}
ESP_LOGE(TAG, "unsupported format: 0x%08lx", format);
if (out_size)
*out_size = 0;
return nullptr;
}
#if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
static jpeg_encoder_handle_t s_hw_jpeg_handle = NULL;
static bool hw_jpeg_ensure_inited(void) {
if (s_hw_jpeg_handle) {
return true;
}
jpeg_encode_engine_cfg_t eng_cfg = {
.intr_priority = 0,
.timeout_ms = 100,
};
esp_err_t er = jpeg_new_encoder_engine(&eng_cfg, &s_hw_jpeg_handle);
if (er != ESP_OK) {
ESP_LOGE(TAG, "jpeg_new_encoder_engine failed: %d", (int)er);
s_hw_jpeg_handle = NULL;
return false;
}
return true;
}
static uint8_t* convert_input_to_hw_encoder_buf(const uint8_t* src, uint16_t width, uint16_t height, v4l2_pix_fmt_t format,
jpeg_enc_input_format_t* out_fmt, int* out_size) {
if (format == V4L2_PIX_FMT_GREY) {
int sz = (int)width * (int)height;
uint8_t* buf = (uint8_t*)malloc_psram(sz);
if (!buf)
return NULL;
memcpy(buf, src, sz);
if (out_fmt)
*out_fmt = JPEG_ENCODE_IN_FORMAT_GRAY;
if (out_size)
*out_size = sz;
return buf;
}
if (format == V4L2_PIX_FMT_RGB24) {
int sz = (int)width * (int)height * 3;
uint8_t* buf = (uint8_t*)malloc_psram(sz);
if (!buf) {
ESP_LOGE(TAG, "malloc_psram failed");
return NULL;
}
memcpy(buf, src, sz);
if (out_fmt)
*out_fmt = JPEG_ENCODE_IN_FORMAT_RGB888;
if (out_size)
*out_size = sz;
return buf;
}
if (format == V4L2_PIX_FMT_RGB565) {
int sz = (int)width * (int)height * 2;
uint8_t* buf = (uint8_t*)malloc_psram(sz);
if (!buf)
return NULL;
memcpy(buf, src, sz);
if (out_fmt)
*out_fmt = JPEG_ENCODE_IN_FORMAT_RGB565;
if (out_size)
*out_size = sz;
return buf;
}
if (format == V4L2_PIX_FMT_YUYV) {
// 硬件需要 | Y1 V Y0 U | 的“大端”格式,因此需要 bswap16
int sz = (int)width * (int)height * 2;
uint16_t* buf = (uint16_t*)malloc_psram(sz);
if (!buf)
return NULL;
const uint16_t* bsrc = (const uint16_t*)src;
for (int i = 0; i < sz / 2; i++) {
buf[i] = __builtin_bswap16(bsrc[i]);
}
if (out_fmt)
*out_fmt = JPEG_ENCODE_IN_FORMAT_YUV422;
if (out_size)
*out_size = sz;
return (uint8_t*)buf;
}
return NULL;
}
static bool encode_with_hw_jpeg(const uint8_t* src, size_t src_len, uint16_t width, uint16_t height,
v4l2_pix_fmt_t format, uint8_t quality, uint8_t** jpg_out, size_t* jpg_out_len,
jpg_out_cb cb, void* cb_arg) {
if (quality < 1)
quality = 1;
if (quality > 100)
quality = 100;
jpeg_enc_input_format_t enc_src_type = JPEG_ENCODE_IN_FORMAT_RGB888;
int enc_in_size = 0;
uint8_t* enc_in = convert_input_to_hw_encoder_buf(src, width, height, format, &enc_src_type, &enc_in_size);
if (!enc_in) {
ESP_LOGW(TAG, "hw jpeg: unsupported format, fallback to sw");
return false;
}
if (!hw_jpeg_ensure_inited()) {
free(enc_in);
return false;
}
jpeg_encode_cfg_t enc_cfg = {0};
enc_cfg.width = width;
enc_cfg.height = height;
enc_cfg.src_type = enc_src_type;
enc_cfg.image_quality = quality;
enc_cfg.sub_sample = (enc_src_type == JPEG_ENCODE_IN_FORMAT_GRAY) ? JPEG_DOWN_SAMPLING_GRAY : JPEG_DOWN_SAMPLING_YUV422;
size_t out_cap = (size_t)width * (size_t)height * 3 / 2 + 64 * 1024;
if (out_cap < 128 * 1024)
out_cap = 128 * 1024;
jpeg_encode_memory_alloc_cfg_t jpeg_enc_output_mem_cfg = { .buffer_direction = JPEG_ENC_ALLOC_OUTPUT_BUFFER };
size_t out_cap_aligned = 0;
uint8_t* outbuf = (uint8_t*)jpeg_alloc_encoder_mem(out_cap, &jpeg_enc_output_mem_cfg, &out_cap_aligned);
if (!outbuf) {
free(enc_in);
ESP_LOGE(TAG, "alloc out buffer failed");
return false;
}
uint32_t out_len = 0;
esp_err_t er = jpeg_encoder_process(s_hw_jpeg_handle, &enc_cfg, enc_in, (uint32_t)enc_in_size, outbuf, (uint32_t)out_cap_aligned, &out_len);
free(enc_in);
if (er != ESP_OK) {
free(outbuf);
ESP_LOGE(TAG, "jpeg_encoder_process failed: %d", (int)er);
return false;
}
if (cb) {
cb(cb_arg, 0, outbuf, (size_t)out_len);
cb(cb_arg, 1, NULL, 0);
free(outbuf);
if (jpg_out)
*jpg_out = NULL;
if (jpg_out_len)
*jpg_out_len = 0;
return true;
}
if (jpg_out && jpg_out_len) {
*jpg_out = outbuf;
*jpg_out_len = (size_t)out_len;
return true;
}
free(outbuf);
return true;
}
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
static bool encode_with_esp_new_jpeg(const uint8_t* src, size_t src_len, uint16_t width, uint16_t height,
v4l2_pix_fmt_t format, uint8_t quality, uint8_t** jpg_out, size_t* jpg_out_len,
jpg_out_cb cb, void* cb_arg) {
if (quality < 1)
quality = 1;
if (quality > 100)
quality = 100;
jpeg_pixel_format_t enc_src_type = JPEG_PIXEL_FORMAT_RGB888;
int enc_in_size = 0;
uint8_t* enc_in = convert_input_to_encoder_buf(src, width, height, format, &enc_src_type, &enc_in_size);
if (!enc_in) {
ESP_LOGE(TAG, "alloc/convert input failed");
return false;
}
jpeg_enc_config_t cfg = DEFAULT_JPEG_ENC_CONFIG();
cfg.width = width;
cfg.height = height;
cfg.src_type = enc_src_type;
cfg.subsampling = (enc_src_type == JPEG_PIXEL_FORMAT_GRAY) ? JPEG_SUBSAMPLE_GRAY : JPEG_SUBSAMPLE_420;
cfg.quality = quality;
cfg.rotate = JPEG_ROTATE_0D;
cfg.task_enable = false;
jpeg_enc_handle_t h = NULL;
jpeg_error_t ret = jpeg_enc_open(&cfg, &h);
if (ret != JPEG_ERR_OK) {
jpeg_free_align(enc_in);
ESP_LOGE(TAG, "jpeg_enc_open failed: %d", (int)ret);
return false;
}
// 估算输出缓冲区:宽高的 1.5 倍 + 64KB
size_t out_cap = (size_t)width * (size_t)height * 3 / 2 + 64 * 1024;
if (out_cap < 128 * 1024)
out_cap = 128 * 1024;
uint8_t* outbuf = (uint8_t*)malloc_psram(out_cap);
if (!outbuf) {
jpeg_enc_close(h);
jpeg_free_align(enc_in);
ESP_LOGE(TAG, "alloc out buffer failed");
return false;
}
int out_len = 0;
ret = jpeg_enc_process(h, enc_in, enc_in_size, outbuf, (int)out_cap, &out_len);
jpeg_enc_close(h);
jpeg_free_align(enc_in);
if (ret != JPEG_ERR_OK) {
free(outbuf);
ESP_LOGE(TAG, "jpeg_enc_process failed: %d", (int)ret);
return false;
}
if (cb) {
cb(cb_arg, 0, outbuf, (size_t)out_len);
cb(cb_arg, 1, NULL, 0); // 结束信号
free(outbuf);
if (jpg_out)
*jpg_out = NULL;
if (jpg_out_len)
*jpg_out_len = 0;
return true;
}
if (jpg_out && jpg_out_len) {
*jpg_out = outbuf;
*jpg_out_len = (size_t)out_len;
return true;
}
free(outbuf);
return true;
}
bool image_to_jpeg(uint8_t* src, size_t src_len, uint16_t width, uint16_t height, v4l2_pix_fmt_t format,
uint8_t quality, uint8_t** out, size_t* out_len) {
#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
if (format == V4L2_PIX_FMT_JPEG) {
uint8_t * out_data = (uint8_t*)heap_caps_malloc(src_len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (!out_data) {
ESP_LOGE(TAG, "Failed to allocate memory for JPEG output");
return false;
}
memcpy(out_data, src, src_len);
*out = out_data;
*out_len = src_len;
return true;
}
#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
#if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
if (encode_with_hw_jpeg(src, src_len, width, height, format, quality, out, out_len, NULL, NULL)) {
return true;
}
// Fallback to esp_new_jpeg
#endif
return encode_with_esp_new_jpeg(src, src_len, width, height, format, quality, out, out_len, NULL, NULL);
}
bool image_to_jpeg_cb(uint8_t* src, size_t src_len, uint16_t width, uint16_t height, v4l2_pix_fmt_t format,
uint8_t quality, jpg_out_cb cb, void* arg) {
#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
if (format == V4L2_PIX_FMT_JPEG) {
cb(arg, 0, src, src_len);
cb(arg, 1, nullptr, 0); // end signal
return true;
}
#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
#if CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
if (encode_with_hw_jpeg(src, src_len, width, height, format, quality, NULL, NULL, cb, arg)) {
return true;
}
// Fallback to esp_new_jpeg
#endif
return encode_with_esp_new_jpeg(src, src_len, width, height, format, quality, NULL, NULL, cb, arg);
}

View File

@@ -0,0 +1,86 @@
// image_to_jpeg.h - 图像到JPEG转换的高效编码接口
// 节省约8KB SRAM的JPEG编码实现
#pragma once
#include "sdkconfig.h"
#ifndef CONFIG_IDF_TARGET_ESP32
#include <stdint.h>
#include <stddef.h>
#if defined(CONFIG_IDF_TARGET_ESP32P4) || defined(CONFIG_IDF_TARGET_ESP32S3)
// ESP32-P4 使用 esp_video 组件提供的 V4L2 头文件
#include <linux/videodev2.h>
#else
// ESP32-S3 等其他芯片:定义常用的 V4L2 像素格式
#define V4L2_PIX_FMT_RGB565 0x50424752 // 'RGBP'
#define V4L2_PIX_FMT_RGB565X 0x52474250 // 'PRGB'
#define V4L2_PIX_FMT_RGB24 0x33424752 // 'RGB3'
#define V4L2_PIX_FMT_YUYV 0x56595559 // 'YUYV'
#define V4L2_PIX_FMT_YUV422P 0x36315559 // 'YU16'
#define V4L2_PIX_FMT_YUV420 0x32315559 // 'YU12'
#define V4L2_PIX_FMT_GREY 0x59455247 // 'GREY'
#define V4L2_PIX_FMT_UYVY 0x59565955 // 'UYVY'
#define V4L2_PIX_FMT_JPEG 0x4745504A // 'JPEG'
#endif
typedef uint32_t v4l2_pix_fmt_t;
#ifdef __cplusplus
extern "C"
{
#endif
// JPEG输出回调函数类型
// arg: 用户自定义参数, index: 当前数据索引, data: JPEG数据块, len: 数据块长度
// 返回: 实际处理的字节数
typedef size_t (*jpg_out_cb)(void *arg, size_t index, const void *data, size_t len);
/**
* @brief 将图像格式高效转换为JPEG
*
* 这个函数使用优化的JPEG编码器进行编码主要特点
* - 节省约8KB的SRAM使用静态变量改为堆分配
* - 支持多种图像格式输入
* - 高质量JPEG输出
*
* @param src 源图像数据
* @param src_len 源图像数据长度
* @param width 图像宽度
* @param height 图像高度
* @param format 图像格式 (PIXFORMAT_RGB565, PIXFORMAT_RGB888, 等)
* @param quality JPEG质量 (1-100)
* @param out 输出JPEG数据指针 (需要调用者释放)
* @param out_len 输出JPEG数据长度
*
* @return true 成功, false 失败
*/
bool image_to_jpeg(uint8_t *src, size_t src_len, uint16_t width, uint16_t height,
v4l2_pix_fmt_t format, uint8_t quality, uint8_t **out, size_t *out_len);
/**
* @brief 将图像格式转换为JPEG回调版本
*
* 使用回调函数处理JPEG输出数据适合流式传输或分块处理
* - 节省约8KB的SRAM使用静态变量改为堆分配
* - 支持流式输出,无需预分配大缓冲区
* - 通过回调函数逐块处理JPEG数据
*
* @param src 源图像数据
* @param src_len 源图像数据长度
* @param width 图像宽度
* @param height 图像高度
* @param format 图像格式
* @param quality JPEG质量 (1-100)
* @param cb 输出回调函数
* @param arg 传递给回调函数的用户参数
*
* @return true 成功, false 失败
*/
bool image_to_jpeg_cb(uint8_t *src, size_t src_len, uint16_t width, uint16_t height,
v4l2_pix_fmt_t format, uint8_t quality, jpg_out_cb cb, void *arg);
#ifdef __cplusplus
}
#endif
#endif // ndef CONFIG_IDF_TARGET_ESP32

View File

@@ -0,0 +1,264 @@
#include <esp_check.h>
#include <esp_err.h>
#include <esp_heap_caps.h>
#include <sys/param.h>
#include "esp_jpeg_common.h"
#include "esp_jpeg_dec.h"
#include "jpeg_to_image.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL
#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG)
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#include <esp_log.h>
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER
#include "driver/jpeg_decode.h"
#endif
#define TAG "jpeg_to_image"
static esp_err_t decode_with_new_jpeg(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width,
size_t* height, size_t* stride) {
ESP_LOGD(TAG, "Decoding JPEG with software decoder");
esp_err_t ret = ESP_OK;
jpeg_error_t jpeg_ret = JPEG_ERR_OK;
uint8_t* out_buf = NULL;
jpeg_dec_io_t jpeg_io = {0};
jpeg_dec_header_info_t out_info = {0};
jpeg_dec_config_t config = DEFAULT_JPEG_DEC_CONFIG();
config.output_type = JPEG_PIXEL_FORMAT_RGB565_LE;
config.rotate = JPEG_ROTATE_0D;
jpeg_dec_handle_t jpeg_dec = NULL;
jpeg_ret = jpeg_dec_open(&config, &jpeg_dec);
if (jpeg_ret != JPEG_ERR_OK) {
ESP_LOGE(TAG, "Failed to open JPEG decoder");
ret = ESP_FAIL;
goto jpeg_dec_failed;
}
jpeg_io.inbuf = (uint8_t*)src;
jpeg_io.inbuf_len = (int)src_len;
jpeg_ret = jpeg_dec_parse_header(jpeg_dec, &jpeg_io, &out_info);
if (jpeg_ret != JPEG_ERR_OK) {
ESP_LOGE(TAG, "Failed to parse JPEG header");
ret = ESP_ERR_INVALID_ARG;
goto jpeg_dec_failed;
}
ESP_LOGD(TAG, "JPEG header info: width=%d, height=%d", out_info.width, out_info.height);
out_buf = jpeg_calloc_align(out_info.width * out_info.height * 2, 16);
if (out_buf == NULL) {
ESP_LOGE(TAG, "Failed to allocate memory for JPEG output buffer");
ret = ESP_ERR_NO_MEM;
goto jpeg_dec_failed;
}
jpeg_io.outbuf = out_buf;
jpeg_ret = jpeg_dec_process(jpeg_dec, &jpeg_io);
if (jpeg_ret != JPEG_ERR_OK) {
ESP_LOGE(TAG, "Failed to decode JPEG");
ret = ESP_FAIL;
goto jpeg_dec_failed;
}
ESP_LOG_BUFFER_HEXDUMP(TAG, out_buf, MIN(out_info.width * out_info.height * 2, 256), ESP_LOG_DEBUG);
*out = out_buf;
out_buf = NULL;
*out_len = (size_t)(out_info.width * out_info.height * 2);
*width = (size_t)out_info.width;
*height = (size_t)out_info.height;
*stride = (size_t)out_info.width * 2;
jpeg_dec_close(jpeg_dec);
jpeg_dec = NULL;
return ret;
jpeg_dec_failed:
if (jpeg_dec) {
jpeg_dec_close(jpeg_dec);
jpeg_dec = NULL;
}
if (out_buf) {
jpeg_free_align(out_buf);
out_buf = NULL;
}
*out = NULL;
*out_len = 0;
*width = 0;
*height = 0;
*stride = 0;
return ret;
}
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER
static esp_err_t decode_with_hardware_jpeg(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len,
size_t* width, size_t* height, size_t* stride) {
ESP_LOGD(TAG, "Decoding JPEG with hardware decoder");
esp_err_t ret = ESP_OK;
jpeg_decoder_handle_t jpeg_dec = NULL;
uint8_t* bit_stream = NULL;
uint8_t* out_buf = NULL;
size_t out_buf_len = 0;
size_t tx_buffer_size = 0;
size_t rx_buffer_size = 0;
jpeg_decode_engine_cfg_t eng_cfg = {
.intr_priority = 1,
.timeout_ms = 1000,
};
jpeg_decode_cfg_t decode_cfg_rgb = {
.output_format = JPEG_DECODE_OUT_FORMAT_RGB565,
.rgb_order = JPEG_DEC_RGB_ELEMENT_ORDER_BGR,
};
ret = jpeg_new_decoder_engine(&eng_cfg, &jpeg_dec);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Failed to create JPEG decoder engine");
goto jpeg_hw_dec_failed;
}
jpeg_decode_memory_alloc_cfg_t tx_mem_cfg = {
.buffer_direction = JPEG_DEC_ALLOC_INPUT_BUFFER,
};
jpeg_decode_memory_alloc_cfg_t rx_mem_cfg = {
.buffer_direction = JPEG_DEC_ALLOC_OUTPUT_BUFFER,
};
bit_stream = (uint8_t*)jpeg_alloc_decoder_mem(src_len, &tx_mem_cfg, &tx_buffer_size);
if (bit_stream == NULL || tx_buffer_size < src_len) {
ESP_LOGE(TAG, "Failed to allocate memory for JPEG bit stream");
ret = ESP_ERR_NO_MEM;
goto jpeg_hw_dec_failed;
}
memcpy(bit_stream, src, src_len);
jpeg_decode_picture_info_t header_info;
ESP_GOTO_ON_ERROR(jpeg_decoder_get_info(bit_stream, src_len, &header_info), jpeg_hw_dec_failed, TAG,
"Failed to get JPEG header info");
ESP_LOGD(TAG, "JPEG header info: width=%d, height=%d, sample_method=%d", header_info.width, header_info.height,
(int)header_info.sample_method);
switch (header_info.sample_method) {
case JPEG_DOWN_SAMPLING_GRAY:
case JPEG_DOWN_SAMPLING_YUV444:
out_buf_len = header_info.width * header_info.height * 2;
*stride = header_info.width * 2;
break;
case JPEG_DOWN_SAMPLING_YUV422:
case JPEG_DOWN_SAMPLING_YUV420:
out_buf_len = ((header_info.width + 15) & ~15) * ((header_info.height + 15) & ~15) * 2;
*stride = ((header_info.width + 15) & ~15) * 2;
break;
default:
ESP_LOGE(TAG, "Unsupported JPEG sample method");
ret = ESP_ERR_NOT_SUPPORTED;
goto jpeg_hw_dec_failed;
}
out_buf = (uint8_t*)jpeg_alloc_decoder_mem(out_buf_len, &rx_mem_cfg, &rx_buffer_size);
if (out_buf == NULL || rx_buffer_size < out_buf_len) {
ESP_LOGE(TAG, "Failed to allocate memory for JPEG output buffer");
ret = ESP_ERR_NO_MEM;
goto jpeg_hw_dec_failed;
}
uint32_t out_size = 0;
ESP_GOTO_ON_ERROR(
jpeg_decoder_process(jpeg_dec, &decode_cfg_rgb, bit_stream, src_len, out_buf, out_buf_len, &out_size),
jpeg_hw_dec_failed, TAG, "Failed to decode JPEG");
ESP_LOGD(TAG, "Expected %d bytes, got %" PRIu32 " bytes", out_buf_len, out_size);
if (out_size != out_buf_len) {
ESP_LOGE(TAG, "Decoded image size mismatch: Expected %zu bytes, got %" PRIu32 " bytes", out_buf_len, out_size);
ret = ESP_ERR_INVALID_SIZE;
goto jpeg_hw_dec_failed;
}
if (header_info.sample_method == JPEG_DOWN_SAMPLING_GRAY) {
// convert GRAY8 to RGB565
uint32_t i = header_info.width * header_info.height;
do {
--i;
uint8_t r = (out_buf[i] >> 3) & 0x1F;
uint8_t g = (out_buf[i] >> 2) & 0x3F;
// b is same as r
uint16_t rgb565 = (r << 11) | (g << 5) | r;
out_buf[2 * i + 1] = (rgb565 >> 8) & 0xFF;
out_buf[2 * i] = rgb565 & 0xFF;
} while (i != 0);
out_size = header_info.width * header_info.height * 2;
ESP_LOGD(TAG, "Converted GRAY8 to RGB565, new size: %zu", out_size);
}
ESP_LOG_BUFFER_HEXDUMP(TAG, out_buf, MIN(out_size, 256), ESP_LOG_DEBUG);
*out = out_buf;
out_buf = NULL;
*out_len = (size_t)out_size;
jpeg_del_decoder_engine(jpeg_dec);
jpeg_dec = NULL;
heap_caps_free(bit_stream);
bit_stream = NULL;
*width = header_info.width;
*height = header_info.height;
return ret;
jpeg_hw_dec_failed:
if (out_buf) {
heap_caps_free(out_buf);
out_buf = NULL;
}
if (bit_stream) {
heap_caps_free(bit_stream);
bit_stream = NULL;
}
if (jpeg_dec) {
jpeg_del_decoder_engine(jpeg_dec);
jpeg_dec = NULL;
}
*out = NULL;
*out_len = 0;
*width = 0;
*height = 0;
*stride = 0;
return ret;
}
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER
esp_err_t jpeg_to_image(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width,
size_t* height, size_t* stride) {
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
esp_log_level_set(TAG, ESP_LOG_DEBUG);
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
if (src == NULL || src_len == 0 || out == NULL || out_len == NULL || width == NULL || height == NULL ||
stride == NULL) {
ESP_LOGE(TAG, "Invalid parameters");
return ESP_ERR_INVALID_ARG;
}
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER
esp_err_t ret = decode_with_hardware_jpeg(src, src_len, out, out_len, width, height, stride);
if (ret == ESP_OK) {
return ret;
}
ESP_LOGW(TAG, "Failed to decode with hardware JPEG, fallback to software decoder");
// Fallback to esp_new_jpeg
#endif
return decode_with_new_jpeg(src, src_len, out, out_len, width, height, stride);
}

View File

@@ -0,0 +1,62 @@
#include "sdkconfig.h"
#ifndef CONFIG_IDF_TARGET_ESP32
#include <esp_err.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Decodes a JPEG image from memory to raw RGB565 pixel data
*
* This function attempts to decode a JPEG image using hardware acceleration first (if enabled),
* falling back to a software decoder if hardware decoding fails or is unavailable.
*
* @param[in] src Pointer to the JPEG bitstream in memory
* @param[in] src_len Length of the JPEG bitstream in bytes
* @param[out] out Pointer to a buffer pointer that will be set to the decoded image data.
* This buffer is allocated internally and MUST be freed by the caller using heap_caps_free().
* @param[out] out_len Pointer to a variable that will receive the size of the decoded image data in bytes
* @param[out] width Pointer to a variable that will receive the image width in pixels
* @param[out] height Pointer to a variable that will receive the image height in pixels
* @param[out] stride Pointer to a variable that will receive the image stride in bytes
*
* @return ESP_OK on successful decoding
* @return ESP_ERR_INVALID_ARG on invalid parameters
* @return ESP_ERR_NO_MEM on memory allocation failure
* @return ESP_FAIL on failure
*
* @attention Memory Management for `*out`:
* - The function allocates memory for the decoded image internally
* - On success, the caller takes ownership of this memory and SHOULD free it using heap_caps_free()
* - On failure, `*out` is guaranteed to be NULL and no freeing is required
* - Example usage:
* @code{.c}
* uint8_t *image = NULL;
* size_t len, width, height;
* if (jpeg_to_image(jpeg_data, jpeg_len, &image, &len, &width, &height)) {
* // Use image data...
* heap_caps_free(image); // Critical: use heap_caps_free
* }
* @endcode
*
* @note Configuration dependency:
* - When CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_DECODER is enabled, hardware acceleration is attempted first
* - Both hardware and software paths allocate memory that requires heap_caps_free() for deallocation
* - The decoded image format is always RGB565 (2 bytes per pixel)
*
* @note When using hardware decoder, the decoded image dimensions might be aligned up to 16-byte boundaries.
* For YUV420 or YUV422 compressed images, both width and height will be rounded up to the nearest multiple of 16.
* See details at
* <https://docs.espressif.com/projects/esp-idf/en/stable/esp32p4/api-reference/peripherals/jpeg.html#jpeg-decoder-engine>
*
*/
esp_err_t jpeg_to_image(const uint8_t* src, size_t src_len, uint8_t** out, size_t* out_len, size_t* width,
size_t* height, size_t* stride);
#ifdef __cplusplus
}
#endif
#endif // CONFIG_IDF_TARGET_ESP32