英文:
simple way te to compute rgb buffer from I420 frame ffmpeg
问题
我正在尝试直接从AVFrame计算rgb缓冲区。有些地方出错了,因为得到的图像是错误的。从AVFrame->data[0]中提取灰度图像可以正常工作。然而,我无法提取彩色图像。
inline void YCrCb_to_RGB8(int Y, int Cr, int Cb, int& R, int& G, int& B){
R = (int)(Y + 1.402 *(Cr - 128));
G = (int)(Y - 0.344136*(Cb-128) -0.71414*(Cr-128));
B = (int)(Y + 1.772 *(Cb-128));
if (R < 0) R = 0; else if (R > 255) R = 255;
if (G < 0) G = 0; else if (G > 255) G = 255;
if (B < 0) B = 0; else if (B > 255) B = 255;
}
int getRGB8buffer(AVFrame* pFrame, byte* buffer){
const int width = pFrame->width, height = pFrame->height;
int Y, Cr, Cb;
int R, G, B;
int pixel = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
Y = pFrame->data[0][x + y * width];
Cr = pFrame->data[1][x / 2 + ((int)(y / 2)) * pFrame->linesize[1]];
Cb = pFrame->data[2][x / 2 + ((int)(y / 2)) * pFrame->linesize[2]];
YCrCb_to_RGB8(Y, Cr, Cb, R, G, B);
buffer[pixel * 3 + 0] = R;
buffer[pixel * 3 + 1] = G;
buffer[pixel * 3 + 2] = B;
pixel++;
}
}
return 0;
}
当我使用以下代码将得到的图像保存为ppm格式时:
int save_RGB_frame(unsigned char* buf, int wrap, int xsize,int ysize, const char* filename){
FILE* f;
int i;
f = fopen(filename, "w");
// 便携式ppm格式 -> https://en.wikipedia.org/wiki/Netpbm#PPM_example
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
// 逐行写入
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize*3, f);
fclose(f);
return 0;
}
请注意,我无法提供图像的链接,因为我无法查看外部链接。
英文:
I'm trying to compute directly rgb buffer from avframe. Something is going wrong since the obtained image is wrong. Extracting grey image from AVFrame->data[0] is working fine. However I'm not able to extract colored image
inline void YCrCb_to_RGB8(int Y, int Cr, int Cb, int& R, int& G, int& B){
R = (int)(Y + 1.402 *(Cr - 128));
G = (int)(Y - 0.344136*(Cb-128) -0.71414*(Cr-128));
B = (int)(Y + 1.772 *(Cb-128));
if (R < 0) R = 0; else if (R > 255) R = 255;
if (G < 0) G = 0; else if (G > 255) G = 255;
if (B < 0) B = 0; else if (B > 255) B = 255;
}
int getRGB8buffer(AVFrame* pFrame, byte* buffer){
const int width = pFrame->width, height = pFrame->height;
int Y, Cr, Cb;
int R, G, B;
int pixel = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
Y = pFrame->data[0][x + y * width];
Cr = pFrame->data[1][x / 2 + ((int)(y / 2)) * pFrame->linesize[1]];
Cb = pFrame->data[2][x / 2 + ((int)(y / 2)) * pFrame->linesize[2]];
YCrCb_to_RGB8(Y, Cr, Cb, R, G, B);
buffer[pixel * 3 + 0] = R;
buffer[pixel * 3 + 1] = G;
buffer[pixel * 3 + 2] = B;
pixel++;
}
}
return 0;
}
When I save the obtained image as ppm using
int save_RGB_frame(unsigned char* buf, int wrap, int xsize,int ysize, const char* filename){
FILE* f;
int i;
f = fopen(filename, "w");
// portable ppm format -> https://en.wikipedia.org/wiki/Netpbm#PPM_example
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
// writing line by line
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize*3, f);
fclose(f);
return 0;
}
The resulting image is wrong
link the the resulting image https://github.com/hacenesh/ffmpeg_question/blob/main/img_2028144.ppm
答案1
得分: 3
主要问题是使用fopen(filename, "w")
而不是f = fopen(filename, "wb")
。
在Windows操作系统中,二进制文件和文本文件之间有重要区别。默认的"w"
选项将文件打开为文本文件。在写入文本文件时,每个新行字符\n
都会转换为两个字符\r\n
。这些额外的字符会破坏图像的整体结构。请注意:如果你使用Linux,"wb"
和"w"
应该是相同的。
save_RGB_frame
的修正代码:
int save_RGB_frame(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename)
{
FILE* f;
int i;
//f = fopen(filename, "w");
f = fopen(filename, "wb"); //在Windows操作系统中,我们必须使用"wb"来打开二进制文件(默认情况下"w"会应用于文本文件)。
// 可移植的PPM格式 -> https://en.wikipedia.org/wiki/Netpbm#PPM_example
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
// 逐行写入
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize*3, f);
fclose(f);
return 0;
}
保存为PPM图像文件:
save_RGB_frame(buffer, width*3, width, height, "img.ppm");
getRGB8buffer
中的问题:
YUV420p格式的平面顺序是Y
,然后是U
,然后是V
。
U
对应Cb
,V
对应Cr
,所以顺序是:Y
,Cb
,Cr
。
getRGB8buffer
的修正代码:
int getRGB8buffer(const AVFrame* pFrame, byte* buffer)
{
const int width = pFrame->width, height = pFrame->height;
int Y, Cr, Cb;
int R, G, B;
int pixel = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
// YUV420p平面顺序是Y,Cb,Cr(不是Y,Cr,Cb)。
Y = pFrame->data[0][x + y * pFrame->linesize[0]]; //使用pFrame->linesize[0]更好。
Cb = pFrame->data[1][x / 2 + ((int)(y / 2)) * pFrame->linesize[1]];
Cr = pFrame->data[2][x / 2 + ((int)(y / 2)) * pFrame->linesize[2]];
YCrCb_to_RGB8(Y, Cr, Cb, R, G, B);
buffer[pixel * 3 + 0] = R;
buffer[pixel * 3 + 1] = G;
buffer[pixel * 3 + 2] = B;
pixel++;
}
}
return 0;
}
YCrCb_to_RGB8
中的问题:
你的问题中的转换公式适用于JPEG转换公式。
FFmpeg默认应用BT.601“有限范围”转换公式,而不是“全范围”[0, 255],“有限范围”范围是[16, 235]。
在这里使用“有限范围”(“电视范围”)比使用“全范围”(PC范围/ JPEG范围)要常见得多。
BT.601在高清视频中可能比BT.709少见,但BT.601是FFmpeg的默认转换(我们将坚持使用BT.601)。
请注意:我们在这里使用的转换公式与MATLAB函数ycbcr2rgb相同。
inline void YCrCb_to_RGB8(int Y, int Cr, int Cb, int& R, int& G, int& B)
{
//减去偏移并转换为double类型。
//从Y中减去16假设“有限范围”YCbCr格式,其中Y范围是[16, 235](与“全范围”相反,在那里Y范围是[0, 255])。
double y = (double)(Y - 16);
double u = (double)(Cb - 128);
double v = (double)(Cr - 128);
//以下转换应用BT.601“有限范围”转换公式。
//得到与MATLAB函数ycbcr2rgb相同的结果。
//BT.601“有限范围”也是FFmpeg默认使用的转换。
R = (int)std::round(1.1644*y + 1.5960*v);
G = (int)std::round(1.1644*y - 0.3918*u - 0.8130*v);
B = (int)std::round(1.1644*y + 2.0172*u);
R = std::max(std::min(R, 255), 0);
G = std::max(std::min(G, 255), 0);
B = std::max(std::min(B, 255), 0);
}
英文:
The main issue is using fopen(filename, "w")
instead of f = fopen(filename, "wb")
.
In Windows OS, there is an important distinction between binary file and text file.
The default "w"
option, opens the file as text file.
When writing to text file, each new line character \n
is converted to two characters \r\n
.
The additional characters messes up the entire structure of the image.
Note: In case you are using Linux, "wb"
and "w"
supposed to be the same.
Corrected code of save_RGB_frame
:
int save_RGB_frame(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename)
{
FILE* f;
int i;
//f = fopen(filename, "w");
f = fopen(filename, "wb"); //In Windows OS, we must use "wb" for opening a binary file (by default "w" applies text file).
// portable ppm format -> https://en.wikipedia.org/wiki/Netpbm#PPM_example
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
// writing line by line
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize*3, f);
fclose(f);
return 0;
}
Saving to PPM image file:
save_RGB_frame(buffer, width*3, width, height, "img.ppm");
Issues in getRGB8buffer
:
The orders of the planes of YUV420p format is Y
then U
then V
.
U
applies Cb
and V
applies Cr
, so the order is: Y
, Cb
, Cr
.
Corrected code of getRGB8buffer
:
int getRGB8buffer(const AVFrame* pFrame, byte* buffer)
{
const int width = pFrame->width, height = pFrame->height;
int Y, Cr, Cb;
int R, G, B;
int pixel = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
//YUV420p planes ordering is Y, Cb, Cr (not Y, Cr, Cb).
//Y = pFrame->data[0][x + y * width];
//Cr = pFrame->data[1][x / 2 + ((int)(y / 2)) * pFrame->linesize[1]];
//Cb = pFrame->data[2][x / 2 + ((int)(y / 2)) * pFrame->linesize[2]];
Y = pFrame->data[0][x + y * pFrame->linesize[0]]; //Using pFrame->linesize[0] is prefered.
Cb = pFrame->data[1][x / 2 + ((int)(y / 2)) * pFrame->linesize[1]];
Cr = pFrame->data[2][x / 2 + ((int)(y / 2)) * pFrame->linesize[2]];
YCrCb_to_RGB8(Y, Cr, Cb, R, G, B);
buffer[pixel * 3 + 0] = R;
buffer[pixel * 3 + 1] = G;
buffer[pixel * 3 + 2] = B;
pixel++;
}
}
return 0;
}
Issues in YCrCb_to_RGB8
:
The conversion formula in your question applies JPEG conversion formula.
The default conversion formula used by FFmpeg applies BT.601 "limited range" conversion formula.
In "limited range", Y range is [16, 235] opposed to "full range" [0, 255].
Using "limited range" ("TV range") is much more common compared to "full range" (PC range / JPEG range).
BT.601 may be less common than BT.709 for HD videos, but BT.601 is FFmpeg default conversion (we are going to stick with BT.601).
Note: The conversion formula we are using here is the same as MATLAB function ycbcr2rgb.
inline void YCrCb_to_RGB8(int Y, int Cr, int Cb, int& R, int& G, int& B)
{
//Subtract offsets and cast to double.
//Subtractin 16 from Y assuems "limited range" YCbCr format where Y range is [16, 235] (oppused to "full range" whenre Y range is [0, 255]).
double y = (double)(Y - 16);
double u = (double)(Cb - 128);
double v = (double)(Cr - 128);
//The folloiwng conversion applies BT.601 "limited range" conversion formula.
//Getting the same results as MATLAB function ycbcr2rgb.
//BT.601 "limited range" is also the default conversion used by FFmpeg.
R = (int)std::round(1.1644*y + 1.5960*v);
G = (int)std::round(1.1644*y - 0.3918*u - 0.8130*v);
B = (int)std::round(1.1644*y + 2.0172*u);
R = std::max(std::min(R, 255), 0);
G = std::max(std::min(G, 255), 0);
B = std::max(std::min(B, 255), 0);
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论