当前位置：首页 > news >正文

avformat_open_input 代码再分析

news 来源：原创 2024/7/9 7:04:02

------------------------------------------------------------
author: hjjdebug
date : 2024年 07月 03日星期三 15:25:06 CST
description: avformat_open_input 代码再分析
------------------------------------------------------------
前言:
avformat_open_input 所做的工作是
1.必需要读取到一定的数据
2.要判定这些数据是什么格式
3.读取数据文件的头部.
当然，这是我分析完代码后得到的结论.
具体的分析过程为下面6项:

1. 在哪里和怎样查找到协议的? 是通过文件名称(url)找到协议的.

2. 在哪里和怎样查找到demuxer的?

3. 在哪里和怎样读取数据的？

4. 是怎样分析数据的？

5. 数据指针怎样rewind?

6. 实现部分怎样读取文件头?

就把它作为大纲目录了.
------------------------------------------------------------
1. 在哪里和怎样查找到协议的? 是通过文件名称(url)找到协议的.
------------------------------------------------------------
入门有点跳跃性,跟着试验即可.
1.0:直接找到枚举协议函数.
在libavformat/目录下有一个protocols.c文件, 其中有函数
const char *avio_enum_protocols(void **opaque, int output)
其中opaque 是当前指针或空(当开始查找时）,
每调用一次，函数内会更新*opaque 内容,它就是在枚举一个列表. 这个列表的名字叫url_protocals
output,是说是否枚举的是输出协议，为1为输出协议，为0为输入协议.
返回值是协议的名称.

都分析到这种程度了,代码也好分析了,拷贝过来看一下吧. (遗憾的是,代码没有调用到这里,而是从别处逃逸了!)
const char *avio_enum_protocols(void **opaque, int output)
{
const URLProtocol **p = *opaque; //*opaque 存储的是一个指针地址.这个指针是URLProtocal 地址的指针,可见p就在一个指针表中行走
p = p ? p + 1 : url_protocols; //如果p为空，就付给url_protocols, 否则指针+1(就是偏移地址+8,对64位cpu来说）
*opaque = p;
if (!*p) {
*opaque = NULL;
return NULL;
}
if ((output && (*p)->url_write) || (!output && (*p)->url_read))
return (*p)->name;
return avio_enum_protocols(opaque, output); //不满足查找条件,递归到下一层,opaque 已经更新
}
1.1: 谁调用了它?
在avio_enum_protocols设断点观察，其调用栈: 没抓住!
用普通的ts 文件测试，惊奇的发现根本没有调用的这里,
那就给个udp 数据流做输入吧. 还是没有中断, 那它的协议是从哪来的呢？
跟踪调试，原来协议的确定不走这里，而是...
协议存储在URLContext->prot 中
int ffurl_alloc(URLContext **puc, const char *filename, int flags,
const AVIOInterruptCB *int_cb)
{
const URLProtocol *p = NULL;

p = url_find_protocol(filename); //这个函数把文件名分成协议和文件部分,考虑上黑名单，白名单重新生成一个协议表，查找该表返回协议地址
if (p)
{//找到了协议，再来分配URLContext, 自然proctol送给了(*puc)->proc, 并且还为UrlContext设置了私有类及大小并初始化(priv_class)
return url_alloc_for_protocol(puc, p, filename, flags, int_cb);
}

*puc = NULL;
return AVERROR_PROTOCOL_NOT_FOUND;
}
拷贝一下它的调用栈吧.url_alloc_for_protocol, 创建UrlContext 对象
0 in url_alloc_for_protocol of libavformat/avio.c:153
1 in ffurl_alloc of libavformat/avio.c:303
2 in ffurl_open_whitelist of libavformat/avio.c:316
3 in ffio_open_whitelist of libavformat/aviobuf.c:1152
4 in io_open_default of libavformat/options.c:191
5 in init_input of libavformat/utils.c:445
6 in avformat_open_input of libavformat/utils.c:548
7 in main of main.cpp:22

只所以没有用avio_enum_protocols 拦截到，是因为它走了url_find_protocol(filename),该函数中，
该函数主要功能如下:

//白名单，黑名单都是空，但还是根据url_protocols 表重新构建了一个协议表protocols,用完后要记着删除.
protocols = ffurl_get_protocols(NULL, NULL);
if (!protocols)
return NULL;
// 枚举协议用了一个循环,所以就不走avio_enum_protocols 了.就是它另建了一条协议枚举路.所以它用这种方式逃逸了.
for (i = 0; protocols[i]; i++) {
const URLProtocol *up = protocols[i];
if (!strcmp(proto_str, up->name)) { // proto_str 是从文件名中分离的部分.
av_freep(&protocols);
return up;
}
if (up->flags & URL_PROTOCOL_FLAG_NESTED_SCHEME &&
!strcmp(proto_nested, up->name)) {
av_freep(&protocols);
return up;
}
}
这样就看到了协议是怎样确定的.!

------------------------------------------------------------
2. 在哪里和怎样查找到demuxer的?
------------------------------------------------------------
在libavformat/目录下有一个allformat.c文件, 其中有函数
const AVInputFormat *av_demuxer_iterate(void **opaque)
其中opaque 是当前指针或空(当开始查找时）,
返回的是AVInputFormat 指针, opaque也会被更新.
函数的意义就是给我上一次demuxer指针, 我给你返回一个新demuxer指针

const AVOutputFormat *av_demuxer_iterate(void **opaque) //有删减!
{
static const uintptr_t size = sizeof(muxer_list)/sizeof(muxer_list[0]) - 1;
uintptr_t i = (uintptr_t)*opaque;
const AVInputFormat *f = NULL;
if (i < size) {
f = demuxer_list[i];
}
if (f)
*opaque = (void*)(i + 1); //让索引号加1，指向下一个
return f;
}

2.1: 谁调用了它?
在av_muxer_iterate设断点观察，其调用栈:
0 in av_demuxer_iterate of libavformat/allformats.c:564
1 in av_probe_input_format3 of libavformat/format.c:160
2 in av_probe_input_format2 of libavformat/format.c:208
3 in av_probe_input_buffer2 of libavformat/format.c:280
4 in init_input of libavformat/utils.c:450
5 in avformat_open_input of libavformat/utils.c:548
6 in main of main.cpp:22

在av_probe_input_format3 中,有
while ((fmt1 = av_demuxer_iterate(&i)))
   {
       score=0
if (fmt1->read_probe) {
score = fmt1->read_probe(&lpd);
       }
if (score > score_max) {
score_max = score;
fmt = (AVInputFormat*)fmt1;
}
   }
我们看到了它枚举的过程，并且通过 read_probe 来确定分数,分数高者为指定格式.
格式会返回到调用参数中.

在av_probe_input_buffer2 中结尾处,有
ret2 = ffio_rewind_with_probe_data(pb, &buf, buf_offset);
它要把probe 数据返回给缓冲,以便后续还可以使用这些数据.
想看清buf,buf_offset,pb 的来历，就要跟踪一下av_probe_input_buffer2 的代码了.

av_probe_input_buffer2(AVIOContext *pb, ff_const59 AVInputFormat **fmt,
const char *filename, void *logctx,
unsigned int offset, unsigned int max_probe_size)
{
AVProbeData pd = { filename ? filename : "" }; //是pd, 不是pb
uint8_t *buf = NULL;
int ret = 0, probe_size, buf_offset = 0;
int score = 0;
int ret2;

for (probe_size = PROBE_BUF_MIN; probe_size <= max_probe_size && !*fmt; \
   probe_size = FFMIN(probe_size << 1, FFMAX(max_probe_size, probe_size + 1))) {
/* Read probe data. */
       //重新分配一块内存到buf, 大小probe_size,开始时是2048
if ((ret = av_reallocp(&buf, probe_size + AVPROBE_PADDING_SIZE)) < 0)
goto fail;
           //从pb 读数据, 读到buf中
if ((ret = avio_read(pb, buf + buf_offset, probe_size - buf_offset)) < 0) goto fail;
buf_offset += ret; //累加读取的数据长度
if (buf_offset < offset) continue;
pd.buf_size = buf_offset - offset; // 设定探测数据的大小
pd.buf = &buf[offset];

memset(pd.buf + pd.buf_size, 0, AVPROBE_PADDING_SIZE);

/* Guess file format. */
*fmt = av_probe_input_format2(&pd, 1, &score); //由探测数据来推测数据格式
if (*fmt) {
av_log(logctx, AV_LOG_DEBUG,
"Format %s probed with size=%d and score=%d\n",
(*fmt)->name, probe_size, score);
}
}

if (!*fmt) ret = AVERROR_INVALIDDATA;
fail:
/* Rewind. Reuse probe buffer to avoid seeking. */
//把读取的数据buf_offset 重新归还pb. 就是重新调整一下pb读指针,当然,调整范围不能超过缓存数据
ret2 = ffio_rewind_with_probe_data(pb, &buf, buf_offset);
if (ret >= 0) ret = ret2;
av_freep(&pd.mime_type); //没有分配内存，为什么还释放? 此时pd 的缓冲指向的是realloc 的buffer 而被释放
return ret < 0 ? ret : score;
}

//rewind 过程比较复杂，所以详细分析一下: 请看注释
//它实际上是把已经使用的数据bufp,buf_size 和未使用的数据(s->buffer 中剩余数据）, 又重新分配了一次内存
// 把它们串联起来，而不是简单的调整一下s->buf_ptr, 所以功能更强,能够恢复更多的数据.
int ffio_rewind_with_probe_data(AVIOContext *s, unsigned char **bufp, int buf_size)
{
// 参数释放回的大小 buf_size:2048
int64_t buffer_start;
int buffer_size;
int overlap, new_size, alloc_size;
uint8_t *buf = *bufp;

buffer_size = s->buf_end - s->buffer; //计算缓冲区大小,跟踪时数值为32768

/* the buffers must touch or overlap */
   //这里buffer_start 为0, s->pos:读取的总数据长度,跟踪时为32768
   //是否意味着当s->pos 很大时，就是从文件中读取的数据较多时，就不能恢复数据了?!
   // 权当如此理解吧,因为它rewind 的是 probe 数据， probe 数据不会太大.
if ((buffer_start = s->pos - buffer_size) > buf_size) {
av_freep(bufp);
return AVERROR(EINVAL);
}

overlap = buf_size - buffer_start; //overlap:2048
new_size = buf_size + buffer_size - overlap; // new_size:32768
//alloc_size 仍然是32768,
//就是说当释放回的数据很大时,alloc_size还可以再放大.
alloc_size = FFMAX(s->buffer_size, new_size);
if (alloc_size > buf_size) //把待回归的数据重新分配内存,保留了原来的数据
if (!(buf = (*bufp) = av_realloc_f(buf, 1, alloc_size)))
return AVERROR(ENOMEM);

if (new_size > buf_size) {
//把avioctx 中还未使用的数据copy到新缓冲的尾巴上
memcpy(buf + buf_size, s->buffer + overlap, buffer_size - overlap);
buf_size = new_size;
}

av_free(s->buffer); //释放掉旧AVIOContext 的buffer
s->buf_ptr = s->buffer = buf; //使AVIOContext 指针指向新的缓冲
s->buffer_size = alloc_size;
s->pos = buf_size; //调整读取到的数据大小，一般而言,此时的buf_size就是new_size
s->buf_end = s->buf_ptr + buf_size;
s->eof_reached = 0;

return 0;
}
--------------------------------------------------------------------------
3. 在哪里和怎样读取数据的？
--------------------------------------------------------------------------
读取数据上层是由avio_read 来操作的, 下层会由protocol分发到具体协议去完成.
所以我们在avio_read 下断点.
0 in avio_read of libavformat/aviobuf.c:634
1 in av_probe_input_buffer2 of libavformat/format.c:262
2 in init_input of libavformat/utils.c:450
3 in avformat_open_input of libavformat/utils.c:548
4 in main of main.cpp:22
是的，我们一下就来到需要的地方，找到了av_probe_input_buffer2 函数.
这个函数前边分析过，它有3个要点.
1. 就是这里的读取探测数据
2. 就是查询输入格式: *fmt=av_probe_input_format2(&pd,1,&score);
3. 就是把探测数据还释放回缓冲, 就是说要消除avio_read带来的对avioctx缓存的影响.
------------------------------------------------------------
4. 是怎样分析数据的？
------------------------------------------------------------
先找到读取数据的地方, 后面肯定就是分析数据了.
av_probe_input_format2(&pd,1,&score) 就是对数据的分析
当然你也可以下内存断点查询到它对数据的访问.
------------------------------------------------------------
5. 数据指针怎样rewind?
------------------------------------------------------------
参考前边ffio_rewind_with_probe_data 代码注释

------------------------------------------------------------
6. 实现部分怎样读取文件头?
------------------------------------------------------------
直接浏览avformat_open_input 代码，前边都是init_input 的关键代码,
执行完成后，我们快速杀到read_header 部分
if ((ret = s->iformat->read_header(s)) < 0)
调用输入格式的read_header, 这对应着mpegts_read_header
0 in mpegts_read_header of libavformat/mpegts.c:3075
1 in avformat_open_input of libavformat/utils.c:609
2 in main of main.cpp:22

跟踪代码，
int mpegts_read_header(AVFormatContext *s)
感兴趣的是:
mpegts_open_section_filter(ts, SDT_PID, sdt_cb, ts, 1);
ts 是mpegts 私有对象指针 , 其实又该标注代码了, 不过copy 代码也没什么意思.
sec_filter 就是一种filter, pes_filter 也是一种filter
它实际是先要打开一个filter
filter = mpegts_open_filter(ts, pid, MPEGTS_SECTION)
所谓打开一个filter, 就是分配一个filter内存，把pid 填进去,把类型填进去.
filter = av_mallocz(sizeof(MpegTSFilter));
filter->type = type;
filter->pid = pid;
filter->es_id = -1;
filter->last_cc = -1;
filter->last_pcr= -1;
那ts 是干什么的? filter 指针要挂到ts->pids[pid] 位置处,这样拿到ts 就可以根据pid 拿到filter
ts->pids[pid] = filter;
创建完filter, 再初始化section_filter 部分.
拿到section 地址sec = &filter->u.section_filter;
填充sec 内容
sec->section_cb = section_cb;
sec->opaque = opaque;
sec->section_buf = av_mallocz(MAX_SECTION_SIZE); //又分配了一个内存
sec->check_crc = check_crc;
sec->last_ver = -1;

小插曲:
观察到s->probe_size=5000000, 5M 是从哪里来的?
查代码,原来是AvFormatContext 的默认值

AvFormatContext 类是一个AvClass 类，一种标准的ffmpeg 类定义形式, ffmpeg 定义了几百个这种类对象.
查看其类指针.
(gdb) p s->av_class
$33 = (const AVClass *) 0x7ffff7f835c0 <av_format_context_class>
其类对象 <av_format_context_class>

查看该类对象的具体内容
(gdb) p *s->av_class
$34 = {
class_name = 0x7ffff7f2ddac "AVFormatContext",
option = 0x7ffff7f82380 <avformat_options>,
... }

对象名称: AVFormatContext
对象 options 选项 static const AVOption avformat_options[]
在如下位置定义:
{"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 = 5000000 }, 32, INT64_MAX, D},

ffmpeg 这样定义类的好处是, 为类分配了内存之后，一个调用就能完成所有成员的赋值.
另一个好处是从命令行可以通过选项修改该类的初始化值.

下面是：
handle_packets(ts, probesize / ts->raw_packet_size); //nb_packets=5000000/188=26595个包，很大
static int handle_packets(MpegTSContext *ts, int64_t nb_packets)
{
uint8_t packet[TS_PACKET_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
const uint8_t *data;
AvFormatContext *s=ts->stream;
for (;;) {
packet_num++;
if (packet_num >= nb_packets )
ret = AVERROR(EAGAIN);
break;
}
if (ts->stop_parse > 0) break;

ret = read_packet(s, packet, ts->raw_packet_size, &data);
if (ret != 0) break;
ret = handle_packet(ts, data, avio_tell(s->pb));
finished_reading_packet(s, ts->raw_packet_size);
if (ret != 0) break;
}
ts->last_pos = avio_tell(s->pb);
}

观察代码, handle_packets 就是不断的读包, 处理包,直到ts->stop_parse 为1 止.
何为读包read_packet(s,packet,ts->raw_packet_size,&data);?
就是
len = ffio_read_indirect(s->pb, buf, TS_PACKET_SIZE, data);
该函数就是从AVIO 缓冲中读取了TS_PACKET_SIZE 个数据, 该函数就分析到此为止了.
至于它是怎样把数据读取到缓冲的,那是AvIoContext 的事，希望你已经掌握了AvIoContext.

第一次handel_packet，
static int handle_packet(MpegTSContext *ts, const uint8_t *packet, int64_t pos)
其参数
(gdb) info args
ts = 0x55555556a3c0
packet = 0x555555562508 "GP"
pos = 188

一番分析后，调用了:
write_section_data(ts, tss, p, p_end - p, 1);
看一个sdt包的分析过程吧
0 in sdt_cb of libavformat/mpegts.c:2711
1 in write_section_data of libavformat/mpegts.c:466
2 in handle_packet of libavformat/mpegts.c:2810
3 in handle_packets of libavformat/mpegts.c:2975
4 in mpegts_read_header of libavformat/mpegts.c:3093
5 in avformat_open_input of libavformat/utils.c:609
6 in main of main.cpp:22
static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len)
{ // 分析其中一段：

switch (desc_tag)
{
case 0x48
   provider_name = getstr8(&p, p_end);
   name = getstr8(&p, p_end);
   if (name && provider_name) {
   // program 确实是新分配的内存，但它已经被添加到了AvFormatContext->programs 数组中了
       // 就是这里的ts->stream
       AVProgram *program = av_new_program(ts->stream, sid);
       if (program) {
           av_dict_set(&program->metadata, "service_name", name, 0);
           av_dict_set(&program->metadata, "service_provider",
                       provider_name, 0);
       }
   }
   av_free(name);
   av_free(provider_name);
   break;
default:
   break;
}
}

其它的pat_cb, pmt_cb也都改变了AvFormatContext 中的相应参数.
想详细了解cb的操作需要继续跟踪代码和阅读ts的标准规范文档.
分析到pmt_cb 时, 会设置循环退出条件ts->parse=2,从而退出hanle_pachets()
这样框架就分析完毕了! 这样感觉就脚踏实地了.