1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/ffmpeg/ffmpeg_common.h"
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "media/base/decoder_buffer.h"
12 #include "media/base/video_frame.h"
13 #include "media/base/video_util.h"
17 // Why FF_INPUT_BUFFER_PADDING_SIZE? FFmpeg assumes all input buffers are
18 // padded. Check here to ensure FFmpeg only receives data padded to its
20 COMPILE_ASSERT(DecoderBuffer::kPaddingSize >= FF_INPUT_BUFFER_PADDING_SIZE,
21 decoder_buffer_padding_size_does_not_fit_ffmpeg_requirement);
23 // Alignment requirement by FFmpeg for input and output buffers. This need to
24 // be updated to match FFmpeg when it changes.
25 #if defined(ARCH_CPU_ARM_FAMILY)
26 static const int kFFmpegBufferAddressAlignment = 16;
28 static const int kFFmpegBufferAddressAlignment = 32;
31 // Check here to ensure FFmpeg only receives data aligned to its specifications.
33 DecoderBuffer::kAlignmentSize >= kFFmpegBufferAddressAlignment &&
34 DecoderBuffer::kAlignmentSize % kFFmpegBufferAddressAlignment == 0,
35 decoder_buffer_alignment_size_does_not_fit_ffmpeg_requirement);
37 // Allows faster SIMD YUV convert. Also, FFmpeg overreads/-writes occasionally.
38 // See video_get_buffer() in libavcodec/utils.c.
39 static const int kFFmpegOutputBufferPaddingSize = 16;
41 COMPILE_ASSERT(VideoFrame::kFrameSizePadding >= kFFmpegOutputBufferPaddingSize,
42 video_frame_padding_size_does_not_fit_ffmpeg_requirement);
45 VideoFrame::kFrameAddressAlignment >= kFFmpegBufferAddressAlignment &&
46 VideoFrame::kFrameAddressAlignment % kFFmpegBufferAddressAlignment == 0,
47 video_frame_address_alignment_does_not_fit_ffmpeg_requirement);
49 static const AVRational kMicrosBase = { 1, base::Time::kMicrosecondsPerSecond };
51 base::TimeDelta ConvertFromTimeBase(const AVRational& time_base,
53 int64 microseconds = av_rescale_q(timestamp, time_base, kMicrosBase);
54 return base::TimeDelta::FromMicroseconds(microseconds);
57 int64 ConvertToTimeBase(const AVRational& time_base,
58 const base::TimeDelta& timestamp) {
59 return av_rescale_q(timestamp.InMicroseconds(), kMicrosBase, time_base);
62 // Converts an FFmpeg audio codec ID into its corresponding supported codec id.
63 AudioCodec CodecIDToAudioCodec(AVCodecID codec_id) {
69 case AV_CODEC_ID_VORBIS:
71 case AV_CODEC_ID_PCM_U8:
72 case AV_CODEC_ID_PCM_S16LE:
73 case AV_CODEC_ID_PCM_S24LE:
74 case AV_CODEC_ID_PCM_F32LE:
76 case AV_CODEC_ID_PCM_S16BE:
77 return kCodecPCM_S16BE;
78 case AV_CODEC_ID_PCM_S24BE:
79 return kCodecPCM_S24BE;
80 case AV_CODEC_ID_FLAC:
82 case AV_CODEC_ID_AMR_NB:
84 case AV_CODEC_ID_AMR_WB:
86 case AV_CODEC_ID_GSM_MS:
88 case AV_CODEC_ID_PCM_MULAW:
89 return kCodecPCM_MULAW;
90 case AV_CODEC_ID_OPUS:
93 DVLOG(1) << "Unknown audio CodecID: " << codec_id;
95 return kUnknownAudioCodec;
98 static AVCodecID AudioCodecToCodecID(AudioCodec audio_codec,
99 SampleFormat sample_format) {
100 switch (audio_codec) {
102 return AV_CODEC_ID_AAC;
104 return AV_CODEC_ID_MP3;
106 switch (sample_format) {
107 case kSampleFormatU8:
108 return AV_CODEC_ID_PCM_U8;
109 case kSampleFormatS16:
110 return AV_CODEC_ID_PCM_S16LE;
111 case kSampleFormatS32:
112 return AV_CODEC_ID_PCM_S24LE;
113 case kSampleFormatF32:
114 return AV_CODEC_ID_PCM_F32LE;
116 DVLOG(1) << "Unsupported sample format: " << sample_format;
119 case kCodecPCM_S16BE:
120 return AV_CODEC_ID_PCM_S16BE;
121 case kCodecPCM_S24BE:
122 return AV_CODEC_ID_PCM_S24BE;
124 return AV_CODEC_ID_VORBIS;
126 return AV_CODEC_ID_FLAC;
128 return AV_CODEC_ID_AMR_NB;
130 return AV_CODEC_ID_AMR_WB;
132 return AV_CODEC_ID_GSM_MS;
133 case kCodecPCM_MULAW:
134 return AV_CODEC_ID_PCM_MULAW;
136 return AV_CODEC_ID_OPUS;
138 DVLOG(1) << "Unknown AudioCodec: " << audio_codec;
140 return AV_CODEC_ID_NONE;
143 // Converts an FFmpeg video codec ID into its corresponding supported codec id.
144 VideoCodec CodecIDToVideoCodec(AVCodecID codec_id) {
146 case AV_CODEC_ID_H264:
148 case AV_CODEC_ID_THEORA:
150 case AV_CODEC_ID_MPEG4:
152 case AV_CODEC_ID_VP8:
154 case AV_CODEC_ID_VP9:
157 DVLOG(1) << "Unknown video CodecID: " << codec_id;
159 return kUnknownVideoCodec;
162 static AVCodecID VideoCodecToCodecID(VideoCodec video_codec) {
163 switch (video_codec) {
165 return AV_CODEC_ID_H264;
167 return AV_CODEC_ID_THEORA;
169 return AV_CODEC_ID_MPEG4;
171 return AV_CODEC_ID_VP8;
173 return AV_CODEC_ID_VP9;
175 DVLOG(1) << "Unknown VideoCodec: " << video_codec;
177 return AV_CODEC_ID_NONE;
180 static VideoCodecProfile ProfileIDToVideoCodecProfile(int profile) {
181 // Clear out the CONSTRAINED & INTRA flags which are strict subsets of the
182 // corresponding profiles with which they're used.
183 profile &= ~FF_PROFILE_H264_CONSTRAINED;
184 profile &= ~FF_PROFILE_H264_INTRA;
186 case FF_PROFILE_H264_BASELINE:
187 return H264PROFILE_BASELINE;
188 case FF_PROFILE_H264_MAIN:
189 return H264PROFILE_MAIN;
190 case FF_PROFILE_H264_EXTENDED:
191 return H264PROFILE_EXTENDED;
192 case FF_PROFILE_H264_HIGH:
193 return H264PROFILE_HIGH;
194 case FF_PROFILE_H264_HIGH_10:
195 return H264PROFILE_HIGH10PROFILE;
196 case FF_PROFILE_H264_HIGH_422:
197 return H264PROFILE_HIGH422PROFILE;
198 case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
199 return H264PROFILE_HIGH444PREDICTIVEPROFILE;
201 DVLOG(1) << "Unknown profile id: " << profile;
203 return VIDEO_CODEC_PROFILE_UNKNOWN;
206 static int VideoCodecProfileToProfileID(VideoCodecProfile profile) {
208 case H264PROFILE_BASELINE:
209 return FF_PROFILE_H264_BASELINE;
210 case H264PROFILE_MAIN:
211 return FF_PROFILE_H264_MAIN;
212 case H264PROFILE_EXTENDED:
213 return FF_PROFILE_H264_EXTENDED;
214 case H264PROFILE_HIGH:
215 return FF_PROFILE_H264_HIGH;
216 case H264PROFILE_HIGH10PROFILE:
217 return FF_PROFILE_H264_HIGH_10;
218 case H264PROFILE_HIGH422PROFILE:
219 return FF_PROFILE_H264_HIGH_422;
220 case H264PROFILE_HIGH444PREDICTIVEPROFILE:
221 return FF_PROFILE_H264_HIGH_444_PREDICTIVE;
223 DVLOG(1) << "Unknown VideoCodecProfile: " << profile;
225 return FF_PROFILE_UNKNOWN;
228 SampleFormat AVSampleFormatToSampleFormat(AVSampleFormat sample_format) {
229 switch (sample_format) {
230 case AV_SAMPLE_FMT_U8:
231 return kSampleFormatU8;
232 case AV_SAMPLE_FMT_S16:
233 return kSampleFormatS16;
234 case AV_SAMPLE_FMT_S32:
235 return kSampleFormatS32;
236 case AV_SAMPLE_FMT_FLT:
237 return kSampleFormatF32;
238 case AV_SAMPLE_FMT_S16P:
239 return kSampleFormatPlanarS16;
240 case AV_SAMPLE_FMT_FLTP:
241 return kSampleFormatPlanarF32;
243 DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
245 return kUnknownSampleFormat;
248 static AVSampleFormat SampleFormatToAVSampleFormat(SampleFormat sample_format) {
249 switch (sample_format) {
250 case kSampleFormatU8:
251 return AV_SAMPLE_FMT_U8;
252 case kSampleFormatS16:
253 return AV_SAMPLE_FMT_S16;
254 case kSampleFormatS32:
255 return AV_SAMPLE_FMT_S32;
256 case kSampleFormatF32:
257 return AV_SAMPLE_FMT_FLT;
258 case kSampleFormatPlanarS16:
259 return AV_SAMPLE_FMT_S16P;
260 case kSampleFormatPlanarF32:
261 return AV_SAMPLE_FMT_FLTP;
263 DVLOG(1) << "Unknown SampleFormat: " << sample_format;
265 return AV_SAMPLE_FMT_NONE;
268 static void AVCodecContextToAudioDecoderConfig(
269 const AVCodecContext* codec_context,
271 AudioDecoderConfig* config,
273 DCHECK_EQ(codec_context->codec_type, AVMEDIA_TYPE_AUDIO);
275 AudioCodec codec = CodecIDToAudioCodec(codec_context->codec_id);
277 SampleFormat sample_format =
278 AVSampleFormatToSampleFormat(codec_context->sample_fmt);
280 ChannelLayout channel_layout = ChannelLayoutToChromeChannelLayout(
281 codec_context->channel_layout, codec_context->channels);
283 if (codec == kCodecOpus) {
284 // |codec_context->sample_fmt| is not set by FFmpeg because Opus decoding is
285 // not enabled in FFmpeg, so we need to manually set the sample format.
286 sample_format = kSampleFormatS16;
289 base::TimeDelta seek_preroll;
290 if (codec_context->seek_preroll > 0) {
291 seek_preroll = base::TimeDelta::FromMicroseconds(
292 codec_context->seek_preroll * 1000000.0 / codec_context->sample_rate);
295 base::TimeDelta codec_delay;
296 if (codec_context->delay > 0) {
297 codec_delay = base::TimeDelta::FromMicroseconds(
298 codec_context->delay * 1000000.0 / codec_context->sample_rate);
301 config->Initialize(codec,
304 codec_context->sample_rate,
305 codec_context->extradata,
306 codec_context->extradata_size,
311 if (codec != kCodecOpus) {
312 DCHECK_EQ(av_get_bytes_per_sample(codec_context->sample_fmt) * 8,
313 config->bits_per_channel());
317 void AVStreamToAudioDecoderConfig(
318 const AVStream* stream,
319 AudioDecoderConfig* config,
321 bool is_encrypted = false;
322 AVDictionaryEntry* key = av_dict_get(stream->metadata, "enc_key_id", NULL, 0);
325 return AVCodecContextToAudioDecoderConfig(
326 stream->codec, is_encrypted, config, record_stats);
329 void AudioDecoderConfigToAVCodecContext(const AudioDecoderConfig& config,
330 AVCodecContext* codec_context) {
331 codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
332 codec_context->codec_id = AudioCodecToCodecID(config.codec(),
333 config.sample_format());
334 codec_context->sample_fmt = SampleFormatToAVSampleFormat(
335 config.sample_format());
337 // TODO(scherkus): should we set |channel_layout|? I'm not sure if FFmpeg uses
338 // said information to decode.
339 codec_context->channels =
340 ChannelLayoutToChannelCount(config.channel_layout());
341 codec_context->sample_rate = config.samples_per_second();
343 if (config.extra_data()) {
344 codec_context->extradata_size = config.extra_data_size();
345 codec_context->extradata = reinterpret_cast<uint8_t*>(
346 av_malloc(config.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE));
347 memcpy(codec_context->extradata, config.extra_data(),
348 config.extra_data_size());
349 memset(codec_context->extradata + config.extra_data_size(), '\0',
350 FF_INPUT_BUFFER_PADDING_SIZE);
352 codec_context->extradata = NULL;
353 codec_context->extradata_size = 0;
357 void AVStreamToVideoDecoderConfig(
358 const AVStream* stream,
359 VideoDecoderConfig* config,
361 gfx::Size coded_size(stream->codec->coded_width, stream->codec->coded_height);
363 // TODO(vrk): This assumes decoded frame data starts at (0, 0), which is true
364 // for now, but may not always be true forever. Fix this in the future.
365 gfx::Rect visible_rect(stream->codec->width, stream->codec->height);
367 AVRational aspect_ratio = { 1, 1 };
368 if (stream->sample_aspect_ratio.num)
369 aspect_ratio = stream->sample_aspect_ratio;
370 else if (stream->codec->sample_aspect_ratio.num)
371 aspect_ratio = stream->codec->sample_aspect_ratio;
373 VideoCodec codec = CodecIDToVideoCodec(stream->codec->codec_id);
375 VideoCodecProfile profile = VIDEO_CODEC_PROFILE_UNKNOWN;
376 if (codec == kCodecVP8)
377 profile = VP8PROFILE_MAIN;
378 else if (codec == kCodecVP9)
379 profile = VP9PROFILE_MAIN;
381 profile = ProfileIDToVideoCodecProfile(stream->codec->profile);
383 gfx::Size natural_size = GetNaturalSize(
384 visible_rect.size(), aspect_ratio.num, aspect_ratio.den);
387 UMA_HISTOGRAM_ENUMERATION("Media.VideoColorRange",
388 stream->codec->color_range,
392 VideoFrame::Format format = PixelFormatToVideoFormat(stream->codec->pix_fmt);
393 if (codec == kCodecVP9) {
394 // TODO(tomfinegan): libavcodec doesn't know about VP9.
395 format = VideoFrame::YV12;
396 coded_size = natural_size;
399 bool is_encrypted = false;
400 AVDictionaryEntry* key = av_dict_get(stream->metadata, "enc_key_id", NULL, 0);
404 AVDictionaryEntry* webm_alpha =
405 av_dict_get(stream->metadata, "alpha_mode", NULL, 0);
406 if (webm_alpha && !strcmp(webm_alpha->value, "1")) {
407 format = VideoFrame::YV12A;
410 config->Initialize(codec,
413 coded_size, visible_rect, natural_size,
414 stream->codec->extradata, stream->codec->extradata_size,
419 void VideoDecoderConfigToAVCodecContext(
420 const VideoDecoderConfig& config,
421 AVCodecContext* codec_context) {
422 codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
423 codec_context->codec_id = VideoCodecToCodecID(config.codec());
424 codec_context->profile = VideoCodecProfileToProfileID(config.profile());
425 codec_context->coded_width = config.coded_size().width();
426 codec_context->coded_height = config.coded_size().height();
427 codec_context->pix_fmt = VideoFormatToPixelFormat(config.format());
429 if (config.extra_data()) {
430 codec_context->extradata_size = config.extra_data_size();
431 codec_context->extradata = reinterpret_cast<uint8_t*>(
432 av_malloc(config.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE));
433 memcpy(codec_context->extradata, config.extra_data(),
434 config.extra_data_size());
435 memset(codec_context->extradata + config.extra_data_size(), '\0',
436 FF_INPUT_BUFFER_PADDING_SIZE);
438 codec_context->extradata = NULL;
439 codec_context->extradata_size = 0;
443 ChannelLayout ChannelLayoutToChromeChannelLayout(int64_t layout, int channels) {
445 case AV_CH_LAYOUT_MONO:
446 return CHANNEL_LAYOUT_MONO;
447 case AV_CH_LAYOUT_STEREO:
448 return CHANNEL_LAYOUT_STEREO;
449 case AV_CH_LAYOUT_2_1:
450 return CHANNEL_LAYOUT_2_1;
451 case AV_CH_LAYOUT_SURROUND:
452 return CHANNEL_LAYOUT_SURROUND;
453 case AV_CH_LAYOUT_4POINT0:
454 return CHANNEL_LAYOUT_4_0;
455 case AV_CH_LAYOUT_2_2:
456 return CHANNEL_LAYOUT_2_2;
457 case AV_CH_LAYOUT_QUAD:
458 return CHANNEL_LAYOUT_QUAD;
459 case AV_CH_LAYOUT_5POINT0:
460 return CHANNEL_LAYOUT_5_0;
461 case AV_CH_LAYOUT_5POINT1:
462 return CHANNEL_LAYOUT_5_1;
463 case AV_CH_LAYOUT_5POINT0_BACK:
464 return CHANNEL_LAYOUT_5_0_BACK;
465 case AV_CH_LAYOUT_5POINT1_BACK:
466 return CHANNEL_LAYOUT_5_1_BACK;
467 case AV_CH_LAYOUT_7POINT0:
468 return CHANNEL_LAYOUT_7_0;
469 case AV_CH_LAYOUT_7POINT1:
470 return CHANNEL_LAYOUT_7_1;
471 case AV_CH_LAYOUT_7POINT1_WIDE:
472 return CHANNEL_LAYOUT_7_1_WIDE;
473 case AV_CH_LAYOUT_STEREO_DOWNMIX:
474 return CHANNEL_LAYOUT_STEREO_DOWNMIX;
475 case AV_CH_LAYOUT_2POINT1:
476 return CHANNEL_LAYOUT_2POINT1;
477 case AV_CH_LAYOUT_3POINT1:
478 return CHANNEL_LAYOUT_3_1;
479 case AV_CH_LAYOUT_4POINT1:
480 return CHANNEL_LAYOUT_4_1;
481 case AV_CH_LAYOUT_6POINT0:
482 return CHANNEL_LAYOUT_6_0;
483 case AV_CH_LAYOUT_6POINT0_FRONT:
484 return CHANNEL_LAYOUT_6_0_FRONT;
485 case AV_CH_LAYOUT_HEXAGONAL:
486 return CHANNEL_LAYOUT_HEXAGONAL;
487 case AV_CH_LAYOUT_6POINT1:
488 return CHANNEL_LAYOUT_6_1;
489 case AV_CH_LAYOUT_6POINT1_BACK:
490 return CHANNEL_LAYOUT_6_1_BACK;
491 case AV_CH_LAYOUT_6POINT1_FRONT:
492 return CHANNEL_LAYOUT_6_1_FRONT;
493 case AV_CH_LAYOUT_7POINT0_FRONT:
494 return CHANNEL_LAYOUT_7_0_FRONT;
495 #ifdef AV_CH_LAYOUT_7POINT1_WIDE_BACK
496 case AV_CH_LAYOUT_7POINT1_WIDE_BACK:
497 return CHANNEL_LAYOUT_7_1_WIDE_BACK;
499 case AV_CH_LAYOUT_OCTAGONAL:
500 return CHANNEL_LAYOUT_OCTAGONAL;
502 // FFmpeg channel_layout is 0 for .wav and .mp3. Attempt to guess layout
503 // based on the channel count.
504 return GuessChannelLayout(channels);
508 VideoFrame::Format PixelFormatToVideoFormat(PixelFormat pixel_format) {
509 switch (pixel_format) {
510 case PIX_FMT_YUV422P:
511 return VideoFrame::YV16;
512 // TODO(scherkus): We should be paying attention to the color range of each
513 // format and scaling as appropriate when rendering. Regular YUV has a range
514 // of 16-239 where as YUVJ has a range of 0-255.
515 case PIX_FMT_YUV420P:
516 case PIX_FMT_YUVJ420P:
517 return VideoFrame::YV12;
518 case PIX_FMT_YUVA420P:
519 return VideoFrame::YV12A;
521 DVLOG(1) << "Unsupported PixelFormat: " << pixel_format;
523 return VideoFrame::UNKNOWN;
526 PixelFormat VideoFormatToPixelFormat(VideoFrame::Format video_format) {
527 switch (video_format) {
528 case VideoFrame::YV16:
529 return PIX_FMT_YUV422P;
530 case VideoFrame::YV12:
531 return PIX_FMT_YUV420P;
532 case VideoFrame::YV12A:
533 return PIX_FMT_YUVA420P;
535 DVLOG(1) << "Unsupported VideoFrame::Format: " << video_format;