- add sources.
[platform/framework/web/crosswalk.git] / src / media / filters / ffmpeg_audio_decoder.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/filters/ffmpeg_audio_decoder.h"
6
7 #include "base/bind.h"
8 #include "base/callback_helpers.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/bind_to_loop.h"
16 #include "media/base/decoder_buffer.h"
17 #include "media/base/demuxer.h"
18 #include "media/base/limits.h"
19 #include "media/base/pipeline.h"
20 #include "media/base/sample_format.h"
21 #include "media/ffmpeg/ffmpeg_common.h"
22 #include "media/filters/ffmpeg_glue.h"
23
24 namespace media {
25
26 // Helper structure for managing multiple decoded audio frames per packet.
27 struct QueuedAudioBuffer {
28   AudioDecoder::Status status;
29   scoped_refptr<AudioBuffer> buffer;
30 };
31
32 // Returns true if the decode result was end of stream.
33 static inline bool IsEndOfStream(int result,
34                                  int decoded_size,
35                                  const scoped_refptr<DecoderBuffer>& input) {
36   // Three conditions to meet to declare end of stream for this decoder:
37   // 1. FFmpeg didn't read anything.
38   // 2. FFmpeg didn't output anything.
39   // 3. An end of stream buffer is received.
40   return result == 0 && decoded_size == 0 && input->end_of_stream();
41 }
42
43 // Return the number of channels from the data in |frame|.
44 static inline int DetermineChannels(AVFrame* frame) {
45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
46   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
47   return av_get_channel_layout_nb_channels(frame->channel_layout);
48 #else
49   return frame->channels;
50 #endif
51 }
52
53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
54 // AVCodecContext.opaque to get the object reference in order to call
55 // GetAudioBuffer() to do the actual allocation.
56 static int GetAudioBufferImpl(struct AVCodecContext* s,
57                               AVFrame* frame,
58                               int flags) {
59   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
60   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
61   FFmpegAudioDecoder* decoder = static_cast<FFmpegAudioDecoder*>(s->opaque);
62   return decoder->GetAudioBuffer(s, frame, flags);
63 }
64
65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
66 // AudioBuffer allocated, so unref it.
67 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
68   scoped_refptr<AudioBuffer> buffer;
69   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
70 }
71
72 FFmpegAudioDecoder::FFmpegAudioDecoder(
73     const scoped_refptr<base::MessageLoopProxy>& message_loop)
74     : message_loop_(message_loop),
75       weak_factory_(this),
76       demuxer_stream_(NULL),
77       bytes_per_channel_(0),
78       channel_layout_(CHANNEL_LAYOUT_NONE),
79       channels_(0),
80       samples_per_second_(0),
81       av_sample_format_(0),
82       last_input_timestamp_(kNoTimestamp()),
83       output_frames_to_drop_(0) {
84 }
85
86 void FFmpegAudioDecoder::Initialize(
87     DemuxerStream* stream,
88     const PipelineStatusCB& status_cb,
89     const StatisticsCB& statistics_cb) {
90   DCHECK(message_loop_->BelongsToCurrentThread());
91   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
92
93   FFmpegGlue::InitializeFFmpeg();
94
95   if (demuxer_stream_) {
96     // TODO(scherkus): initialization currently happens more than once in
97     // PipelineIntegrationTest.BasicPlayback.
98     LOG(ERROR) << "Initialize has already been called.";
99     CHECK(false);
100   }
101
102   weak_this_ = weak_factory_.GetWeakPtr();
103   demuxer_stream_ = stream;
104
105   if (!ConfigureDecoder()) {
106     status_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
107     return;
108   }
109
110   statistics_cb_ = statistics_cb;
111   initialize_cb.Run(PIPELINE_OK);
112 }
113
114 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) {
115   DCHECK(message_loop_->BelongsToCurrentThread());
116   DCHECK(!read_cb.is_null());
117   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
118
119   read_cb_ = BindToCurrentLoop(read_cb);
120
121   // If we don't have any queued audio from the last packet we decoded, ask for
122   // more data from the demuxer to satisfy this read.
123   if (queued_audio_.empty()) {
124     ReadFromDemuxerStream();
125     return;
126   }
127
128   base::ResetAndReturn(&read_cb_).Run(
129       queued_audio_.front().status, queued_audio_.front().buffer);
130   queued_audio_.pop_front();
131 }
132
133 int FFmpegAudioDecoder::bits_per_channel() {
134   DCHECK(message_loop_->BelongsToCurrentThread());
135   return bytes_per_channel_ * 8;
136 }
137
138 ChannelLayout FFmpegAudioDecoder::channel_layout() {
139   DCHECK(message_loop_->BelongsToCurrentThread());
140   return channel_layout_;
141 }
142
143 int FFmpegAudioDecoder::samples_per_second() {
144   DCHECK(message_loop_->BelongsToCurrentThread());
145   return samples_per_second_;
146 }
147
148 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
149   DCHECK(message_loop_->BelongsToCurrentThread());
150   base::Closure reset_cb = BindToCurrentLoop(closure);
151
152   avcodec_flush_buffers(codec_context_.get());
153   ResetTimestampState();
154   queued_audio_.clear();
155   reset_cb.Run();
156 }
157
158 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
159   // TODO(scherkus): should we require Stop() to be called? this might end up
160   // getting called on a random thread due to refcounting.
161   ReleaseFFmpegResources();
162 }
163
164 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext* codec,
165                                        AVFrame* frame,
166                                        int flags) {
167   // Since this routine is called by FFmpeg when a buffer is required for audio
168   // data, use the values supplied by FFmpeg (ignoring the current settings).
169   // RunDecodeLoop() gets to determine if the buffer is useable or not.
170   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
171   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
172   int channels = DetermineChannels(frame);
173   if ((channels <= 0) || (channels >= limits::kMaxChannels)) {
174     DLOG(ERROR) << "Requested number of channels (" << channels
175                 << ") exceeds limit.";
176     return AVERROR(EINVAL);
177   }
178
179   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
180   if (frame->nb_samples <= 0)
181     return AVERROR(EINVAL);
182
183   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
184   // how big each channel data is in order to meet the alignment policy, so
185   // we need to take this into consideration.
186   int buffer_size_in_bytes =
187       av_samples_get_buffer_size(&frame->linesize[0],
188                                  channels,
189                                  frame->nb_samples,
190                                  format,
191                                  AudioBuffer::kChannelAlignment);
192   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
193   DCHECK_GE(frames_required, frame->nb_samples);
194   scoped_refptr<AudioBuffer> buffer =
195       AudioBuffer::CreateBuffer(sample_format, channels, frames_required);
196
197   // Initialize the data[] and extended_data[] fields to point into the memory
198   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
199   // audio and equal to |channels| for planar audio.
200   int number_of_planes = buffer->channel_data().size();
201   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
202     DCHECK_EQ(frame->extended_data, frame->data);
203     for (int i = 0; i < number_of_planes; ++i)
204       frame->data[i] = buffer->channel_data()[i];
205   } else {
206     // There are more channels than can fit into data[], so allocate
207     // extended_data[] and fill appropriately.
208     frame->extended_data = static_cast<uint8**>(
209         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
210     int i = 0;
211     for (; i < AV_NUM_DATA_POINTERS; ++i)
212       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
213     for (; i < number_of_planes; ++i)
214       frame->extended_data[i] = buffer->channel_data()[i];
215   }
216
217   // Now create an AVBufferRef for the data just allocated. It will own the
218   // reference to the AudioBuffer object.
219   void* opaque = NULL;
220   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
221   frame->buf[0] = av_buffer_create(
222       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
223   return 0;
224 }
225
226 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
227   DCHECK(!read_cb_.is_null());
228   demuxer_stream_->Read(base::Bind(
229       &FFmpegAudioDecoder::BufferReady, weak_this_));
230 }
231
232 void FFmpegAudioDecoder::BufferReady(
233     DemuxerStream::Status status,
234     const scoped_refptr<DecoderBuffer>& input) {
235   DCHECK(message_loop_->BelongsToCurrentThread());
236   DCHECK(!read_cb_.is_null());
237   DCHECK(queued_audio_.empty());
238   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
239
240   if (status == DemuxerStream::kAborted) {
241     DCHECK(!input.get());
242     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
243     return;
244   }
245
246   if (status == DemuxerStream::kConfigChanged) {
247     DCHECK(!input.get());
248
249     // Send a "end of stream" buffer to the decode loop
250     // to output any remaining data still in the decoder.
251     RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
252
253     DVLOG(1) << "Config changed.";
254
255     if (!ConfigureDecoder()) {
256       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
257       return;
258     }
259
260     ResetTimestampState();
261
262     if (queued_audio_.empty()) {
263       ReadFromDemuxerStream();
264       return;
265     }
266
267     base::ResetAndReturn(&read_cb_).Run(
268         queued_audio_.front().status, queued_audio_.front().buffer);
269     queued_audio_.pop_front();
270     return;
271   }
272
273   DCHECK_EQ(status, DemuxerStream::kOk);
274   DCHECK(input.get());
275
276   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
277   // occurs with some damaged files.
278   if (!input->end_of_stream() && input->timestamp() == kNoTimestamp() &&
279       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
280     DVLOG(1) << "Received a buffer without timestamps!";
281     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
282     return;
283   }
284
285   if (!input->end_of_stream()) {
286     if (last_input_timestamp_ == kNoTimestamp() &&
287         codec_context_->codec_id == AV_CODEC_ID_VORBIS &&
288         input->timestamp() < base::TimeDelta()) {
289       // Dropping frames for negative timestamps as outlined in section A.2
290       // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
291       output_frames_to_drop_ = floor(
292           0.5 + -input->timestamp().InSecondsF() * samples_per_second_);
293     } else {
294       if (last_input_timestamp_ != kNoTimestamp() &&
295           input->timestamp() < last_input_timestamp_) {
296         const base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
297         DLOG(WARNING)
298             << "Input timestamps are not monotonically increasing! "
299             << " ts " << input->timestamp().InMicroseconds() << " us"
300             << " diff " << diff.InMicroseconds() << " us";
301       }
302
303       last_input_timestamp_ = input->timestamp();
304     }
305   }
306
307   RunDecodeLoop(input, false);
308
309   // We exhausted the provided packet, but it wasn't enough for a frame.  Ask
310   // for more data in order to fulfill this read.
311   if (queued_audio_.empty()) {
312     ReadFromDemuxerStream();
313     return;
314   }
315
316   // Execute callback to return the first frame we decoded.
317   base::ResetAndReturn(&read_cb_).Run(
318       queued_audio_.front().status, queued_audio_.front().buffer);
319   queued_audio_.pop_front();
320 }
321
322 bool FFmpegAudioDecoder::ConfigureDecoder() {
323   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
324
325   if (!config.IsValidConfig()) {
326     DLOG(ERROR) << "Invalid audio stream -"
327                 << " codec: " << config.codec()
328                 << " channel layout: " << config.channel_layout()
329                 << " bits per channel: " << config.bits_per_channel()
330                 << " samples per second: " << config.samples_per_second();
331     return false;
332   }
333
334   if (config.is_encrypted()) {
335     DLOG(ERROR) << "Encrypted audio stream not supported";
336     return false;
337   }
338
339   if (codec_context_.get() &&
340       (bytes_per_channel_ != config.bytes_per_channel() ||
341        channel_layout_ != config.channel_layout() ||
342        samples_per_second_ != config.samples_per_second())) {
343     DVLOG(1) << "Unsupported config change :";
344     DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
345              << " -> " << config.bytes_per_channel();
346     DVLOG(1) << "\tchannel_layout : " << channel_layout_
347              << " -> " << config.channel_layout();
348     DVLOG(1) << "\tsample_rate : " << samples_per_second_
349              << " -> " << config.samples_per_second();
350     return false;
351   }
352
353   // Release existing decoder resources if necessary.
354   ReleaseFFmpegResources();
355
356   // Initialize AVCodecContext structure.
357   codec_context_.reset(avcodec_alloc_context3(NULL));
358   AudioDecoderConfigToAVCodecContext(config, codec_context_.get());
359
360   codec_context_->opaque = this;
361   codec_context_->get_buffer2 = GetAudioBufferImpl;
362   codec_context_->refcounted_frames = 1;
363
364   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
365   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
366     DLOG(ERROR) << "Could not initialize audio decoder: "
367                 << codec_context_->codec_id;
368     return false;
369   }
370
371   // Success!
372   av_frame_.reset(avcodec_alloc_frame());
373   channel_layout_ = config.channel_layout();
374   samples_per_second_ = config.samples_per_second();
375   output_timestamp_helper_.reset(
376       new AudioTimestampHelper(config.samples_per_second()));
377
378   // Store initial values to guard against midstream configuration changes.
379   channels_ = codec_context_->channels;
380   if (channels_ != ChannelLayoutToChannelCount(channel_layout_)) {
381     DLOG(ERROR) << "Audio configuration specified "
382                 << ChannelLayoutToChannelCount(channel_layout_)
383                 << " channels, but FFmpeg thinks the file contains "
384                 << channels_ << " channels";
385     return false;
386   }
387   av_sample_format_ = codec_context_->sample_fmt;
388   sample_format_ = AVSampleFormatToSampleFormat(
389       static_cast<AVSampleFormat>(av_sample_format_));
390   bytes_per_channel_ = SampleFormatToBytesPerChannel(sample_format_);
391
392   return true;
393 }
394
395 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
396   codec_context_.reset();
397   av_frame_.reset();
398 }
399
400 void FFmpegAudioDecoder::ResetTimestampState() {
401   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
402   last_input_timestamp_ = kNoTimestamp();
403   output_frames_to_drop_ = 0;
404 }
405
406 void FFmpegAudioDecoder::RunDecodeLoop(
407     const scoped_refptr<DecoderBuffer>& input,
408     bool skip_eos_append) {
409   AVPacket packet;
410   av_init_packet(&packet);
411   if (input->end_of_stream()) {
412     packet.data = NULL;
413     packet.size = 0;
414   } else {
415     packet.data = const_cast<uint8*>(input->data());
416     packet.size = input->data_size();
417   }
418
419   // Each audio packet may contain several frames, so we must call the decoder
420   // until we've exhausted the packet.  Regardless of the packet size we always
421   // want to hand it to the decoder at least once, otherwise we would end up
422   // skipping end of stream packets since they have a size of zero.
423   do {
424     int frame_decoded = 0;
425     int result = avcodec_decode_audio4(
426         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
427
428     if (result < 0) {
429       DCHECK(!input->end_of_stream())
430           << "End of stream buffer produced an error! "
431           << "This is quite possibly a bug in the audio decoder not handling "
432           << "end of stream AVPackets correctly.";
433
434       DLOG(WARNING)
435           << "Failed to decode an audio frame with timestamp: "
436           << input->timestamp().InMicroseconds() << " us, duration: "
437           << input->duration().InMicroseconds() << " us, packet size: "
438           << input->data_size() << " bytes";
439
440       break;
441     }
442
443     // Update packet size and data pointer in case we need to call the decoder
444     // with the remaining bytes from this packet.
445     packet.size -= result;
446     packet.data += result;
447
448     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
449         !input->end_of_stream()) {
450       DCHECK(input->timestamp() != kNoTimestamp());
451       if (output_frames_to_drop_ > 0) {
452         // Currently Vorbis is the only codec that causes us to drop samples.
453         // If we have to drop samples it always means the timeline starts at 0.
454         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
455         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
456       } else {
457         output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
458       }
459     }
460
461     scoped_refptr<AudioBuffer> output;
462     int decoded_frames = 0;
463     int original_frames = 0;
464     int channels = DetermineChannels(av_frame_.get());
465     if (frame_decoded) {
466       if (av_frame_->sample_rate != samples_per_second_ ||
467           channels != channels_ ||
468           av_frame_->format != av_sample_format_) {
469         DLOG(ERROR) << "Unsupported midstream configuration change!"
470                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
471                     << samples_per_second_
472                     << ", Channels: " << channels << " vs "
473                     << channels_
474                     << ", Sample Format: " << av_frame_->format << " vs "
475                     << av_sample_format_;
476
477         // This is an unrecoverable error, so bail out.
478         QueuedAudioBuffer queue_entry = { kDecodeError, NULL };
479         queued_audio_.push_back(queue_entry);
480         av_frame_unref(av_frame_.get());
481         break;
482       }
483
484       // Get the AudioBuffer that the data was decoded into. Adjust the number
485       // of frames, in case fewer than requested were actually decoded.
486       output = reinterpret_cast<AudioBuffer*>(
487           av_buffer_get_opaque(av_frame_->buf[0]));
488       DCHECK_EQ(channels_, output->channel_count());
489       original_frames = av_frame_->nb_samples;
490       int unread_frames = output->frame_count() - original_frames;
491       DCHECK_GE(unread_frames, 0);
492       if (unread_frames > 0)
493         output->TrimEnd(unread_frames);
494
495       // If there are frames to drop, get rid of as many as we can.
496       if (output_frames_to_drop_ > 0) {
497         int drop = std::min(output->frame_count(), output_frames_to_drop_);
498         output->TrimStart(drop);
499         output_frames_to_drop_ -= drop;
500       }
501
502       decoded_frames = output->frame_count();
503       av_frame_unref(av_frame_.get());
504     }
505
506     // WARNING: |av_frame_| no longer has valid data at this point.
507
508     if (decoded_frames > 0) {
509       // Set the timestamp/duration once all the extra frames have been
510       // discarded.
511       output->set_timestamp(output_timestamp_helper_->GetTimestamp());
512       output->set_duration(
513           output_timestamp_helper_->GetFrameDuration(decoded_frames));
514       output_timestamp_helper_->AddFrames(decoded_frames);
515     } else if (IsEndOfStream(result, original_frames, input) &&
516                !skip_eos_append) {
517       DCHECK_EQ(packet.size, 0);
518       output = AudioBuffer::CreateEOSBuffer();
519     } else {
520       // In case all the frames in the buffer were dropped.
521       output = NULL;
522     }
523
524     if (output.get()) {
525       QueuedAudioBuffer queue_entry = { kOk, output };
526       queued_audio_.push_back(queue_entry);
527     }
528
529     // Decoding finished successfully, update statistics.
530     if (result > 0) {
531       PipelineStatistics statistics;
532       statistics.audio_bytes_decoded = result;
533       statistics_cb_.Run(statistics);
534     }
535   } while (packet.size > 0);
536 }
537
538 }  // namespace media