- add sources.
[platform/framework/web/crosswalk.git] / src / media / cdm / ppapi / ffmpeg_cdm_audio_decoder.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h"
6
7 #include <algorithm>
8
9 #include "base/logging.h"
10 #include "media/base/audio_bus.h"
11 #include "media/base/audio_timestamp_helper.h"
12 #include "media/base/buffers.h"
13 #include "media/base/data_buffer.h"
14 #include "media/base/limits.h"
15 #include "media/ffmpeg/ffmpeg_common.h"
16
17 // Include FFmpeg header files.
18 extern "C" {
19 // Temporarily disable possible loss of data warning.
20 MSVC_PUSH_DISABLE_WARNING(4244);
21 #include <libavcodec/avcodec.h>
22 MSVC_POP_WARNING();
23 }  // extern "C"
24
25 namespace media {
26
27 // Maximum number of channels with defined layout in src/media.
28 static const int kMaxChannels = 8;
29
30 static AVCodecID CdmAudioCodecToCodecID(
31     cdm::AudioDecoderConfig::AudioCodec audio_codec) {
32   switch (audio_codec) {
33     case cdm::AudioDecoderConfig::kCodecVorbis:
34       return AV_CODEC_ID_VORBIS;
35     case cdm::AudioDecoderConfig::kCodecAac:
36       return AV_CODEC_ID_AAC;
37     case cdm::AudioDecoderConfig::kUnknownAudioCodec:
38     default:
39       NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
40       return AV_CODEC_ID_NONE;
41   }
42 }
43
44 static void CdmAudioDecoderConfigToAVCodecContext(
45     const cdm::AudioDecoderConfig& config,
46     AVCodecContext* codec_context) {
47   codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
48   codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
49
50   switch (config.bits_per_channel) {
51     case 8:
52       codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
53       break;
54     case 16:
55       codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
56       break;
57     case 32:
58       codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
59       break;
60     default:
61       DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
62                   "per channel: " << config.bits_per_channel;
63       codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
64   }
65
66   codec_context->channels = config.channel_count;
67   codec_context->sample_rate = config.samples_per_second;
68
69   if (config.extra_data) {
70     codec_context->extradata_size = config.extra_data_size;
71     codec_context->extradata = reinterpret_cast<uint8_t*>(
72         av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
73     memcpy(codec_context->extradata, config.extra_data,
74            config.extra_data_size);
75     memset(codec_context->extradata + config.extra_data_size, '\0',
76            FF_INPUT_BUFFER_PADDING_SIZE);
77   } else {
78     codec_context->extradata = NULL;
79     codec_context->extradata_size = 0;
80   }
81 }
82
83 static cdm::AudioFormat AVSampleFormatToCdmAudioFormat(
84     AVSampleFormat sample_format) {
85   switch (sample_format) {
86     case AV_SAMPLE_FMT_U8:
87       return cdm::kAudioFormatU8;
88     case AV_SAMPLE_FMT_S16:
89       return cdm::kAudioFormatS16;
90     case AV_SAMPLE_FMT_S32:
91       return cdm::kAudioFormatS32;
92     case AV_SAMPLE_FMT_FLT:
93       return cdm::kAudioFormatF32;
94     case AV_SAMPLE_FMT_S16P:
95       return cdm::kAudioFormatPlanarS16;
96     case AV_SAMPLE_FMT_FLTP:
97       return cdm::kAudioFormatPlanarF32;
98     default:
99       DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
100   }
101   return cdm::kUnknownAudioFormat;
102 }
103
104 static void CopySamples(cdm::AudioFormat cdm_format,
105                         int decoded_audio_size,
106                         const AVFrame& av_frame,
107                         uint8_t* output_buffer) {
108   switch (cdm_format) {
109     case cdm::kAudioFormatU8:
110     case cdm::kAudioFormatS16:
111     case cdm::kAudioFormatS32:
112     case cdm::kAudioFormatF32:
113       memcpy(output_buffer, av_frame.data[0], decoded_audio_size);
114       break;
115     case cdm::kAudioFormatPlanarS16:
116     case cdm::kAudioFormatPlanarF32: {
117       const int decoded_size_per_channel =
118           decoded_audio_size / av_frame.channels;
119       for (int i = 0; i < av_frame.channels; ++i) {
120         memcpy(output_buffer,
121                av_frame.extended_data[i],
122                decoded_size_per_channel);
123         output_buffer += decoded_size_per_channel;
124       }
125       break;
126     }
127     default:
128       NOTREACHED() << "Unsupported CDM Audio Format!";
129       memset(output_buffer, 0, decoded_audio_size);
130   }
131 }
132
133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(CdmHost* host)
134     : is_initialized_(false),
135       host_(host),
136       samples_per_second_(0),
137       channels_(0),
138       av_sample_format_(0),
139       bytes_per_frame_(0),
140       last_input_timestamp_(kNoTimestamp()),
141       output_bytes_to_drop_(0) {
142 }
143
144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
145   ReleaseFFmpegResources();
146 }
147
148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
149   DVLOG(1) << "Initialize()";
150   if (!IsValidConfig(config)) {
151     LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
152     return false;
153   }
154
155   if (is_initialized_) {
156     LOG(ERROR) << "Initialize(): Already initialized.";
157     return false;
158   }
159
160   // Initialize AVCodecContext structure.
161   codec_context_.reset(avcodec_alloc_context3(NULL));
162   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
163
164   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
165   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
166     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
167
168   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
169   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
170     DLOG(ERROR) << "Could not initialize audio decoder: "
171                 << codec_context_->codec_id;
172     return false;
173   }
174
175   // Ensure avcodec_open2() respected our format request.
176   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
177     DLOG(ERROR) << "Unable to configure a supported sample format: "
178                 << codec_context_->sample_fmt;
179     return false;
180   }
181
182   // Success!
183   av_frame_.reset(avcodec_alloc_frame());
184   samples_per_second_ = config.samples_per_second;
185   bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8;
186   output_timestamp_helper_.reset(
187       new AudioTimestampHelper(config.samples_per_second));
188   is_initialized_ = true;
189
190   // Store initial values to guard against midstream configuration changes.
191   channels_ = codec_context_->channels;
192   av_sample_format_ = codec_context_->sample_fmt;
193
194   return true;
195 }
196
197 void FFmpegCdmAudioDecoder::Deinitialize() {
198   DVLOG(1) << "Deinitialize()";
199   ReleaseFFmpegResources();
200   is_initialized_ = false;
201   ResetTimestampState();
202 }
203
204 void FFmpegCdmAudioDecoder::Reset() {
205   DVLOG(1) << "Reset()";
206   avcodec_flush_buffers(codec_context_.get());
207   ResetTimestampState();
208 }
209
210 // static
211 bool FFmpegCdmAudioDecoder::IsValidConfig(
212     const cdm::AudioDecoderConfig& config) {
213   return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
214          config.channel_count > 0 &&
215          config.channel_count <= kMaxChannels &&
216          config.bits_per_channel > 0 &&
217          config.bits_per_channel <= limits::kMaxBitsPerSample &&
218          config.samples_per_second > 0 &&
219          config.samples_per_second <= limits::kMaxSampleRate;
220 }
221
222 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
223     const uint8_t* compressed_buffer,
224     int32_t compressed_buffer_size,
225     int64_t input_timestamp,
226     cdm::AudioFrames* decoded_frames) {
227   DVLOG(1) << "DecodeBuffer()";
228   const bool is_end_of_stream = !compressed_buffer;
229   base::TimeDelta timestamp =
230       base::TimeDelta::FromMicroseconds(input_timestamp);
231
232   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
233   if (!is_end_of_stream) {
234     if (last_input_timestamp_ == kNoTimestamp()) {
235       if (is_vorbis && timestamp < base::TimeDelta()) {
236         // Dropping frames for negative timestamps as outlined in section A.2
237         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
238         int frames_to_drop = floor(
239             0.5 + -timestamp.InSecondsF() * samples_per_second_);
240         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
241       } else {
242         last_input_timestamp_ = timestamp;
243       }
244     } else if (timestamp != kNoTimestamp()) {
245       if (timestamp < last_input_timestamp_) {
246         base::TimeDelta diff = timestamp - last_input_timestamp_;
247         DVLOG(1) << "Input timestamps are not monotonically increasing! "
248                  << " ts " << timestamp.InMicroseconds() << " us"
249                  << " diff " << diff.InMicroseconds() << " us";
250         return cdm::kDecodeError;
251       }
252
253       last_input_timestamp_ = timestamp;
254     }
255   }
256
257   AVPacket packet;
258   av_init_packet(&packet);
259   packet.data = const_cast<uint8_t*>(compressed_buffer);
260   packet.size = compressed_buffer_size;
261
262   // Tell the CDM what AudioFormat we're using.
263   const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat(
264       static_cast<AVSampleFormat>(av_sample_format_));
265   DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat);
266   decoded_frames->SetFormat(cdm_format);
267
268   // Each audio packet may contain several frames, so we must call the decoder
269   // until we've exhausted the packet.  Regardless of the packet size we always
270   // want to hand it to the decoder at least once, otherwise we would end up
271   // skipping end of stream packets since they have a size of zero.
272   do {
273     // Reset frame to default values.
274     avcodec_get_frame_defaults(av_frame_.get());
275
276     int frame_decoded = 0;
277     int result = avcodec_decode_audio4(
278         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
279
280     if (result < 0) {
281       DCHECK(!is_end_of_stream)
282           << "End of stream buffer produced an error! "
283           << "This is quite possibly a bug in the audio decoder not handling "
284           << "end of stream AVPackets correctly.";
285
286       DLOG(ERROR)
287           << "Error decoding an audio frame with timestamp: "
288           << timestamp.InMicroseconds() << " us, duration: "
289           << timestamp.InMicroseconds() << " us, packet size: "
290           << compressed_buffer_size << " bytes";
291
292       return cdm::kDecodeError;
293     }
294
295     // Update packet size and data pointer in case we need to call the decoder
296     // with the remaining bytes from this packet.
297     packet.size -= result;
298     packet.data += result;
299
300     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
301         !is_end_of_stream) {
302       DCHECK(timestamp != kNoTimestamp());
303       if (output_bytes_to_drop_ > 0) {
304         // Currently Vorbis is the only codec that causes us to drop samples.
305         // If we have to drop samples it always means the timeline starts at 0.
306         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
307         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
308       } else {
309         output_timestamp_helper_->SetBaseTimestamp(timestamp);
310       }
311     }
312
313     int decoded_audio_size = 0;
314     if (frame_decoded) {
315       if (av_frame_->sample_rate != samples_per_second_ ||
316           av_frame_->channels != channels_ ||
317           av_frame_->format != av_sample_format_) {
318         DLOG(ERROR) << "Unsupported midstream configuration change!"
319                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
320                     << samples_per_second_
321                     << ", Channels: " << av_frame_->channels << " vs "
322                     << channels_
323                     << ", Sample Format: " << av_frame_->format << " vs "
324                     << av_sample_format_;
325         return cdm::kDecodeError;
326       }
327
328       decoded_audio_size = av_samples_get_buffer_size(
329           NULL, codec_context_->channels, av_frame_->nb_samples,
330           codec_context_->sample_fmt, 1);
331     }
332
333     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
334       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
335           << "Decoder didn't output full frames";
336
337       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
338       decoded_audio_size -= dropped_size;
339       output_bytes_to_drop_ -= dropped_size;
340     }
341
342     if (decoded_audio_size > 0) {
343       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
344           << "Decoder didn't output full frames";
345
346       base::TimeDelta output_timestamp =
347           output_timestamp_helper_->GetTimestamp();
348       output_timestamp_helper_->AddFrames(decoded_audio_size /
349                                           bytes_per_frame_);
350
351       // If we've exhausted the packet in the first decode we can write directly
352       // into the frame buffer instead of a multistep serialization approach.
353       if (serialized_audio_frames_.empty() && !packet.size) {
354         const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2;
355         decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size));
356         if (!decoded_frames->FrameBuffer()) {
357           LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed.";
358           return cdm::kDecodeError;
359         }
360         decoded_frames->FrameBuffer()->SetSize(buffer_size);
361         uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data();
362
363         const int64 timestamp = output_timestamp.InMicroseconds();
364         memcpy(output_buffer, &timestamp, sizeof(timestamp));
365         output_buffer += sizeof(timestamp);
366
367         const int64 output_size = decoded_audio_size;
368         memcpy(output_buffer, &output_size, sizeof(output_size));
369         output_buffer += sizeof(output_size);
370
371         // Copy the samples and return success.
372         CopySamples(
373             cdm_format, decoded_audio_size, *av_frame_, output_buffer);
374         return cdm::kSuccess;
375       }
376
377       // There are still more frames to decode, so we need to serialize them in
378       // a secondary buffer since we don't know their sizes ahead of time (which
379       // is required to allocate the FrameBuffer object).
380       SerializeInt64(output_timestamp.InMicroseconds());
381       SerializeInt64(decoded_audio_size);
382
383       const size_t previous_size = serialized_audio_frames_.size();
384       serialized_audio_frames_.resize(previous_size + decoded_audio_size);
385       uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size;
386       CopySamples(
387           cdm_format, decoded_audio_size, *av_frame_, output_buffer);
388     }
389   } while (packet.size > 0);
390
391   if (!serialized_audio_frames_.empty()) {
392     decoded_frames->SetFrameBuffer(
393         host_->Allocate(serialized_audio_frames_.size()));
394     if (!decoded_frames->FrameBuffer()) {
395       LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed.";
396       return cdm::kDecodeError;
397     }
398     memcpy(decoded_frames->FrameBuffer()->Data(),
399            &serialized_audio_frames_[0],
400            serialized_audio_frames_.size());
401     decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
402     serialized_audio_frames_.clear();
403
404     return cdm::kSuccess;
405   }
406
407   return cdm::kNeedMoreData;
408 }
409
410 void FFmpegCdmAudioDecoder::ResetTimestampState() {
411   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
412   last_input_timestamp_ = kNoTimestamp();
413   output_bytes_to_drop_ = 0;
414 }
415
416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
417   DVLOG(1) << "ReleaseFFmpegResources()";
418
419   codec_context_.reset();
420   av_frame_.reset();
421 }
422
423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
424   const size_t previous_size = serialized_audio_frames_.size();
425   serialized_audio_frames_.resize(previous_size + sizeof(value));
426   memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
427 }
428
429 }  // namespace media