src/media/cdm/ppapi/ffmpeg_cdm_audio_decoder.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/logging.h"
  10 #include "media/base/audio_bus.h"
  11 #include "media/base/audio_timestamp_helper.h"
  12 #include "media/base/buffers.h"
  13 #include "media/base/data_buffer.h"
  14 #include "media/base/limits.h"
  15 #include "media/ffmpeg/ffmpeg_common.h"
  16
  17 // Include FFmpeg header files.
  18 extern "C" {
  19 // Temporarily disable possible loss of data warning.
  20 MSVC_PUSH_DISABLE_WARNING(4244);
  21 #include <libavcodec/avcodec.h>
  22 MSVC_POP_WARNING();
  23 }  // extern "C"
  24
  25 namespace media {
  26
  27 // Maximum number of channels with defined layout in src/media.
  28 static const int kMaxChannels = 8;
  29
  30 static AVCodecID CdmAudioCodecToCodecID(
  31     cdm::AudioDecoderConfig::AudioCodec audio_codec) {
  32   switch (audio_codec) {
  33     case cdm::AudioDecoderConfig::kCodecVorbis:
  34       return AV_CODEC_ID_VORBIS;
  35     case cdm::AudioDecoderConfig::kCodecAac:
  36       return AV_CODEC_ID_AAC;
  37     case cdm::AudioDecoderConfig::kUnknownAudioCodec:
  38     default:
  39       NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
  40       return AV_CODEC_ID_NONE;
  41   }
  42 }
  43
  44 static void CdmAudioDecoderConfigToAVCodecContext(
  45     const cdm::AudioDecoderConfig& config,
  46     AVCodecContext* codec_context) {
  47   codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
  48   codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
  49
  50   switch (config.bits_per_channel) {
  51     case 8:
  52       codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
  53       break;
  54     case 16:
  55       codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
  56       break;
  57     case 32:
  58       codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
  59       break;
  60     default:
  61       DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
  62                   "per channel: " << config.bits_per_channel;
  63       codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
  64   }
  65
  66   codec_context->channels = config.channel_count;
  67   codec_context->sample_rate = config.samples_per_second;
  68
  69   if (config.extra_data) {
  70     codec_context->extradata_size = config.extra_data_size;
  71     codec_context->extradata = reinterpret_cast<uint8_t*>(
  72         av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
  73     memcpy(codec_context->extradata, config.extra_data,
  74            config.extra_data_size);
  75     memset(codec_context->extradata + config.extra_data_size, '\0',
  76            FF_INPUT_BUFFER_PADDING_SIZE);
  77   } else {
  78     codec_context->extradata = NULL;
  79     codec_context->extradata_size = 0;
  80   }
  81 }
  82
  83 static cdm::AudioFormat AVSampleFormatToCdmAudioFormat(
  84     AVSampleFormat sample_format) {
  85   switch (sample_format) {
  86     case AV_SAMPLE_FMT_U8:
  87       return cdm::kAudioFormatU8;
  88     case AV_SAMPLE_FMT_S16:
  89       return cdm::kAudioFormatS16;
  90     case AV_SAMPLE_FMT_S32:
  91       return cdm::kAudioFormatS32;
  92     case AV_SAMPLE_FMT_FLT:
  93       return cdm::kAudioFormatF32;
  94     case AV_SAMPLE_FMT_S16P:
  95       return cdm::kAudioFormatPlanarS16;
  96     case AV_SAMPLE_FMT_FLTP:
  97       return cdm::kAudioFormatPlanarF32;
  98     default:
  99       DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
 100   }
 101   return cdm::kUnknownAudioFormat;
 102 }
 103
 104 static void CopySamples(cdm::AudioFormat cdm_format,
 105                         int decoded_audio_size,
 106                         const AVFrame& av_frame,
 107                         uint8_t* output_buffer) {
 108   switch (cdm_format) {
 109     case cdm::kAudioFormatU8:
 110     case cdm::kAudioFormatS16:
 111     case cdm::kAudioFormatS32:
 112     case cdm::kAudioFormatF32:
 113       memcpy(output_buffer, av_frame.data[0], decoded_audio_size);
 114       break;
 115     case cdm::kAudioFormatPlanarS16:
 116     case cdm::kAudioFormatPlanarF32: {
 117       const int decoded_size_per_channel =
 118           decoded_audio_size / av_frame.channels;
 119       for (int i = 0; i < av_frame.channels; ++i) {
 120         memcpy(output_buffer,
 121                av_frame.extended_data[i],
 122                decoded_size_per_channel);
 123         output_buffer += decoded_size_per_channel;
 124       }
 125       break;
 126     }
 127     default:
 128       NOTREACHED() << "Unsupported CDM Audio Format!";
 129       memset(output_buffer, 0, decoded_audio_size);
 130   }
 131 }
 132
 133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(CdmHost* host)
 134     : is_initialized_(false),
 135       host_(host),
 136       samples_per_second_(0),
 137       channels_(0),
 138       av_sample_format_(0),
 139       bytes_per_frame_(0),
 140       last_input_timestamp_(kNoTimestamp()),
 141       output_bytes_to_drop_(0) {
 142 }
 143
 144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
 145   ReleaseFFmpegResources();
 146 }
 147
 148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
 149   DVLOG(1) << "Initialize()";
 150   if (!IsValidConfig(config)) {
 151     LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
 152     return false;
 153   }
 154
 155   if (is_initialized_) {
 156     LOG(ERROR) << "Initialize(): Already initialized.";
 157     return false;
 158   }
 159
 160   // Initialize AVCodecContext structure.
 161   codec_context_.reset(avcodec_alloc_context3(NULL));
 162   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
 163
 164   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
 165   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
 166     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
 167
 168   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 169   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
 170     DLOG(ERROR) << "Could not initialize audio decoder: "
 171                 << codec_context_->codec_id;
 172     return false;
 173   }
 174
 175   // Ensure avcodec_open2() respected our format request.
 176   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
 177     DLOG(ERROR) << "Unable to configure a supported sample format: "
 178                 << codec_context_->sample_fmt;
 179     return false;
 180   }
 181
 182   // Success!
 183   av_frame_.reset(avcodec_alloc_frame());
 184   samples_per_second_ = config.samples_per_second;
 185   bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8;
 186   output_timestamp_helper_.reset(
 187       new AudioTimestampHelper(config.samples_per_second));
 188   is_initialized_ = true;
 189
 190   // Store initial values to guard against midstream configuration changes.
 191   channels_ = codec_context_->channels;
 192   av_sample_format_ = codec_context_->sample_fmt;
 193
 194   return true;
 195 }
 196
 197 void FFmpegCdmAudioDecoder::Deinitialize() {
 198   DVLOG(1) << "Deinitialize()";
 199   ReleaseFFmpegResources();
 200   is_initialized_ = false;
 201   ResetTimestampState();
 202 }
 203
 204 void FFmpegCdmAudioDecoder::Reset() {
 205   DVLOG(1) << "Reset()";
 206   avcodec_flush_buffers(codec_context_.get());
 207   ResetTimestampState();
 208 }
 209
 210 // static
 211 bool FFmpegCdmAudioDecoder::IsValidConfig(
 212     const cdm::AudioDecoderConfig& config) {
 213   return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
 214          config.channel_count > 0 &&
 215          config.channel_count <= kMaxChannels &&
 216          config.bits_per_channel > 0 &&
 217          config.bits_per_channel <= limits::kMaxBitsPerSample &&
 218          config.samples_per_second > 0 &&
 219          config.samples_per_second <= limits::kMaxSampleRate;
 220 }
 221
 222 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
 223     const uint8_t* compressed_buffer,
 224     int32_t compressed_buffer_size,
 225     int64_t input_timestamp,
 226     cdm::AudioFrames* decoded_frames) {
 227   DVLOG(1) << "DecodeBuffer()";
 228   const bool is_end_of_stream = !compressed_buffer;
 229   base::TimeDelta timestamp =
 230       base::TimeDelta::FromMicroseconds(input_timestamp);
 231
 232   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
 233   if (!is_end_of_stream) {
 234     if (last_input_timestamp_ == kNoTimestamp()) {
 235       if (is_vorbis && timestamp < base::TimeDelta()) {
 236         // Dropping frames for negative timestamps as outlined in section A.2
 237         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 238         int frames_to_drop = floor(
 239             0.5 + -timestamp.InSecondsF() * samples_per_second_);
 240         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
 241       } else {
 242         last_input_timestamp_ = timestamp;
 243       }
 244     } else if (timestamp != kNoTimestamp()) {
 245       if (timestamp < last_input_timestamp_) {
 246         base::TimeDelta diff = timestamp - last_input_timestamp_;
 247         DVLOG(1) << "Input timestamps are not monotonically increasing! "
 248                  << " ts " << timestamp.InMicroseconds() << " us"
 249                  << " diff " << diff.InMicroseconds() << " us";
 250         return cdm::kDecodeError;
 251       }
 252
 253       last_input_timestamp_ = timestamp;
 254     }
 255   }
 256
 257   AVPacket packet;
 258   av_init_packet(&packet);
 259   packet.data = const_cast<uint8_t*>(compressed_buffer);
 260   packet.size = compressed_buffer_size;
 261
 262   // Tell the CDM what AudioFormat we're using.
 263   const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat(
 264       static_cast<AVSampleFormat>(av_sample_format_));
 265   DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat);
 266   decoded_frames->SetFormat(cdm_format);
 267
 268   // Each audio packet may contain several frames, so we must call the decoder
 269   // until we've exhausted the packet.  Regardless of the packet size we always
 270   // want to hand it to the decoder at least once, otherwise we would end up
 271   // skipping end of stream packets since they have a size of zero.
 272   do {
 273     // Reset frame to default values.
 274     avcodec_get_frame_defaults(av_frame_.get());
 275
 276     int frame_decoded = 0;
 277     int result = avcodec_decode_audio4(
 278         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
 279
 280     if (result < 0) {
 281       DCHECK(!is_end_of_stream)
 282           << "End of stream buffer produced an error! "
 283           << "This is quite possibly a bug in the audio decoder not handling "
 284           << "end of stream AVPackets correctly.";
 285
 286       DLOG(ERROR)
 287           << "Error decoding an audio frame with timestamp: "
 288           << timestamp.InMicroseconds() << " us, duration: "
 289           << timestamp.InMicroseconds() << " us, packet size: "
 290           << compressed_buffer_size << " bytes";
 291
 292       return cdm::kDecodeError;
 293     }
 294
 295     // Update packet size and data pointer in case we need to call the decoder
 296     // with the remaining bytes from this packet.
 297     packet.size -= result;
 298     packet.data += result;
 299
 300     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
 301         !is_end_of_stream) {
 302       DCHECK(timestamp != kNoTimestamp());
 303       if (output_bytes_to_drop_ > 0) {
 304         // Currently Vorbis is the only codec that causes us to drop samples.
 305         // If we have to drop samples it always means the timeline starts at 0.
 306         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
 307         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 308       } else {
 309         output_timestamp_helper_->SetBaseTimestamp(timestamp);
 310       }
 311     }
 312
 313     int decoded_audio_size = 0;
 314     if (frame_decoded) {
 315       if (av_frame_->sample_rate != samples_per_second_ ||
 316           av_frame_->channels != channels_ ||
 317           av_frame_->format != av_sample_format_) {
 318         DLOG(ERROR) << "Unsupported midstream configuration change!"
 319                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 320                     << samples_per_second_
 321                     << ", Channels: " << av_frame_->channels << " vs "
 322                     << channels_
 323                     << ", Sample Format: " << av_frame_->format << " vs "
 324                     << av_sample_format_;
 325         return cdm::kDecodeError;
 326       }
 327
 328       decoded_audio_size = av_samples_get_buffer_size(
 329           NULL, codec_context_->channels, av_frame_->nb_samples,
 330           codec_context_->sample_fmt, 1);
 331     }
 332
 333     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
 334       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 335           << "Decoder didn't output full frames";
 336
 337       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
 338       decoded_audio_size -= dropped_size;
 339       output_bytes_to_drop_ -= dropped_size;
 340     }
 341
 342     if (decoded_audio_size > 0) {
 343       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 344           << "Decoder didn't output full frames";
 345
 346       base::TimeDelta output_timestamp =
 347           output_timestamp_helper_->GetTimestamp();
 348       output_timestamp_helper_->AddFrames(decoded_audio_size /
 349                                           bytes_per_frame_);
 350
 351       // If we've exhausted the packet in the first decode we can write directly
 352       // into the frame buffer instead of a multistep serialization approach.
 353       if (serialized_audio_frames_.empty() && !packet.size) {
 354         const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2;
 355         decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size));
 356         if (!decoded_frames->FrameBuffer()) {
 357           LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed.";
 358           return cdm::kDecodeError;
 359         }
 360         decoded_frames->FrameBuffer()->SetSize(buffer_size);
 361         uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data();
 362
 363         const int64 timestamp = output_timestamp.InMicroseconds();
 364         memcpy(output_buffer, &timestamp, sizeof(timestamp));
 365         output_buffer += sizeof(timestamp);
 366
 367         const int64 output_size = decoded_audio_size;
 368         memcpy(output_buffer, &output_size, sizeof(output_size));
 369         output_buffer += sizeof(output_size);
 370
 371         // Copy the samples and return success.
 372         CopySamples(
 373             cdm_format, decoded_audio_size, *av_frame_, output_buffer);
 374         return cdm::kSuccess;
 375       }
 376
 377       // There are still more frames to decode, so we need to serialize them in
 378       // a secondary buffer since we don't know their sizes ahead of time (which
 379       // is required to allocate the FrameBuffer object).
 380       SerializeInt64(output_timestamp.InMicroseconds());
 381       SerializeInt64(decoded_audio_size);
 382
 383       const size_t previous_size = serialized_audio_frames_.size();
 384       serialized_audio_frames_.resize(previous_size + decoded_audio_size);
 385       uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size;
 386       CopySamples(
 387           cdm_format, decoded_audio_size, *av_frame_, output_buffer);
 388     }
 389   } while (packet.size > 0);
 390
 391   if (!serialized_audio_frames_.empty()) {
 392     decoded_frames->SetFrameBuffer(
 393         host_->Allocate(serialized_audio_frames_.size()));
 394     if (!decoded_frames->FrameBuffer()) {
 395       LOG(ERROR) << "DecodeBuffer() CdmHost::Allocate failed.";
 396       return cdm::kDecodeError;
 397     }
 398     memcpy(decoded_frames->FrameBuffer()->Data(),
 399            &serialized_audio_frames_[0],
 400            serialized_audio_frames_.size());
 401     decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
 402     serialized_audio_frames_.clear();
 403
 404     return cdm::kSuccess;
 405   }
 406
 407   return cdm::kNeedMoreData;
 408 }
 409
 410 void FFmpegCdmAudioDecoder::ResetTimestampState() {
 411   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
 412   last_input_timestamp_ = kNoTimestamp();
 413   output_bytes_to_drop_ = 0;
 414 }
 415
 416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
 417   DVLOG(1) << "ReleaseFFmpegResources()";
 418
 419   codec_context_.reset();
 420   av_frame_.reset();
 421 }
 422
 423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
 424   const size_t previous_size = serialized_audio_frames_.size();
 425   serialized_audio_frames_.resize(previous_size + sizeof(value));
 426   memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
 427 }
 428
 429 }  // namespace media