media/filters/audio_file_reader.cc

   1 // Copyright 2012 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/audio_file_reader.h"
   6
   7 #include <stddef.h>
   8
   9 #include <cmath>
  10 #include <memory>
  11 #include <vector>
  12
  13 #include "base/bind.h"
  14 #include "base/callback.h"
  15 #include "base/logging.h"
  16 #include "base/numerics/safe_math.h"
  17 #include "base/time/time.h"
  18 #include "media/base/audio_bus.h"
  19 #include "media/base/audio_sample_types.h"
  20 #include "media/ffmpeg/ffmpeg_common.h"
  21 #include "media/ffmpeg/ffmpeg_decoding_loop.h"
  22
  23 namespace media {
  24
  25 // AAC(M4A) decoding specific constants.
  26 static const int kAACPrimingFrameCount = 2112;
  27 static const int kAACRemainderFrameCount = 519;
  28
  29 AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
  30     : stream_index_(0),
  31       protocol_(protocol),
  32       audio_codec_(AudioCodec::kUnknown),
  33       channels_(0),
  34       sample_rate_(0),
  35       av_sample_format_(0) {}
  36
  37 AudioFileReader::~AudioFileReader() {
  38   Close();
  39 }
  40
  41 bool AudioFileReader::Open() {
  42   return OpenDemuxer() && OpenDecoder();
  43 }
  44
  45 bool AudioFileReader::OpenDemuxer() {
  46   glue_ = std::make_unique<FFmpegGlue>(protocol_);
  47   AVFormatContext* format_context = glue_->format_context();
  48
  49   // Open FFmpeg AVFormatContext.
  50   if (!glue_->OpenContext()) {
  51     DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
  52     return false;
  53   }
  54
  55   const int result = avformat_find_stream_info(format_context, NULL);
  56   if (result < 0) {
  57     DLOG(WARNING)
  58         << "AudioFileReader::Open() : error in avformat_find_stream_info()";
  59     return false;
  60   }
  61
  62   // Calling avformat_find_stream_info can uncover new streams. We wait till now
  63   // to find the first audio stream, if any.
  64   codec_context_.reset();
  65   bool found_stream = false;
  66   for (size_t i = 0; i < format_context->nb_streams; ++i) {
  67     if (format_context->streams[i]->codecpar->codec_type ==
  68         AVMEDIA_TYPE_AUDIO) {
  69       stream_index_ = i;
  70       found_stream = true;
  71       break;
  72     }
  73   }
  74
  75   if (!found_stream)
  76     return false;
  77
  78   // Get the codec context.
  79   codec_context_ =
  80       AVStreamToAVCodecContext(format_context->streams[stream_index_]);
  81   if (!codec_context_)
  82     return false;
  83
  84   DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
  85   return true;
  86 }
  87
  88 bool AudioFileReader::OpenDecoder() {
  89   const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
  90   if (codec) {
  91     // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
  92     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
  93       codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
  94
  95     const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
  96     if (result < 0) {
  97       DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
  98                     << " result: " << result;
  99       return false;
 100     }
 101
 102     // Ensure avcodec_open2() respected our format request.
 103     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
 104       DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
 105                   << " supported sample format - "
 106                   << codec_context_->sample_fmt;
 107       return false;
 108     }
 109   } else {
 110     DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
 111     return false;
 112   }
 113
 114   // Verify the channel layout is supported by Chrome.  Acts as a sanity check
 115   // against invalid files.  See http://crbug.com/171962
 116   if (ChannelLayoutToChromeChannelLayout(
 117           codec_context_->ch_layout.u.mask,
 118           codec_context_->ch_layout.nb_channels) ==
 119       CHANNEL_LAYOUT_UNSUPPORTED) {
 120     return false;
 121   }
 122
 123   // Store initial values to guard against midstream configuration changes.
 124   channels_ = codec_context_->ch_layout.nb_channels;
 125   audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
 126   sample_rate_ = codec_context_->sample_rate;
 127   av_sample_format_ = codec_context_->sample_fmt;
 128   return true;
 129 }
 130
 131 bool AudioFileReader::HasKnownDuration() const {
 132   return glue_->format_context()->duration != AV_NOPTS_VALUE;
 133 }
 134
 135 void AudioFileReader::Close() {
 136   codec_context_.reset();
 137   glue_.reset();
 138 }
 139
 140 int AudioFileReader::Read(
 141     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
 142     int packets_to_read) {
 143   DCHECK(glue_ && codec_context_)
 144       << "AudioFileReader::Read() : reader is not opened!";
 145
 146   FFmpegDecodingLoop decode_loop(codec_context_.get());
 147
 148   int total_frames = 0;
 149   auto frame_ready_cb =
 150       base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
 151                           &total_frames, decoded_audio_packets);
 152
 153   AVPacket packet;
 154   int packets_read = 0;
 155   while (packets_read++ < packets_to_read && ReadPacket(&packet)) {
 156     const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
 157     av_packet_unref(&packet);
 158
 159     if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
 160       break;
 161   }
 162
 163   return total_frames;
 164 }
 165
 166 base::TimeDelta AudioFileReader::GetDuration() const {
 167   const AVRational av_time_base = {1, AV_TIME_BASE};
 168
 169   DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
 170   base::CheckedNumeric<int64_t> estimated_duration_us =
 171       glue_->format_context()->duration;
 172
 173   if (audio_codec_ == AudioCodec::kAAC) {
 174     // For certain AAC-encoded files, FFMPEG's estimated frame count might not
 175     // be sufficient to capture the entire audio content that we want. This is
 176     // especially noticeable for short files (< 10ms) resulting in silence
 177     // throughout the decoded buffer. Thus we add the priming frames and the
 178     // remainder frames to the estimation.
 179     // (See: crbug.com/513178)
 180     estimated_duration_us += ceil(
 181         1000000.0 *
 182         static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
 183         sample_rate());
 184   } else {
 185     // Add one microsecond to avoid rounding-down errors which can occur when
 186     // |duration| has been calculated from an exact number of sample-frames.
 187     // One microsecond is much less than the time of a single sample-frame
 188     // at any real-world sample-rate.
 189     estimated_duration_us += 1;
 190   }
 191
 192   return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
 193 }
 194
 195 int AudioFileReader::GetNumberOfFrames() const {
 196   return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
 197 }
 198
 199 bool AudioFileReader::OpenDemuxerForTesting() {
 200   return OpenDemuxer();
 201 }
 202
 203 bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
 204   return ReadPacket(output_packet);
 205 }
 206
 207 bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
 208   while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
 209     // Skip packets from other streams.
 210     if (output_packet->stream_index != stream_index_) {
 211       av_packet_unref(output_packet);
 212       continue;
 213     }
 214     return true;
 215   }
 216   return false;
 217 }
 218
 219 bool AudioFileReader::OnNewFrame(
 220     int* total_frames,
 221     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
 222     AVFrame* frame) {
 223   int frames_read = frame->nb_samples;
 224   if (frames_read < 0)
 225     return false;
 226
 227   const int channels = frame->ch_layout.nb_channels;
 228   if (frame->sample_rate != sample_rate_ || channels != channels_ ||
 229       frame->format != av_sample_format_) {
 230     DLOG(ERROR) << "Unsupported midstream configuration change!"
 231                 << " Sample Rate: " << frame->sample_rate << " vs "
 232                 << sample_rate_ << ", Channels: " << channels << " vs "
 233                 << channels_ << ", Sample Format: " << frame->format << " vs "
 234                 << av_sample_format_;
 235
 236     // This is an unrecoverable error, so bail out.  We'll return
 237     // whatever we've decoded up to this point.
 238     return false;
 239   }
 240
 241   // AAC decoding doesn't properly trim the last packet in a stream, so if we
 242   // have duration information, use it to set the correct length to avoid extra
 243   // silence from being output. In the case where we are also discarding some
 244   // portion of the packet (as indicated by a negative pts), we further want to
 245   // adjust the duration downward by however much exists before zero.
 246   if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
 247     const base::TimeDelta pkt_duration = ConvertFromTimeBase(
 248         glue_->format_context()->streams[stream_index_]->time_base,
 249         frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
 250     const base::TimeDelta frame_duration =
 251         base::Seconds(frames_read / static_cast<double>(sample_rate_));
 252
 253     if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
 254       const int new_frames_read =
 255           base::ClampFloor(frames_read * (pkt_duration / frame_duration));
 256       DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
 257                << new_frames_read << " based on packet duration.";
 258       frames_read = new_frames_read;
 259
 260       // The above process may delete the entire packet.
 261       if (!frames_read)
 262         return true;
 263     }
 264   }
 265
 266   // Deinterleave each channel and convert to 32bit floating-point with
 267   // nominal range -1.0 -> +1.0.  If the output is already in float planar
 268   // format, just copy it into the AudioBus.
 269   decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
 270   AudioBus* audio_bus = decoded_audio_packets->back().get();
 271
 272   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 273     audio_bus->FromInterleaved<Float32SampleTypeTraits>(
 274         reinterpret_cast<float*>(frame->data[0]), frames_read);
 275   } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
 276     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 277       memcpy(audio_bus->channel(ch), frame->extended_data[ch],
 278              sizeof(float) * frames_read);
 279     }
 280   } else {
 281     int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
 282     switch (bytes_per_sample) {
 283       case 1:
 284         audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>(
 285             reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
 286         break;
 287       case 2:
 288         audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>(
 289             reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
 290         break;
 291       case 4:
 292         audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>(
 293             reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
 294         break;
 295       default:
 296         NOTREACHED() << "Unsupported bytes per sample encountered: "
 297                      << bytes_per_sample;
 298         audio_bus->ZeroFrames(frames_read);
 299     }
 300   }
 301
 302   (*total_frames) += frames_read;
 303   return true;
 304 }
 305
 306 bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
 307   // Use the AVStream's time_base, since |codec_context_| does not have
 308   // time_base populated until after OpenDecoder().
 309   return av_seek_frame(
 310              glue_->format_context(), stream_index_,
 311              ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
 312              AVSEEK_FLAG_BACKWARD) >= 0;
 313 }
 314
 315 const AVStream* AudioFileReader::GetAVStreamForTesting() const {
 316   return glue_->format_context()->streams[stream_index_];
 317 }
 318
 319 }  // namespace media