media/filters/audio_file_reader.cc

   1 // Copyright 2012 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/audio_file_reader.h"
   6
   7 #include <stddef.h>
   8
   9 #include <cmath>
  10 #include <memory>
  11 #include <vector>
  12
  13 #include "base/functional/bind.h"
  14 #include "base/functional/callback.h"
  15 #include "base/logging.h"
  16 #include "base/numerics/safe_math.h"
  17 #include "base/time/time.h"
  18 #include "media/base/audio_bus.h"
  19 #include "media/base/audio_sample_types.h"
  20 #include "media/base/media_switches.h"
  21 #include "media/ffmpeg/ffmpeg_common.h"
  22 #include "media/ffmpeg/ffmpeg_decoding_loop.h"
  23
  24 namespace media {
  25
  26 // AAC(M4A) decoding specific constants.
  27 static const int kAACPrimingFrameCount = 2112;
  28 static const int kAACRemainderFrameCount = 519;
  29
  30 AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
  31     : stream_index_(0),
  32       protocol_(protocol),
  33       audio_codec_(AudioCodec::kUnknown),
  34       channels_(0),
  35       sample_rate_(0),
  36       av_sample_format_(0) {}
  37
  38 AudioFileReader::~AudioFileReader() {
  39   Close();
  40 }
  41
  42 bool AudioFileReader::Open() {
  43   return OpenDemuxer() && OpenDecoder();
  44 }
  45
  46 bool AudioFileReader::OpenDemuxer() {
  47   glue_ = std::make_unique<FFmpegGlue>(protocol_);
  48   AVFormatContext* format_context = glue_->format_context();
  49
  50   // Open FFmpeg AVFormatContext.
  51   if (!glue_->OpenContext()) {
  52     DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
  53     return false;
  54   }
  55
  56   const int result = avformat_find_stream_info(format_context, NULL);
  57   if (result < 0) {
  58     DLOG(WARNING)
  59         << "AudioFileReader::Open() : error in avformat_find_stream_info()";
  60     return false;
  61   }
  62
  63   // Calling avformat_find_stream_info can uncover new streams. We wait till now
  64   // to find the first audio stream, if any.
  65   codec_context_.reset();
  66   bool found_stream = false;
  67   for (size_t i = 0; i < format_context->nb_streams; ++i) {
  68     if (format_context->streams[i]->codecpar->codec_type ==
  69         AVMEDIA_TYPE_AUDIO) {
  70       stream_index_ = i;
  71       found_stream = true;
  72       break;
  73     }
  74   }
  75
  76   if (!found_stream)
  77     return false;
  78
  79   // Get the codec context.
  80   codec_context_ =
  81       AVStreamToAVCodecContext(format_context->streams[stream_index_]);
  82   if (!codec_context_)
  83     return false;
  84
  85   // Future versions of ffmpeg may copy the allow list from the format context.
  86   if (base::FeatureList::IsEnabled(kFFmpegAllowLists) &&
  87       !codec_context_->codec_whitelist) {
  88     // Note: FFmpeg will try to free this string, so we must duplicate it.
  89     codec_context_->codec_whitelist =
  90         av_strdup(FFmpegGlue::GetAllowedAudioDecoders());
  91   }
  92
  93   DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
  94   return true;
  95 }
  96
  97 bool AudioFileReader::OpenDecoder() {
  98   const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
  99   if (codec) {
 100     // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
 101     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
 102       codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
 103
 104     const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
 105     if (result < 0) {
 106       DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
 107                     << " result: " << result;
 108       return false;
 109     }
 110
 111     // Ensure avcodec_open2() respected our format request.
 112     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
 113       DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
 114                   << " supported sample format - "
 115                   << codec_context_->sample_fmt;
 116       return false;
 117     }
 118   } else {
 119     DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
 120     return false;
 121   }
 122
 123   // Verify the channel layout is supported by Chrome.  Acts as a sanity check
 124   // against invalid files.  See http://crbug.com/171962
 125   if (ChannelLayoutToChromeChannelLayout(
 126           codec_context_->ch_layout.u.mask,
 127           codec_context_->ch_layout.nb_channels) ==
 128       CHANNEL_LAYOUT_UNSUPPORTED) {
 129     return false;
 130   }
 131
 132   // Store initial values to guard against midstream configuration changes.
 133   channels_ = codec_context_->ch_layout.nb_channels;
 134   audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
 135   sample_rate_ = codec_context_->sample_rate;
 136   av_sample_format_ = codec_context_->sample_fmt;
 137   return true;
 138 }
 139
 140 bool AudioFileReader::HasKnownDuration() const {
 141   return glue_->format_context()->duration != AV_NOPTS_VALUE;
 142 }
 143
 144 void AudioFileReader::Close() {
 145   codec_context_.reset();
 146   glue_.reset();
 147 }
 148
 149 int AudioFileReader::Read(
 150     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
 151     int packets_to_read) {
 152   DCHECK(glue_ && codec_context_)
 153       << "AudioFileReader::Read() : reader is not opened!";
 154
 155   FFmpegDecodingLoop decode_loop(codec_context_.get());
 156
 157   int total_frames = 0;
 158   auto frame_ready_cb =
 159       base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
 160                           &total_frames, decoded_audio_packets);
 161
 162   AVPacket packet;
 163   int packets_read = 0;
 164   while (packets_read++ < packets_to_read && ReadPacket(&packet)) {
 165     const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
 166     av_packet_unref(&packet);
 167
 168     if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
 169       break;
 170   }
 171
 172   return total_frames;
 173 }
 174
 175 base::TimeDelta AudioFileReader::GetDuration() const {
 176   const AVRational av_time_base = {1, AV_TIME_BASE};
 177
 178   DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
 179   base::CheckedNumeric<int64_t> estimated_duration_us =
 180       glue_->format_context()->duration;
 181
 182   if (audio_codec_ == AudioCodec::kAAC) {
 183     // For certain AAC-encoded files, FFMPEG's estimated frame count might not
 184     // be sufficient to capture the entire audio content that we want. This is
 185     // especially noticeable for short files (< 10ms) resulting in silence
 186     // throughout the decoded buffer. Thus we add the priming frames and the
 187     // remainder frames to the estimation.
 188     // (See: crbug.com/513178)
 189     estimated_duration_us += ceil(
 190         1000000.0 *
 191         static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
 192         sample_rate());
 193   } else {
 194     // Add one microsecond to avoid rounding-down errors which can occur when
 195     // |duration| has been calculated from an exact number of sample-frames.
 196     // One microsecond is much less than the time of a single sample-frame
 197     // at any real-world sample-rate.
 198     estimated_duration_us += 1;
 199   }
 200
 201   return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
 202 }
 203
 204 int AudioFileReader::GetNumberOfFrames() const {
 205   return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
 206 }
 207
 208 bool AudioFileReader::OpenDemuxerForTesting() {
 209   return OpenDemuxer();
 210 }
 211
 212 bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
 213   return ReadPacket(output_packet);
 214 }
 215
 216 bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
 217   while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
 218     // Skip packets from other streams.
 219     if (output_packet->stream_index != stream_index_) {
 220       av_packet_unref(output_packet);
 221       continue;
 222     }
 223     return true;
 224   }
 225   return false;
 226 }
 227
 228 bool AudioFileReader::OnNewFrame(
 229     int* total_frames,
 230     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
 231     AVFrame* frame) {
 232   int frames_read = frame->nb_samples;
 233   if (frames_read < 0)
 234     return false;
 235
 236   const int channels = frame->ch_layout.nb_channels;
 237   if (frame->sample_rate != sample_rate_ || channels != channels_ ||
 238       frame->format != av_sample_format_) {
 239     DLOG(ERROR) << "Unsupported midstream configuration change!"
 240                 << " Sample Rate: " << frame->sample_rate << " vs "
 241                 << sample_rate_ << ", Channels: " << channels << " vs "
 242                 << channels_ << ", Sample Format: " << frame->format << " vs "
 243                 << av_sample_format_;
 244
 245     // This is an unrecoverable error, so bail out.  We'll return
 246     // whatever we've decoded up to this point.
 247     return false;
 248   }
 249
 250   // AAC decoding doesn't properly trim the last packet in a stream, so if we
 251   // have duration information, use it to set the correct length to avoid extra
 252   // silence from being output. In the case where we are also discarding some
 253   // portion of the packet (as indicated by a negative pts), we further want to
 254   // adjust the duration downward by however much exists before zero.
 255   if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
 256     const base::TimeDelta pkt_duration = ConvertFromTimeBase(
 257         glue_->format_context()->streams[stream_index_]->time_base,
 258         frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
 259     const base::TimeDelta frame_duration =
 260         base::Seconds(frames_read / static_cast<double>(sample_rate_));
 261
 262     if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
 263       const int new_frames_read =
 264           base::ClampFloor(frames_read * (pkt_duration / frame_duration));
 265       DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
 266                << new_frames_read << " based on packet duration.";
 267       frames_read = new_frames_read;
 268
 269       // The above process may delete the entire packet.
 270       if (!frames_read)
 271         return true;
 272     }
 273   }
 274
 275   // Deinterleave each channel and convert to 32bit floating-point with
 276   // nominal range -1.0 -> +1.0.  If the output is already in float planar
 277   // format, just copy it into the AudioBus.
 278   decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
 279   AudioBus* audio_bus = decoded_audio_packets->back().get();
 280
 281   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 282     audio_bus->FromInterleaved<Float32SampleTypeTraits>(
 283         reinterpret_cast<float*>(frame->data[0]), frames_read);
 284   } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
 285     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 286       memcpy(audio_bus->channel(ch), frame->extended_data[ch],
 287              sizeof(float) * frames_read);
 288     }
 289   } else {
 290     int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
 291     switch (bytes_per_sample) {
 292       case 1:
 293         audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>(
 294             reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
 295         break;
 296       case 2:
 297         audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>(
 298             reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
 299         break;
 300       case 4:
 301         audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>(
 302             reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
 303         break;
 304       default:
 305         NOTREACHED() << "Unsupported bytes per sample encountered: "
 306                      << bytes_per_sample;
 307         audio_bus->ZeroFrames(frames_read);
 308     }
 309   }
 310
 311   (*total_frames) += frames_read;
 312   return true;
 313 }
 314
 315 bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
 316   // Use the AVStream's time_base, since |codec_context_| does not have
 317   // time_base populated until after OpenDecoder().
 318   return av_seek_frame(
 319              glue_->format_context(), stream_index_,
 320              ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
 321              AVSEEK_FLAG_BACKWARD) >= 0;
 322 }
 323
 324 const AVStream* AudioFileReader::GetAVStreamForTesting() const {
 325   return glue_->format_context()->streams[stream_index_];
 326 }
 327
 328 }  // namespace media