1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/audio_file_reader.h"
13 #include "base/functional/bind.h"
14 #include "base/functional/callback.h"
15 #include "base/logging.h"
16 #include "base/numerics/safe_math.h"
17 #include "base/time/time.h"
18 #include "media/base/audio_bus.h"
19 #include "media/base/audio_sample_types.h"
20 #include "media/base/media_switches.h"
21 #include "media/ffmpeg/ffmpeg_common.h"
22 #include "media/ffmpeg/ffmpeg_decoding_loop.h"
26 // AAC(M4A) decoding specific constants.
27 static const int kAACPrimingFrameCount = 2112;
28 static const int kAACRemainderFrameCount = 519;
30 AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
33 audio_codec_(AudioCodec::kUnknown),
36 av_sample_format_(0) {}
38 AudioFileReader::~AudioFileReader() {
42 bool AudioFileReader::Open() {
43 return OpenDemuxer() && OpenDecoder();
46 bool AudioFileReader::OpenDemuxer() {
47 glue_ = std::make_unique<FFmpegGlue>(protocol_);
48 AVFormatContext* format_context = glue_->format_context();
50 // Open FFmpeg AVFormatContext.
51 if (!glue_->OpenContext()) {
52 DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
56 const int result = avformat_find_stream_info(format_context, NULL);
59 << "AudioFileReader::Open() : error in avformat_find_stream_info()";
63 // Calling avformat_find_stream_info can uncover new streams. We wait till now
64 // to find the first audio stream, if any.
65 codec_context_.reset();
66 bool found_stream = false;
67 for (size_t i = 0; i < format_context->nb_streams; ++i) {
68 if (format_context->streams[i]->codecpar->codec_type ==
79 // Get the codec context.
81 AVStreamToAVCodecContext(format_context->streams[stream_index_]);
85 // Future versions of ffmpeg may copy the allow list from the format context.
86 if (base::FeatureList::IsEnabled(kFFmpegAllowLists) &&
87 !codec_context_->codec_whitelist) {
88 // Note: FFmpeg will try to free this string, so we must duplicate it.
89 codec_context_->codec_whitelist =
90 av_strdup(FFmpegGlue::GetAllowedAudioDecoders());
93 DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
97 bool AudioFileReader::OpenDecoder() {
98 const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
100 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
101 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
102 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
104 const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
106 DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
107 << " result: " << result;
111 // Ensure avcodec_open2() respected our format request.
112 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
113 DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
114 << " supported sample format - "
115 << codec_context_->sample_fmt;
119 DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
123 // Verify the channel layout is supported by Chrome. Acts as a sanity check
124 // against invalid files. See http://crbug.com/171962
125 if (ChannelLayoutToChromeChannelLayout(
126 codec_context_->ch_layout.u.mask,
127 codec_context_->ch_layout.nb_channels) ==
128 CHANNEL_LAYOUT_UNSUPPORTED) {
132 // Store initial values to guard against midstream configuration changes.
133 channels_ = codec_context_->ch_layout.nb_channels;
134 audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
135 sample_rate_ = codec_context_->sample_rate;
136 av_sample_format_ = codec_context_->sample_fmt;
140 bool AudioFileReader::HasKnownDuration() const {
141 return glue_->format_context()->duration != AV_NOPTS_VALUE;
144 void AudioFileReader::Close() {
145 codec_context_.reset();
149 int AudioFileReader::Read(
150 std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
151 int packets_to_read) {
152 DCHECK(glue_ && codec_context_)
153 << "AudioFileReader::Read() : reader is not opened!";
155 FFmpegDecodingLoop decode_loop(codec_context_.get());
157 int total_frames = 0;
158 auto frame_ready_cb =
159 base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
160 &total_frames, decoded_audio_packets);
163 int packets_read = 0;
164 while (packets_read++ < packets_to_read && ReadPacket(&packet)) {
165 const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
166 av_packet_unref(&packet);
168 if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
175 base::TimeDelta AudioFileReader::GetDuration() const {
176 const AVRational av_time_base = {1, AV_TIME_BASE};
178 DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
179 base::CheckedNumeric<int64_t> estimated_duration_us =
180 glue_->format_context()->duration;
182 if (audio_codec_ == AudioCodec::kAAC) {
183 // For certain AAC-encoded files, FFMPEG's estimated frame count might not
184 // be sufficient to capture the entire audio content that we want. This is
185 // especially noticeable for short files (< 10ms) resulting in silence
186 // throughout the decoded buffer. Thus we add the priming frames and the
187 // remainder frames to the estimation.
188 // (See: crbug.com/513178)
189 estimated_duration_us += ceil(
191 static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
194 // Add one microsecond to avoid rounding-down errors which can occur when
195 // |duration| has been calculated from an exact number of sample-frames.
196 // One microsecond is much less than the time of a single sample-frame
197 // at any real-world sample-rate.
198 estimated_duration_us += 1;
201 return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
204 int AudioFileReader::GetNumberOfFrames() const {
205 return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
208 bool AudioFileReader::OpenDemuxerForTesting() {
209 return OpenDemuxer();
212 bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
213 return ReadPacket(output_packet);
216 bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
217 while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
218 // Skip packets from other streams.
219 if (output_packet->stream_index != stream_index_) {
220 av_packet_unref(output_packet);
228 bool AudioFileReader::OnNewFrame(
230 std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
232 int frames_read = frame->nb_samples;
236 const int channels = frame->ch_layout.nb_channels;
237 if (frame->sample_rate != sample_rate_ || channels != channels_ ||
238 frame->format != av_sample_format_) {
239 DLOG(ERROR) << "Unsupported midstream configuration change!"
240 << " Sample Rate: " << frame->sample_rate << " vs "
241 << sample_rate_ << ", Channels: " << channels << " vs "
242 << channels_ << ", Sample Format: " << frame->format << " vs "
243 << av_sample_format_;
245 // This is an unrecoverable error, so bail out. We'll return
246 // whatever we've decoded up to this point.
250 // AAC decoding doesn't properly trim the last packet in a stream, so if we
251 // have duration information, use it to set the correct length to avoid extra
252 // silence from being output. In the case where we are also discarding some
253 // portion of the packet (as indicated by a negative pts), we further want to
254 // adjust the duration downward by however much exists before zero.
255 if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
256 const base::TimeDelta pkt_duration = ConvertFromTimeBase(
257 glue_->format_context()->streams[stream_index_]->time_base,
258 frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
259 const base::TimeDelta frame_duration =
260 base::Seconds(frames_read / static_cast<double>(sample_rate_));
262 if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
263 const int new_frames_read =
264 base::ClampFloor(frames_read * (pkt_duration / frame_duration));
265 DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
266 << new_frames_read << " based on packet duration.";
267 frames_read = new_frames_read;
269 // The above process may delete the entire packet.
275 // Deinterleave each channel and convert to 32bit floating-point with
276 // nominal range -1.0 -> +1.0. If the output is already in float planar
277 // format, just copy it into the AudioBus.
278 decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
279 AudioBus* audio_bus = decoded_audio_packets->back().get();
281 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
282 audio_bus->FromInterleaved<Float32SampleTypeTraits>(
283 reinterpret_cast<float*>(frame->data[0]), frames_read);
284 } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
285 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
286 memcpy(audio_bus->channel(ch), frame->extended_data[ch],
287 sizeof(float) * frames_read);
290 int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
291 switch (bytes_per_sample) {
293 audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>(
294 reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
297 audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>(
298 reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
301 audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>(
302 reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
305 NOTREACHED() << "Unsupported bytes per sample encountered: "
307 audio_bus->ZeroFrames(frames_read);
311 (*total_frames) += frames_read;
315 bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
316 // Use the AVStream's time_base, since |codec_context_| does not have
317 // time_base populated until after OpenDecoder().
318 return av_seek_frame(
319 glue_->format_context(), stream_index_,
320 ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
321 AVSEEK_FLAG_BACKWARD) >= 0;
324 const AVStream* AudioFileReader::GetAVStreamForTesting() const {
325 return glue_->format_context()->streams[stream_index_];