1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/audio_file_reader.h"
13 #include "base/bind.h"
14 #include "base/callback.h"
15 #include "base/logging.h"
16 #include "base/numerics/safe_math.h"
17 #include "base/time/time.h"
18 #include "media/base/audio_bus.h"
19 #include "media/base/audio_sample_types.h"
20 #include "media/ffmpeg/ffmpeg_common.h"
21 #include "media/ffmpeg/ffmpeg_decoding_loop.h"
25 // AAC(M4A) decoding specific constants.
26 static const int kAACPrimingFrameCount = 2112;
27 static const int kAACRemainderFrameCount = 519;
29 AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
32 audio_codec_(AudioCodec::kUnknown),
35 av_sample_format_(0) {}
37 AudioFileReader::~AudioFileReader() {
41 bool AudioFileReader::Open() {
42 return OpenDemuxer() && OpenDecoder();
45 bool AudioFileReader::OpenDemuxer() {
46 glue_ = std::make_unique<FFmpegGlue>(protocol_);
47 AVFormatContext* format_context = glue_->format_context();
49 // Open FFmpeg AVFormatContext.
50 if (!glue_->OpenContext()) {
51 DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
55 const int result = avformat_find_stream_info(format_context, NULL);
58 << "AudioFileReader::Open() : error in avformat_find_stream_info()";
62 // Calling avformat_find_stream_info can uncover new streams. We wait till now
63 // to find the first audio stream, if any.
64 codec_context_.reset();
65 bool found_stream = false;
66 for (size_t i = 0; i < format_context->nb_streams; ++i) {
67 if (format_context->streams[i]->codecpar->codec_type ==
78 // Get the codec context.
80 AVStreamToAVCodecContext(format_context->streams[stream_index_]);
84 DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
88 bool AudioFileReader::OpenDecoder() {
89 const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
91 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
92 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
93 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
95 const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
97 DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
98 << " result: " << result;
102 // Ensure avcodec_open2() respected our format request.
103 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
104 DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
105 << " supported sample format - "
106 << codec_context_->sample_fmt;
110 DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
114 // Verify the channel layout is supported by Chrome. Acts as a sanity check
115 // against invalid files. See http://crbug.com/171962
116 if (ChannelLayoutToChromeChannelLayout(
117 codec_context_->ch_layout.u.mask,
118 codec_context_->ch_layout.nb_channels) ==
119 CHANNEL_LAYOUT_UNSUPPORTED) {
123 // Store initial values to guard against midstream configuration changes.
124 channels_ = codec_context_->ch_layout.nb_channels;
125 audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
126 sample_rate_ = codec_context_->sample_rate;
127 av_sample_format_ = codec_context_->sample_fmt;
131 bool AudioFileReader::HasKnownDuration() const {
132 return glue_->format_context()->duration != AV_NOPTS_VALUE;
135 void AudioFileReader::Close() {
136 codec_context_.reset();
140 int AudioFileReader::Read(
141 std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
142 int packets_to_read) {
143 DCHECK(glue_ && codec_context_)
144 << "AudioFileReader::Read() : reader is not opened!";
146 FFmpegDecodingLoop decode_loop(codec_context_.get());
148 int total_frames = 0;
149 auto frame_ready_cb =
150 base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
151 &total_frames, decoded_audio_packets);
154 int packets_read = 0;
155 while (packets_read++ < packets_to_read && ReadPacket(&packet)) {
156 const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
157 av_packet_unref(&packet);
159 if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
166 base::TimeDelta AudioFileReader::GetDuration() const {
167 const AVRational av_time_base = {1, AV_TIME_BASE};
169 DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
170 base::CheckedNumeric<int64_t> estimated_duration_us =
171 glue_->format_context()->duration;
173 if (audio_codec_ == AudioCodec::kAAC) {
174 // For certain AAC-encoded files, FFMPEG's estimated frame count might not
175 // be sufficient to capture the entire audio content that we want. This is
176 // especially noticeable for short files (< 10ms) resulting in silence
177 // throughout the decoded buffer. Thus we add the priming frames and the
178 // remainder frames to the estimation.
179 // (See: crbug.com/513178)
180 estimated_duration_us += ceil(
182 static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
185 // Add one microsecond to avoid rounding-down errors which can occur when
186 // |duration| has been calculated from an exact number of sample-frames.
187 // One microsecond is much less than the time of a single sample-frame
188 // at any real-world sample-rate.
189 estimated_duration_us += 1;
192 return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
195 int AudioFileReader::GetNumberOfFrames() const {
196 return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
199 bool AudioFileReader::OpenDemuxerForTesting() {
200 return OpenDemuxer();
203 bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
204 return ReadPacket(output_packet);
207 bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
208 while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
209 // Skip packets from other streams.
210 if (output_packet->stream_index != stream_index_) {
211 av_packet_unref(output_packet);
219 bool AudioFileReader::OnNewFrame(
221 std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
223 int frames_read = frame->nb_samples;
227 const int channels = frame->ch_layout.nb_channels;
228 if (frame->sample_rate != sample_rate_ || channels != channels_ ||
229 frame->format != av_sample_format_) {
230 DLOG(ERROR) << "Unsupported midstream configuration change!"
231 << " Sample Rate: " << frame->sample_rate << " vs "
232 << sample_rate_ << ", Channels: " << channels << " vs "
233 << channels_ << ", Sample Format: " << frame->format << " vs "
234 << av_sample_format_;
236 // This is an unrecoverable error, so bail out. We'll return
237 // whatever we've decoded up to this point.
241 // AAC decoding doesn't properly trim the last packet in a stream, so if we
242 // have duration information, use it to set the correct length to avoid extra
243 // silence from being output. In the case where we are also discarding some
244 // portion of the packet (as indicated by a negative pts), we further want to
245 // adjust the duration downward by however much exists before zero.
246 if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
247 const base::TimeDelta pkt_duration = ConvertFromTimeBase(
248 glue_->format_context()->streams[stream_index_]->time_base,
249 frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
250 const base::TimeDelta frame_duration =
251 base::Seconds(frames_read / static_cast<double>(sample_rate_));
253 if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
254 const int new_frames_read =
255 base::ClampFloor(frames_read * (pkt_duration / frame_duration));
256 DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
257 << new_frames_read << " based on packet duration.";
258 frames_read = new_frames_read;
260 // The above process may delete the entire packet.
266 // Deinterleave each channel and convert to 32bit floating-point with
267 // nominal range -1.0 -> +1.0. If the output is already in float planar
268 // format, just copy it into the AudioBus.
269 decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
270 AudioBus* audio_bus = decoded_audio_packets->back().get();
272 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
273 audio_bus->FromInterleaved<Float32SampleTypeTraits>(
274 reinterpret_cast<float*>(frame->data[0]), frames_read);
275 } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
276 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
277 memcpy(audio_bus->channel(ch), frame->extended_data[ch],
278 sizeof(float) * frames_read);
281 int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
282 switch (bytes_per_sample) {
284 audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>(
285 reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
288 audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>(
289 reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
292 audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>(
293 reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
296 NOTREACHED() << "Unsupported bytes per sample encountered: "
298 audio_bus->ZeroFrames(frames_read);
302 (*total_frames) += frames_read;
306 bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
307 // Use the AVStream's time_base, since |codec_context_| does not have
308 // time_base populated until after OpenDecoder().
309 return av_seek_frame(
310 glue_->format_context(), stream_index_,
311 ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
312 AVSEEK_FLAG_BACKWARD) >= 0;
315 const AVStream* AudioFileReader::GetAVStreamForTesting() const {
316 return glue_->format_context()->streams[stream_index_];