1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/opus_audio_decoder.h"
10 #include "base/callback_helpers.h"
11 #include "base/location.h"
12 #include "base/message_loop/message_loop_proxy.h"
13 #include "base/sys_byteorder.h"
14 #include "media/base/audio_buffer.h"
15 #include "media/base/audio_decoder_config.h"
16 #include "media/base/audio_timestamp_helper.h"
17 #include "media/base/bind_to_loop.h"
18 #include "media/base/buffers.h"
19 #include "media/base/decoder_buffer.h"
20 #include "media/base/demuxer.h"
21 #include "media/base/pipeline.h"
22 #include "third_party/opus/src/include/opus.h"
23 #include "third_party/opus/src/include/opus_multistream.h"
27 static uint16 ReadLE16(const uint8* data, size_t data_size, int read_offset) {
30 DCHECK_LE(read_offset + sizeof(value), data_size);
31 memcpy(&value, data + read_offset, sizeof(value));
32 return base::ByteSwapToLE16(value);
35 // Returns true if the decode result was end of stream.
36 static inline bool IsEndOfStream(int decoded_size,
37 const scoped_refptr<DecoderBuffer>& input) {
38 // Two conditions to meet to declare end of stream for this decoder:
39 // 1. Opus didn't output anything.
40 // 2. An end of stream buffer is received.
41 return decoded_size == 0 && input->end_of_stream();
44 static int TimeDeltaToAudioFrames(base::TimeDelta time_delta,
46 return std::ceil(time_delta.InSecondsF() * frame_rate);
49 // The Opus specification is part of IETF RFC 6716:
50 // http://tools.ietf.org/html/rfc6716
52 // Opus uses Vorbis channel mapping, and Vorbis channel mapping specifies
53 // mappings for up to 8 channels. This information is part of the Vorbis I
55 // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html
56 static const int kMaxVorbisChannels = 8;
58 // Opus allows for decode of S16 or float samples. OpusAudioDecoder always uses
60 static const int kBitsPerChannel = 16;
61 static const int kBytesPerChannel = kBitsPerChannel / 8;
63 // Maximum packet size used in Xiph's opusdec and FFmpeg's libopusdec.
64 static const int kMaxOpusOutputPacketSizeSamples = 960 * 6 * kMaxVorbisChannels;
65 static const int kMaxOpusOutputPacketSizeBytes =
66 kMaxOpusOutputPacketSizeSamples * kBytesPerChannel;
68 static void RemapOpusChannelLayout(const uint8* opus_mapping,
70 uint8* channel_layout) {
71 DCHECK_LE(num_channels, kMaxVorbisChannels);
73 // Opus uses Vorbis channel layout.
74 const int32 num_layouts = kMaxVorbisChannels;
75 const int32 num_layout_values = kMaxVorbisChannels;
77 // Vorbis channel ordering for streams with >= 2 channels:
83 // Front L, Front R, Back L, Back R
85 // Front L, Center, Front R, Back L, Back R
87 // Front L, Center, Front R, Back L, Back R, LFE
89 // Front L, Front Center, Front R, Side L, Side R, Back Center, LFE
91 // Front L, Center, Front R, Side L, Side R, Back L, Back R, LFE
93 // Channel ordering information is taken from section 4.3.9 of the Vorbis I
95 // http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9
97 // These are the FFmpeg channel layouts expressed using the position of each
98 // channel in the output stream from libopus.
99 const uint8 kFFmpegChannelLayouts[num_layouts][num_layout_values] = {
102 // Stereo: No reorder.
105 // 3 Channels, from Vorbis order to:
109 // 4 Channels: No reorder.
112 // 5 Channels, from Vorbis order to:
113 // Front L, Front R, Center, Back L, Back R
116 // 6 Channels (5.1), from Vorbis order to:
117 // Front L, Front R, Center, LFE, Back L, Back R
118 { 0, 2, 1, 5, 3, 4 },
120 // 7 Channels (6.1), from Vorbis order to:
121 // Front L, Front R, Front Center, LFE, Side L, Side R, Back Center
122 { 0, 2, 1, 6, 3, 4, 5 },
124 // 8 Channels (7.1), from Vorbis order to:
125 // Front L, Front R, Center, LFE, Back L, Back R, Side L, Side R
126 { 0, 2, 1, 7, 5, 6, 3, 4 },
129 // Reorder the channels to produce the same ordering as FFmpeg, which is
130 // what the pipeline expects.
131 const uint8* vorbis_layout_offset = kFFmpegChannelLayouts[num_channels - 1];
132 for (int channel = 0; channel < num_channels; ++channel)
133 channel_layout[channel] = opus_mapping[vorbis_layout_offset[channel]];
136 // Opus Extra Data contents:
137 // - "OpusHead" (64 bits)
138 // - version number (8 bits)
139 // - Channels C (8 bits)
140 // - Pre-skip (16 bits)
141 // - Sampling rate (32 bits)
142 // - Gain in dB (16 bits, S7.8)
143 // - Mapping (8 bits, 0=single stream (mono/stereo) 1=Vorbis mapping,
144 // 2..254: reserved, 255: multistream with no mapping)
146 // - if (mapping != 0)
147 // - N = totel number of streams (8 bits)
148 // - M = number of paired streams (8 bits)
149 // - C times channel origin
152 // - if (byte&0x1 == 0)
159 // Default audio output channel layout. Used to initialize |stream_map| in
160 // OpusExtraData, and passed to opus_multistream_decoder_create() when the
161 // extra data does not contain mapping information. The values are valid only
162 // for mono and stereo output: Opus streams with more than 2 channels require a
164 static const int kMaxChannelsWithDefaultLayout = 2;
165 static const uint8 kDefaultOpusChannelLayout[kMaxChannelsWithDefaultLayout] = {
168 // Size of the Opus extra data excluding optional mapping information.
169 static const int kOpusExtraDataSize = 19;
171 // Offset to the channel count byte in the Opus extra data.
172 static const int kOpusExtraDataChannelsOffset = 9;
174 // Offset to the pre-skip value in the Opus extra data.
175 static const int kOpusExtraDataSkipSamplesOffset = 10;
177 // Offset to the channel mapping byte in the Opus extra data.
178 static const int kOpusExtraDataChannelMappingOffset = 18;
180 // Extra Data contains a stream map. The mapping values are in extra data beyond
181 // the always present |kOpusExtraDataSize| bytes of data. The mapping data
182 // contains stream count, coupling information, and per channel mapping values:
183 // - Byte 0: Number of streams.
184 // - Byte 1: Number coupled.
185 // - Byte 2: Starting at byte 2 are |extra_data->channels| uint8 mapping
187 static const int kOpusExtraDataNumStreamsOffset = kOpusExtraDataSize;
188 static const int kOpusExtraDataNumCoupledOffset =
189 kOpusExtraDataNumStreamsOffset + 1;
190 static const int kOpusExtraDataStreamMapOffset =
191 kOpusExtraDataNumStreamsOffset + 2;
193 struct OpusExtraData {
201 kDefaultOpusChannelLayout,
202 kMaxChannelsWithDefaultLayout);
209 uint8 stream_map[kMaxVorbisChannels];
212 // Returns true when able to successfully parse and store Opus extra data in
213 // |extra_data|. Based on opus header parsing code in libopusdec from FFmpeg,
214 // and opus_header from Xiph's opus-tools project.
215 static bool ParseOpusExtraData(const uint8* data, int data_size,
216 const AudioDecoderConfig& config,
217 OpusExtraData* extra_data) {
218 if (data_size < kOpusExtraDataSize)
221 extra_data->channels = *(data + kOpusExtraDataChannelsOffset);
223 if (extra_data->channels <= 0 || extra_data->channels > kMaxVorbisChannels) {
224 DVLOG(0) << "invalid channel count in extra data: " << extra_data->channels;
228 extra_data->skip_samples =
229 ReadLE16(data, data_size, kOpusExtraDataSkipSamplesOffset);
231 extra_data->channel_mapping = *(data + kOpusExtraDataChannelMappingOffset);
233 if (!extra_data->channel_mapping) {
234 if (extra_data->channels > kMaxChannelsWithDefaultLayout) {
235 DVLOG(0) << "Invalid extra data, missing stream map.";
239 extra_data->num_streams = 1;
240 extra_data->num_coupled =
241 (ChannelLayoutToChannelCount(config.channel_layout()) > 1) ? 1 : 0;
245 if (data_size < kOpusExtraDataStreamMapOffset + extra_data->channels) {
246 DVLOG(0) << "Invalid stream map; insufficient data for current channel "
247 << "count: " << extra_data->channels;
251 extra_data->num_streams = *(data + kOpusExtraDataNumStreamsOffset);
252 extra_data->num_coupled = *(data + kOpusExtraDataNumCoupledOffset);
254 if (extra_data->num_streams + extra_data->num_coupled != extra_data->channels)
255 DVLOG(1) << "Inconsistent channel mapping.";
257 for (int i = 0; i < extra_data->channels; ++i)
258 extra_data->stream_map[i] = *(data + kOpusExtraDataStreamMapOffset + i);
262 OpusAudioDecoder::OpusAudioDecoder(
263 const scoped_refptr<base::MessageLoopProxy>& message_loop)
264 : message_loop_(message_loop),
266 demuxer_stream_(NULL),
268 bits_per_channel_(0),
269 channel_layout_(CHANNEL_LAYOUT_NONE),
270 samples_per_second_(0),
271 last_input_timestamp_(kNoTimestamp()),
272 frames_to_discard_(0),
273 frame_delay_at_start_(0) {
276 void OpusAudioDecoder::Initialize(
277 DemuxerStream* stream,
278 const PipelineStatusCB& status_cb,
279 const StatisticsCB& statistics_cb) {
280 DCHECK(message_loop_->BelongsToCurrentThread());
281 PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
283 if (demuxer_stream_) {
284 // TODO(scherkus): initialization currently happens more than once in
285 // PipelineIntegrationTest.BasicPlayback.
286 DVLOG(0) << "Initialize has already been called.";
290 weak_this_ = weak_factory_.GetWeakPtr();
291 demuxer_stream_ = stream;
293 if (!ConfigureDecoder()) {
294 initialize_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
298 statistics_cb_ = statistics_cb;
299 initialize_cb.Run(PIPELINE_OK);
302 void OpusAudioDecoder::Read(const ReadCB& read_cb) {
303 DCHECK(message_loop_->BelongsToCurrentThread());
304 DCHECK(!read_cb.is_null());
305 CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
306 read_cb_ = BindToCurrentLoop(read_cb);
308 ReadFromDemuxerStream();
311 int OpusAudioDecoder::bits_per_channel() {
312 DCHECK(message_loop_->BelongsToCurrentThread());
313 return bits_per_channel_;
316 ChannelLayout OpusAudioDecoder::channel_layout() {
317 DCHECK(message_loop_->BelongsToCurrentThread());
318 return channel_layout_;
321 int OpusAudioDecoder::samples_per_second() {
322 DCHECK(message_loop_->BelongsToCurrentThread());
323 return samples_per_second_;
326 void OpusAudioDecoder::Reset(const base::Closure& closure) {
327 DCHECK(message_loop_->BelongsToCurrentThread());
328 base::Closure reset_cb = BindToCurrentLoop(closure);
330 opus_multistream_decoder_ctl(opus_decoder_, OPUS_RESET_STATE);
331 ResetTimestampState();
335 OpusAudioDecoder::~OpusAudioDecoder() {
336 // TODO(scherkus): should we require Stop() to be called? this might end up
337 // getting called on a random thread due to refcounting.
341 void OpusAudioDecoder::ReadFromDemuxerStream() {
342 DCHECK(!read_cb_.is_null());
343 demuxer_stream_->Read(base::Bind(&OpusAudioDecoder::BufferReady, weak_this_));
346 void OpusAudioDecoder::BufferReady(
347 DemuxerStream::Status status,
348 const scoped_refptr<DecoderBuffer>& input) {
349 DCHECK(message_loop_->BelongsToCurrentThread());
350 DCHECK(!read_cb_.is_null());
351 DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
353 if (status == DemuxerStream::kAborted) {
354 DCHECK(!input.get());
355 base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
359 if (status == DemuxerStream::kConfigChanged) {
360 DCHECK(!input.get());
361 DVLOG(1) << "Config changed.";
363 if (!ConfigureDecoder()) {
364 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
368 ResetTimestampState();
369 ReadFromDemuxerStream();
373 DCHECK_EQ(status, DemuxerStream::kOk);
376 // Libopus does not buffer output. Decoding is complete when an end of stream
377 // input buffer is received.
378 if (input->end_of_stream()) {
379 base::ResetAndReturn(&read_cb_).Run(kOk, AudioBuffer::CreateEOSBuffer());
383 // Make sure we are notified if http://crbug.com/49709 returns. Issue also
384 // occurs with some damaged files.
385 if (input->timestamp() == kNoTimestamp() &&
386 output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
387 DVLOG(1) << "Received a buffer without timestamps!";
388 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
392 if (last_input_timestamp_ != kNoTimestamp() &&
393 input->timestamp() != kNoTimestamp() &&
394 input->timestamp() < last_input_timestamp_) {
395 base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
396 DVLOG(1) << "Input timestamps are not monotonically increasing! "
397 << " ts " << input->timestamp().InMicroseconds() << " us"
398 << " diff " << diff.InMicroseconds() << " us";
399 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
403 last_input_timestamp_ = input->timestamp();
405 scoped_refptr<AudioBuffer> output_buffer;
407 if (!Decode(input, &output_buffer)) {
408 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
412 if (output_buffer.get()) {
413 // Execute callback to return the decoded audio.
414 base::ResetAndReturn(&read_cb_).Run(kOk, output_buffer);
416 // We exhausted the input data, but it wasn't enough for a frame. Ask for
417 // more data in order to fulfill this read.
418 ReadFromDemuxerStream();
422 bool OpusAudioDecoder::ConfigureDecoder() {
423 const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
425 if (config.codec() != kCodecOpus) {
426 DVLOG(0) << "codec must be kCodecOpus.";
430 const int channel_count =
431 ChannelLayoutToChannelCount(config.channel_layout());
432 if (!config.IsValidConfig() || channel_count > kMaxVorbisChannels) {
433 DVLOG(0) << "Invalid or unsupported audio stream -"
434 << " codec: " << config.codec()
435 << " channel count: " << channel_count
436 << " channel layout: " << config.channel_layout()
437 << " bits per channel: " << config.bits_per_channel()
438 << " samples per second: " << config.samples_per_second();
442 if (config.bits_per_channel() != kBitsPerChannel) {
443 DVLOG(0) << "16 bit samples required.";
447 if (config.is_encrypted()) {
448 DVLOG(0) << "Encrypted audio stream not supported.";
453 (bits_per_channel_ != config.bits_per_channel() ||
454 channel_layout_ != config.channel_layout() ||
455 samples_per_second_ != config.samples_per_second())) {
456 DVLOG(1) << "Unsupported config change :";
457 DVLOG(1) << "\tbits_per_channel : " << bits_per_channel_
458 << " -> " << config.bits_per_channel();
459 DVLOG(1) << "\tchannel_layout : " << channel_layout_
460 << " -> " << config.channel_layout();
461 DVLOG(1) << "\tsample_rate : " << samples_per_second_
462 << " -> " << config.samples_per_second();
466 // Clean up existing decoder if necessary.
469 // Allocate the output buffer if necessary.
471 output_buffer_.reset(new int16[kMaxOpusOutputPacketSizeSamples]);
473 // Parse the Opus Extra Data.
474 OpusExtraData opus_extra_data;
475 if (!ParseOpusExtraData(config.extra_data(), config.extra_data_size(),
480 if (!config.codec_delay().InMicroseconds())
483 // Convert from seconds to samples.
484 timestamp_offset_ = config.codec_delay();
485 frame_delay_at_start_ = TimeDeltaToAudioFrames(config.codec_delay(),
486 config.samples_per_second());
487 if (frame_delay_at_start_ < 0) {
488 DVLOG(1) << "Invalid file. Incorrect value for codec delay.";
491 if (frame_delay_at_start_ != opus_extra_data.skip_samples) {
492 DVLOG(1) << "Invalid file. Codec Delay in container does not match the "
493 << "value in Opus Extra Data.";
497 uint8 channel_mapping[kMaxVorbisChannels];
498 memcpy(&channel_mapping,
499 kDefaultOpusChannelLayout,
500 kMaxChannelsWithDefaultLayout);
502 if (channel_count > kMaxChannelsWithDefaultLayout) {
503 RemapOpusChannelLayout(opus_extra_data.stream_map,
509 int status = OPUS_INVALID_STATE;
510 opus_decoder_ = opus_multistream_decoder_create(config.samples_per_second(),
512 opus_extra_data.num_streams,
513 opus_extra_data.num_coupled,
516 if (!opus_decoder_ || status != OPUS_OK) {
517 DVLOG(0) << "opus_multistream_decoder_create failed status="
518 << opus_strerror(status);
522 bits_per_channel_ = config.bits_per_channel();
523 channel_layout_ = config.channel_layout();
524 samples_per_second_ = config.samples_per_second();
525 output_timestamp_helper_.reset(
526 new AudioTimestampHelper(config.samples_per_second()));
530 void OpusAudioDecoder::CloseDecoder() {
532 opus_multistream_decoder_destroy(opus_decoder_);
533 opus_decoder_ = NULL;
537 void OpusAudioDecoder::ResetTimestampState() {
538 output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
539 last_input_timestamp_ = kNoTimestamp();
540 frames_to_discard_ = TimeDeltaToAudioFrames(
541 demuxer_stream_->audio_decoder_config().seek_preroll(),
542 samples_per_second_);
545 bool OpusAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& input,
546 scoped_refptr<AudioBuffer>* output_buffer) {
547 int frames_decoded = opus_multistream_decode(opus_decoder_,
551 kMaxOpusOutputPacketSizeSamples,
553 if (frames_decoded < 0) {
554 DVLOG(0) << "opus_multistream_decode failed for"
555 << " timestamp: " << input->timestamp().InMicroseconds()
556 << " us, duration: " << input->duration().InMicroseconds()
557 << " us, packet size: " << input->data_size() << " bytes with"
558 << " status: " << opus_strerror(frames_decoded);
562 uint8* decoded_audio_data = reinterpret_cast<uint8*>(&output_buffer_[0]);
563 int bytes_decoded = frames_decoded *
564 demuxer_stream_->audio_decoder_config().bytes_per_frame();
565 DCHECK_LE(bytes_decoded, kMaxOpusOutputPacketSizeBytes);
567 if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
568 !input->end_of_stream()) {
569 DCHECK(input->timestamp() != kNoTimestamp());
570 output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
573 // Skip samples should be equal to codec delay when the file starts and when
574 // there is a seek to zero.
575 // TODO(vigneshv): This should be checked for start of stream rather than
576 // input timestamp of zero to accomodate streams that don't start at zero.
577 if (input->timestamp() == base::TimeDelta())
578 frames_to_discard_ = frame_delay_at_start_;
580 if (bytes_decoded > 0 && frames_decoded > frames_to_discard_) {
581 // Copy the audio samples into an output buffer.
582 uint8* data[] = { decoded_audio_data };
583 *output_buffer = AudioBuffer::CopyFrom(
585 ChannelLayoutToChannelCount(channel_layout_),
588 output_timestamp_helper_->GetTimestamp() - timestamp_offset_,
589 output_timestamp_helper_->GetFrameDuration(frames_decoded));
590 output_timestamp_helper_->AddFrames(frames_decoded);
591 if (frames_to_discard_ > 0) {
592 output_buffer->get()->TrimStart(frames_to_discard_);
593 frames_decoded -= frames_to_discard_;
594 frames_to_discard_ = 0;
596 if (input->discard_padding().InMicroseconds() > 0) {
597 int discard_padding = TimeDeltaToAudioFrames(input->discard_padding(),
598 samples_per_second_);
599 if (discard_padding < 0 || discard_padding > frames_decoded) {
600 DVLOG(1) << "Invalid file. Incorrect discard padding value.";
603 output_buffer->get()->TrimEnd(discard_padding);
604 frames_decoded -= discard_padding;
606 } else if (bytes_decoded > 0) {
607 frames_to_discard_ -= frames_decoded;
611 // Decoding finished successfully, update statistics.
612 PipelineStatistics statistics;
613 statistics.audio_bytes_decoded =
615 demuxer_stream_->audio_decoder_config().bytes_per_frame();
616 statistics_cb_.Run(statistics);