1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/sender/audio_encoder.h"
10 #include "base/bind_helpers.h"
11 #include "base/location.h"
12 #include "base/stl_util.h"
13 #include "base/sys_byteorder.h"
14 #include "base/time/time.h"
15 #include "media/base/audio_bus.h"
16 #include "media/cast/cast_defines.h"
17 #include "media/cast/cast_environment.h"
18 #include "third_party/opus/src/include/opus.h"
25 // The fixed number of audio frames per second and, inversely, the duration of
26 // one frame's worth of samples.
27 const int kFramesPerSecond = 100;
28 const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder!
30 // Threshold used to decide whether audio being delivered to the encoder is
31 // coming in too slow with respect to the capture timestamps.
32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
37 // Base class that handles the common problem of feeding one or more AudioBus'
38 // data into a buffer and then, once the buffer is full, encoding the signal and
39 // emitting an EncodedFrame via the FrameEncodedCallback.
41 // Subclasses complete the implementation by handling the actual encoding
43 class AudioEncoder::ImplBase
44 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
46 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
50 const FrameEncodedCallback& callback)
51 : cast_environment_(cast_environment),
53 num_channels_(num_channels),
54 samples_per_frame_(sampling_rate / kFramesPerSecond),
56 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
59 frame_rtp_timestamp_(0),
60 samples_dropped_from_buffer_(0) {
61 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
62 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
63 if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
64 sampling_rate % kFramesPerSecond != 0 ||
65 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
66 cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
70 CastInitializationStatus InitializationResult() const {
71 return cast_initialization_status_;
74 int samples_per_frame() const {
75 return samples_per_frame_;
78 void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
79 const base::TimeTicks& recorded_time) {
80 DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
81 DCHECK(!recorded_time.is_null());
83 // Determine whether |recorded_time| is consistent with the amount of audio
84 // data having been processed in the past. Resolve the underrun problem by
85 // dropping data from the internal buffer and skipping ahead the next
86 // frame's RTP timestamp by the estimated number of frames missed. On the
87 // other hand, don't attempt to resolve overruns: A receiver should
88 // gracefully deal with an excess of audio data.
89 const base::TimeDelta frame_duration =
90 base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
91 base::TimeDelta buffer_fill_duration =
92 buffer_fill_end_ * frame_duration / samples_per_frame_;
93 if (!frame_capture_time_.is_null()) {
94 const base::TimeDelta amount_ahead_by =
95 recorded_time - (frame_capture_time_ + buffer_fill_duration);
97 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
98 samples_dropped_from_buffer_ += buffer_fill_end_;
100 buffer_fill_duration = base::TimeDelta();
101 const int64 num_frames_missed = amount_ahead_by /
102 base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
103 frame_rtp_timestamp_ +=
104 static_cast<uint32>(num_frames_missed * samples_per_frame_);
105 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
106 << num_frames_missed * samples_per_frame_
107 << " samples' worth of underrun.";
110 frame_capture_time_ = recorded_time - buffer_fill_duration;
112 // Encode all audio in |audio_bus| into zero or more frames.
114 while (src_pos < audio_bus->frames()) {
115 const int num_samples_to_xfer = std::min(
116 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
117 DCHECK_EQ(audio_bus->channels(), num_channels_);
118 TransferSamplesIntoBuffer(
119 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
120 src_pos += num_samples_to_xfer;
121 buffer_fill_end_ += num_samples_to_xfer;
123 if (buffer_fill_end_ < samples_per_frame_)
126 scoped_ptr<EncodedFrame> audio_frame(
128 audio_frame->dependency = EncodedFrame::KEY;
129 audio_frame->frame_id = frame_id_;
130 audio_frame->referenced_frame_id = frame_id_;
131 audio_frame->rtp_timestamp = frame_rtp_timestamp_;
132 audio_frame->reference_time = frame_capture_time_;
134 if (EncodeFromFilledBuffer(&audio_frame->data)) {
135 cast_environment_->PostTask(
136 CastEnvironment::MAIN,
138 base::Bind(callback_,
139 base::Passed(&audio_frame),
140 samples_dropped_from_buffer_));
141 samples_dropped_from_buffer_ = 0;
144 // Reset the internal buffer, frame ID, and timestamps for the next frame.
145 buffer_fill_end_ = 0;
147 frame_rtp_timestamp_ += samples_per_frame_;
148 frame_capture_time_ += frame_duration;
153 friend class base::RefCountedThreadSafe<ImplBase>;
154 virtual ~ImplBase() {}
156 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
158 int buffer_fill_offset,
159 int num_samples) = 0;
160 virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
162 const scoped_refptr<CastEnvironment> cast_environment_;
164 const int num_channels_;
165 const int samples_per_frame_;
166 const FrameEncodedCallback callback_;
168 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
169 CastInitializationStatus cast_initialization_status_;
172 // In the case where a call to EncodeAudio() cannot completely fill the
173 // buffer, this points to the position at which to populate data in a later
175 int buffer_fill_end_;
177 // A counter used to label EncodedFrames.
180 // The RTP timestamp for the next frame of encoded audio. This is defined as
181 // the number of audio samples encoded so far, plus the estimated number of
182 // samples that were missed due to data underruns. A receiver uses this value
183 // to detect gaps in the audio signal data being provided. Per the spec, RTP
184 // timestamp values are allowed to overflow and roll around past zero.
185 uint32 frame_rtp_timestamp_;
187 // The local system time associated with the start of the next frame of
188 // encoded audio. This value is passed on to a receiver as a reference clock
189 // timestamp for the purposes of synchronizing audio and video. Its
190 // progression is expected to drift relative to the elapsed time implied by
191 // the RTP timestamps.
192 base::TimeTicks frame_capture_time_;
194 // Set to non-zero to indicate the next output frame skipped over audio
195 // samples in order to recover from an input underrun.
196 int samples_dropped_from_buffer_;
198 DISALLOW_COPY_AND_ASSIGN(ImplBase);
201 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
203 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
207 const FrameEncodedCallback& callback)
208 : ImplBase(cast_environment,
213 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
214 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
215 buffer_(new float[num_channels * samples_per_frame_]) {
216 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
218 if (opus_encoder_init(opus_encoder_,
221 OPUS_APPLICATION_AUDIO) != OPUS_OK) {
222 ImplBase::cast_initialization_status_ =
223 STATUS_INVALID_AUDIO_CONFIGURATION;
226 ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
229 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
230 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
231 // frame size. The opus library authors may, of course, adjust this in
235 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
240 virtual ~OpusImpl() {}
242 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
244 int buffer_fill_offset,
245 int num_samples) OVERRIDE {
246 // Opus requires channel-interleaved samples in a single array.
247 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
248 const float* src = audio_bus->channel(ch) + source_offset;
249 const float* const src_end = src + num_samples;
250 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
251 for (; src < src_end; ++src, dest += num_channels_)
256 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
257 out->resize(kOpusMaxPayloadSize);
258 const opus_int32 result =
259 opus_encode_float(opus_encoder_,
262 reinterpret_cast<uint8*>(string_as_array(out)),
263 kOpusMaxPayloadSize);
267 } else if (result < 0) {
268 LOG(ERROR) << "Error code from opus_encode_float(): " << result;
271 // Do nothing: The documentation says that a return value of zero or
272 // one byte means the packet does not need to be transmitted.
277 const scoped_ptr<uint8[]> encoder_memory_;
278 OpusEncoder* const opus_encoder_;
279 const scoped_ptr<float[]> buffer_;
281 // This is the recommended value, according to documentation in
282 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
283 // degrade the audio due to memory constraints.
285 // Note: Whereas other RTP implementations do not, the cast library is
286 // perfectly capable of transporting larger than MTU-sized audio frames.
287 static const int kOpusMaxPayloadSize = 4000;
289 DISALLOW_COPY_AND_ASSIGN(OpusImpl);
292 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
294 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
297 const FrameEncodedCallback& callback)
298 : ImplBase(cast_environment,
303 buffer_(new int16[num_channels * samples_per_frame_]) {
304 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
306 cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
310 virtual ~Pcm16Impl() {}
312 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
314 int buffer_fill_offset,
315 int num_samples) OVERRIDE {
316 audio_bus->ToInterleavedPartial(
320 buffer_.get() + buffer_fill_offset * num_channels_);
323 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
324 // Output 16-bit PCM integers in big-endian byte order.
325 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
326 const int16* src = buffer_.get();
327 const int16* const src_end = src + num_channels_ * samples_per_frame_;
328 uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
329 for (; src < src_end; ++src, ++dest)
330 *dest = base::HostToNet16(*src);
335 const scoped_ptr<int16[]> buffer_;
337 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
340 AudioEncoder::AudioEncoder(
341 const scoped_refptr<CastEnvironment>& cast_environment,
346 const FrameEncodedCallback& frame_encoded_callback)
347 : cast_environment_(cast_environment) {
348 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
349 // as all calls to InsertAudio() are by the same thread.
350 insert_thread_checker_.DetachFromThread();
352 case CODEC_AUDIO_OPUS:
353 impl_ = new OpusImpl(cast_environment,
357 frame_encoded_callback);
359 case CODEC_AUDIO_PCM16:
360 impl_ = new Pcm16Impl(cast_environment,
363 frame_encoded_callback);
366 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
371 AudioEncoder::~AudioEncoder() {}
373 CastInitializationStatus AudioEncoder::InitializationResult() const {
374 DCHECK(insert_thread_checker_.CalledOnValidThread());
376 return impl_->InitializationResult();
378 return STATUS_UNSUPPORTED_AUDIO_CODEC;
381 int AudioEncoder::GetSamplesPerFrame() const {
382 DCHECK(insert_thread_checker_.CalledOnValidThread());
383 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) {
385 return std::numeric_limits<int>::max();
387 return impl_->samples_per_frame();
390 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
391 const base::TimeTicks& recorded_time) {
392 DCHECK(insert_thread_checker_.CalledOnValidThread());
393 DCHECK(audio_bus.get());
394 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) {
398 cast_environment_->PostTask(CastEnvironment::AUDIO,
400 base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
402 base::Passed(&audio_bus),