src/media/cast/sender/audio_encoder.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/bind_helpers.h"
  11 #include "base/location.h"
  12 #include "base/stl_util.h"
  13 #include "base/sys_byteorder.h"
  14 #include "base/time/time.h"
  15 #include "media/base/audio_bus.h"
  16 #include "media/cast/cast_defines.h"
  17 #include "media/cast/cast_environment.h"
  18 #include "third_party/opus/src/include/opus.h"
  19
  20 namespace media {
  21 namespace cast {
  22
  23 namespace {
  24
  25 // The fixed number of audio frames per second and, inversely, the duration of
  26 // one frame's worth of samples.
  27 const int kFramesPerSecond = 100;
  28 const int kFrameDurationMillis = 1000 / kFramesPerSecond;  // No remainder!
  29
  30 // Threshold used to decide whether audio being delivered to the encoder is
  31 // coming in too slow with respect to the capture timestamps.
  32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
  33
  34 }  // namespace
  35
  36
  37 // Base class that handles the common problem of feeding one or more AudioBus'
  38 // data into a buffer and then, once the buffer is full, encoding the signal and
  39 // emitting an EncodedFrame via the FrameEncodedCallback.
  40 //
  41 // Subclasses complete the implementation by handling the actual encoding
  42 // details.
  43 class AudioEncoder::ImplBase
  44     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  45  public:
  46   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  47            Codec codec,
  48            int num_channels,
  49            int sampling_rate,
  50            const FrameEncodedCallback& callback)
  51       : cast_environment_(cast_environment),
  52         codec_(codec),
  53         num_channels_(num_channels),
  54         samples_per_frame_(sampling_rate / kFramesPerSecond),
  55         callback_(callback),
  56         cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
  57         buffer_fill_end_(0),
  58         frame_id_(0),
  59         frame_rtp_timestamp_(0) {
  60     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
  61     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
  62     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  63         sampling_rate % kFramesPerSecond != 0 ||
  64         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
  65       cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
  66     }
  67   }
  68
  69   CastInitializationStatus InitializationResult() const {
  70     return cast_initialization_status_;
  71   }
  72
  73   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  74                    const base::TimeTicks& recorded_time) {
  75     DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
  76     DCHECK(!recorded_time.is_null());
  77
  78     // Determine whether |recorded_time| is consistent with the amount of audio
  79     // data having been processed in the past.  Resolve the underrun problem by
  80     // dropping data from the internal buffer and skipping ahead the next
  81     // frame's RTP timestamp by the estimated number of frames missed.  On the
  82     // other hand, don't attempt to resolve overruns: A receiver should
  83     // gracefully deal with an excess of audio data.
  84     const base::TimeDelta frame_duration =
  85         base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
  86     base::TimeDelta buffer_fill_duration =
  87         buffer_fill_end_ * frame_duration / samples_per_frame_;
  88     if (!frame_capture_time_.is_null()) {
  89       const base::TimeDelta amount_ahead_by =
  90           recorded_time - (frame_capture_time_ + buffer_fill_duration);
  91       if (amount_ahead_by >
  92               base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
  93         buffer_fill_end_ = 0;
  94         buffer_fill_duration = base::TimeDelta();
  95         const int64 num_frames_missed = amount_ahead_by /
  96             base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
  97         frame_rtp_timestamp_ +=
  98             static_cast<uint32>(num_frames_missed * samples_per_frame_);
  99         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 100                  << num_frames_missed * samples_per_frame_
 101                  << " samples' worth of underrun.";
 102       }
 103     }
 104     frame_capture_time_ = recorded_time - buffer_fill_duration;
 105
 106     // Encode all audio in |audio_bus| into zero or more frames.
 107     int src_pos = 0;
 108     while (src_pos < audio_bus->frames()) {
 109       const int num_samples_to_xfer = std::min(
 110           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 111       DCHECK_EQ(audio_bus->channels(), num_channels_);
 112       TransferSamplesIntoBuffer(
 113           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 114       src_pos += num_samples_to_xfer;
 115       buffer_fill_end_ += num_samples_to_xfer;
 116
 117       if (buffer_fill_end_ < samples_per_frame_)
 118         break;
 119
 120       scoped_ptr<EncodedFrame> audio_frame(
 121           new EncodedFrame());
 122       audio_frame->dependency = EncodedFrame::KEY;
 123       audio_frame->frame_id = frame_id_;
 124       audio_frame->referenced_frame_id = frame_id_;
 125       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 126       audio_frame->reference_time = frame_capture_time_;
 127
 128       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 129         cast_environment_->PostTask(
 130             CastEnvironment::MAIN,
 131             FROM_HERE,
 132             base::Bind(callback_, base::Passed(&audio_frame)));
 133       }
 134
 135       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 136       buffer_fill_end_ = 0;
 137       ++frame_id_;
 138       frame_rtp_timestamp_ += samples_per_frame_;
 139       frame_capture_time_ += frame_duration;
 140     }
 141   }
 142
 143  protected:
 144   friend class base::RefCountedThreadSafe<ImplBase>;
 145   virtual ~ImplBase() {}
 146
 147   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 148                                          int source_offset,
 149                                          int buffer_fill_offset,
 150                                          int num_samples) = 0;
 151   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 152
 153   const scoped_refptr<CastEnvironment> cast_environment_;
 154   const Codec codec_;
 155   const int num_channels_;
 156   const int samples_per_frame_;
 157   const FrameEncodedCallback callback_;
 158
 159   // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
 160   CastInitializationStatus cast_initialization_status_;
 161
 162  private:
 163   // In the case where a call to EncodeAudio() cannot completely fill the
 164   // buffer, this points to the position at which to populate data in a later
 165   // call.
 166   int buffer_fill_end_;
 167
 168   // A counter used to label EncodedFrames.
 169   uint32 frame_id_;
 170
 171   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 172   // the number of audio samples encoded so far, plus the estimated number of
 173   // samples that were missed due to data underruns.  A receiver uses this value
 174   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 175   // timestamp values are allowed to overflow and roll around past zero.
 176   uint32 frame_rtp_timestamp_;
 177
 178   // The local system time associated with the start of the next frame of
 179   // encoded audio.  This value is passed on to a receiver as a reference clock
 180   // timestamp for the purposes of synchronizing audio and video.  Its
 181   // progression is expected to drift relative to the elapsed time implied by
 182   // the RTP timestamps.
 183   base::TimeTicks frame_capture_time_;
 184
 185   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 186 };
 187
 188 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 189  public:
 190   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 191            int num_channels,
 192            int sampling_rate,
 193            int bitrate,
 194            const FrameEncodedCallback& callback)
 195       : ImplBase(cast_environment,
 196                  CODEC_AUDIO_OPUS,
 197                  num_channels,
 198                  sampling_rate,
 199                  callback),
 200         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 201         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 202         buffer_(new float[num_channels * samples_per_frame_]) {
 203     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 204       return;
 205     if (opus_encoder_init(opus_encoder_,
 206                           sampling_rate,
 207                           num_channels,
 208                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 209       ImplBase::cast_initialization_status_ =
 210           STATUS_INVALID_AUDIO_CONFIGURATION;
 211       return;
 212     }
 213     ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 214
 215     if (bitrate <= 0) {
 216       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 217       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 218       // frame size.  The opus library authors may, of course, adjust this in
 219       // later versions.
 220       bitrate = OPUS_AUTO;
 221     }
 222     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 223              OPUS_OK);
 224   }
 225
 226  private:
 227   virtual ~OpusImpl() {}
 228
 229   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 230                                          int source_offset,
 231                                          int buffer_fill_offset,
 232                                          int num_samples) OVERRIDE {
 233     // Opus requires channel-interleaved samples in a single array.
 234     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 235       const float* src = audio_bus->channel(ch) + source_offset;
 236       const float* const src_end = src + num_samples;
 237       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 238       for (; src < src_end; ++src, dest += num_channels_)
 239         *dest = *src;
 240     }
 241   }
 242
 243   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 244     out->resize(kOpusMaxPayloadSize);
 245     const opus_int32 result =
 246         opus_encode_float(opus_encoder_,
 247                           buffer_.get(),
 248                           samples_per_frame_,
 249                           reinterpret_cast<uint8*>(string_as_array(out)),
 250                           kOpusMaxPayloadSize);
 251     if (result > 1) {
 252       out->resize(result);
 253       return true;
 254     } else if (result < 0) {
 255       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 256       return false;
 257     } else {
 258       // Do nothing: The documentation says that a return value of zero or
 259       // one byte means the packet does not need to be transmitted.
 260       return false;
 261     }
 262   }
 263
 264   const scoped_ptr<uint8[]> encoder_memory_;
 265   OpusEncoder* const opus_encoder_;
 266   const scoped_ptr<float[]> buffer_;
 267
 268   // This is the recommended value, according to documentation in
 269   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 270   // degrade the audio due to memory constraints.
 271   //
 272   // Note: Whereas other RTP implementations do not, the cast library is
 273   // perfectly capable of transporting larger than MTU-sized audio frames.
 274   static const int kOpusMaxPayloadSize = 4000;
 275
 276   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 277 };
 278
 279 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 280  public:
 281   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 282             int num_channels,
 283             int sampling_rate,
 284             const FrameEncodedCallback& callback)
 285       : ImplBase(cast_environment,
 286                  CODEC_AUDIO_PCM16,
 287                  num_channels,
 288                  sampling_rate,
 289                  callback),
 290         buffer_(new int16[num_channels * samples_per_frame_]) {
 291     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 292       return;
 293     cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 294   }
 295
 296  private:
 297   virtual ~Pcm16Impl() {}
 298
 299   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 300                                          int source_offset,
 301                                          int buffer_fill_offset,
 302                                          int num_samples) OVERRIDE {
 303     audio_bus->ToInterleavedPartial(
 304         source_offset,
 305         num_samples,
 306         sizeof(int16),
 307         buffer_.get() + buffer_fill_offset * num_channels_);
 308   }
 309
 310   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 311     // Output 16-bit PCM integers in big-endian byte order.
 312     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 313     const int16* src = buffer_.get();
 314     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 315     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 316     for (; src < src_end; ++src, ++dest)
 317       *dest = base::HostToNet16(*src);
 318     return true;
 319   }
 320
 321  private:
 322   const scoped_ptr<int16[]> buffer_;
 323
 324   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 325 };
 326
 327 AudioEncoder::AudioEncoder(
 328     const scoped_refptr<CastEnvironment>& cast_environment,
 329     int num_channels,
 330     int sampling_rate,
 331     int bitrate,
 332     Codec codec,
 333     const FrameEncodedCallback& frame_encoded_callback)
 334     : cast_environment_(cast_environment) {
 335   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 336   // as all calls to InsertAudio() are by the same thread.
 337   insert_thread_checker_.DetachFromThread();
 338   switch (codec) {
 339     case CODEC_AUDIO_OPUS:
 340       impl_ = new OpusImpl(cast_environment,
 341                            num_channels,
 342                            sampling_rate,
 343                            bitrate,
 344                            frame_encoded_callback);
 345       break;
 346     case CODEC_AUDIO_PCM16:
 347       impl_ = new Pcm16Impl(cast_environment,
 348                             num_channels,
 349                             sampling_rate,
 350                             frame_encoded_callback);
 351       break;
 352     default:
 353       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 354       break;
 355   }
 356 }
 357
 358 AudioEncoder::~AudioEncoder() {}
 359
 360 CastInitializationStatus AudioEncoder::InitializationResult() const {
 361   DCHECK(insert_thread_checker_.CalledOnValidThread());
 362   if (impl_) {
 363     return impl_->InitializationResult();
 364   }
 365   return STATUS_UNSUPPORTED_AUDIO_CODEC;
 366 }
 367
 368 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 369                                const base::TimeTicks& recorded_time) {
 370   DCHECK(insert_thread_checker_.CalledOnValidThread());
 371   DCHECK(audio_bus.get());
 372   if (!impl_) {
 373     NOTREACHED();
 374     return;
 375   }
 376   cast_environment_->PostTask(CastEnvironment::AUDIO,
 377                               FROM_HERE,
 378                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 379                                          impl_,
 380                                          base::Passed(&audio_bus),
 381                                          recorded_time));
 382 }
 383
 384 }  // namespace cast
 385 }  // namespace media