src/media/cast/sender/audio_encoder.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/bind_helpers.h"
  11 #include "base/location.h"
  12 #include "base/stl_util.h"
  13 #include "base/sys_byteorder.h"
  14 #include "base/time/time.h"
  15 #include "media/base/audio_bus.h"
  16 #include "media/cast/cast_defines.h"
  17 #include "media/cast/cast_environment.h"
  18 #include "third_party/opus/src/include/opus.h"
  19
  20 namespace media {
  21 namespace cast {
  22
  23 namespace {
  24
  25 // The fixed number of audio frames per second and, inversely, the duration of
  26 // one frame's worth of samples.
  27 const int kFramesPerSecond = 100;
  28 const int kFrameDurationMillis = 1000 / kFramesPerSecond;  // No remainder!
  29
  30 // Threshold used to decide whether audio being delivered to the encoder is
  31 // coming in too slow with respect to the capture timestamps.
  32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
  33
  34 }  // namespace
  35
  36
  37 // Base class that handles the common problem of feeding one or more AudioBus'
  38 // data into a buffer and then, once the buffer is full, encoding the signal and
  39 // emitting an EncodedFrame via the FrameEncodedCallback.
  40 //
  41 // Subclasses complete the implementation by handling the actual encoding
  42 // details.
  43 class AudioEncoder::ImplBase
  44     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  45  public:
  46   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  47            Codec codec,
  48            int num_channels,
  49            int sampling_rate,
  50            const FrameEncodedCallback& callback)
  51       : cast_environment_(cast_environment),
  52         codec_(codec),
  53         num_channels_(num_channels),
  54         samples_per_frame_(sampling_rate / kFramesPerSecond),
  55         callback_(callback),
  56         cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
  57         buffer_fill_end_(0),
  58         frame_id_(0),
  59         frame_rtp_timestamp_(0),
  60         samples_dropped_from_buffer_(0) {
  61     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
  62     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
  63     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  64         sampling_rate % kFramesPerSecond != 0 ||
  65         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
  66       cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
  67     }
  68   }
  69
  70   CastInitializationStatus InitializationResult() const {
  71     return cast_initialization_status_;
  72   }
  73
  74   int samples_per_frame() const {
  75     return samples_per_frame_;
  76   }
  77
  78   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  79                    const base::TimeTicks& recorded_time) {
  80     DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
  81     DCHECK(!recorded_time.is_null());
  82
  83     // Determine whether |recorded_time| is consistent with the amount of audio
  84     // data having been processed in the past.  Resolve the underrun problem by
  85     // dropping data from the internal buffer and skipping ahead the next
  86     // frame's RTP timestamp by the estimated number of frames missed.  On the
  87     // other hand, don't attempt to resolve overruns: A receiver should
  88     // gracefully deal with an excess of audio data.
  89     const base::TimeDelta frame_duration =
  90         base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
  91     base::TimeDelta buffer_fill_duration =
  92         buffer_fill_end_ * frame_duration / samples_per_frame_;
  93     if (!frame_capture_time_.is_null()) {
  94       const base::TimeDelta amount_ahead_by =
  95           recorded_time - (frame_capture_time_ + buffer_fill_duration);
  96       if (amount_ahead_by >
  97               base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
  98         samples_dropped_from_buffer_ += buffer_fill_end_;
  99         buffer_fill_end_ = 0;
 100         buffer_fill_duration = base::TimeDelta();
 101         const int64 num_frames_missed = amount_ahead_by /
 102             base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
 103         frame_rtp_timestamp_ +=
 104             static_cast<uint32>(num_frames_missed * samples_per_frame_);
 105         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 106                  << num_frames_missed * samples_per_frame_
 107                  << " samples' worth of underrun.";
 108       }
 109     }
 110     frame_capture_time_ = recorded_time - buffer_fill_duration;
 111
 112     // Encode all audio in |audio_bus| into zero or more frames.
 113     int src_pos = 0;
 114     while (src_pos < audio_bus->frames()) {
 115       const int num_samples_to_xfer = std::min(
 116           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 117       DCHECK_EQ(audio_bus->channels(), num_channels_);
 118       TransferSamplesIntoBuffer(
 119           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 120       src_pos += num_samples_to_xfer;
 121       buffer_fill_end_ += num_samples_to_xfer;
 122
 123       if (buffer_fill_end_ < samples_per_frame_)
 124         break;
 125
 126       scoped_ptr<EncodedFrame> audio_frame(
 127           new EncodedFrame());
 128       audio_frame->dependency = EncodedFrame::KEY;
 129       audio_frame->frame_id = frame_id_;
 130       audio_frame->referenced_frame_id = frame_id_;
 131       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 132       audio_frame->reference_time = frame_capture_time_;
 133
 134       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 135         cast_environment_->PostTask(
 136             CastEnvironment::MAIN,
 137             FROM_HERE,
 138             base::Bind(callback_,
 139                        base::Passed(&audio_frame),
 140                        samples_dropped_from_buffer_));
 141         samples_dropped_from_buffer_ = 0;
 142       }
 143
 144       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 145       buffer_fill_end_ = 0;
 146       ++frame_id_;
 147       frame_rtp_timestamp_ += samples_per_frame_;
 148       frame_capture_time_ += frame_duration;
 149     }
 150   }
 151
 152  protected:
 153   friend class base::RefCountedThreadSafe<ImplBase>;
 154   virtual ~ImplBase() {}
 155
 156   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 157                                          int source_offset,
 158                                          int buffer_fill_offset,
 159                                          int num_samples) = 0;
 160   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 161
 162   const scoped_refptr<CastEnvironment> cast_environment_;
 163   const Codec codec_;
 164   const int num_channels_;
 165   const int samples_per_frame_;
 166   const FrameEncodedCallback callback_;
 167
 168   // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
 169   CastInitializationStatus cast_initialization_status_;
 170
 171  private:
 172   // In the case where a call to EncodeAudio() cannot completely fill the
 173   // buffer, this points to the position at which to populate data in a later
 174   // call.
 175   int buffer_fill_end_;
 176
 177   // A counter used to label EncodedFrames.
 178   uint32 frame_id_;
 179
 180   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 181   // the number of audio samples encoded so far, plus the estimated number of
 182   // samples that were missed due to data underruns.  A receiver uses this value
 183   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 184   // timestamp values are allowed to overflow and roll around past zero.
 185   uint32 frame_rtp_timestamp_;
 186
 187   // The local system time associated with the start of the next frame of
 188   // encoded audio.  This value is passed on to a receiver as a reference clock
 189   // timestamp for the purposes of synchronizing audio and video.  Its
 190   // progression is expected to drift relative to the elapsed time implied by
 191   // the RTP timestamps.
 192   base::TimeTicks frame_capture_time_;
 193
 194   // Set to non-zero to indicate the next output frame skipped over audio
 195   // samples in order to recover from an input underrun.
 196   int samples_dropped_from_buffer_;
 197
 198   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 199 };
 200
 201 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 202  public:
 203   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 204            int num_channels,
 205            int sampling_rate,
 206            int bitrate,
 207            const FrameEncodedCallback& callback)
 208       : ImplBase(cast_environment,
 209                  CODEC_AUDIO_OPUS,
 210                  num_channels,
 211                  sampling_rate,
 212                  callback),
 213         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 214         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 215         buffer_(new float[num_channels * samples_per_frame_]) {
 216     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 217       return;
 218     if (opus_encoder_init(opus_encoder_,
 219                           sampling_rate,
 220                           num_channels,
 221                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 222       ImplBase::cast_initialization_status_ =
 223           STATUS_INVALID_AUDIO_CONFIGURATION;
 224       return;
 225     }
 226     ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 227
 228     if (bitrate <= 0) {
 229       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 230       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 231       // frame size.  The opus library authors may, of course, adjust this in
 232       // later versions.
 233       bitrate = OPUS_AUTO;
 234     }
 235     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 236              OPUS_OK);
 237   }
 238
 239  private:
 240   virtual ~OpusImpl() {}
 241
 242   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 243                                          int source_offset,
 244                                          int buffer_fill_offset,
 245                                          int num_samples) OVERRIDE {
 246     // Opus requires channel-interleaved samples in a single array.
 247     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 248       const float* src = audio_bus->channel(ch) + source_offset;
 249       const float* const src_end = src + num_samples;
 250       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 251       for (; src < src_end; ++src, dest += num_channels_)
 252         *dest = *src;
 253     }
 254   }
 255
 256   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 257     out->resize(kOpusMaxPayloadSize);
 258     const opus_int32 result =
 259         opus_encode_float(opus_encoder_,
 260                           buffer_.get(),
 261                           samples_per_frame_,
 262                           reinterpret_cast<uint8*>(string_as_array(out)),
 263                           kOpusMaxPayloadSize);
 264     if (result > 1) {
 265       out->resize(result);
 266       return true;
 267     } else if (result < 0) {
 268       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 269       return false;
 270     } else {
 271       // Do nothing: The documentation says that a return value of zero or
 272       // one byte means the packet does not need to be transmitted.
 273       return false;
 274     }
 275   }
 276
 277   const scoped_ptr<uint8[]> encoder_memory_;
 278   OpusEncoder* const opus_encoder_;
 279   const scoped_ptr<float[]> buffer_;
 280
 281   // This is the recommended value, according to documentation in
 282   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 283   // degrade the audio due to memory constraints.
 284   //
 285   // Note: Whereas other RTP implementations do not, the cast library is
 286   // perfectly capable of transporting larger than MTU-sized audio frames.
 287   static const int kOpusMaxPayloadSize = 4000;
 288
 289   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 290 };
 291
 292 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 293  public:
 294   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 295             int num_channels,
 296             int sampling_rate,
 297             const FrameEncodedCallback& callback)
 298       : ImplBase(cast_environment,
 299                  CODEC_AUDIO_PCM16,
 300                  num_channels,
 301                  sampling_rate,
 302                  callback),
 303         buffer_(new int16[num_channels * samples_per_frame_]) {
 304     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 305       return;
 306     cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 307   }
 308
 309  private:
 310   virtual ~Pcm16Impl() {}
 311
 312   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 313                                          int source_offset,
 314                                          int buffer_fill_offset,
 315                                          int num_samples) OVERRIDE {
 316     audio_bus->ToInterleavedPartial(
 317         source_offset,
 318         num_samples,
 319         sizeof(int16),
 320         buffer_.get() + buffer_fill_offset * num_channels_);
 321   }
 322
 323   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 324     // Output 16-bit PCM integers in big-endian byte order.
 325     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 326     const int16* src = buffer_.get();
 327     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 328     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 329     for (; src < src_end; ++src, ++dest)
 330       *dest = base::HostToNet16(*src);
 331     return true;
 332   }
 333
 334  private:
 335   const scoped_ptr<int16[]> buffer_;
 336
 337   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 338 };
 339
 340 AudioEncoder::AudioEncoder(
 341     const scoped_refptr<CastEnvironment>& cast_environment,
 342     int num_channels,
 343     int sampling_rate,
 344     int bitrate,
 345     Codec codec,
 346     const FrameEncodedCallback& frame_encoded_callback)
 347     : cast_environment_(cast_environment) {
 348   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 349   // as all calls to InsertAudio() are by the same thread.
 350   insert_thread_checker_.DetachFromThread();
 351   switch (codec) {
 352     case CODEC_AUDIO_OPUS:
 353       impl_ = new OpusImpl(cast_environment,
 354                            num_channels,
 355                            sampling_rate,
 356                            bitrate,
 357                            frame_encoded_callback);
 358       break;
 359     case CODEC_AUDIO_PCM16:
 360       impl_ = new Pcm16Impl(cast_environment,
 361                             num_channels,
 362                             sampling_rate,
 363                             frame_encoded_callback);
 364       break;
 365     default:
 366       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 367       break;
 368   }
 369 }
 370
 371 AudioEncoder::~AudioEncoder() {}
 372
 373 CastInitializationStatus AudioEncoder::InitializationResult() const {
 374   DCHECK(insert_thread_checker_.CalledOnValidThread());
 375   if (impl_.get()) {
 376     return impl_->InitializationResult();
 377   }
 378   return STATUS_UNSUPPORTED_AUDIO_CODEC;
 379 }
 380
 381 int AudioEncoder::GetSamplesPerFrame() const {
 382   DCHECK(insert_thread_checker_.CalledOnValidThread());
 383   if (InitializationResult() != STATUS_AUDIO_INITIALIZED) {
 384     NOTREACHED();
 385     return std::numeric_limits<int>::max();
 386   }
 387   return impl_->samples_per_frame();
 388 }
 389
 390 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 391                                const base::TimeTicks& recorded_time) {
 392   DCHECK(insert_thread_checker_.CalledOnValidThread());
 393   DCHECK(audio_bus.get());
 394   if (InitializationResult() != STATUS_AUDIO_INITIALIZED) {
 395     NOTREACHED();
 396     return;
 397   }
 398   cast_environment_->PostTask(CastEnvironment::AUDIO,
 399                               FROM_HERE,
 400                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 401                                          impl_,
 402                                          base::Passed(&audio_bus),
 403                                          recorded_time));
 404 }
 405
 406 }  // namespace cast
 407 }  // namespace media