src/content/browser/speech/audio_encoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/audio_encoder.h"
   6
   7 #include "base/basictypes.h"
   8 #include "base/logging.h"
   9 #include "base/memory/scoped_ptr.h"
  10 #include "base/stl_util.h"
  11 #include "base/strings/string_number_conversions.h"
  12 #include "content/browser/speech/audio_buffer.h"
  13 #include "third_party/flac/include/FLAC/stream_encoder.h"
  14 #include "third_party/speex/include/speex/speex.h"
  15
  16 namespace content {
  17 namespace {
  18
  19 //-------------------------------- FLACEncoder ---------------------------------
  20
  21 const char* const kContentTypeFLAC = "audio/x-flac; rate=";
  22 const int kFLACCompressionLevel = 0;  // 0 for speed
  23
  24 class FLACEncoder : public AudioEncoder {
  25  public:
  26   FLACEncoder(int sampling_rate, int bits_per_sample);
  27   ~FLACEncoder() override;
  28   void Encode(const AudioChunk& raw_audio) override;
  29   void Flush() override;
  30
  31  private:
  32   static FLAC__StreamEncoderWriteStatus WriteCallback(
  33       const FLAC__StreamEncoder* encoder,
  34       const FLAC__byte buffer[],
  35       size_t bytes,
  36       unsigned samples,
  37       unsigned current_frame,
  38       void* client_data);
  39
  40   FLAC__StreamEncoder* encoder_;
  41   bool is_encoder_initialized_;
  42
  43   DISALLOW_COPY_AND_ASSIGN(FLACEncoder);
  44 };
  45
  46 FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
  47     const FLAC__StreamEncoder* encoder,
  48     const FLAC__byte buffer[],
  49     size_t bytes,
  50     unsigned samples,
  51     unsigned current_frame,
  52     void* client_data) {
  53   FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
  54   DCHECK(me->encoder_ == encoder);
  55   me->encoded_audio_buffer_.Enqueue(buffer, bytes);
  56   return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
  57 }
  58
  59 FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
  60     : AudioEncoder(std::string(kContentTypeFLAC) +
  61                    base::IntToString(sampling_rate),
  62                    bits_per_sample),
  63       encoder_(FLAC__stream_encoder_new()),
  64       is_encoder_initialized_(false) {
  65   FLAC__stream_encoder_set_channels(encoder_, 1);
  66   FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample);
  67   FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate);
  68   FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel);
  69
  70   // Initializing the encoder will cause sync bytes to be written to
  71   // its output stream, so we wait until the first call to this method
  72   // before doing so.
  73 }
  74
  75 FLACEncoder::~FLACEncoder() {
  76   FLAC__stream_encoder_delete(encoder_);
  77 }
  78
  79 void FLACEncoder::Encode(const AudioChunk& raw_audio) {
  80   DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
  81   if (!is_encoder_initialized_) {
  82     const FLAC__StreamEncoderInitStatus encoder_status =
  83         FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
  84                                          NULL, this);
  85     DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
  86     is_encoder_initialized_ = true;
  87   }
  88
  89   // FLAC encoder wants samples as int32s.
  90   const int num_samples = raw_audio.NumSamples();
  91   scoped_ptr<FLAC__int32[]> flac_samples(new FLAC__int32[num_samples]);
  92   FLAC__int32* flac_samples_ptr = flac_samples.get();
  93   for (int i = 0; i < num_samples; ++i)
  94     flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
  95
  96   FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
  97 }
  98
  99 void FLACEncoder::Flush() {
 100   FLAC__stream_encoder_finish(encoder_);
 101 }
 102
 103 //-------------------------------- SpeexEncoder --------------------------------
 104
 105 const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=";
 106 const int kSpeexEncodingQuality = 8;
 107 const int kMaxSpeexFrameLength = 110;  // (44kbps rate sampled at 32kHz).
 108
 109 // Since the frame length gets written out as a byte in the encoded packet,
 110 // make sure it is within the byte range.
 111 COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
 112
 113 class SpeexEncoder : public AudioEncoder {
 114  public:
 115   explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
 116   ~SpeexEncoder() override;
 117   void Encode(const AudioChunk& raw_audio) override;
 118   void Flush() override {}
 119
 120  private:
 121   void* encoder_state_;
 122   SpeexBits bits_;
 123   int samples_per_frame_;
 124   char encoded_frame_data_[kMaxSpeexFrameLength + 1];  // +1 for the frame size.
 125   DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
 126 };
 127
 128 SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
 129     : AudioEncoder(std::string(kContentTypeSpeex) +
 130                    base::IntToString(sampling_rate),
 131                    bits_per_sample) {
 132    // speex_bits_init() does not initialize all of the |bits_| struct.
 133    memset(&bits_, 0, sizeof(bits_));
 134    speex_bits_init(&bits_);
 135    encoder_state_ = speex_encoder_init(&speex_wb_mode);
 136    DCHECK(encoder_state_);
 137    speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_);
 138    DCHECK(samples_per_frame_ > 0);
 139    int quality = kSpeexEncodingQuality;
 140    speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality);
 141    int vbr = 1;
 142    speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr);
 143    memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_));
 144 }
 145
 146 SpeexEncoder::~SpeexEncoder() {
 147   speex_bits_destroy(&bits_);
 148   speex_encoder_destroy(encoder_state_);
 149 }
 150
 151 void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
 152   spx_int16_t* src_buffer =
 153       const_cast<spx_int16_t*>(raw_audio.SamplesData16());
 154   int num_samples = raw_audio.NumSamples();
 155   // Drop incomplete frames, typically those which come in when recording stops.
 156   num_samples -= (num_samples % samples_per_frame_);
 157   for (int i = 0; i < num_samples; i += samples_per_frame_) {
 158     speex_bits_reset(&bits_);
 159     speex_encode_int(encoder_state_, src_buffer + i, &bits_);
 160
 161     // Encode the frame and place the size of the frame as the first byte. This
 162     // is the packet format for MIME type x-speex-with-header-byte.
 163     int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
 164                                         kMaxSpeexFrameLength);
 165     encoded_frame_data_[0] = static_cast<char>(frame_length);
 166     encoded_audio_buffer_.Enqueue(
 167         reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
 168   }
 169 }
 170
 171 }  // namespace
 172
 173 AudioEncoder* AudioEncoder::Create(Codec codec,
 174                                    int sampling_rate,
 175                                    int bits_per_sample) {
 176   if (codec == CODEC_FLAC)
 177     return new FLACEncoder(sampling_rate, bits_per_sample);
 178   return new SpeexEncoder(sampling_rate, bits_per_sample);
 179 }
 180
 181 AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
 182     : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
 183       mime_type_(mime_type),
 184       bits_per_sample_(bits_per_sample) {
 185 }
 186
 187 AudioEncoder::~AudioEncoder() {
 188 }
 189
 190 scoped_refptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
 191   return encoded_audio_buffer_.DequeueAll();
 192 }
 193
 194 }  // namespace content