src/third_party/webrtc/modules/audio_processing/voice_detection_impl.cc

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
  12
  13 #include <assert.h>
  14
  15 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
  16 #include "webrtc/modules/audio_processing/audio_buffer.h"
  17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
  18
  19 namespace webrtc {
  20
  21 typedef VadInst Handle;
  22
  23 namespace {
  24 int MapSetting(VoiceDetection::Likelihood likelihood) {
  25   switch (likelihood) {
  26     case VoiceDetection::kVeryLowLikelihood:
  27       return 3;
  28     case VoiceDetection::kLowLikelihood:
  29       return 2;
  30     case VoiceDetection::kModerateLikelihood:
  31       return 1;
  32     case VoiceDetection::kHighLikelihood:
  33       return 0;
  34   }
  35   assert(false);
  36   return -1;
  37 }
  38 }  // namespace
  39
  40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
  41                                        CriticalSectionWrapper* crit)
  42   : ProcessingComponent(),
  43     apm_(apm),
  44     crit_(crit),
  45     stream_has_voice_(false),
  46     using_external_vad_(false),
  47     likelihood_(kLowLikelihood),
  48     frame_size_ms_(10),
  49     frame_size_samples_(0) {}
  50
  51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
  52
  53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
  54   if (!is_component_enabled()) {
  55     return apm_->kNoError;
  56   }
  57
  58   if (using_external_vad_) {
  59     using_external_vad_ = false;
  60     return apm_->kNoError;
  61   }
  62   assert(audio->samples_per_split_channel() <= 160);
  63
  64   // TODO(ajm): concatenate data in frame buffer here.
  65
  66   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
  67                                   apm_->proc_split_sample_rate_hz(),
  68                                   audio->mixed_low_pass_data(),
  69                                   frame_size_samples_);
  70   if (vad_ret == 0) {
  71     stream_has_voice_ = false;
  72     audio->set_activity(AudioFrame::kVadPassive);
  73   } else if (vad_ret == 1) {
  74     stream_has_voice_ = true;
  75     audio->set_activity(AudioFrame::kVadActive);
  76   } else {
  77     return apm_->kUnspecifiedError;
  78   }
  79
  80   return apm_->kNoError;
  81 }
  82
  83 int VoiceDetectionImpl::Enable(bool enable) {
  84   CriticalSectionScoped crit_scoped(crit_);
  85   return EnableComponent(enable);
  86 }
  87
  88 bool VoiceDetectionImpl::is_enabled() const {
  89   return is_component_enabled();
  90 }
  91
  92 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
  93   using_external_vad_ = true;
  94   stream_has_voice_ = has_voice;
  95   return apm_->kNoError;
  96 }
  97
  98 bool VoiceDetectionImpl::stream_has_voice() const {
  99   // TODO(ajm): enable this assertion?
 100   //assert(using_external_vad_ || is_component_enabled());
 101   return stream_has_voice_;
 102 }
 103
 104 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
 105   CriticalSectionScoped crit_scoped(crit_);
 106   if (MapSetting(likelihood) == -1) {
 107     return apm_->kBadParameterError;
 108   }
 109
 110   likelihood_ = likelihood;
 111   return Configure();
 112 }
 113
 114 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
 115   return likelihood_;
 116 }
 117
 118 int VoiceDetectionImpl::set_frame_size_ms(int size) {
 119   CriticalSectionScoped crit_scoped(crit_);
 120   assert(size == 10); // TODO(ajm): remove when supported.
 121   if (size != 10 &&
 122       size != 20 &&
 123       size != 30) {
 124     return apm_->kBadParameterError;
 125   }
 126
 127   frame_size_ms_ = size;
 128
 129   return Initialize();
 130 }
 131
 132 int VoiceDetectionImpl::frame_size_ms() const {
 133   return frame_size_ms_;
 134 }
 135
 136 int VoiceDetectionImpl::Initialize() {
 137   int err = ProcessingComponent::Initialize();
 138   if (err != apm_->kNoError || !is_component_enabled()) {
 139     return err;
 140   }
 141
 142   using_external_vad_ = false;
 143   frame_size_samples_ = frame_size_ms_ *
 144       apm_->proc_split_sample_rate_hz() / 1000;
 145   // TODO(ajm): intialize frame buffer here.
 146
 147   return apm_->kNoError;
 148 }
 149
 150 void* VoiceDetectionImpl::CreateHandle() const {
 151   Handle* handle = NULL;
 152   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
 153     handle = NULL;
 154   } else {
 155     assert(handle != NULL);
 156   }
 157
 158   return handle;
 159 }
 160
 161 void VoiceDetectionImpl::DestroyHandle(void* handle) const {
 162   WebRtcVad_Free(static_cast<Handle*>(handle));
 163 }
 164
 165 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
 166   return WebRtcVad_Init(static_cast<Handle*>(handle));
 167 }
 168
 169 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
 170   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
 171                             MapSetting(likelihood_));
 172 }
 173
 174 int VoiceDetectionImpl::num_handles_required() const {
 175   return 1;
 176 }
 177
 178 int VoiceDetectionImpl::GetHandleError(void* handle) const {
 179   // The VAD has no get_error() function.
 180   assert(handle != NULL);
 181   return apm_->kUnspecifiedError;
 182 }
 183 }  // namespace webrtc