src/chrome/browser/ui/app_list/speech_recognizer.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/strings/string16.h"
  11 #include "base/timer/timer.h"
  12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
  13 #include "content/public/browser/browser_thread.h"
  14 #include "content/public/browser/render_process_host.h"
  15 #include "content/public/browser/speech_recognition_event_listener.h"
  16 #include "content/public/browser/speech_recognition_manager.h"
  17 #include "content/public/browser/speech_recognition_session_config.h"
  18 #include "content/public/browser/web_contents.h"
  19 #include "content/public/common/speech_recognition_error.h"
  20 #include "net/url_request/url_request_context_getter.h"
  21 #include "ui/app_list/speech_ui_model_observer.h"
  22
  23 namespace app_list {
  24
  25 // Length of timeout to cancel recognition if there's no speech heard.
  26 static const int kNoSpeechTimeoutInSeconds = 5;
  27
  28 // Invalid speech session.
  29 static const int kInvalidSessionId = -1;
  30
  31 // Speech recognizer listener. This is separate from SpeechRecognizer because
  32 // the speech recognition engine must function from the IO thread. Because of
  33 // this, the lifecycle of this class must be decoupled from the lifecycle of
  34 // SpeechRecognizer. To avoid circular references, this class has no reference
  35 // to SpeechRecognizer. Instead, it has a reference to the
  36 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
  37 // the UI thread.
  38 class SpeechRecognizer::EventListener
  39     : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,
  40       public content::SpeechRecognitionEventListener {
  41  public:
  42   EventListener(const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
  43                 net::URLRequestContextGetter* url_request_context_getter,
  44                 const std::string& locale);
  45
  46   void StartOnIOThread(int render_process_id);
  47   void StopOnIOThread();
  48
  49  private:
  50   friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;
  51   ~EventListener();
  52
  53   void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);
  54
  55   void StartSpeechTimeout();
  56   void StopSpeechTimeout();
  57   void SpeechTimeout();
  58
  59   // Overidden from content::SpeechRecognitionEventListener:
  60   // These are always called on the IO thread.
  61   void OnRecognitionStart(int session_id) override;
  62   void OnRecognitionEnd(int session_id) override;
  63   void OnRecognitionResults(
  64       int session_id,
  65       const content::SpeechRecognitionResults& results) override;
  66   void OnRecognitionError(
  67       int session_id, const content::SpeechRecognitionError& error) override;
  68   void OnSoundStart(int session_id) override;
  69   void OnSoundEnd(int session_id) override;
  70   void OnAudioLevelsChange(
  71       int session_id, float volume, float noise_volume) override;
  72   void OnEnvironmentEstimationComplete(int session_id) override;
  73   void OnAudioStart(int session_id) override;
  74   void OnAudioEnd(int session_id) override;
  75
  76   // Only dereferenced from the UI thread, but copied on IO thread.
  77   base::WeakPtr<SpeechRecognizerDelegate> delegate_;
  78
  79   // All remaining members only accessed from the IO thread.
  80   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
  81   std::string locale_;
  82   base::Timer speech_timeout_;
  83   int session_;
  84
  85   base::WeakPtrFactory<EventListener> weak_factory_;
  86
  87   DISALLOW_COPY_AND_ASSIGN(EventListener);
  88 };
  89
  90 SpeechRecognizer::EventListener::EventListener(
  91     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
  92     net::URLRequestContextGetter* url_request_context_getter,
  93     const std::string& locale)
  94     : delegate_(delegate),
  95       url_request_context_getter_(url_request_context_getter),
  96       locale_(locale),
  97       speech_timeout_(false, false),
  98       session_(kInvalidSessionId),
  99       weak_factory_(this) {
 100   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 101 }
 102
 103 SpeechRecognizer::EventListener::~EventListener() {
 104   DCHECK(!speech_timeout_.IsRunning());
 105 }
 106
 107 void SpeechRecognizer::EventListener::StartOnIOThread(int render_process_id) {
 108   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 109   if (session_ != kInvalidSessionId)
 110     StopOnIOThread();
 111
 112   content::SpeechRecognitionSessionConfig config;
 113   config.language = locale_;
 114   config.is_legacy_api = false;
 115   config.continuous = true;
 116   config.interim_results = true;
 117   config.max_hypotheses = 1;
 118   config.filter_profanities = true;
 119   config.url_request_context_getter = url_request_context_getter_;
 120   config.event_listener = weak_factory_.GetWeakPtr();
 121   config.initial_context.render_process_id = render_process_id;
 122
 123   auto speech_instance = content::SpeechRecognitionManager::GetInstance();
 124   session_ = speech_instance->CreateSession(config);
 125   speech_instance->StartSession(session_);
 126 }
 127
 128 void SpeechRecognizer::EventListener::StopOnIOThread() {
 129   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 130   if (session_ == kInvalidSessionId)
 131     return;
 132
 133   // Prevent recursion.
 134   int session = session_;
 135   session_ = kInvalidSessionId;
 136   StopSpeechTimeout();
 137   content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
 138       session);
 139 }
 140
 141 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
 142     SpeechRecognitionState new_state) {
 143   content::BrowserThread::PostTask(
 144       content::BrowserThread::UI,
 145       FROM_HERE,
 146       base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,
 147                  delegate_,
 148                  new_state));
 149 }
 150
 151 void SpeechRecognizer::EventListener::StartSpeechTimeout() {
 152   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 153   speech_timeout_.Start(
 154       FROM_HERE,
 155       base::TimeDelta::FromSeconds(kNoSpeechTimeoutInSeconds),
 156       base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));
 157 }
 158
 159 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
 160   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 161   speech_timeout_.Stop();
 162 }
 163
 164 void SpeechRecognizer::EventListener::SpeechTimeout() {
 165   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 166   StopOnIOThread();
 167 }
 168
 169 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {
 170   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
 171 }
 172
 173 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
 174   StopOnIOThread();
 175   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
 176 }
 177
 178 void SpeechRecognizer::EventListener::OnRecognitionResults(
 179     int session_id, const content::SpeechRecognitionResults& results) {
 180   base::string16 result_str;
 181   size_t final_count = 0;
 182   for (const auto& result : results) {
 183     if (!result.is_provisional)
 184       final_count++;
 185     result_str += result.hypotheses[0].utterance;
 186   }
 187   StopSpeechTimeout();
 188   content::BrowserThread::PostTask(
 189       content::BrowserThread::UI,
 190       FROM_HERE,
 191       base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,
 192                  delegate_,
 193                  result_str,
 194                  final_count == results.size()));
 195
 196   // Stop the moment we have a final result.
 197   if (final_count == results.size())
 198     StopOnIOThread();
 199 }
 200
 201 void SpeechRecognizer::EventListener::OnRecognitionError(
 202     int session_id, const content::SpeechRecognitionError& error) {
 203   StopOnIOThread();
 204   if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {
 205     NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);
 206   }
 207   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
 208 }
 209
 210 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {
 211   StartSpeechTimeout();
 212   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);
 213 }
 214
 215 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {
 216   StopOnIOThread();
 217   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
 218 }
 219
 220 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
 221     int session_id, float volume, float noise_volume) {
 222   DCHECK_LE(0.0, volume);
 223   DCHECK_GE(1.0, volume);
 224   DCHECK_LE(0.0, noise_volume);
 225   DCHECK_GE(1.0, noise_volume);
 226   volume = std::max(0.0f, volume - noise_volume);
 227   // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
 228   // See: content/public/browser/speech_recognition_event_listener.h
 229   int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);
 230   content::BrowserThread::PostTask(
 231       content::BrowserThread::UI,
 232       FROM_HERE,
 233       base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,
 234                  delegate_,
 235                  sound_level));
 236 }
 237
 238 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
 239     int session_id) {
 240 }
 241
 242 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {
 243 }
 244
 245 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {
 246 }
 247
 248 SpeechRecognizer::SpeechRecognizer(
 249     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
 250     net::URLRequestContextGetter* url_request_context_getter,
 251     const std::string& locale)
 252     : delegate_(delegate),
 253       speech_event_listener_(new EventListener(
 254           delegate, url_request_context_getter, locale)){
 255   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 256 }
 257
 258 SpeechRecognizer::~SpeechRecognizer() {
 259   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 260   Stop();
 261 }
 262
 263 void SpeechRecognizer::Start() {
 264   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 265   // The speech recognizer checks to see if the request is allowed by looking
 266   // up the renderer process. A renderer containing the app-list is hard-coded
 267   // to be allowed.
 268   if (!delegate_)
 269     return;
 270   content::WebContents* contents = delegate_->GetSpeechContents();
 271   if (!contents)
 272     return;
 273
 274   content::BrowserThread::PostTask(
 275       content::BrowserThread::IO,
 276       FROM_HERE,
 277       base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,
 278                  speech_event_listener_,
 279                  contents->GetRenderProcessHost()->GetID()));
 280 }
 281
 282 void SpeechRecognizer::Stop() {
 283   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 284   content::BrowserThread::PostTask(
 285       content::BrowserThread::IO,
 286       FROM_HERE,
 287       base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,
 288                  speech_event_listener_));
 289 }
 290
 291 }  // namespace app_list