1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
10 #include "base/strings/string16.h"
11 #include "base/timer/timer.h"
12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
13 #include "content/public/browser/browser_thread.h"
14 #include "content/public/browser/render_process_host.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/web_contents.h"
19 #include "content/public/common/speech_recognition_error.h"
20 #include "net/url_request/url_request_context_getter.h"
21 #include "ui/app_list/speech_ui_model_observer.h"
25 // Length of timeout to cancel recognition if there's no speech heard.
26 static const int kNoSpeechTimeoutInSeconds = 5;
28 // Invalid speech session.
29 static const int kInvalidSessionId = -1;
31 // Speech recognizer listener. This is separate from SpeechRecognizer because
32 // the speech recognition engine must function from the IO thread. Because of
33 // this, the lifecycle of this class must be decoupled from the lifecycle of
34 // SpeechRecognizer. To avoid circular references, this class has no reference
35 // to SpeechRecognizer. Instead, it has a reference to the
36 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
38 class SpeechRecognizer::EventListener
39 : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,
40 public content::SpeechRecognitionEventListener {
42 EventListener(const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
43 net::URLRequestContextGetter* url_request_context_getter,
44 const std::string& locale);
46 void StartOnIOThread(int render_process_id);
47 void StopOnIOThread();
50 friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;
53 void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);
55 void StartSpeechTimeout();
56 void StopSpeechTimeout();
59 // Overidden from content::SpeechRecognitionEventListener:
60 // These are always called on the IO thread.
61 void OnRecognitionStart(int session_id) override;
62 void OnRecognitionEnd(int session_id) override;
63 void OnRecognitionResults(
65 const content::SpeechRecognitionResults& results) override;
66 void OnRecognitionError(
67 int session_id, const content::SpeechRecognitionError& error) override;
68 void OnSoundStart(int session_id) override;
69 void OnSoundEnd(int session_id) override;
70 void OnAudioLevelsChange(
71 int session_id, float volume, float noise_volume) override;
72 void OnEnvironmentEstimationComplete(int session_id) override;
73 void OnAudioStart(int session_id) override;
74 void OnAudioEnd(int session_id) override;
76 // Only dereferenced from the UI thread, but copied on IO thread.
77 base::WeakPtr<SpeechRecognizerDelegate> delegate_;
79 // All remaining members only accessed from the IO thread.
80 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
82 base::Timer speech_timeout_;
85 base::WeakPtrFactory<EventListener> weak_factory_;
87 DISALLOW_COPY_AND_ASSIGN(EventListener);
90 SpeechRecognizer::EventListener::EventListener(
91 const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
92 net::URLRequestContextGetter* url_request_context_getter,
93 const std::string& locale)
94 : delegate_(delegate),
95 url_request_context_getter_(url_request_context_getter),
97 speech_timeout_(false, false),
98 session_(kInvalidSessionId),
100 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
103 SpeechRecognizer::EventListener::~EventListener() {
104 DCHECK(!speech_timeout_.IsRunning());
107 void SpeechRecognizer::EventListener::StartOnIOThread(int render_process_id) {
108 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
109 if (session_ != kInvalidSessionId)
112 content::SpeechRecognitionSessionConfig config;
113 config.language = locale_;
114 config.is_legacy_api = false;
115 config.continuous = true;
116 config.interim_results = true;
117 config.max_hypotheses = 1;
118 config.filter_profanities = true;
119 config.url_request_context_getter = url_request_context_getter_;
120 config.event_listener = weak_factory_.GetWeakPtr();
121 config.initial_context.render_process_id = render_process_id;
123 auto speech_instance = content::SpeechRecognitionManager::GetInstance();
124 session_ = speech_instance->CreateSession(config);
125 speech_instance->StartSession(session_);
128 void SpeechRecognizer::EventListener::StopOnIOThread() {
129 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
130 if (session_ == kInvalidSessionId)
133 // Prevent recursion.
134 int session = session_;
135 session_ = kInvalidSessionId;
137 content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
141 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
142 SpeechRecognitionState new_state) {
143 content::BrowserThread::PostTask(
144 content::BrowserThread::UI,
146 base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,
151 void SpeechRecognizer::EventListener::StartSpeechTimeout() {
152 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
153 speech_timeout_.Start(
155 base::TimeDelta::FromSeconds(kNoSpeechTimeoutInSeconds),
156 base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));
159 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
160 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
161 speech_timeout_.Stop();
164 void SpeechRecognizer::EventListener::SpeechTimeout() {
165 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
169 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {
170 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
173 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
175 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
178 void SpeechRecognizer::EventListener::OnRecognitionResults(
179 int session_id, const content::SpeechRecognitionResults& results) {
180 base::string16 result_str;
181 size_t final_count = 0;
182 for (const auto& result : results) {
183 if (!result.is_provisional)
185 result_str += result.hypotheses[0].utterance;
188 content::BrowserThread::PostTask(
189 content::BrowserThread::UI,
191 base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,
194 final_count == results.size()));
196 // Stop the moment we have a final result.
197 if (final_count == results.size())
201 void SpeechRecognizer::EventListener::OnRecognitionError(
202 int session_id, const content::SpeechRecognitionError& error) {
204 if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {
205 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);
207 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
210 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {
211 StartSpeechTimeout();
212 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);
215 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {
217 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
220 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
221 int session_id, float volume, float noise_volume) {
222 DCHECK_LE(0.0, volume);
223 DCHECK_GE(1.0, volume);
224 DCHECK_LE(0.0, noise_volume);
225 DCHECK_GE(1.0, noise_volume);
226 volume = std::max(0.0f, volume - noise_volume);
227 // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
228 // See: content/public/browser/speech_recognition_event_listener.h
229 int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);
230 content::BrowserThread::PostTask(
231 content::BrowserThread::UI,
233 base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,
238 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
242 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {
245 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {
248 SpeechRecognizer::SpeechRecognizer(
249 const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
250 net::URLRequestContextGetter* url_request_context_getter,
251 const std::string& locale)
252 : delegate_(delegate),
253 speech_event_listener_(new EventListener(
254 delegate, url_request_context_getter, locale)){
255 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
258 SpeechRecognizer::~SpeechRecognizer() {
259 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
263 void SpeechRecognizer::Start() {
264 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
265 // The speech recognizer checks to see if the request is allowed by looking
266 // up the renderer process. A renderer containing the app-list is hard-coded
270 content::WebContents* contents = delegate_->GetSpeechContents();
274 content::BrowserThread::PostTask(
275 content::BrowserThread::IO,
277 base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,
278 speech_event_listener_,
279 contents->GetRenderProcessHost()->GetID()));
282 void SpeechRecognizer::Stop() {
283 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
284 content::BrowserThread::PostTask(
285 content::BrowserThread::IO,
287 base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,
288 speech_event_listener_));
291 } // namespace app_list