Upstream version 11.40.271.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / ui / app_list / speech_recognizer.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
6
7 #include <algorithm>
8
9 #include "base/bind.h"
10 #include "base/strings/string16.h"
11 #include "base/timer/timer.h"
12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
13 #include "content/public/browser/browser_thread.h"
14 #include "content/public/browser/render_process_host.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/web_contents.h"
19 #include "content/public/common/speech_recognition_error.h"
20 #include "net/url_request/url_request_context_getter.h"
21 #include "ui/app_list/speech_ui_model_observer.h"
22
23 namespace app_list {
24
25 // Length of timeout to cancel recognition if there's no speech heard.
26 static const int kNoSpeechTimeoutInSeconds = 5;
27
28 // Invalid speech session.
29 static const int kInvalidSessionId = -1;
30
31 // Speech recognizer listener. This is separate from SpeechRecognizer because
32 // the speech recognition engine must function from the IO thread. Because of
33 // this, the lifecycle of this class must be decoupled from the lifecycle of
34 // SpeechRecognizer. To avoid circular references, this class has no reference
35 // to SpeechRecognizer. Instead, it has a reference to the
36 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
37 // the UI thread.
38 class SpeechRecognizer::EventListener
39     : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,
40       public content::SpeechRecognitionEventListener {
41  public:
42   EventListener(const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
43                 net::URLRequestContextGetter* url_request_context_getter,
44                 const std::string& locale);
45
46   void StartOnIOThread(int render_process_id);
47   void StopOnIOThread();
48
49  private:
50   friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;
51   ~EventListener();
52
53   void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);
54
55   void StartSpeechTimeout();
56   void StopSpeechTimeout();
57   void SpeechTimeout();
58
59   // Overidden from content::SpeechRecognitionEventListener:
60   // These are always called on the IO thread.
61   void OnRecognitionStart(int session_id) override;
62   void OnRecognitionEnd(int session_id) override;
63   void OnRecognitionResults(
64       int session_id,
65       const content::SpeechRecognitionResults& results) override;
66   void OnRecognitionError(
67       int session_id, const content::SpeechRecognitionError& error) override;
68   void OnSoundStart(int session_id) override;
69   void OnSoundEnd(int session_id) override;
70   void OnAudioLevelsChange(
71       int session_id, float volume, float noise_volume) override;
72   void OnEnvironmentEstimationComplete(int session_id) override;
73   void OnAudioStart(int session_id) override;
74   void OnAudioEnd(int session_id) override;
75
76   // Only dereferenced from the UI thread, but copied on IO thread.
77   base::WeakPtr<SpeechRecognizerDelegate> delegate_;
78
79   // All remaining members only accessed from the IO thread.
80   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
81   std::string locale_;
82   base::Timer speech_timeout_;
83   int session_;
84
85   base::WeakPtrFactory<EventListener> weak_factory_;
86
87   DISALLOW_COPY_AND_ASSIGN(EventListener);
88 };
89
90 SpeechRecognizer::EventListener::EventListener(
91     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
92     net::URLRequestContextGetter* url_request_context_getter,
93     const std::string& locale)
94     : delegate_(delegate),
95       url_request_context_getter_(url_request_context_getter),
96       locale_(locale),
97       speech_timeout_(false, false),
98       session_(kInvalidSessionId),
99       weak_factory_(this) {
100   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
101 }
102
103 SpeechRecognizer::EventListener::~EventListener() {
104   DCHECK(!speech_timeout_.IsRunning());
105 }
106
107 void SpeechRecognizer::EventListener::StartOnIOThread(int render_process_id) {
108   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
109   if (session_ != kInvalidSessionId)
110     StopOnIOThread();
111
112   content::SpeechRecognitionSessionConfig config;
113   config.language = locale_;
114   config.is_legacy_api = false;
115   config.continuous = true;
116   config.interim_results = true;
117   config.max_hypotheses = 1;
118   config.filter_profanities = true;
119   config.url_request_context_getter = url_request_context_getter_;
120   config.event_listener = weak_factory_.GetWeakPtr();
121   config.initial_context.render_process_id = render_process_id;
122
123   auto speech_instance = content::SpeechRecognitionManager::GetInstance();
124   session_ = speech_instance->CreateSession(config);
125   speech_instance->StartSession(session_);
126 }
127
128 void SpeechRecognizer::EventListener::StopOnIOThread() {
129   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
130   if (session_ == kInvalidSessionId)
131     return;
132
133   // Prevent recursion.
134   int session = session_;
135   session_ = kInvalidSessionId;
136   StopSpeechTimeout();
137   content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
138       session);
139 }
140
141 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
142     SpeechRecognitionState new_state) {
143   content::BrowserThread::PostTask(
144       content::BrowserThread::UI,
145       FROM_HERE,
146       base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,
147                  delegate_,
148                  new_state));
149 }
150
151 void SpeechRecognizer::EventListener::StartSpeechTimeout() {
152   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
153   speech_timeout_.Start(
154       FROM_HERE,
155       base::TimeDelta::FromSeconds(kNoSpeechTimeoutInSeconds),
156       base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));
157 }
158
159 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
160   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
161   speech_timeout_.Stop();
162 }
163
164 void SpeechRecognizer::EventListener::SpeechTimeout() {
165   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
166   StopOnIOThread();
167 }
168
169 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {
170   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
171 }
172
173 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
174   StopOnIOThread();
175   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
176 }
177
178 void SpeechRecognizer::EventListener::OnRecognitionResults(
179     int session_id, const content::SpeechRecognitionResults& results) {
180   base::string16 result_str;
181   size_t final_count = 0;
182   for (const auto& result : results) {
183     if (!result.is_provisional)
184       final_count++;
185     result_str += result.hypotheses[0].utterance;
186   }
187   StopSpeechTimeout();
188   content::BrowserThread::PostTask(
189       content::BrowserThread::UI,
190       FROM_HERE,
191       base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,
192                  delegate_,
193                  result_str,
194                  final_count == results.size()));
195
196   // Stop the moment we have a final result.
197   if (final_count == results.size())
198     StopOnIOThread();
199 }
200
201 void SpeechRecognizer::EventListener::OnRecognitionError(
202     int session_id, const content::SpeechRecognitionError& error) {
203   StopOnIOThread();
204   if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {
205     NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);
206   }
207   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
208 }
209
210 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {
211   StartSpeechTimeout();
212   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);
213 }
214
215 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {
216   StopOnIOThread();
217   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
218 }
219
220 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
221     int session_id, float volume, float noise_volume) {
222   DCHECK_LE(0.0, volume);
223   DCHECK_GE(1.0, volume);
224   DCHECK_LE(0.0, noise_volume);
225   DCHECK_GE(1.0, noise_volume);
226   volume = std::max(0.0f, volume - noise_volume);
227   // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
228   // See: content/public/browser/speech_recognition_event_listener.h
229   int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);
230   content::BrowserThread::PostTask(
231       content::BrowserThread::UI,
232       FROM_HERE,
233       base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,
234                  delegate_,
235                  sound_level));
236 }
237
238 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
239     int session_id) {
240 }
241
242 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {
243 }
244
245 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {
246 }
247
248 SpeechRecognizer::SpeechRecognizer(
249     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
250     net::URLRequestContextGetter* url_request_context_getter,
251     const std::string& locale)
252     : delegate_(delegate),
253       speech_event_listener_(new EventListener(
254           delegate, url_request_context_getter, locale)){
255   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
256 }
257
258 SpeechRecognizer::~SpeechRecognizer() {
259   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
260   Stop();
261 }
262
263 void SpeechRecognizer::Start() {
264   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
265   // The speech recognizer checks to see if the request is allowed by looking
266   // up the renderer process. A renderer containing the app-list is hard-coded
267   // to be allowed.
268   if (!delegate_)
269     return;
270   content::WebContents* contents = delegate_->GetSpeechContents();
271   if (!contents)
272     return;
273
274   content::BrowserThread::PostTask(
275       content::BrowserThread::IO,
276       FROM_HERE,
277       base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,
278                  speech_event_listener_,
279                  contents->GetRenderProcessHost()->GetID()));
280 }
281
282 void SpeechRecognizer::Stop() {
283   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
284   content::BrowserThread::PostTask(
285       content::BrowserThread::IO,
286       FROM_HERE,
287       base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,
288                  speech_event_listener_));
289 }
290
291 }  // namespace app_list