Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / content / renderer / speech_recognition_dispatcher.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/speech_recognition_dispatcher.h"
6
7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
17
18 #if defined(ENABLE_WEBRTC)
19 #include "content/renderer/media/speech_recognition_audio_sink.h"
20 #endif
21
22 using blink::WebVector;
23 using blink::WebString;
24 using blink::WebSpeechGrammar;
25 using blink::WebSpeechRecognitionHandle;
26 using blink::WebSpeechRecognitionResult;
27 using blink::WebSpeechRecognitionParams;
28 using blink::WebSpeechRecognizerClient;
29
30 namespace content {
31
32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
33     RenderViewImpl* render_view)
34     : RenderViewObserver(render_view),
35       recognizer_client_(NULL),
36       next_id_(1) {}
37
38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
39
40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
41   ResetAudioSink();
42   Send(new SpeechRecognitionHostMsg_AbortAllRequests(
43       routing_id()));
44 }
45
46 bool SpeechRecognitionDispatcher::OnMessageReceived(
47     const IPC::Message& message) {
48   bool handled = true;
49   IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
50     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
51     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
52     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
53     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
54     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
55     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
56     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
57     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
58                         OnResultsRetrieved)
59     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady,
60                         OnAudioReceiverReady)
61     IPC_MESSAGE_UNHANDLED(handled = false)
62   IPC_END_MESSAGE_MAP()
63   return handled;
64 }
65
66 void SpeechRecognitionDispatcher::start(
67     const WebSpeechRecognitionHandle& handle,
68     const WebSpeechRecognitionParams& params,
69     WebSpeechRecognizerClient* recognizer_client) {
70   DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
71   recognizer_client_ = recognizer_client;
72
73 #if defined(ENABLE_WEBRTC)
74   const blink::WebMediaStreamTrack track = params.audioTrack();
75   if (!track.isNull()) {
76     // Check if this type of track is allowed by implemented policy.
77     if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
78       audio_track_.assign(track);
79     } else {
80       audio_track_.reset();
81       // Notify user that the track used is not supported.
82       recognizer_client_->didReceiveError(
83           handle,
84           WebString("Provided audioTrack is not supported."),
85           WebSpeechRecognizerClient::AudioCaptureError);
86
87       return;
88     }
89   }
90
91   // Destroy any previous instance to detach from the audio track.
92   // Each new session should reinstantiate the provider once the track is ready.
93   ResetAudioSink();
94 #endif
95
96   SpeechRecognitionHostMsg_StartRequest_Params msg_params;
97   for (size_t i = 0; i < params.grammars().size(); ++i) {
98     const WebSpeechGrammar& grammar = params.grammars()[i];
99     msg_params.grammars.push_back(
100         SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
101   }
102   msg_params.language = base::UTF16ToUTF8(params.language());
103   msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
104   msg_params.continuous = params.continuous();
105   msg_params.interim_results = params.interimResults();
106   msg_params.origin_url = params.origin().toString().utf8();
107   msg_params.render_view_id = routing_id();
108   msg_params.request_id = GetOrCreateIDForHandle(handle);
109 #if defined(ENABLE_WEBRTC)
110   // Fall back to default input when the track is not allowed.
111   msg_params.using_audio_track = !audio_track_.isNull();
112 #else
113   msg_params.using_audio_track = false;
114 #endif
115   // The handle mapping will be removed in |OnRecognitionEnd|.
116   Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
117 }
118
119 void SpeechRecognitionDispatcher::stop(
120     const WebSpeechRecognitionHandle& handle,
121     WebSpeechRecognizerClient* recognizer_client) {
122   ResetAudioSink();
123   // Ignore a |stop| issued without a matching |start|.
124   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
125     return;
126   Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
127       routing_id(), GetOrCreateIDForHandle(handle)));
128 }
129
130 void SpeechRecognitionDispatcher::abort(
131     const WebSpeechRecognitionHandle& handle,
132     WebSpeechRecognizerClient* recognizer_client) {
133   ResetAudioSink();
134   // Ignore an |abort| issued without a matching |start|.
135   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
136     return;
137   Send(new SpeechRecognitionHostMsg_AbortRequest(
138       routing_id(), GetOrCreateIDForHandle(handle)));
139 }
140
141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
142   recognizer_client_->didStart(GetHandleFromID(request_id));
143 }
144
145 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) {
146   recognizer_client_->didStartAudio(GetHandleFromID(request_id));
147 }
148
149 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) {
150   recognizer_client_->didStartSound(GetHandleFromID(request_id));
151 }
152
153 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) {
154   recognizer_client_->didEndSound(GetHandleFromID(request_id));
155 }
156
157 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) {
158   recognizer_client_->didEndAudio(GetHandleFromID(request_id));
159 }
160
161 static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
162     SpeechRecognitionErrorCode e) {
163   switch (e) {
164     case SPEECH_RECOGNITION_ERROR_NONE:
165       NOTREACHED();
166       return WebSpeechRecognizerClient::OtherError;
167     case SPEECH_RECOGNITION_ERROR_ABORTED:
168       return WebSpeechRecognizerClient::AbortedError;
169     case SPEECH_RECOGNITION_ERROR_AUDIO:
170       return WebSpeechRecognizerClient::AudioCaptureError;
171     case SPEECH_RECOGNITION_ERROR_NETWORK:
172       return WebSpeechRecognizerClient::NetworkError;
173     case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED:
174       return WebSpeechRecognizerClient::NotAllowedError;
175     case SPEECH_RECOGNITION_ERROR_NO_SPEECH:
176       return WebSpeechRecognizerClient::NoSpeechError;
177     case SPEECH_RECOGNITION_ERROR_NO_MATCH:
178       NOTREACHED();
179       return WebSpeechRecognizerClient::OtherError;
180     case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR:
181       return WebSpeechRecognizerClient::BadGrammarError;
182   }
183   NOTREACHED();
184   return WebSpeechRecognizerClient::OtherError;
185 }
186
187 void SpeechRecognitionDispatcher::OnErrorOccurred(
188     int request_id, const SpeechRecognitionError& error) {
189   if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
190     recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
191                                           WebSpeechRecognitionResult());
192   } else {
193     ResetAudioSink();
194     recognizer_client_->didReceiveError(
195         GetHandleFromID(request_id),
196         WebString(),  // TODO(primiano): message?
197         WebKitErrorCode(error.code));
198   }
199 }
200
201 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
202   // TODO(tommi): It is possible that the handle isn't found in the array if
203   // the user just refreshed the page. It seems that we then get a notification
204   // for the previously loaded instance of the page.
205   HandleMap::iterator iter = handle_map_.find(request_id);
206   if (iter == handle_map_.end()) {
207     DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
208   } else {
209     WebSpeechRecognitionHandle handle = iter->second;
210     // Note: we need to erase the handle from the map *before* calling didEnd.
211     // didEnd may call back synchronously to start a new recognition session,
212     // and we don't want to delete the handle from the map after that happens.
213     handle_map_.erase(request_id);
214     ResetAudioSink();
215     recognizer_client_->didEnd(handle);
216   }
217 }
218
219 void SpeechRecognitionDispatcher::OnResultsRetrieved(
220     int request_id, const SpeechRecognitionResults& results) {
221   size_t provisional_count = 0;
222   SpeechRecognitionResults::const_iterator it = results.begin();
223   for (; it != results.end(); ++it) {
224     if (it->is_provisional)
225       ++provisional_count;
226   }
227
228   WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
229   WebVector<WebSpeechRecognitionResult> final(
230       results.size() - provisional_count);
231
232   int provisional_index = 0, final_index = 0;
233   for (it = results.begin(); it != results.end(); ++it) {
234     const SpeechRecognitionResult& result = (*it);
235     WebSpeechRecognitionResult* webkit_result = result.is_provisional ?
236         &provisional[provisional_index++] : &final[final_index++];
237
238     const size_t num_hypotheses = result.hypotheses.size();
239     WebVector<WebString> transcripts(num_hypotheses);
240     WebVector<float> confidences(num_hypotheses);
241     for (size_t i = 0; i < num_hypotheses; ++i) {
242       transcripts[i] = result.hypotheses[i].utterance;
243       confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
244     }
245     webkit_result->assign(transcripts, confidences, !result.is_provisional);
246   }
247
248   recognizer_client_->didReceiveResults(
249       GetHandleFromID(request_id), final, provisional);
250 }
251
252 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
253     int request_id,
254     const media::AudioParameters& params,
255     const base::SharedMemoryHandle memory,
256     const base::SyncSocket::TransitDescriptor descriptor) {
257 #if defined(ENABLE_WEBRTC)
258   DCHECK(!speech_audio_sink_.get());
259   if (audio_track_.isNull()) {
260     ResetAudioSink();
261     return;
262   }
263
264   // The instantiation and type of SyncSocket is up to the client since it
265   // is dependency injected to the SpeechRecognitionAudioSink.
266   scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
267       base::SyncSocket::UnwrapHandle(descriptor)));
268
269   speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
270       audio_track_, params, memory, socket.Pass(),
271       base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
272                  base::Unretained(this))));
273 #endif
274 }
275
276 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
277     const WebSpeechRecognitionHandle& handle) {
278   // Search first for an existing mapping.
279   for (HandleMap::iterator iter = handle_map_.begin();
280       iter != handle_map_.end();
281       ++iter) {
282     if (iter->second.equals(handle))
283       return iter->first;
284   }
285   // If no existing mapping found, create a new one.
286   const int new_id = next_id_;
287   handle_map_[new_id] = handle;
288   ++next_id_;
289   return new_id;
290 }
291
292 bool SpeechRecognitionDispatcher::HandleExists(
293     const WebSpeechRecognitionHandle& handle) {
294   for (HandleMap::iterator iter = handle_map_.begin();
295       iter != handle_map_.end();
296       ++iter) {
297     if (iter->second.equals(handle))
298       return true;
299   }
300   return false;
301 }
302
303 void SpeechRecognitionDispatcher::ResetAudioSink() {
304 #if defined(ENABLE_WEBRTC)
305   speech_audio_sink_.reset();
306 #endif
307 }
308
309 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
310     int request_id) {
311   HandleMap::iterator iter = handle_map_.find(request_id);
312   DCHECK(iter != handle_map_.end());
313   return iter->second;
314 }
315
316 }  // namespace content