Upstream version 9.37.197.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / speech / tts_controller.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/tts_controller.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/float_util.h"
11 #include "base/values.h"
12 #include "chrome/browser/browser_process.h"
13 #include "chrome/browser/profiles/profile.h"
14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
16 #include "chrome/browser/speech/tts_platform.h"
17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
18 #include "extensions/browser/extension_system.h"
19 #include "extensions/common/extension.h"
20
21 namespace {
22 // A value to be used to indicate that there is no char index available.
23 const int kInvalidCharIndex = -1;
24
25 // Given a language/region code of the form 'fr-FR', returns just the basic
26 // language portion, e.g. 'fr'.
27 std::string TrimLanguageCode(std::string lang) {
28   if (lang.size() >= 5 && lang[2] == '-')
29     return lang.substr(0, 2);
30   else
31     return lang;
32 }
33
34 }  // namespace
35
36 bool IsFinalTtsEventType(TtsEventType event_type) {
37   return (event_type == TTS_EVENT_END ||
38           event_type == TTS_EVENT_INTERRUPTED ||
39           event_type == TTS_EVENT_CANCELLED ||
40           event_type == TTS_EVENT_ERROR);
41 }
42
43 //
44 // UtteranceContinuousParameters
45 //
46
47
48 UtteranceContinuousParameters::UtteranceContinuousParameters()
49     : rate(-1),
50       pitch(-1),
51       volume(-1) {}
52
53
54 //
55 // VoiceData
56 //
57
58
59 VoiceData::VoiceData()
60     : gender(TTS_GENDER_NONE),
61       remote(false),
62       native(false) {}
63
64 VoiceData::~VoiceData() {}
65
66
67 //
68 // Utterance
69 //
70
71 // static
72 int Utterance::next_utterance_id_ = 0;
73
74 Utterance::Utterance(Profile* profile)
75     : profile_(profile),
76       id_(next_utterance_id_++),
77       src_id_(-1),
78       gender_(TTS_GENDER_NONE),
79       can_enqueue_(false),
80       char_index_(0),
81       finished_(false) {
82   options_.reset(new base::DictionaryValue());
83 }
84
85 Utterance::~Utterance() {
86   DCHECK(finished_);
87 }
88
89 void Utterance::OnTtsEvent(TtsEventType event_type,
90                            int char_index,
91                            const std::string& error_message) {
92   if (char_index >= 0)
93     char_index_ = char_index;
94   if (IsFinalTtsEventType(event_type))
95     finished_ = true;
96
97   if (event_delegate_)
98     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
99   if (finished_)
100     event_delegate_.reset();
101 }
102
103 void Utterance::Finish() {
104   finished_ = true;
105 }
106
107 void Utterance::set_options(const base::Value* options) {
108   options_.reset(options->DeepCopy());
109 }
110
111 //
112 // TtsController
113 //
114
115 // static
116 TtsController* TtsController::GetInstance() {
117   return Singleton<TtsController>::get();
118 }
119
120 TtsController::TtsController()
121     : current_utterance_(NULL),
122       paused_(false),
123       platform_impl_(NULL) {
124 }
125
126 TtsController::~TtsController() {
127   if (current_utterance_) {
128     current_utterance_->Finish();
129     delete current_utterance_;
130   }
131
132   // Clear any queued utterances too.
133   ClearUtteranceQueue(false);  // Don't sent events.
134 }
135
136 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
137   // If we're paused and we get an utterance that can't be queued,
138   // flush the queue but stay in the paused state.
139   if (paused_ && !utterance->can_enqueue()) {
140     Stop();
141     paused_ = true;
142     return;
143   }
144
145   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
146     utterance_queue_.push(utterance);
147   } else {
148     Stop();
149     SpeakNow(utterance);
150   }
151 }
152
153 void TtsController::SpeakNow(Utterance* utterance) {
154   // Get all available voices and try to find a matching voice.
155   std::vector<VoiceData> voices;
156   GetVoices(utterance->profile(), &voices);
157   int index = GetMatchingVoice(utterance, voices);
158
159   VoiceData voice;
160   if (index != -1) {
161     // Select the matching voice.
162     voice = voices[index];
163   } else {
164     // However, if no match was found on a platform without native tts voices,
165     // attempt to get a voice based only on the current locale without respect
166     // to any supplied voice names.
167     std::vector<VoiceData> native_voices;
168
169     if (GetPlatformImpl()->PlatformImplAvailable())
170       GetPlatformImpl()->GetVoices(&native_voices);
171
172     if (native_voices.empty() && !voices.empty()) {
173       // TODO(dtseng): Notify extension caller of an error.
174       utterance->set_voice_name("");
175       utterance->set_lang(g_browser_process->GetApplicationLocale());
176       index = GetMatchingVoice(utterance, voices);
177
178       // If even that fails, just take the first available voice.
179       if (index == -1)
180         index = 0;
181       voice = voices[index];
182     } else {
183       // Otherwise, simply give native voices a chance to handle this utterance.
184       voice.native = true;
185     }
186   }
187
188   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
189
190   if (!voice.native) {
191 #if !defined(OS_ANDROID)
192     DCHECK(!voice.extension_id.empty());
193     current_utterance_ = utterance;
194     utterance->set_extension_id(voice.extension_id);
195     ExtensionTtsEngineSpeak(utterance, voice);
196     bool sends_end_event =
197         voice.events.find(TTS_EVENT_END) != voice.events.end();
198     if (!sends_end_event) {
199       utterance->Finish();
200       delete utterance;
201       current_utterance_ = NULL;
202       SpeakNextUtterance();
203     }
204 #endif
205   } else {
206     // It's possible for certain platforms to send start events immediately
207     // during |speak|.
208     current_utterance_ = utterance;
209     GetPlatformImpl()->clear_error();
210     bool success = GetPlatformImpl()->Speak(
211         utterance->id(),
212         utterance->text(),
213         utterance->lang(),
214         voice,
215         utterance->continuous_parameters());
216     if (!success)
217       current_utterance_ = NULL;
218
219     // If the native voice wasn't able to process this speech, see if
220     // the browser has built-in TTS that isn't loaded yet.
221     if (!success &&
222         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
223       utterance_queue_.push(utterance);
224       return;
225     }
226
227     if (!success) {
228       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
229                             GetPlatformImpl()->error());
230       delete utterance;
231       return;
232     }
233   }
234 }
235
236 void TtsController::Stop() {
237   paused_ = false;
238   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
239 #if !defined(OS_ANDROID)
240     ExtensionTtsEngineStop(current_utterance_);
241 #endif
242   } else {
243     GetPlatformImpl()->clear_error();
244     GetPlatformImpl()->StopSpeaking();
245   }
246
247   if (current_utterance_)
248     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
249                                    std::string());
250   FinishCurrentUtterance();
251   ClearUtteranceQueue(true);  // Send events.
252 }
253
254 void TtsController::Pause() {
255   paused_ = true;
256   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
257 #if !defined(OS_ANDROID)
258     ExtensionTtsEnginePause(current_utterance_);
259 #endif
260   } else if (current_utterance_) {
261     GetPlatformImpl()->clear_error();
262     GetPlatformImpl()->Pause();
263   }
264 }
265
266 void TtsController::Resume() {
267   paused_ = false;
268   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
269 #if !defined(OS_ANDROID)
270     ExtensionTtsEngineResume(current_utterance_);
271 #endif
272   } else if (current_utterance_) {
273     GetPlatformImpl()->clear_error();
274     GetPlatformImpl()->Resume();
275   } else {
276     SpeakNextUtterance();
277   }
278 }
279
280 void TtsController::OnTtsEvent(int utterance_id,
281                                         TtsEventType event_type,
282                                         int char_index,
283                                         const std::string& error_message) {
284   // We may sometimes receive completion callbacks "late", after we've
285   // already finished the utterance (for example because another utterance
286   // interrupted or we got a call to Stop). This is normal and we can
287   // safely just ignore these events.
288   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
289     return;
290   }
291   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
292   if (current_utterance_->finished()) {
293     FinishCurrentUtterance();
294     SpeakNextUtterance();
295   }
296 }
297
298 void TtsController::GetVoices(Profile* profile,
299                               std::vector<VoiceData>* out_voices) {
300 #if !defined(OS_ANDROID)
301   if (profile)
302     GetExtensionVoices(profile, out_voices);
303 #endif
304
305   TtsPlatformImpl* platform_impl = GetPlatformImpl();
306   if (platform_impl && platform_impl->PlatformImplAvailable())
307     platform_impl->GetVoices(out_voices);
308 }
309
310 bool TtsController::IsSpeaking() {
311   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
312 }
313
314 void TtsController::FinishCurrentUtterance() {
315   if (current_utterance_) {
316     if (!current_utterance_->finished())
317       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
318                                      std::string());
319     delete current_utterance_;
320     current_utterance_ = NULL;
321   }
322 }
323
324 void TtsController::SpeakNextUtterance() {
325   if (paused_)
326     return;
327
328   // Start speaking the next utterance in the queue.  Keep trying in case
329   // one fails but there are still more in the queue to try.
330   while (!utterance_queue_.empty() && !current_utterance_) {
331     Utterance* utterance = utterance_queue_.front();
332     utterance_queue_.pop();
333     SpeakNow(utterance);
334   }
335 }
336
337 void TtsController::RetrySpeakingQueuedUtterances() {
338   if (current_utterance_ == NULL && !utterance_queue_.empty())
339     SpeakNextUtterance();
340 }
341
342 void TtsController::ClearUtteranceQueue(bool send_events) {
343   while (!utterance_queue_.empty()) {
344     Utterance* utterance = utterance_queue_.front();
345     utterance_queue_.pop();
346     if (send_events)
347       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
348                             std::string());
349     else
350       utterance->Finish();
351     delete utterance;
352   }
353 }
354
355 void TtsController::SetPlatformImpl(
356     TtsPlatformImpl* platform_impl) {
357   platform_impl_ = platform_impl;
358 }
359
360 int TtsController::QueueSize() {
361   return static_cast<int>(utterance_queue_.size());
362 }
363
364 TtsPlatformImpl* TtsController::GetPlatformImpl() {
365   if (!platform_impl_)
366     platform_impl_ = TtsPlatformImpl::GetInstance();
367   return platform_impl_;
368 }
369
370 int TtsController::GetMatchingVoice(
371     const Utterance* utterance, std::vector<VoiceData>& voices) {
372   // Make two passes: the first time, do strict language matching
373   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
374   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
375   for (int pass = 0; pass < 2; ++pass) {
376     for (size_t i = 0; i < voices.size(); ++i) {
377       const VoiceData& voice = voices[i];
378
379       if (!utterance->extension_id().empty() &&
380           utterance->extension_id() != voice.extension_id) {
381         continue;
382       }
383
384       if (!voice.name.empty() &&
385           !utterance->voice_name().empty() &&
386           voice.name != utterance->voice_name()) {
387         continue;
388       }
389       if (!voice.lang.empty() && !utterance->lang().empty()) {
390         std::string voice_lang = voice.lang;
391         std::string utterance_lang = utterance->lang();
392         if (pass == 1) {
393           voice_lang = TrimLanguageCode(voice_lang);
394           utterance_lang = TrimLanguageCode(utterance_lang);
395         }
396         if (voice_lang != utterance_lang) {
397           continue;
398         }
399       }
400       if (voice.gender != TTS_GENDER_NONE &&
401           utterance->gender() != TTS_GENDER_NONE &&
402           voice.gender != utterance->gender()) {
403         continue;
404       }
405
406       if (utterance->required_event_types().size() > 0) {
407         bool has_all_required_event_types = true;
408         for (std::set<TtsEventType>::const_iterator iter =
409                  utterance->required_event_types().begin();
410              iter != utterance->required_event_types().end();
411              ++iter) {
412           if (voice.events.find(*iter) == voice.events.end()) {
413             has_all_required_event_types = false;
414             break;
415           }
416         }
417         if (!has_all_required_event_types)
418           continue;
419       }
420
421       return static_cast<int>(i);
422     }
423   }
424
425   return -1;
426 }
427
428 void TtsController::VoicesChanged() {
429   for (std::set<VoicesChangedDelegate*>::iterator iter =
430            voices_changed_delegates_.begin();
431        iter != voices_changed_delegates_.end(); ++iter) {
432     (*iter)->OnVoicesChanged();
433   }
434 }
435
436 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
437   voices_changed_delegates_.insert(delegate);
438 }
439
440 void TtsController::RemoveVoicesChangedDelegate(
441     VoicesChangedDelegate* delegate) {
442   voices_changed_delegates_.erase(delegate);
443 }