8d3153591d7bfa26050284efc0b6d388e5255c7a
[platform/framework/web/crosswalk.git] / src / chrome / browser / speech / tts_controller_impl.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/tts_controller_impl.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/float_util.h"
11 #include "base/values.h"
12 #include "chrome/browser/browser_process.h"
13 #include "chrome/browser/speech/tts_platform.h"
14
15 namespace {
16 // A value to be used to indicate that there is no char index available.
17 const int kInvalidCharIndex = -1;
18
19 // Given a language/region code of the form 'fr-FR', returns just the basic
20 // language portion, e.g. 'fr'.
21 std::string TrimLanguageCode(std::string lang) {
22   if (lang.size() >= 5 && lang[2] == '-')
23     return lang.substr(0, 2);
24   else
25     return lang;
26 }
27
28 }  // namespace
29
30 bool IsFinalTtsEventType(TtsEventType event_type) {
31   return (event_type == TTS_EVENT_END ||
32           event_type == TTS_EVENT_INTERRUPTED ||
33           event_type == TTS_EVENT_CANCELLED ||
34           event_type == TTS_EVENT_ERROR);
35 }
36
37 //
38 // UtteranceContinuousParameters
39 //
40
41
42 UtteranceContinuousParameters::UtteranceContinuousParameters()
43     : rate(-1),
44       pitch(-1),
45       volume(-1) {}
46
47
48 //
49 // VoiceData
50 //
51
52
53 VoiceData::VoiceData()
54     : gender(TTS_GENDER_NONE),
55       remote(false),
56       native(false) {}
57
58 VoiceData::~VoiceData() {}
59
60
61 //
62 // Utterance
63 //
64
65 // static
66 int Utterance::next_utterance_id_ = 0;
67
68 Utterance::Utterance(content::BrowserContext* browser_context)
69     : browser_context_(browser_context),
70       id_(next_utterance_id_++),
71       src_id_(-1),
72       gender_(TTS_GENDER_NONE),
73       can_enqueue_(false),
74       char_index_(0),
75       finished_(false) {
76   options_.reset(new base::DictionaryValue());
77 }
78
79 Utterance::~Utterance() {
80   DCHECK(finished_);
81 }
82
83 void Utterance::OnTtsEvent(TtsEventType event_type,
84                            int char_index,
85                            const std::string& error_message) {
86   if (char_index >= 0)
87     char_index_ = char_index;
88   if (IsFinalTtsEventType(event_type))
89     finished_ = true;
90
91   if (event_delegate_)
92     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
93   if (finished_)
94     event_delegate_.reset();
95 }
96
97 void Utterance::Finish() {
98   finished_ = true;
99 }
100
101 void Utterance::set_options(const base::Value* options) {
102   options_.reset(options->DeepCopy());
103 }
104
105 TtsController* TtsController::GetInstance() {
106   return TtsControllerImpl::GetInstance();
107 }
108
109 //
110 // TtsControllerImpl
111 //
112
113 // static
114 TtsControllerImpl* TtsControllerImpl::GetInstance() {
115   return Singleton<TtsControllerImpl>::get();
116 }
117
118 TtsControllerImpl::TtsControllerImpl()
119     : current_utterance_(NULL),
120       paused_(false),
121       platform_impl_(NULL),
122       tts_engine_delegate_(NULL) {
123 }
124
125 TtsControllerImpl::~TtsControllerImpl() {
126   if (current_utterance_) {
127     current_utterance_->Finish();
128     delete current_utterance_;
129   }
130
131   // Clear any queued utterances too.
132   ClearUtteranceQueue(false);  // Don't sent events.
133 }
134
135 void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
136   // If we're paused and we get an utterance that can't be queued,
137   // flush the queue but stay in the paused state.
138   if (paused_ && !utterance->can_enqueue()) {
139     Stop();
140     paused_ = true;
141     return;
142   }
143
144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
145     utterance_queue_.push(utterance);
146   } else {
147     Stop();
148     SpeakNow(utterance);
149   }
150 }
151
152 void TtsControllerImpl::SpeakNow(Utterance* utterance) {
153   // Ensure we have all built-in voices loaded. This is a no-op if already
154   // loaded.
155   bool loaded_built_in =
156       GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
157
158   // Get all available voices and try to find a matching voice.
159   std::vector<VoiceData> voices;
160   GetVoices(utterance->browser_context(), &voices);
161   int index = GetMatchingVoice(utterance, voices);
162
163   VoiceData voice;
164   if (index != -1) {
165     // Select the matching voice.
166     voice = voices[index];
167   } else {
168     // However, if no match was found on a platform without native tts voices,
169     // attempt to get a voice based only on the current locale without respect
170     // to any supplied voice names.
171     std::vector<VoiceData> native_voices;
172
173     if (GetPlatformImpl()->PlatformImplAvailable())
174       GetPlatformImpl()->GetVoices(&native_voices);
175
176     if (native_voices.empty() && !voices.empty()) {
177       // TODO(dtseng): Notify extension caller of an error.
178       utterance->set_voice_name("");
179       // TODO(gaochun): Replace the global variable g_browser_process with
180       // GetContentClient()->browser() to eliminate the dependency of browser
181       // once TTS implementation was moved to content.
182       utterance->set_lang(g_browser_process->GetApplicationLocale());
183       index = GetMatchingVoice(utterance, voices);
184
185       // If even that fails, just take the first available voice.
186       if (index == -1)
187         index = 0;
188       voice = voices[index];
189     } else {
190       // Otherwise, simply give native voices a chance to handle this utterance.
191       voice.native = true;
192     }
193   }
194
195   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
196
197   if (!voice.native) {
198 #if !defined(OS_ANDROID)
199     DCHECK(!voice.extension_id.empty());
200     current_utterance_ = utterance;
201     utterance->set_extension_id(voice.extension_id);
202     if (tts_engine_delegate_)
203       tts_engine_delegate_->Speak(utterance, voice);
204     bool sends_end_event =
205         voice.events.find(TTS_EVENT_END) != voice.events.end();
206     if (!sends_end_event) {
207       utterance->Finish();
208       delete utterance;
209       current_utterance_ = NULL;
210       SpeakNextUtterance();
211     }
212 #endif
213   } else {
214     // It's possible for certain platforms to send start events immediately
215     // during |speak|.
216     current_utterance_ = utterance;
217     GetPlatformImpl()->clear_error();
218     bool success = GetPlatformImpl()->Speak(
219         utterance->id(),
220         utterance->text(),
221         utterance->lang(),
222         voice,
223         utterance->continuous_parameters());
224     if (!success)
225       current_utterance_ = NULL;
226
227     // If the native voice wasn't able to process this speech, see if
228     // the browser has built-in TTS that isn't loaded yet.
229     if (!success && loaded_built_in) {
230       utterance_queue_.push(utterance);
231       return;
232     }
233
234     if (!success) {
235       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
236                             GetPlatformImpl()->error());
237       delete utterance;
238       return;
239     }
240   }
241 }
242
243 void TtsControllerImpl::Stop() {
244   paused_ = false;
245   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
246 #if !defined(OS_ANDROID)
247     if (tts_engine_delegate_)
248       tts_engine_delegate_->Stop(current_utterance_);
249 #endif
250   } else {
251     GetPlatformImpl()->clear_error();
252     GetPlatformImpl()->StopSpeaking();
253   }
254
255   if (current_utterance_)
256     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
257                                    std::string());
258   FinishCurrentUtterance();
259   ClearUtteranceQueue(true);  // Send events.
260 }
261
262 void TtsControllerImpl::Pause() {
263   paused_ = true;
264   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
265 #if !defined(OS_ANDROID)
266     if (tts_engine_delegate_)
267       tts_engine_delegate_->Pause(current_utterance_);
268 #endif
269   } else if (current_utterance_) {
270     GetPlatformImpl()->clear_error();
271     GetPlatformImpl()->Pause();
272   }
273 }
274
275 void TtsControllerImpl::Resume() {
276   paused_ = false;
277   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
278 #if !defined(OS_ANDROID)
279     if (tts_engine_delegate_)
280       tts_engine_delegate_->Resume(current_utterance_);
281 #endif
282   } else if (current_utterance_) {
283     GetPlatformImpl()->clear_error();
284     GetPlatformImpl()->Resume();
285   } else {
286     SpeakNextUtterance();
287   }
288 }
289
290 void TtsControllerImpl::OnTtsEvent(int utterance_id,
291                                         TtsEventType event_type,
292                                         int char_index,
293                                         const std::string& error_message) {
294   // We may sometimes receive completion callbacks "late", after we've
295   // already finished the utterance (for example because another utterance
296   // interrupted or we got a call to Stop). This is normal and we can
297   // safely just ignore these events.
298   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
299     return;
300   }
301   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
302   if (current_utterance_->finished()) {
303     FinishCurrentUtterance();
304     SpeakNextUtterance();
305   }
306 }
307
308 void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
309                               std::vector<VoiceData>* out_voices) {
310 #if !defined(OS_ANDROID)
311   if (browser_context && tts_engine_delegate_)
312     tts_engine_delegate_->GetVoices(browser_context, out_voices);
313 #endif
314
315   TtsPlatformImpl* platform_impl = GetPlatformImpl();
316   if (platform_impl) {
317     // Ensure we have all built-in voices loaded. This is a no-op if already
318     // loaded.
319     platform_impl->LoadBuiltInTtsExtension(browser_context);
320     if (platform_impl->PlatformImplAvailable())
321       platform_impl->GetVoices(out_voices);
322   }
323 }
324
325 bool TtsControllerImpl::IsSpeaking() {
326   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
327 }
328
329 void TtsControllerImpl::FinishCurrentUtterance() {
330   if (current_utterance_) {
331     if (!current_utterance_->finished())
332       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
333                                      std::string());
334     delete current_utterance_;
335     current_utterance_ = NULL;
336   }
337 }
338
339 void TtsControllerImpl::SpeakNextUtterance() {
340   if (paused_)
341     return;
342
343   // Start speaking the next utterance in the queue.  Keep trying in case
344   // one fails but there are still more in the queue to try.
345   while (!utterance_queue_.empty() && !current_utterance_) {
346     Utterance* utterance = utterance_queue_.front();
347     utterance_queue_.pop();
348     SpeakNow(utterance);
349   }
350 }
351
352 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
353   while (!utterance_queue_.empty()) {
354     Utterance* utterance = utterance_queue_.front();
355     utterance_queue_.pop();
356     if (send_events)
357       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
358                             std::string());
359     else
360       utterance->Finish();
361     delete utterance;
362   }
363 }
364
365 void TtsControllerImpl::SetPlatformImpl(
366     TtsPlatformImpl* platform_impl) {
367   platform_impl_ = platform_impl;
368 }
369
370 int TtsControllerImpl::QueueSize() {
371   return static_cast<int>(utterance_queue_.size());
372 }
373
374 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
375   if (!platform_impl_)
376     platform_impl_ = TtsPlatformImpl::GetInstance();
377   return platform_impl_;
378 }
379
380 int TtsControllerImpl::GetMatchingVoice(
381     const Utterance* utterance, std::vector<VoiceData>& voices) {
382   // Make two passes: the first time, do strict language matching
383   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
384   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
385   for (int pass = 0; pass < 2; ++pass) {
386     for (size_t i = 0; i < voices.size(); ++i) {
387       const VoiceData& voice = voices[i];
388
389       if (!utterance->extension_id().empty() &&
390           utterance->extension_id() != voice.extension_id) {
391         continue;
392       }
393
394       if (!voice.name.empty() &&
395           !utterance->voice_name().empty() &&
396           voice.name != utterance->voice_name()) {
397         continue;
398       }
399       if (!voice.lang.empty() && !utterance->lang().empty()) {
400         std::string voice_lang = voice.lang;
401         std::string utterance_lang = utterance->lang();
402         if (pass == 1) {
403           voice_lang = TrimLanguageCode(voice_lang);
404           utterance_lang = TrimLanguageCode(utterance_lang);
405         }
406         if (voice_lang != utterance_lang) {
407           continue;
408         }
409       }
410       if (voice.gender != TTS_GENDER_NONE &&
411           utterance->gender() != TTS_GENDER_NONE &&
412           voice.gender != utterance->gender()) {
413         continue;
414       }
415
416       if (utterance->required_event_types().size() > 0) {
417         bool has_all_required_event_types = true;
418         for (std::set<TtsEventType>::const_iterator iter =
419                  utterance->required_event_types().begin();
420              iter != utterance->required_event_types().end();
421              ++iter) {
422           if (voice.events.find(*iter) == voice.events.end()) {
423             has_all_required_event_types = false;
424             break;
425           }
426         }
427         if (!has_all_required_event_types)
428           continue;
429       }
430
431       return static_cast<int>(i);
432     }
433   }
434
435   return -1;
436 }
437
438 void TtsControllerImpl::VoicesChanged() {
439   for (std::set<VoicesChangedDelegate*>::iterator iter =
440            voices_changed_delegates_.begin();
441        iter != voices_changed_delegates_.end(); ++iter) {
442     (*iter)->OnVoicesChanged();
443   }
444 }
445
446 void TtsControllerImpl::AddVoicesChangedDelegate(
447     VoicesChangedDelegate* delegate) {
448   voices_changed_delegates_.insert(delegate);
449 }
450
451 void TtsControllerImpl::RemoveVoicesChangedDelegate(
452     VoicesChangedDelegate* delegate) {
453   voices_changed_delegates_.erase(delegate);
454 }
455
456 void TtsControllerImpl::SetTtsEngineDelegate(
457     TtsEngineDelegate* delegate) {
458   tts_engine_delegate_ = delegate;
459 }
460
461 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
462   return tts_engine_delegate_;
463 }