Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / speech / tts_controller.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/tts_controller.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/float_util.h"
11 #include "base/values.h"
12 #include "chrome/browser/profiles/profile.h"
13 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
14 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
15 #include "chrome/browser/speech/tts_platform.h"
16 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
17 #include "extensions/browser/extension_system.h"
18 #include "extensions/common/extension.h"
19
20 namespace {
21 // A value to be used to indicate that there is no char index available.
22 const int kInvalidCharIndex = -1;
23
24 // Given a language/region code of the form 'fr-FR', returns just the basic
25 // language portion, e.g. 'fr'.
26 std::string TrimLanguageCode(std::string lang) {
27   if (lang.size() >= 5 && lang[2] == '-')
28     return lang.substr(0, 2);
29   else
30     return lang;
31 }
32
33 }  // namespace
34
35 bool IsFinalTtsEventType(TtsEventType event_type) {
36   return (event_type == TTS_EVENT_END ||
37           event_type == TTS_EVENT_INTERRUPTED ||
38           event_type == TTS_EVENT_CANCELLED ||
39           event_type == TTS_EVENT_ERROR);
40 }
41
42 //
43 // UtteranceContinuousParameters
44 //
45
46
47 UtteranceContinuousParameters::UtteranceContinuousParameters()
48     : rate(-1),
49       pitch(-1),
50       volume(-1) {}
51
52
53 //
54 // VoiceData
55 //
56
57
58 VoiceData::VoiceData()
59     : gender(TTS_GENDER_NONE),
60       remote(false),
61       native(false) {}
62
63 VoiceData::~VoiceData() {}
64
65
66 //
67 // Utterance
68 //
69
70 // static
71 int Utterance::next_utterance_id_ = 0;
72
73 Utterance::Utterance(Profile* profile)
74     : profile_(profile),
75       id_(next_utterance_id_++),
76       src_id_(-1),
77       gender_(TTS_GENDER_NONE),
78       can_enqueue_(false),
79       char_index_(0),
80       finished_(false) {
81   options_.reset(new base::DictionaryValue());
82 }
83
84 Utterance::~Utterance() {
85   DCHECK(finished_);
86 }
87
88 void Utterance::OnTtsEvent(TtsEventType event_type,
89                            int char_index,
90                            const std::string& error_message) {
91   if (char_index >= 0)
92     char_index_ = char_index;
93   if (IsFinalTtsEventType(event_type))
94     finished_ = true;
95
96   if (event_delegate_)
97     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
98   if (finished_)
99     event_delegate_.reset();
100 }
101
102 void Utterance::Finish() {
103   finished_ = true;
104 }
105
106 void Utterance::set_options(const base::Value* options) {
107   options_.reset(options->DeepCopy());
108 }
109
110 //
111 // TtsController
112 //
113
114 // static
115 TtsController* TtsController::GetInstance() {
116   return Singleton<TtsController>::get();
117 }
118
119 TtsController::TtsController()
120     : current_utterance_(NULL),
121       paused_(false),
122       platform_impl_(NULL) {
123 }
124
125 TtsController::~TtsController() {
126   if (current_utterance_) {
127     current_utterance_->Finish();
128     delete current_utterance_;
129   }
130
131   // Clear any queued utterances too.
132   ClearUtteranceQueue(false);  // Don't sent events.
133 }
134
135 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
136   // If we're paused and we get an utterance that can't be queued,
137   // flush the queue but stay in the paused state.
138   if (paused_ && !utterance->can_enqueue()) {
139     Stop();
140     paused_ = true;
141     return;
142   }
143
144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
145     utterance_queue_.push(utterance);
146   } else {
147     Stop();
148     SpeakNow(utterance);
149   }
150 }
151
152 void TtsController::SpeakNow(Utterance* utterance) {
153   // Get all available voices and try to find a matching voice.
154   std::vector<VoiceData> voices;
155   GetVoices(utterance->profile(), &voices);
156   int index = GetMatchingVoice(utterance, voices);
157
158   // Select the matching voice, but if none was found, initialize an
159   // empty VoiceData with native = true, which will give the native
160   // speech synthesizer a chance to try to synthesize the utterance
161   // anyway.
162   VoiceData voice;
163   if (index >= 0 && index < static_cast<int>(voices.size()))
164     voice = voices[index];
165   else
166     voice.native = true;
167
168   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
169
170   if (!voice.native) {
171 #if !defined(OS_ANDROID)
172     DCHECK(!voice.extension_id.empty());
173     current_utterance_ = utterance;
174     utterance->set_extension_id(voice.extension_id);
175     ExtensionTtsEngineSpeak(utterance, voice);
176     bool sends_end_event =
177         voice.events.find(TTS_EVENT_END) != voice.events.end();
178     if (!sends_end_event) {
179       utterance->Finish();
180       delete utterance;
181       current_utterance_ = NULL;
182       SpeakNextUtterance();
183     }
184 #endif
185   } else {
186     // It's possible for certain platforms to send start events immediately
187     // during |speak|.
188     current_utterance_ = utterance;
189     GetPlatformImpl()->clear_error();
190     bool success = GetPlatformImpl()->Speak(
191         utterance->id(),
192         utterance->text(),
193         utterance->lang(),
194         voice,
195         utterance->continuous_parameters());
196     if (!success)
197       current_utterance_ = NULL;
198
199     // If the native voice wasn't able to process this speech, see if
200     // the browser has built-in TTS that isn't loaded yet.
201     if (!success &&
202         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
203       utterance_queue_.push(utterance);
204       return;
205     }
206
207     if (!success) {
208       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
209                             GetPlatformImpl()->error());
210       delete utterance;
211       return;
212     }
213   }
214 }
215
216 void TtsController::Stop() {
217   paused_ = false;
218   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
219 #if !defined(OS_ANDROID)
220     ExtensionTtsEngineStop(current_utterance_);
221 #endif
222   } else {
223     GetPlatformImpl()->clear_error();
224     GetPlatformImpl()->StopSpeaking();
225   }
226
227   if (current_utterance_)
228     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
229                                    std::string());
230   FinishCurrentUtterance();
231   ClearUtteranceQueue(true);  // Send events.
232 }
233
234 void TtsController::Pause() {
235   paused_ = true;
236   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
237 #if !defined(OS_ANDROID)
238     ExtensionTtsEnginePause(current_utterance_);
239 #endif
240   } else if (current_utterance_) {
241     GetPlatformImpl()->clear_error();
242     GetPlatformImpl()->Pause();
243   }
244 }
245
246 void TtsController::Resume() {
247   paused_ = false;
248   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
249 #if !defined(OS_ANDROID)
250     ExtensionTtsEngineResume(current_utterance_);
251 #endif
252   } else if (current_utterance_) {
253     GetPlatformImpl()->clear_error();
254     GetPlatformImpl()->Resume();
255   } else {
256     SpeakNextUtterance();
257   }
258 }
259
260 void TtsController::OnTtsEvent(int utterance_id,
261                                         TtsEventType event_type,
262                                         int char_index,
263                                         const std::string& error_message) {
264   // We may sometimes receive completion callbacks "late", after we've
265   // already finished the utterance (for example because another utterance
266   // interrupted or we got a call to Stop). This is normal and we can
267   // safely just ignore these events.
268   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
269     return;
270   }
271   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
272   if (current_utterance_->finished()) {
273     FinishCurrentUtterance();
274     SpeakNextUtterance();
275   }
276 }
277
278 void TtsController::GetVoices(Profile* profile,
279                               std::vector<VoiceData>* out_voices) {
280 #if !defined(OS_ANDROID)
281   if (profile)
282     GetExtensionVoices(profile, out_voices);
283 #endif
284
285   TtsPlatformImpl* platform_impl = GetPlatformImpl();
286   if (platform_impl && platform_impl->PlatformImplAvailable())
287     platform_impl->GetVoices(out_voices);
288 }
289
290 bool TtsController::IsSpeaking() {
291   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
292 }
293
294 void TtsController::FinishCurrentUtterance() {
295   if (current_utterance_) {
296     if (!current_utterance_->finished())
297       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
298                                      std::string());
299     delete current_utterance_;
300     current_utterance_ = NULL;
301   }
302 }
303
304 void TtsController::SpeakNextUtterance() {
305   if (paused_)
306     return;
307
308   // Start speaking the next utterance in the queue.  Keep trying in case
309   // one fails but there are still more in the queue to try.
310   while (!utterance_queue_.empty() && !current_utterance_) {
311     Utterance* utterance = utterance_queue_.front();
312     utterance_queue_.pop();
313     SpeakNow(utterance);
314   }
315 }
316
317 void TtsController::RetrySpeakingQueuedUtterances() {
318   if (current_utterance_ == NULL && !utterance_queue_.empty())
319     SpeakNextUtterance();
320 }
321
322 void TtsController::ClearUtteranceQueue(bool send_events) {
323   while (!utterance_queue_.empty()) {
324     Utterance* utterance = utterance_queue_.front();
325     utterance_queue_.pop();
326     if (send_events)
327       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
328                             std::string());
329     else
330       utterance->Finish();
331     delete utterance;
332   }
333 }
334
335 void TtsController::SetPlatformImpl(
336     TtsPlatformImpl* platform_impl) {
337   platform_impl_ = platform_impl;
338 }
339
340 int TtsController::QueueSize() {
341   return static_cast<int>(utterance_queue_.size());
342 }
343
344 TtsPlatformImpl* TtsController::GetPlatformImpl() {
345   if (!platform_impl_)
346     platform_impl_ = TtsPlatformImpl::GetInstance();
347   return platform_impl_;
348 }
349
350 int TtsController::GetMatchingVoice(
351     const Utterance* utterance, std::vector<VoiceData>& voices) {
352   // Make two passes: the first time, do strict language matching
353   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
354   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
355   for (int pass = 0; pass < 2; ++pass) {
356     for (size_t i = 0; i < voices.size(); ++i) {
357       const VoiceData& voice = voices[i];
358
359       if (!utterance->extension_id().empty() &&
360           utterance->extension_id() != voice.extension_id) {
361         continue;
362       }
363
364       if (!voice.name.empty() &&
365           !utterance->voice_name().empty() &&
366           voice.name != utterance->voice_name()) {
367         continue;
368       }
369       if (!voice.lang.empty() && !utterance->lang().empty()) {
370         std::string voice_lang = voice.lang;
371         std::string utterance_lang = utterance->lang();
372         if (pass == 1) {
373           voice_lang = TrimLanguageCode(voice_lang);
374           utterance_lang = TrimLanguageCode(utterance_lang);
375         }
376         if (voice_lang != utterance_lang) {
377           continue;
378         }
379       }
380       if (voice.gender != TTS_GENDER_NONE &&
381           utterance->gender() != TTS_GENDER_NONE &&
382           voice.gender != utterance->gender()) {
383         continue;
384       }
385
386       if (utterance->required_event_types().size() > 0) {
387         bool has_all_required_event_types = true;
388         for (std::set<TtsEventType>::const_iterator iter =
389                  utterance->required_event_types().begin();
390              iter != utterance->required_event_types().end();
391              ++iter) {
392           if (voice.events.find(*iter) == voice.events.end()) {
393             has_all_required_event_types = false;
394             break;
395           }
396         }
397         if (!has_all_required_event_types)
398           continue;
399       }
400
401       return static_cast<int>(i);
402     }
403   }
404
405   return -1;
406 }
407
408 void TtsController::VoicesChanged() {
409   for (std::set<VoicesChangedDelegate*>::iterator iter =
410            voices_changed_delegates_.begin();
411        iter != voices_changed_delegates_.end(); ++iter) {
412     (*iter)->OnVoicesChanged();
413   }
414 }
415
416 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
417   voices_changed_delegates_.insert(delegate);
418 }
419
420 void TtsController::RemoveVoicesChangedDelegate(
421     VoicesChangedDelegate* delegate) {
422   voices_changed_delegates_.erase(delegate);
423 }