src/chrome/browser/speech/tts_controller.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/speech/tts_controller.h"
   6
   7 #include <string>
   8 #include <vector>
   9
  10 #include "base/float_util.h"
  11 #include "base/values.h"
  12 #include "chrome/browser/profiles/profile.h"
  13 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
  14 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
  15 #include "chrome/browser/speech/tts_platform.h"
  16 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
  17 #include "extensions/browser/extension_system.h"
  18 #include "extensions/common/extension.h"
  19
  20 namespace {
  21 // A value to be used to indicate that there is no char index available.
  22 const int kInvalidCharIndex = -1;
  23
  24 // Given a language/region code of the form 'fr-FR', returns just the basic
  25 // language portion, e.g. 'fr'.
  26 std::string TrimLanguageCode(std::string lang) {
  27   if (lang.size() >= 5 && lang[2] == '-')
  28     return lang.substr(0, 2);
  29   else
  30     return lang;
  31 }
  32
  33 }  // namespace
  34
  35 bool IsFinalTtsEventType(TtsEventType event_type) {
  36   return (event_type == TTS_EVENT_END ||
  37           event_type == TTS_EVENT_INTERRUPTED ||
  38           event_type == TTS_EVENT_CANCELLED ||
  39           event_type == TTS_EVENT_ERROR);
  40 }
  41
  42 //
  43 // UtteranceContinuousParameters
  44 //
  45
  46
  47 UtteranceContinuousParameters::UtteranceContinuousParameters()
  48     : rate(-1),
  49       pitch(-1),
  50       volume(-1) {}
  51
  52
  53 //
  54 // VoiceData
  55 //
  56
  57
  58 VoiceData::VoiceData()
  59     : gender(TTS_GENDER_NONE),
  60       remote(false),
  61       native(false) {}
  62
  63 VoiceData::~VoiceData() {}
  64
  65
  66 //
  67 // Utterance
  68 //
  69
  70 // static
  71 int Utterance::next_utterance_id_ = 0;
  72
  73 Utterance::Utterance(Profile* profile)
  74     : profile_(profile),
  75       id_(next_utterance_id_++),
  76       src_id_(-1),
  77       gender_(TTS_GENDER_NONE),
  78       can_enqueue_(false),
  79       char_index_(0),
  80       finished_(false) {
  81   options_.reset(new base::DictionaryValue());
  82 }
  83
  84 Utterance::~Utterance() {
  85   DCHECK(finished_);
  86 }
  87
  88 void Utterance::OnTtsEvent(TtsEventType event_type,
  89                            int char_index,
  90                            const std::string& error_message) {
  91   if (char_index >= 0)
  92     char_index_ = char_index;
  93   if (IsFinalTtsEventType(event_type))
  94     finished_ = true;
  95
  96   if (event_delegate_)
  97     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  98   if (finished_)
  99     event_delegate_.reset();
 100 }
 101
 102 void Utterance::Finish() {
 103   finished_ = true;
 104 }
 105
 106 void Utterance::set_options(const base::Value* options) {
 107   options_.reset(options->DeepCopy());
 108 }
 109
 110 //
 111 // TtsController
 112 //
 113
 114 // static
 115 TtsController* TtsController::GetInstance() {
 116   return Singleton<TtsController>::get();
 117 }
 118
 119 TtsController::TtsController()
 120     : current_utterance_(NULL),
 121       paused_(false),
 122       platform_impl_(NULL) {
 123 }
 124
 125 TtsController::~TtsController() {
 126   if (current_utterance_) {
 127     current_utterance_->Finish();
 128     delete current_utterance_;
 129   }
 130
 131   // Clear any queued utterances too.
 132   ClearUtteranceQueue(false);  // Don't sent events.
 133 }
 134
 135 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
 136   // If we're paused and we get an utterance that can't be queued,
 137   // flush the queue but stay in the paused state.
 138   if (paused_ && !utterance->can_enqueue()) {
 139     Stop();
 140     paused_ = true;
 141     return;
 142   }
 143
 144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
 145     utterance_queue_.push(utterance);
 146   } else {
 147     Stop();
 148     SpeakNow(utterance);
 149   }
 150 }
 151
 152 void TtsController::SpeakNow(Utterance* utterance) {
 153   // Get all available voices and try to find a matching voice.
 154   std::vector<VoiceData> voices;
 155   GetVoices(utterance->profile(), &voices);
 156   int index = GetMatchingVoice(utterance, voices);
 157
 158   // Select the matching voice, but if none was found, initialize an
 159   // empty VoiceData with native = true, which will give the native
 160   // speech synthesizer a chance to try to synthesize the utterance
 161   // anyway.
 162   VoiceData voice;
 163   if (index >= 0 && index < static_cast<int>(voices.size()))
 164     voice = voices[index];
 165   else
 166     voice.native = true;
 167
 168   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
 169
 170   if (!voice.native) {
 171 #if !defined(OS_ANDROID)
 172     DCHECK(!voice.extension_id.empty());
 173     current_utterance_ = utterance;
 174     utterance->set_extension_id(voice.extension_id);
 175     ExtensionTtsEngineSpeak(utterance, voice);
 176     bool sends_end_event =
 177         voice.events.find(TTS_EVENT_END) != voice.events.end();
 178     if (!sends_end_event) {
 179       utterance->Finish();
 180       delete utterance;
 181       current_utterance_ = NULL;
 182       SpeakNextUtterance();
 183     }
 184 #endif
 185   } else {
 186     // It's possible for certain platforms to send start events immediately
 187     // during |speak|.
 188     current_utterance_ = utterance;
 189     GetPlatformImpl()->clear_error();
 190     bool success = GetPlatformImpl()->Speak(
 191         utterance->id(),
 192         utterance->text(),
 193         utterance->lang(),
 194         voice,
 195         utterance->continuous_parameters());
 196     if (!success)
 197       current_utterance_ = NULL;
 198
 199     // If the native voice wasn't able to process this speech, see if
 200     // the browser has built-in TTS that isn't loaded yet.
 201     if (!success &&
 202         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
 203       utterance_queue_.push(utterance);
 204       return;
 205     }
 206
 207     if (!success) {
 208       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
 209                             GetPlatformImpl()->error());
 210       delete utterance;
 211       return;
 212     }
 213   }
 214 }
 215
 216 void TtsController::Stop() {
 217   paused_ = false;
 218   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 219 #if !defined(OS_ANDROID)
 220     ExtensionTtsEngineStop(current_utterance_);
 221 #endif
 222   } else {
 223     GetPlatformImpl()->clear_error();
 224     GetPlatformImpl()->StopSpeaking();
 225   }
 226
 227   if (current_utterance_)
 228     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 229                                    std::string());
 230   FinishCurrentUtterance();
 231   ClearUtteranceQueue(true);  // Send events.
 232 }
 233
 234 void TtsController::Pause() {
 235   paused_ = true;
 236   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 237 #if !defined(OS_ANDROID)
 238     ExtensionTtsEnginePause(current_utterance_);
 239 #endif
 240   } else if (current_utterance_) {
 241     GetPlatformImpl()->clear_error();
 242     GetPlatformImpl()->Pause();
 243   }
 244 }
 245
 246 void TtsController::Resume() {
 247   paused_ = false;
 248   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 249 #if !defined(OS_ANDROID)
 250     ExtensionTtsEngineResume(current_utterance_);
 251 #endif
 252   } else if (current_utterance_) {
 253     GetPlatformImpl()->clear_error();
 254     GetPlatformImpl()->Resume();
 255   } else {
 256     SpeakNextUtterance();
 257   }
 258 }
 259
 260 void TtsController::OnTtsEvent(int utterance_id,
 261                                         TtsEventType event_type,
 262                                         int char_index,
 263                                         const std::string& error_message) {
 264   // We may sometimes receive completion callbacks "late", after we've
 265   // already finished the utterance (for example because another utterance
 266   // interrupted or we got a call to Stop). This is normal and we can
 267   // safely just ignore these events.
 268   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
 269     return;
 270   }
 271   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
 272   if (current_utterance_->finished()) {
 273     FinishCurrentUtterance();
 274     SpeakNextUtterance();
 275   }
 276 }
 277
 278 void TtsController::GetVoices(Profile* profile,
 279                               std::vector<VoiceData>* out_voices) {
 280 #if !defined(OS_ANDROID)
 281   if (profile)
 282     GetExtensionVoices(profile, out_voices);
 283 #endif
 284
 285   TtsPlatformImpl* platform_impl = GetPlatformImpl();
 286   if (platform_impl && platform_impl->PlatformImplAvailable())
 287     platform_impl->GetVoices(out_voices);
 288 }
 289
 290 bool TtsController::IsSpeaking() {
 291   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
 292 }
 293
 294 void TtsController::FinishCurrentUtterance() {
 295   if (current_utterance_) {
 296     if (!current_utterance_->finished())
 297       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 298                                      std::string());
 299     delete current_utterance_;
 300     current_utterance_ = NULL;
 301   }
 302 }
 303
 304 void TtsController::SpeakNextUtterance() {
 305   if (paused_)
 306     return;
 307
 308   // Start speaking the next utterance in the queue.  Keep trying in case
 309   // one fails but there are still more in the queue to try.
 310   while (!utterance_queue_.empty() && !current_utterance_) {
 311     Utterance* utterance = utterance_queue_.front();
 312     utterance_queue_.pop();
 313     SpeakNow(utterance);
 314   }
 315 }
 316
 317 void TtsController::RetrySpeakingQueuedUtterances() {
 318   if (current_utterance_ == NULL && !utterance_queue_.empty())
 319     SpeakNextUtterance();
 320 }
 321
 322 void TtsController::ClearUtteranceQueue(bool send_events) {
 323   while (!utterance_queue_.empty()) {
 324     Utterance* utterance = utterance_queue_.front();
 325     utterance_queue_.pop();
 326     if (send_events)
 327       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
 328                             std::string());
 329     else
 330       utterance->Finish();
 331     delete utterance;
 332   }
 333 }
 334
 335 void TtsController::SetPlatformImpl(
 336     TtsPlatformImpl* platform_impl) {
 337   platform_impl_ = platform_impl;
 338 }
 339
 340 int TtsController::QueueSize() {
 341   return static_cast<int>(utterance_queue_.size());
 342 }
 343
 344 TtsPlatformImpl* TtsController::GetPlatformImpl() {
 345   if (!platform_impl_)
 346     platform_impl_ = TtsPlatformImpl::GetInstance();
 347   return platform_impl_;
 348 }
 349
 350 int TtsController::GetMatchingVoice(
 351     const Utterance* utterance, std::vector<VoiceData>& voices) {
 352   // Make two passes: the first time, do strict language matching
 353   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
 354   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
 355   for (int pass = 0; pass < 2; ++pass) {
 356     for (size_t i = 0; i < voices.size(); ++i) {
 357       const VoiceData& voice = voices[i];
 358
 359       if (!utterance->extension_id().empty() &&
 360           utterance->extension_id() != voice.extension_id) {
 361         continue;
 362       }
 363
 364       if (!voice.name.empty() &&
 365           !utterance->voice_name().empty() &&
 366           voice.name != utterance->voice_name()) {
 367         continue;
 368       }
 369       if (!voice.lang.empty() && !utterance->lang().empty()) {
 370         std::string voice_lang = voice.lang;
 371         std::string utterance_lang = utterance->lang();
 372         if (pass == 1) {
 373           voice_lang = TrimLanguageCode(voice_lang);
 374           utterance_lang = TrimLanguageCode(utterance_lang);
 375         }
 376         if (voice_lang != utterance_lang) {
 377           continue;
 378         }
 379       }
 380       if (voice.gender != TTS_GENDER_NONE &&
 381           utterance->gender() != TTS_GENDER_NONE &&
 382           voice.gender != utterance->gender()) {
 383         continue;
 384       }
 385
 386       if (utterance->required_event_types().size() > 0) {
 387         bool has_all_required_event_types = true;
 388         for (std::set<TtsEventType>::const_iterator iter =
 389                  utterance->required_event_types().begin();
 390              iter != utterance->required_event_types().end();
 391              ++iter) {
 392           if (voice.events.find(*iter) == voice.events.end()) {
 393             has_all_required_event_types = false;
 394             break;
 395           }
 396         }
 397         if (!has_all_required_event_types)
 398           continue;
 399       }
 400
 401       return static_cast<int>(i);
 402     }
 403   }
 404
 405   return -1;
 406 }
 407
 408 void TtsController::VoicesChanged() {
 409   for (std::set<VoicesChangedDelegate*>::iterator iter =
 410            voices_changed_delegates_.begin();
 411        iter != voices_changed_delegates_.end(); ++iter) {
 412     (*iter)->OnVoicesChanged();
 413   }
 414 }
 415
 416 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
 417   voices_changed_delegates_.insert(delegate);
 418 }
 419
 420 void TtsController::RemoveVoicesChangedDelegate(
 421     VoicesChangedDelegate* delegate) {
 422   voices_changed_delegates_.erase(delegate);
 423 }