src/chrome/browser/speech/tts_controller.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/speech/tts_controller.h"
   6
   7 #include <string>
   8 #include <vector>
   9
  10 #include "base/float_util.h"
  11 #include "base/values.h"
  12 #include "chrome/browser/browser_process.h"
  13 #include "chrome/browser/profiles/profile.h"
  14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
  15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
  16 #include "chrome/browser/speech/tts_platform.h"
  17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
  18 #include "extensions/browser/extension_system.h"
  19 #include "extensions/common/extension.h"
  20
  21 namespace {
  22 // A value to be used to indicate that there is no char index available.
  23 const int kInvalidCharIndex = -1;
  24
  25 // Given a language/region code of the form 'fr-FR', returns just the basic
  26 // language portion, e.g. 'fr'.
  27 std::string TrimLanguageCode(std::string lang) {
  28   if (lang.size() >= 5 && lang[2] == '-')
  29     return lang.substr(0, 2);
  30   else
  31     return lang;
  32 }
  33
  34 }  // namespace
  35
  36 bool IsFinalTtsEventType(TtsEventType event_type) {
  37   return (event_type == TTS_EVENT_END ||
  38           event_type == TTS_EVENT_INTERRUPTED ||
  39           event_type == TTS_EVENT_CANCELLED ||
  40           event_type == TTS_EVENT_ERROR);
  41 }
  42
  43 //
  44 // UtteranceContinuousParameters
  45 //
  46
  47
  48 UtteranceContinuousParameters::UtteranceContinuousParameters()
  49     : rate(-1),
  50       pitch(-1),
  51       volume(-1) {}
  52
  53
  54 //
  55 // VoiceData
  56 //
  57
  58
  59 VoiceData::VoiceData()
  60     : gender(TTS_GENDER_NONE),
  61       remote(false),
  62       native(false) {}
  63
  64 VoiceData::~VoiceData() {}
  65
  66
  67 //
  68 // Utterance
  69 //
  70
  71 // static
  72 int Utterance::next_utterance_id_ = 0;
  73
  74 Utterance::Utterance(Profile* profile)
  75     : profile_(profile),
  76       id_(next_utterance_id_++),
  77       src_id_(-1),
  78       gender_(TTS_GENDER_NONE),
  79       can_enqueue_(false),
  80       char_index_(0),
  81       finished_(false) {
  82   options_.reset(new base::DictionaryValue());
  83 }
  84
  85 Utterance::~Utterance() {
  86   DCHECK(finished_);
  87 }
  88
  89 void Utterance::OnTtsEvent(TtsEventType event_type,
  90                            int char_index,
  91                            const std::string& error_message) {
  92   if (char_index >= 0)
  93     char_index_ = char_index;
  94   if (IsFinalTtsEventType(event_type))
  95     finished_ = true;
  96
  97   if (event_delegate_)
  98     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  99   if (finished_)
 100     event_delegate_.reset();
 101 }
 102
 103 void Utterance::Finish() {
 104   finished_ = true;
 105 }
 106
 107 void Utterance::set_options(const base::Value* options) {
 108   options_.reset(options->DeepCopy());
 109 }
 110
 111 //
 112 // TtsController
 113 //
 114
 115 // static
 116 TtsController* TtsController::GetInstance() {
 117   return Singleton<TtsController>::get();
 118 }
 119
 120 TtsController::TtsController()
 121     : current_utterance_(NULL),
 122       paused_(false),
 123       platform_impl_(NULL) {
 124 }
 125
 126 TtsController::~TtsController() {
 127   if (current_utterance_) {
 128     current_utterance_->Finish();
 129     delete current_utterance_;
 130   }
 131
 132   // Clear any queued utterances too.
 133   ClearUtteranceQueue(false);  // Don't sent events.
 134 }
 135
 136 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
 137   // If we're paused and we get an utterance that can't be queued,
 138   // flush the queue but stay in the paused state.
 139   if (paused_ && !utterance->can_enqueue()) {
 140     Stop();
 141     paused_ = true;
 142     return;
 143   }
 144
 145   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
 146     utterance_queue_.push(utterance);
 147   } else {
 148     Stop();
 149     SpeakNow(utterance);
 150   }
 151 }
 152
 153 void TtsController::SpeakNow(Utterance* utterance) {
 154   // Get all available voices and try to find a matching voice.
 155   std::vector<VoiceData> voices;
 156   GetVoices(utterance->profile(), &voices);
 157   int index = GetMatchingVoice(utterance, voices);
 158
 159   VoiceData voice;
 160   if (index != -1) {
 161     // Select the matching voice.
 162     voice = voices[index];
 163   } else {
 164     // However, if no match was found on a platform without native tts voices,
 165     // attempt to get a voice based only on the current locale without respect
 166     // to any supplied voice names.
 167     std::vector<VoiceData> native_voices;
 168
 169     if (GetPlatformImpl()->PlatformImplAvailable())
 170       GetPlatformImpl()->GetVoices(&native_voices);
 171
 172     if (native_voices.empty() && !voices.empty()) {
 173       // TODO(dtseng): Notify extension caller of an error.
 174       utterance->set_voice_name("");
 175       utterance->set_lang(g_browser_process->GetApplicationLocale());
 176       index = GetMatchingVoice(utterance, voices);
 177
 178       // If even that fails, just take the first available voice.
 179       if (index == -1)
 180         index = 0;
 181       voice = voices[index];
 182     } else {
 183       // Otherwise, simply give native voices a chance to handle this utterance.
 184       voice.native = true;
 185     }
 186   }
 187
 188   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
 189
 190   if (!voice.native) {
 191 #if !defined(OS_ANDROID)
 192     DCHECK(!voice.extension_id.empty());
 193     current_utterance_ = utterance;
 194     utterance->set_extension_id(voice.extension_id);
 195     ExtensionTtsEngineSpeak(utterance, voice);
 196     bool sends_end_event =
 197         voice.events.find(TTS_EVENT_END) != voice.events.end();
 198     if (!sends_end_event) {
 199       utterance->Finish();
 200       delete utterance;
 201       current_utterance_ = NULL;
 202       SpeakNextUtterance();
 203     }
 204 #endif
 205   } else {
 206     // It's possible for certain platforms to send start events immediately
 207     // during |speak|.
 208     current_utterance_ = utterance;
 209     GetPlatformImpl()->clear_error();
 210     bool success = GetPlatformImpl()->Speak(
 211         utterance->id(),
 212         utterance->text(),
 213         utterance->lang(),
 214         voice,
 215         utterance->continuous_parameters());
 216     if (!success)
 217       current_utterance_ = NULL;
 218
 219     // If the native voice wasn't able to process this speech, see if
 220     // the browser has built-in TTS that isn't loaded yet.
 221     if (!success &&
 222         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
 223       utterance_queue_.push(utterance);
 224       return;
 225     }
 226
 227     if (!success) {
 228       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
 229                             GetPlatformImpl()->error());
 230       delete utterance;
 231       return;
 232     }
 233   }
 234 }
 235
 236 void TtsController::Stop() {
 237   paused_ = false;
 238   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 239 #if !defined(OS_ANDROID)
 240     ExtensionTtsEngineStop(current_utterance_);
 241 #endif
 242   } else {
 243     GetPlatformImpl()->clear_error();
 244     GetPlatformImpl()->StopSpeaking();
 245   }
 246
 247   if (current_utterance_)
 248     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 249                                    std::string());
 250   FinishCurrentUtterance();
 251   ClearUtteranceQueue(true);  // Send events.
 252 }
 253
 254 void TtsController::Pause() {
 255   paused_ = true;
 256   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 257 #if !defined(OS_ANDROID)
 258     ExtensionTtsEnginePause(current_utterance_);
 259 #endif
 260   } else if (current_utterance_) {
 261     GetPlatformImpl()->clear_error();
 262     GetPlatformImpl()->Pause();
 263   }
 264 }
 265
 266 void TtsController::Resume() {
 267   paused_ = false;
 268   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 269 #if !defined(OS_ANDROID)
 270     ExtensionTtsEngineResume(current_utterance_);
 271 #endif
 272   } else if (current_utterance_) {
 273     GetPlatformImpl()->clear_error();
 274     GetPlatformImpl()->Resume();
 275   } else {
 276     SpeakNextUtterance();
 277   }
 278 }
 279
 280 void TtsController::OnTtsEvent(int utterance_id,
 281                                         TtsEventType event_type,
 282                                         int char_index,
 283                                         const std::string& error_message) {
 284   // We may sometimes receive completion callbacks "late", after we've
 285   // already finished the utterance (for example because another utterance
 286   // interrupted or we got a call to Stop). This is normal and we can
 287   // safely just ignore these events.
 288   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
 289     return;
 290   }
 291   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
 292   if (current_utterance_->finished()) {
 293     FinishCurrentUtterance();
 294     SpeakNextUtterance();
 295   }
 296 }
 297
 298 void TtsController::GetVoices(Profile* profile,
 299                               std::vector<VoiceData>* out_voices) {
 300 #if !defined(OS_ANDROID)
 301   if (profile)
 302     GetExtensionVoices(profile, out_voices);
 303 #endif
 304
 305   TtsPlatformImpl* platform_impl = GetPlatformImpl();
 306   if (platform_impl && platform_impl->PlatformImplAvailable())
 307     platform_impl->GetVoices(out_voices);
 308 }
 309
 310 bool TtsController::IsSpeaking() {
 311   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
 312 }
 313
 314 void TtsController::FinishCurrentUtterance() {
 315   if (current_utterance_) {
 316     if (!current_utterance_->finished())
 317       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 318                                      std::string());
 319     delete current_utterance_;
 320     current_utterance_ = NULL;
 321   }
 322 }
 323
 324 void TtsController::SpeakNextUtterance() {
 325   if (paused_)
 326     return;
 327
 328   // Start speaking the next utterance in the queue.  Keep trying in case
 329   // one fails but there are still more in the queue to try.
 330   while (!utterance_queue_.empty() && !current_utterance_) {
 331     Utterance* utterance = utterance_queue_.front();
 332     utterance_queue_.pop();
 333     SpeakNow(utterance);
 334   }
 335 }
 336
 337 void TtsController::RetrySpeakingQueuedUtterances() {
 338   if (current_utterance_ == NULL && !utterance_queue_.empty())
 339     SpeakNextUtterance();
 340 }
 341
 342 void TtsController::ClearUtteranceQueue(bool send_events) {
 343   while (!utterance_queue_.empty()) {
 344     Utterance* utterance = utterance_queue_.front();
 345     utterance_queue_.pop();
 346     if (send_events)
 347       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
 348                             std::string());
 349     else
 350       utterance->Finish();
 351     delete utterance;
 352   }
 353 }
 354
 355 void TtsController::SetPlatformImpl(
 356     TtsPlatformImpl* platform_impl) {
 357   platform_impl_ = platform_impl;
 358 }
 359
 360 int TtsController::QueueSize() {
 361   return static_cast<int>(utterance_queue_.size());
 362 }
 363
 364 TtsPlatformImpl* TtsController::GetPlatformImpl() {
 365   if (!platform_impl_)
 366     platform_impl_ = TtsPlatformImpl::GetInstance();
 367   return platform_impl_;
 368 }
 369
 370 int TtsController::GetMatchingVoice(
 371     const Utterance* utterance, std::vector<VoiceData>& voices) {
 372   // Make two passes: the first time, do strict language matching
 373   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
 374   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
 375   for (int pass = 0; pass < 2; ++pass) {
 376     for (size_t i = 0; i < voices.size(); ++i) {
 377       const VoiceData& voice = voices[i];
 378
 379       if (!utterance->extension_id().empty() &&
 380           utterance->extension_id() != voice.extension_id) {
 381         continue;
 382       }
 383
 384       if (!voice.name.empty() &&
 385           !utterance->voice_name().empty() &&
 386           voice.name != utterance->voice_name()) {
 387         continue;
 388       }
 389       if (!voice.lang.empty() && !utterance->lang().empty()) {
 390         std::string voice_lang = voice.lang;
 391         std::string utterance_lang = utterance->lang();
 392         if (pass == 1) {
 393           voice_lang = TrimLanguageCode(voice_lang);
 394           utterance_lang = TrimLanguageCode(utterance_lang);
 395         }
 396         if (voice_lang != utterance_lang) {
 397           continue;
 398         }
 399       }
 400       if (voice.gender != TTS_GENDER_NONE &&
 401           utterance->gender() != TTS_GENDER_NONE &&
 402           voice.gender != utterance->gender()) {
 403         continue;
 404       }
 405
 406       if (utterance->required_event_types().size() > 0) {
 407         bool has_all_required_event_types = true;
 408         for (std::set<TtsEventType>::const_iterator iter =
 409                  utterance->required_event_types().begin();
 410              iter != utterance->required_event_types().end();
 411              ++iter) {
 412           if (voice.events.find(*iter) == voice.events.end()) {
 413             has_all_required_event_types = false;
 414             break;
 415           }
 416         }
 417         if (!has_all_required_event_types)
 418           continue;
 419       }
 420
 421       return static_cast<int>(i);
 422     }
 423   }
 424
 425   return -1;
 426 }
 427
 428 void TtsController::VoicesChanged() {
 429   for (std::set<VoicesChangedDelegate*>::iterator iter =
 430            voices_changed_delegates_.begin();
 431        iter != voices_changed_delegates_.end(); ++iter) {
 432     (*iter)->OnVoicesChanged();
 433   }
 434 }
 435
 436 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
 437   voices_changed_delegates_.insert(delegate);
 438 }
 439
 440 void TtsController::RemoveVoicesChangedDelegate(
 441     VoicesChangedDelegate* delegate) {
 442   voices_changed_delegates_.erase(delegate);
 443 }