src/chrome/browser/speech/tts_controller.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   7
   8 #include <queue>
   9 #include <set>
  10 #include <string>
  11 #include <vector>
  12
  13 #include "base/memory/scoped_ptr.h"
  14 #include "base/memory/singleton.h"
  15 #include "base/memory/weak_ptr.h"
  16 #include "url/gurl.h"
  17
  18 class Utterance;
  19 class TtsPlatformImpl;
  20 class Profile;
  21
  22 namespace base {
  23 class Value;
  24 }
  25
  26 // Events sent back from the TTS engine indicating the progress.
  27 enum TtsEventType {
  28   TTS_EVENT_START,
  29   TTS_EVENT_END,
  30   TTS_EVENT_WORD,
  31   TTS_EVENT_SENTENCE,
  32   TTS_EVENT_MARKER,
  33   TTS_EVENT_INTERRUPTED,
  34   TTS_EVENT_CANCELLED,
  35   TTS_EVENT_ERROR,
  36   TTS_EVENT_PAUSE,
  37   TTS_EVENT_RESUME
  38 };
  39
  40 enum TtsGenderType {
  41   TTS_GENDER_NONE,
  42   TTS_GENDER_MALE,
  43   TTS_GENDER_FEMALE
  44 };
  45
  46 // Returns true if this event type is one that indicates an utterance
  47 // is finished and can be destroyed.
  48 bool IsFinalTtsEventType(TtsEventType event_type);
  49
  50 // The continuous parameters that apply to a given utterance.
  51 struct UtteranceContinuousParameters {
  52   UtteranceContinuousParameters();
  53
  54   double rate;
  55   double pitch;
  56   double volume;
  57 };
  58
  59 // Information about one voice.
  60 struct VoiceData {
  61   VoiceData();
  62   ~VoiceData();
  63
  64   std::string name;
  65   std::string lang;
  66   TtsGenderType gender;
  67   std::string extension_id;
  68   std::set<TtsEventType> events;
  69
  70   // If true, the synthesis engine is a remote network resource.
  71   // It may be higher latency and may incur bandwidth costs.
  72   bool remote;
  73
  74   // If true, this is implemented by this platform's subclass of
  75   // TtsPlatformImpl. If false, this is implemented by an extension.
  76   bool native;
  77   std::string native_voice_identifier;
  78 };
  79
  80 // Class that wants to receive events on utterances.
  81 class UtteranceEventDelegate {
  82  public:
  83   virtual ~UtteranceEventDelegate() {}
  84   virtual void OnTtsEvent(Utterance* utterance,
  85                           TtsEventType event_type,
  86                           int char_index,
  87                           const std::string& error_message) = 0;
  88 };
  89
  90 // Class that wants to be notified when the set of
  91 // voices has changed.
  92 class VoicesChangedDelegate {
  93  public:
  94   virtual ~VoicesChangedDelegate() {}
  95   virtual void OnVoicesChanged() = 0;
  96 };
  97
  98 // One speech utterance.
  99 class Utterance {
 100  public:
 101   // Construct an utterance given a profile and a completion task to call
 102   // when the utterance is done speaking. Before speaking this utterance,
 103   // its other parameters like text, rate, pitch, etc. should all be set.
 104   explicit Utterance(Profile* profile);
 105   virtual ~Utterance();
 106
 107   // Sends an event to the delegate. If the event type is TTS_EVENT_END
 108   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
 109   // uses the last good value.
 110   void OnTtsEvent(TtsEventType event_type,
 111                   int char_index,
 112                   const std::string& error_message);
 113
 114   // Finish an utterance without sending an event to the delegate.
 115   void Finish();
 116
 117   // Getters and setters for the text to speak and other speech options.
 118   void set_text(const std::string& text) { text_ = text; }
 119   const std::string& text() const { return text_; }
 120
 121   void set_options(const base::Value* options);
 122   const base::Value* options() const { return options_.get(); }
 123
 124   void set_src_extension_id(const std::string& src_extension_id) {
 125     src_extension_id_ = src_extension_id;
 126   }
 127   const std::string& src_extension_id() { return src_extension_id_; }
 128
 129   void set_src_id(int src_id) { src_id_ = src_id; }
 130   int src_id() { return src_id_; }
 131
 132   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
 133   const GURL& src_url() { return src_url_; }
 134
 135   void set_voice_name(const std::string& voice_name) {
 136     voice_name_ = voice_name;
 137   }
 138   const std::string& voice_name() const { return voice_name_; }
 139
 140   void set_lang(const std::string& lang) {
 141     lang_ = lang;
 142   }
 143   const std::string& lang() const { return lang_; }
 144
 145   void set_gender(TtsGenderType gender) {
 146     gender_ = gender;
 147   }
 148   TtsGenderType gender() const { return gender_; }
 149
 150   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
 151     continuous_parameters_ = params;
 152   }
 153   const UtteranceContinuousParameters& continuous_parameters() {
 154     return continuous_parameters_;
 155   }
 156
 157   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
 158   bool can_enqueue() const { return can_enqueue_; }
 159
 160   void set_required_event_types(const std::set<TtsEventType>& types) {
 161     required_event_types_ = types;
 162   }
 163   const std::set<TtsEventType>& required_event_types() const {
 164     return required_event_types_;
 165   }
 166
 167   void set_desired_event_types(const std::set<TtsEventType>& types) {
 168     desired_event_types_ = types;
 169   }
 170   const std::set<TtsEventType>& desired_event_types() const {
 171     return desired_event_types_;
 172   }
 173
 174   const std::string& extension_id() const { return extension_id_; }
 175   void set_extension_id(const std::string& extension_id) {
 176     extension_id_ = extension_id;
 177   }
 178
 179   UtteranceEventDelegate* event_delegate() const {
 180     return event_delegate_.get();
 181   }
 182   void set_event_delegate(
 183       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
 184     event_delegate_ = event_delegate;
 185   }
 186
 187   // Getters and setters for internal state.
 188   Profile* profile() const { return profile_; }
 189   int id() const { return id_; }
 190   bool finished() const { return finished_; }
 191
 192  protected:
 193   void set_finished_for_testing(bool finished) { finished_ = finished; }
 194
 195  private:
 196   // The profile that initiated this utterance.
 197   Profile* profile_;
 198
 199   // The extension ID of the extension providing TTS for this utterance, or
 200   // empty if native TTS is being used.
 201   std::string extension_id_;
 202
 203   // The unique ID of this utterance, used to associate callback functions
 204   // with utterances.
 205   int id_;
 206
 207   // The id of the next utterance, so we can associate requests with
 208   // responses.
 209   static int next_utterance_id_;
 210
 211   // The text to speak.
 212   std::string text_;
 213
 214   // The full options arg passed to tts.speak, which may include fields
 215   // other than the ones we explicitly parse, below.
 216   scoped_ptr<base::Value> options_;
 217
 218   // The extension ID of the extension that called speak() and should
 219   // receive events.
 220   std::string src_extension_id_;
 221
 222   // The source extension's ID of this utterance, so that it can associate
 223   // events with the appropriate callback.
 224   int src_id_;
 225
 226   // The URL of the page where the source extension called speak.
 227   GURL src_url_;
 228
 229   // The delegate to be called when an utterance event is fired.
 230   base::WeakPtr<UtteranceEventDelegate> event_delegate_;
 231
 232   // The parsed options.
 233   std::string voice_name_;
 234   std::string lang_;
 235   TtsGenderType gender_;
 236   UtteranceContinuousParameters continuous_parameters_;
 237   bool can_enqueue_;
 238   std::set<TtsEventType> required_event_types_;
 239   std::set<TtsEventType> desired_event_types_;
 240
 241   // The index of the current char being spoken.
 242   int char_index_;
 243
 244   // True if this utterance received an event indicating it's done.
 245   bool finished_;
 246 };
 247
 248 // Singleton class that manages text-to-speech for the TTS and TTS engine
 249 // extension APIs, maintaining a queue of pending utterances and keeping
 250 // track of all state.
 251 class TtsController {
 252  public:
 253   // Get the single instance of this class.
 254   static TtsController* GetInstance();
 255
 256   // Returns true if we're currently speaking an utterance.
 257   bool IsSpeaking();
 258
 259   // Speak the given utterance. If the utterance's can_enqueue flag is true
 260   // and another utterance is in progress, adds it to the end of the queue.
 261   // Otherwise, interrupts any current utterance and speaks this one
 262   // immediately.
 263   void SpeakOrEnqueue(Utterance* utterance);
 264
 265   // Stop all utterances and flush the queue. Implies leaving pause mode
 266   // as well.
 267   void Stop();
 268
 269   // Pause the speech queue. Some engines may support pausing in the middle
 270   // of an utterance.
 271   void Pause();
 272
 273   // Resume speaking.
 274   void Resume();
 275
 276   // Handle events received from the speech engine. Events are forwarded to
 277   // the callback function, and in addition, completion and error events
 278   // trigger finishing the current utterance and starting the next one, if
 279   // any.
 280   void OnTtsEvent(int utterance_id,
 281                   TtsEventType event_type,
 282                   int char_index,
 283                   const std::string& error_message);
 284
 285   // Return a list of all available voices, including the native voice,
 286   // if supported, and all voices registered by extensions.
 287   void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
 288
 289   // Called by the extension system or platform implementation when the
 290   // list of voices may have changed and should be re-queried.
 291   void VoicesChanged();
 292
 293   // Add a delegate that wants to be notified when the set of voices changes.
 294   void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
 295
 296   // Remove delegate that wants to be notified when the set of voices changes.
 297   void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
 298
 299   // For unit testing.
 300   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
 301   int QueueSize();
 302
 303  protected:
 304   TtsController();
 305   virtual ~TtsController();
 306
 307  private:
 308   // Get the platform TTS implementation (or injected mock).
 309   TtsPlatformImpl* GetPlatformImpl();
 310
 311   // Start speaking the given utterance. Will either take ownership of
 312   // |utterance| or delete it if there's an error. Returns true on success.
 313   void SpeakNow(Utterance* utterance);
 314
 315   // Clear the utterance queue. If send_events is true, will send
 316   // TTS_EVENT_CANCELLED events on each one.
 317   void ClearUtteranceQueue(bool send_events);
 318
 319   // Finalize and delete the current utterance.
 320   void FinishCurrentUtterance();
 321
 322   // Start speaking the next utterance in the queue.
 323   void SpeakNextUtterance();
 324
 325   // Given an utterance and a vector of voices, return the
 326   // index of the voice that best matches the utterance.
 327   int GetMatchingVoice(const Utterance* utterance,
 328                        std::vector<VoiceData>& voices);
 329
 330   friend struct DefaultSingletonTraits<TtsController>;
 331
 332   // The current utterance being spoken.
 333   Utterance* current_utterance_;
 334
 335   // Whether the queue is paused or not.
 336   bool paused_;
 337
 338   // A queue of utterances to speak after the current one finishes.
 339   std::queue<Utterance*> utterance_queue_;
 340
 341   // A set of delegates that want to be notified when the voices change.
 342   std::set<VoicesChangedDelegate*> voices_changed_delegates_;
 343
 344   // A pointer to the platform implementation of text-to-speech, for
 345   // dependency injection.
 346   TtsPlatformImpl* platform_impl_;
 347
 348   DISALLOW_COPY_AND_ASSIGN(TtsController);
 349 };
 350
 351 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_