src/chrome/browser/speech/tts_controller.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   7
   8 #include <queue>
   9 #include <set>
  10 #include <string>
  11 #include <vector>
  12
  13 #include "base/memory/scoped_ptr.h"
  14 #include "base/memory/singleton.h"
  15 #include "base/memory/weak_ptr.h"
  16 #include "url/gurl.h"
  17
  18 class Utterance;
  19 class TtsPlatformImpl;
  20
  21 namespace base {
  22 class Value;
  23 }
  24
  25 namespace content {
  26 class BrowserContext;
  27 }
  28
  29 // Events sent back from the TTS engine indicating the progress.
  30 enum TtsEventType {
  31   TTS_EVENT_START,
  32   TTS_EVENT_END,
  33   TTS_EVENT_WORD,
  34   TTS_EVENT_SENTENCE,
  35   TTS_EVENT_MARKER,
  36   TTS_EVENT_INTERRUPTED,
  37   TTS_EVENT_CANCELLED,
  38   TTS_EVENT_ERROR,
  39   TTS_EVENT_PAUSE,
  40   TTS_EVENT_RESUME
  41 };
  42
  43 enum TtsGenderType {
  44   TTS_GENDER_NONE,
  45   TTS_GENDER_MALE,
  46   TTS_GENDER_FEMALE
  47 };
  48
  49 // Returns true if this event type is one that indicates an utterance
  50 // is finished and can be destroyed.
  51 bool IsFinalTtsEventType(TtsEventType event_type);
  52
  53 // The continuous parameters that apply to a given utterance.
  54 struct UtteranceContinuousParameters {
  55   UtteranceContinuousParameters();
  56
  57   double rate;
  58   double pitch;
  59   double volume;
  60 };
  61
  62 // Information about one voice.
  63 struct VoiceData {
  64   VoiceData();
  65   ~VoiceData();
  66
  67   std::string name;
  68   std::string lang;
  69   TtsGenderType gender;
  70   std::string extension_id;
  71   std::set<TtsEventType> events;
  72
  73   // If true, the synthesis engine is a remote network resource.
  74   // It may be higher latency and may incur bandwidth costs.
  75   bool remote;
  76
  77   // If true, this is implemented by this platform's subclass of
  78   // TtsPlatformImpl. If false, this is implemented by an extension.
  79   bool native;
  80   std::string native_voice_identifier;
  81 };
  82
  83 // Interface that delegates TTS requests to user-installed extensions.
  84 class TtsEngineDelegate {
  85  public:
  86   virtual ~TtsEngineDelegate() {}
  87
  88   // Return a list of all available voices registered.
  89   virtual void GetVoices(content::BrowserContext* browser_context,
  90                          std::vector<VoiceData>* out_voices) = 0;
  91
  92   // Speak the given utterance by sending an event to the given TTS engine.
  93   virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
  94
  95   // Stop speaking the given utterance by sending an event to the target
  96   // associated with this utterance.
  97   virtual void Stop(Utterance* utterance) = 0;
  98
  99   // Pause in the middle of speaking this utterance.
 100   virtual void Pause(Utterance* utterance) = 0;
 101
 102   // Resume speaking this utterance.
 103   virtual void Resume(Utterance* utterance) = 0;
 104
 105   // Load the built-in component extension for ChromeOS.
 106   virtual bool LoadBuiltInTtsExtension(
 107       content::BrowserContext* browser_context) = 0;
 108 };
 109
 110 // Class that wants to receive events on utterances.
 111 class UtteranceEventDelegate {
 112  public:
 113   virtual ~UtteranceEventDelegate() {}
 114   virtual void OnTtsEvent(Utterance* utterance,
 115                           TtsEventType event_type,
 116                           int char_index,
 117                           const std::string& error_message) = 0;
 118 };
 119
 120 // Class that wants to be notified when the set of
 121 // voices has changed.
 122 class VoicesChangedDelegate {
 123  public:
 124   virtual ~VoicesChangedDelegate() {}
 125   virtual void OnVoicesChanged() = 0;
 126 };
 127
 128 // One speech utterance.
 129 class Utterance {
 130  public:
 131   // Construct an utterance given a profile and a completion task to call
 132   // when the utterance is done speaking. Before speaking this utterance,
 133   // its other parameters like text, rate, pitch, etc. should all be set.
 134   explicit Utterance(content::BrowserContext* browser_context);
 135   ~Utterance();
 136
 137   // Sends an event to the delegate. If the event type is TTS_EVENT_END
 138   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
 139   // uses the last good value.
 140   void OnTtsEvent(TtsEventType event_type,
 141                   int char_index,
 142                   const std::string& error_message);
 143
 144   // Finish an utterance without sending an event to the delegate.
 145   void Finish();
 146
 147   // Getters and setters for the text to speak and other speech options.
 148   void set_text(const std::string& text) { text_ = text; }
 149   const std::string& text() const { return text_; }
 150
 151   void set_options(const base::Value* options);
 152   const base::Value* options() const { return options_.get(); }
 153
 154   void set_src_extension_id(const std::string& src_extension_id) {
 155     src_extension_id_ = src_extension_id;
 156   }
 157   const std::string& src_extension_id() { return src_extension_id_; }
 158
 159   void set_src_id(int src_id) { src_id_ = src_id; }
 160   int src_id() { return src_id_; }
 161
 162   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
 163   const GURL& src_url() { return src_url_; }
 164
 165   void set_voice_name(const std::string& voice_name) {
 166     voice_name_ = voice_name;
 167   }
 168   const std::string& voice_name() const { return voice_name_; }
 169
 170   void set_lang(const std::string& lang) {
 171     lang_ = lang;
 172   }
 173   const std::string& lang() const { return lang_; }
 174
 175   void set_gender(TtsGenderType gender) {
 176     gender_ = gender;
 177   }
 178   TtsGenderType gender() const { return gender_; }
 179
 180   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
 181     continuous_parameters_ = params;
 182   }
 183   const UtteranceContinuousParameters& continuous_parameters() {
 184     return continuous_parameters_;
 185   }
 186
 187   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
 188   bool can_enqueue() const { return can_enqueue_; }
 189
 190   void set_required_event_types(const std::set<TtsEventType>& types) {
 191     required_event_types_ = types;
 192   }
 193   const std::set<TtsEventType>& required_event_types() const {
 194     return required_event_types_;
 195   }
 196
 197   void set_desired_event_types(const std::set<TtsEventType>& types) {
 198     desired_event_types_ = types;
 199   }
 200   const std::set<TtsEventType>& desired_event_types() const {
 201     return desired_event_types_;
 202   }
 203
 204   const std::string& extension_id() const { return extension_id_; }
 205   void set_extension_id(const std::string& extension_id) {
 206     extension_id_ = extension_id;
 207   }
 208
 209   UtteranceEventDelegate* event_delegate() const {
 210     return event_delegate_.get();
 211   }
 212   void set_event_delegate(
 213       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
 214     event_delegate_ = event_delegate;
 215   }
 216
 217   // Getters and setters for internal state.
 218   content::BrowserContext* browser_context() const { return browser_context_; }
 219   int id() const { return id_; }
 220   bool finished() const { return finished_; }
 221
 222  private:
 223   // The BrowserContext that initiated this utterance.
 224   content::BrowserContext* browser_context_;
 225
 226   // The extension ID of the extension providing TTS for this utterance, or
 227   // empty if native TTS is being used.
 228   std::string extension_id_;
 229
 230   // The unique ID of this utterance, used to associate callback functions
 231   // with utterances.
 232   int id_;
 233
 234   // The id of the next utterance, so we can associate requests with
 235   // responses.
 236   static int next_utterance_id_;
 237
 238   // The text to speak.
 239   std::string text_;
 240
 241   // The full options arg passed to tts.speak, which may include fields
 242   // other than the ones we explicitly parse, below.
 243   scoped_ptr<base::Value> options_;
 244
 245   // The extension ID of the extension that called speak() and should
 246   // receive events.
 247   std::string src_extension_id_;
 248
 249   // The source extension's ID of this utterance, so that it can associate
 250   // events with the appropriate callback.
 251   int src_id_;
 252
 253   // The URL of the page where the source extension called speak.
 254   GURL src_url_;
 255
 256   // The delegate to be called when an utterance event is fired.
 257   base::WeakPtr<UtteranceEventDelegate> event_delegate_;
 258
 259   // The parsed options.
 260   std::string voice_name_;
 261   std::string lang_;
 262   TtsGenderType gender_;
 263   UtteranceContinuousParameters continuous_parameters_;
 264   bool can_enqueue_;
 265   std::set<TtsEventType> required_event_types_;
 266   std::set<TtsEventType> desired_event_types_;
 267
 268   // The index of the current char being spoken.
 269   int char_index_;
 270
 271   // True if this utterance received an event indicating it's done.
 272   bool finished_;
 273 };
 274
 275 // Singleton class that manages text-to-speech for the TTS and TTS engine
 276 // extension APIs, maintaining a queue of pending utterances and keeping
 277 // track of all state.
 278 class TtsController {
 279  public:
 280   // Get the single instance of this class.
 281   static TtsController* GetInstance();
 282
 283   // Returns true if we're currently speaking an utterance.
 284   virtual bool IsSpeaking() = 0;
 285
 286   // Speak the given utterance. If the utterance's can_enqueue flag is true
 287   // and another utterance is in progress, adds it to the end of the queue.
 288   // Otherwise, interrupts any current utterance and speaks this one
 289   // immediately.
 290   virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
 291
 292   // Stop all utterances and flush the queue. Implies leaving pause mode
 293   // as well.
 294   virtual void Stop() = 0;
 295
 296   // Pause the speech queue. Some engines may support pausing in the middle
 297   // of an utterance.
 298   virtual void Pause() = 0;
 299
 300   // Resume speaking.
 301   virtual void Resume() = 0;
 302
 303   // Handle events received from the speech engine. Events are forwarded to
 304   // the callback function, and in addition, completion and error events
 305   // trigger finishing the current utterance and starting the next one, if
 306   // any.
 307   virtual void OnTtsEvent(int utterance_id,
 308                           TtsEventType event_type,
 309                           int char_index,
 310                           const std::string& error_message) = 0;
 311
 312   // Return a list of all available voices, including the native voice,
 313   // if supported, and all voices registered by extensions.
 314   virtual void GetVoices(content::BrowserContext* browser_context,
 315                          std::vector<VoiceData>* out_voices) = 0;
 316
 317   // Called by the extension system or platform implementation when the
 318   // list of voices may have changed and should be re-queried.
 319   virtual void VoicesChanged() = 0;
 320
 321   // Add a delegate that wants to be notified when the set of voices changes.
 322   virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
 323
 324   // Remove delegate that wants to be notified when the set of voices changes.
 325   virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
 326
 327   // Set the delegate that processes TTS requests with user-installed
 328   // extensions.
 329   virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
 330
 331   // Get the delegate that processes TTS requests with user-installed
 332   // extensions.
 333   virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
 334
 335   // For unit testing.
 336   virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
 337   virtual int QueueSize() = 0;
 338
 339  protected:
 340   virtual ~TtsController() {}
 341 };
 342
 343 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_