src/chrome/browser/speech/tts_linux.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6
   7 #include <map>
   8
   9 #include "base/memory/scoped_ptr.h"
  10 #include "base/memory/singleton.h"
  11 #include "base/synchronization/lock.h"
  12 #include "chrome/browser/speech/tts_platform.h"
  13 #include "content/public/browser/browser_thread.h"
  14
  15 #include "library_loaders/libspeechd.h"
  16
  17 using content::BrowserThread;
  18
  19 namespace {
  20
  21 const char kNotSupportedError[] =
  22     "Native speech synthesis not supported on this platform.";
  23
  24 struct SPDChromeVoice {
  25   std::string name;
  26   std::string module;
  27 };
  28
  29 }  // namespace
  30
  31 class TtsPlatformImplLinux : public TtsPlatformImpl {
  32  public:
  33   virtual bool PlatformImplAvailable() OVERRIDE;
  34   virtual bool Speak(
  35       int utterance_id,
  36       const std::string& utterance,
  37       const std::string& lang,
  38       const VoiceData& voice,
  39       const UtteranceContinuousParameters& params) OVERRIDE;
  40   virtual bool StopSpeaking() OVERRIDE;
  41   virtual void Pause() OVERRIDE;
  42   virtual void Resume() OVERRIDE;
  43   virtual bool IsSpeaking() OVERRIDE;
  44   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
  45
  46   void OnSpeechEvent(SPDNotificationType type);
  47
  48   // Get the single instance of this class.
  49   static TtsPlatformImplLinux* GetInstance();
  50
  51  private:
  52   TtsPlatformImplLinux();
  53   virtual ~TtsPlatformImplLinux();
  54
  55   // Initiate the connection with the speech dispatcher.
  56   void Initialize();
  57
  58   // Resets the connection with speech dispatcher.
  59   void Reset();
  60
  61   static void NotificationCallback(size_t msg_id,
  62                                    size_t client_id,
  63                                    SPDNotificationType type);
  64
  65   static void IndexMarkCallback(size_t msg_id,
  66                                 size_t client_id,
  67                                 SPDNotificationType state,
  68                                 char* index_mark);
  69
  70   static SPDNotificationType current_notification_;
  71
  72   base::Lock initialization_lock_;
  73   LibSpeechdLoader libspeechd_loader_;
  74   SPDConnection* conn_;
  75
  76   // These apply to the current utterance only.
  77   std::string utterance_;
  78   int utterance_id_;
  79
  80   // Map a string composed of a voicename and module to the voicename. Used to
  81   // uniquely identify a voice across all available modules.
  82   scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
  83
  84   friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
  85
  86   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
  87 };
  88
  89 // static
  90 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
  91     SPD_EVENT_END;
  92
  93 TtsPlatformImplLinux::TtsPlatformImplLinux()
  94     : utterance_id_(0) {
  95   BrowserThread::PostTask(BrowserThread::FILE,
  96                           FROM_HERE,
  97                           base::Bind(&TtsPlatformImplLinux::Initialize,
  98                                      base::Unretained(this)));
  99 }
 100
 101 void TtsPlatformImplLinux::Initialize() {
 102   base::AutoLock lock(initialization_lock_);
 103
 104   if (!libspeechd_loader_.Load("libspeechd.so.2"))
 105     return;
 106
 107   conn_ = libspeechd_loader_.spd_open(
 108       "chrome", "extension_api", NULL, SPD_MODE_THREADED);
 109   if (!conn_)
 110     return;
 111
 112   // Register callbacks for all events.
 113   conn_->callback_begin =
 114     conn_->callback_end =
 115     conn_->callback_cancel =
 116     conn_->callback_pause =
 117     conn_->callback_resume =
 118     &NotificationCallback;
 119
 120   conn_->callback_im = &IndexMarkCallback;
 121
 122   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
 123   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
 124   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
 125   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
 126   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
 127 }
 128
 129 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
 130   base::AutoLock lock(initialization_lock_);
 131   if (conn_) {
 132     libspeechd_loader_.spd_close(conn_);
 133     conn_ = NULL;
 134   }
 135 }
 136
 137 void TtsPlatformImplLinux::Reset() {
 138   base::AutoLock lock(initialization_lock_);
 139   if (conn_)
 140     libspeechd_loader_.spd_close(conn_);
 141   conn_ = libspeechd_loader_.spd_open(
 142       "chrome", "extension_api", NULL, SPD_MODE_THREADED);
 143 }
 144
 145 bool TtsPlatformImplLinux::PlatformImplAvailable() {
 146   if (!initialization_lock_.Try())
 147     return false;
 148   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
 149   initialization_lock_.Release();
 150   return result;
 151 }
 152
 153 bool TtsPlatformImplLinux::Speak(
 154     int utterance_id,
 155     const std::string& utterance,
 156     const std::string& lang,
 157     const VoiceData& voice,
 158     const UtteranceContinuousParameters& params) {
 159   if (!PlatformImplAvailable()) {
 160     error_ = kNotSupportedError;
 161     return false;
 162   }
 163
 164   // Speech dispatcher's speech params are around 3x at either limit.
 165   float rate = params.rate > 3 ? 3 : params.rate;
 166   rate = params.rate < 0.334 ? 0.334 : rate;
 167   float pitch = params.pitch > 3 ? 3 : params.pitch;
 168   pitch = params.pitch < 0.334 ? 0.334 : pitch;
 169
 170   std::map<std::string, SPDChromeVoice>::iterator it =
 171       all_native_voices_->find(voice.name);
 172   if (it != all_native_voices_->end()) {
 173     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
 174     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
 175   }
 176
 177   // Map our multiplicative range to Speech Dispatcher's linear range.
 178   // .334 = -100.
 179   // 3 = 100.
 180   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
 181   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
 182
 183   utterance_ = utterance;
 184   utterance_id_ = utterance_id;
 185
 186   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
 187     Reset();
 188     return false;
 189   }
 190   return true;
 191 }
 192
 193 bool TtsPlatformImplLinux::StopSpeaking() {
 194   if (!PlatformImplAvailable())
 195     return false;
 196   if (libspeechd_loader_.spd_stop(conn_) == -1) {
 197     Reset();
 198     return false;
 199   }
 200   return true;
 201 }
 202
 203 void TtsPlatformImplLinux::Pause() {
 204   if (!PlatformImplAvailable())
 205     return;
 206   libspeechd_loader_.spd_pause(conn_);
 207 }
 208
 209 void TtsPlatformImplLinux::Resume() {
 210   if (!PlatformImplAvailable())
 211     return;
 212   libspeechd_loader_.spd_resume(conn_);
 213 }
 214
 215 bool TtsPlatformImplLinux::IsSpeaking() {
 216   return current_notification_ == SPD_EVENT_BEGIN;
 217 }
 218
 219 void TtsPlatformImplLinux::GetVoices(
 220     std::vector<VoiceData>* out_voices) {
 221   if (!all_native_voices_.get()) {
 222     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
 223     char** modules = libspeechd_loader_.spd_list_modules(conn_);
 224     if (!modules)
 225       return;
 226     for (int i = 0; modules[i]; i++) {
 227       char* module = modules[i];
 228       libspeechd_loader_.spd_set_output_module(conn_, module);
 229       SPDVoice** native_voices =
 230           libspeechd_loader_.spd_list_synthesis_voices(conn_);
 231       if (!native_voices) {
 232         free(module);
 233         continue;
 234       }
 235       for (int j = 0; native_voices[j]; j++) {
 236         SPDVoice* native_voice = native_voices[j];
 237         SPDChromeVoice native_data;
 238         native_data.name = native_voice->name;
 239         native_data.module = module;
 240         std::string key;
 241         key.append(native_data.name);
 242         key.append(" ");
 243         key.append(native_data.module);
 244         all_native_voices_->insert(
 245             std::pair<std::string, SPDChromeVoice>(key, native_data));
 246         free(native_voices[j]);
 247       }
 248       free(modules[i]);
 249     }
 250   }
 251
 252   for (std::map<std::string, SPDChromeVoice>::iterator it =
 253            all_native_voices_->begin();
 254        it != all_native_voices_->end();
 255        it++) {
 256     out_voices->push_back(VoiceData());
 257     VoiceData& voice = out_voices->back();
 258     voice.native = true;
 259     voice.name = it->first;
 260     voice.events.insert(TTS_EVENT_START);
 261     voice.events.insert(TTS_EVENT_END);
 262     voice.events.insert(TTS_EVENT_CANCELLED);
 263     voice.events.insert(TTS_EVENT_MARKER);
 264     voice.events.insert(TTS_EVENT_PAUSE);
 265     voice.events.insert(TTS_EVENT_RESUME);
 266   }
 267 }
 268
 269 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
 270   TtsController* controller = TtsController::GetInstance();
 271   switch (type) {
 272   case SPD_EVENT_BEGIN:
 273     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
 274     break;
 275   case SPD_EVENT_RESUME:
 276     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
 277     break;
 278   case SPD_EVENT_END:
 279     controller->OnTtsEvent(
 280         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
 281     break;
 282   case SPD_EVENT_PAUSE:
 283     controller->OnTtsEvent(
 284         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
 285     break;
 286   case SPD_EVENT_CANCEL:
 287     controller->OnTtsEvent(
 288         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
 289     break;
 290   case SPD_EVENT_INDEX_MARK:
 291     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
 292     break;
 293   }
 294 }
 295
 296 // static
 297 void TtsPlatformImplLinux::NotificationCallback(
 298     size_t msg_id, size_t client_id, SPDNotificationType type) {
 299   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 300   // be in a separate thread.
 301   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 302     current_notification_ = type;
 303     BrowserThread::PostTask(
 304         BrowserThread::UI,
 305         FROM_HERE,
 306         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 307                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
 308                    type));
 309   }
 310 }
 311
 312 // static
 313 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
 314                                                       size_t client_id,
 315                                                       SPDNotificationType state,
 316                                                       char* index_mark) {
 317   // TODO(dtseng): index_mark appears to specify an index type supplied by a
 318   // client. Need to explore how this is used before hooking it up with existing
 319   // word, sentence events.
 320   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 321   // be in a separate thread.
 322   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 323     current_notification_ = state;
 324     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
 325         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 326         base::Unretained(TtsPlatformImplLinux::GetInstance()),
 327         state));
 328   }
 329 }
 330
 331 // static
 332 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
 333   return Singleton<TtsPlatformImplLinux,
 334                    LeakySingletonTraits<TtsPlatformImplLinux> >::get();
 335 }
 336
 337 // static
 338 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 339   return TtsPlatformImplLinux::GetInstance();
 340 }