- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / browser / speech / tts_linux.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <math.h>
6
7 #include <map>
8
9 #include "base/memory/scoped_ptr.h"
10 #include "base/memory/singleton.h"
11 #include "base/synchronization/lock.h"
12 #include "chrome/browser/speech/tts_platform.h"
13 #include "content/public/browser/browser_thread.h"
14
15 #include "library_loaders/libspeechd.h"
16
17 using content::BrowserThread;
18
19 namespace {
20
21 const char kNotSupportedError[] =
22     "Native speech synthesis not supported on this platform.";
23
24 struct SPDChromeVoice {
25   std::string name;
26   std::string module;
27 };
28
29 }  // namespace
30
31 class TtsPlatformImplLinux : public TtsPlatformImpl {
32  public:
33   virtual bool PlatformImplAvailable() OVERRIDE;
34   virtual bool Speak(
35       int utterance_id,
36       const std::string& utterance,
37       const std::string& lang,
38       const VoiceData& voice,
39       const UtteranceContinuousParameters& params) OVERRIDE;
40   virtual bool StopSpeaking() OVERRIDE;
41   virtual void Pause() OVERRIDE;
42   virtual void Resume() OVERRIDE;
43   virtual bool IsSpeaking() OVERRIDE;
44   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
45
46   void OnSpeechEvent(SPDNotificationType type);
47
48   // Get the single instance of this class.
49   static TtsPlatformImplLinux* GetInstance();
50
51  private:
52   TtsPlatformImplLinux();
53   virtual ~TtsPlatformImplLinux();
54
55   // Initiate the connection with the speech dispatcher.
56   void Initialize();
57
58   // Resets the connection with speech dispatcher.
59   void Reset();
60
61   static void NotificationCallback(size_t msg_id,
62                                    size_t client_id,
63                                    SPDNotificationType type);
64
65   static void IndexMarkCallback(size_t msg_id,
66                                 size_t client_id,
67                                 SPDNotificationType state,
68                                 char* index_mark);
69
70   static SPDNotificationType current_notification_;
71
72   base::Lock initialization_lock_;
73   LibSpeechdLoader libspeechd_loader_;
74   SPDConnection* conn_;
75
76   // These apply to the current utterance only.
77   std::string utterance_;
78   int utterance_id_;
79
80   // Map a string composed of a voicename and module to the voicename. Used to
81   // uniquely identify a voice across all available modules.
82   scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
83
84   friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
85
86   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
87 };
88
89 // static
90 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
91     SPD_EVENT_END;
92
93 TtsPlatformImplLinux::TtsPlatformImplLinux()
94     : utterance_id_(0) {
95   BrowserThread::PostTask(BrowserThread::FILE,
96                           FROM_HERE,
97                           base::Bind(&TtsPlatformImplLinux::Initialize,
98                                      base::Unretained(this)));
99 }
100
101 void TtsPlatformImplLinux::Initialize() {
102   base::AutoLock lock(initialization_lock_);
103
104   if (!libspeechd_loader_.Load("libspeechd.so.2"))
105     return;
106
107   conn_ = libspeechd_loader_.spd_open(
108       "chrome", "extension_api", NULL, SPD_MODE_THREADED);
109   if (!conn_)
110     return;
111
112   // Register callbacks for all events.
113   conn_->callback_begin =
114     conn_->callback_end =
115     conn_->callback_cancel =
116     conn_->callback_pause =
117     conn_->callback_resume =
118     &NotificationCallback;
119
120   conn_->callback_im = &IndexMarkCallback;
121
122   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
123   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
124   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
125   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
126   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
127 }
128
129 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
130   base::AutoLock lock(initialization_lock_);
131   if (conn_) {
132     libspeechd_loader_.spd_close(conn_);
133     conn_ = NULL;
134   }
135 }
136
137 void TtsPlatformImplLinux::Reset() {
138   base::AutoLock lock(initialization_lock_);
139   if (conn_)
140     libspeechd_loader_.spd_close(conn_);
141   conn_ = libspeechd_loader_.spd_open(
142       "chrome", "extension_api", NULL, SPD_MODE_THREADED);
143 }
144
145 bool TtsPlatformImplLinux::PlatformImplAvailable() {
146   if (!initialization_lock_.Try())
147     return false;
148   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
149   initialization_lock_.Release();
150   return result;
151 }
152
153 bool TtsPlatformImplLinux::Speak(
154     int utterance_id,
155     const std::string& utterance,
156     const std::string& lang,
157     const VoiceData& voice,
158     const UtteranceContinuousParameters& params) {
159   if (!PlatformImplAvailable()) {
160     error_ = kNotSupportedError;
161     return false;
162   }
163
164   // Speech dispatcher's speech params are around 3x at either limit.
165   float rate = params.rate > 3 ? 3 : params.rate;
166   rate = params.rate < 0.334 ? 0.334 : rate;
167   float pitch = params.pitch > 3 ? 3 : params.pitch;
168   pitch = params.pitch < 0.334 ? 0.334 : pitch;
169
170   std::map<std::string, SPDChromeVoice>::iterator it =
171       all_native_voices_->find(voice.name);
172   if (it != all_native_voices_->end()) {
173     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
174     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
175   }
176
177   // Map our multiplicative range to Speech Dispatcher's linear range.
178   // .334 = -100.
179   // 3 = 100.
180   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
181   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
182
183   utterance_ = utterance;
184   utterance_id_ = utterance_id;
185
186   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
187     Reset();
188     return false;
189   }
190   return true;
191 }
192
193 bool TtsPlatformImplLinux::StopSpeaking() {
194   if (!PlatformImplAvailable())
195     return false;
196   if (libspeechd_loader_.spd_stop(conn_) == -1) {
197     Reset();
198     return false;
199   }
200   return true;
201 }
202
203 void TtsPlatformImplLinux::Pause() {
204   if (!PlatformImplAvailable())
205     return;
206   libspeechd_loader_.spd_pause(conn_);
207 }
208
209 void TtsPlatformImplLinux::Resume() {
210   if (!PlatformImplAvailable())
211     return;
212   libspeechd_loader_.spd_resume(conn_);
213 }
214
215 bool TtsPlatformImplLinux::IsSpeaking() {
216   return current_notification_ == SPD_EVENT_BEGIN;
217 }
218
219 void TtsPlatformImplLinux::GetVoices(
220     std::vector<VoiceData>* out_voices) {
221   if (!all_native_voices_.get()) {
222     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
223     char** modules = libspeechd_loader_.spd_list_modules(conn_);
224     if (!modules)
225       return;
226     for (int i = 0; modules[i]; i++) {
227       char* module = modules[i];
228       libspeechd_loader_.spd_set_output_module(conn_, module);
229       SPDVoice** native_voices =
230           libspeechd_loader_.spd_list_synthesis_voices(conn_);
231       if (!native_voices) {
232         free(module);
233         continue;
234       }
235       for (int j = 0; native_voices[j]; j++) {
236         SPDVoice* native_voice = native_voices[j];
237         SPDChromeVoice native_data;
238         native_data.name = native_voice->name;
239         native_data.module = module;
240         std::string key;
241         key.append(native_data.name);
242         key.append(" ");
243         key.append(native_data.module);
244         all_native_voices_->insert(
245             std::pair<std::string, SPDChromeVoice>(key, native_data));
246         free(native_voices[j]);
247       }
248       free(modules[i]);
249     }
250   }
251
252   for (std::map<std::string, SPDChromeVoice>::iterator it =
253            all_native_voices_->begin();
254        it != all_native_voices_->end();
255        it++) {
256     out_voices->push_back(VoiceData());
257     VoiceData& voice = out_voices->back();
258     voice.native = true;
259     voice.name = it->first;
260     voice.events.insert(TTS_EVENT_START);
261     voice.events.insert(TTS_EVENT_END);
262     voice.events.insert(TTS_EVENT_CANCELLED);
263     voice.events.insert(TTS_EVENT_MARKER);
264     voice.events.insert(TTS_EVENT_PAUSE);
265     voice.events.insert(TTS_EVENT_RESUME);
266   }
267 }
268
269 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
270   TtsController* controller = TtsController::GetInstance();
271   switch (type) {
272   case SPD_EVENT_BEGIN:
273     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
274     break;
275   case SPD_EVENT_RESUME:
276     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
277     break;
278   case SPD_EVENT_END:
279     controller->OnTtsEvent(
280         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
281     break;
282   case SPD_EVENT_PAUSE:
283     controller->OnTtsEvent(
284         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
285     break;
286   case SPD_EVENT_CANCEL:
287     controller->OnTtsEvent(
288         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
289     break;
290   case SPD_EVENT_INDEX_MARK:
291     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
292     break;
293   }
294 }
295
296 // static
297 void TtsPlatformImplLinux::NotificationCallback(
298     size_t msg_id, size_t client_id, SPDNotificationType type) {
299   // We run Speech Dispatcher in threaded mode, so these callbacks should always
300   // be in a separate thread.
301   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
302     current_notification_ = type;
303     BrowserThread::PostTask(
304         BrowserThread::UI,
305         FROM_HERE,
306         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
307                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
308                    type));
309   }
310 }
311
312 // static
313 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
314                                                       size_t client_id,
315                                                       SPDNotificationType state,
316                                                       char* index_mark) {
317   // TODO(dtseng): index_mark appears to specify an index type supplied by a
318   // client. Need to explore how this is used before hooking it up with existing
319   // word, sentence events.
320   // We run Speech Dispatcher in threaded mode, so these callbacks should always
321   // be in a separate thread.
322   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
323     current_notification_ = state;
324     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
325         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
326         base::Unretained(TtsPlatformImplLinux::GetInstance()),
327         state));
328   }
329 }
330
331 // static
332 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
333   return Singleton<TtsPlatformImplLinux,
334                    LeakySingletonTraits<TtsPlatformImplLinux> >::get();
335 }
336
337 // static
338 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
339   return TtsPlatformImplLinux::GetInstance();
340 }