src/plugins/text-to-speech/espeak/espeak-voice.c

   1 /*
   2  * Copyright (c) 2012-2014, Intel Corporation
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *   * Redistributions of source code must retain the above copyright notice,
   9  *     this list of conditions and the following disclaimer.
  10  *   * Redistributions in binary form must reproduce the above copyright
  11  *     notice, this list of conditions and the following disclaimer in the
  12  *     documentation and/or other materials provided with the distribution.
  13  *   * Neither the name of Intel Corporation nor the names of its contributors
  14  *     may be used to endorse or promote products derived from this software
  15  *     without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30 #include <fcntl.h>
  31 #include <sys/types.h>
  32 #include <sys/stat.h>
  33
  34 #include <murphy/common/debug.h>
  35 #include <murphy/common/log.h>
  36 #include <murphy/common/mm.h>
  37 #include <murphy/common/mainloop.h>
  38
  39 #include <espeak/speak_lib.h>
  40
  41 #include "srs/daemon/plugin.h"
  42 #include "srs/daemon/voice.h"
  43 #include "srs/daemon/iso-6391.h"
  44 #include "srs/daemon/pulse.h"
  45
  46 #include "espeak-voice.h"
  47
  48 #define PLUGIN_NAME    "espeak-voice"
  49 #define PLUGIN_DESCR   "An espeak-based voice synthesizer plugin for SRS."
  50 #define PLUGIN_AUTHORS "Krisztian Litkey <kli@iki.fi>"
  51 #define PLUGIN_VERSION "0.0.1"
  52
  53 #define CONFIG_VOICEDIR "espeak.voicedir"
  54
  55 #define ESPEAK_CONTINUE 0
  56 #define ESPEAK_ABORT    1
  57
  58 typedef struct {
  59     void *samples;
  60     int   nsample;
  61 } synth_data_t;
  62
  63
  64 static void stream_event_cb(srs_pulse_t *p, srs_voice_event_t *event,
  65                             void *user_data)
  66 {
  67     espeak_t *e = (espeak_t *)user_data;
  68
  69     MRP_UNUSED(p);
  70
  71     e->voice.notify(event, e->voice.notify_data);
  72 }
  73
  74
  75 static int espeak_synth_cb(short *samples, int nsample, espeak_EVENT *events)
  76 {
  77     synth_data_t *data = events->user_data;
  78
  79     if (samples == NULL)
  80         return ESPEAK_CONTINUE;
  81
  82     if (mrp_realloc(data->samples, 2 * (data->nsample + nsample)) == NULL)
  83         return ESPEAK_ABORT;
  84
  85     memcpy(data->samples + 2 * data->nsample, samples, 2 * nsample);
  86     data->nsample += nsample;
  87
  88     return ESPEAK_CONTINUE;
  89 }
  90
  91
  92 static int espeak_setrate(double drate)
  93 {
  94     int min, max, step, rate, orig;
  95
  96     if (0.0 < drate && drate <= 2.0) {
  97         if (drate == 1.0)
  98             rate = espeakRATE_NORMAL;
  99         else if (drate < 1.0) {
 100             min  = espeakRATE_MINIMUM;
 101             max  = espeakRATE_NORMAL;
 102             step = (max - min) / 1.0;
 103             rate = (int)(min + drate * step);
 104         }
 105         else { /*drate > 1.0*/
 106             min  = espeakRATE_NORMAL;
 107             max  = espeakRATE_MAXIMUM;
 108             step = (max - min) / 1.0;
 109             rate = (int)(min + (drate - 1.0) * step);
 110         }
 111
 112         orig = espeak_GetParameter(espeakRATE, 1);
 113         espeak_SetParameter(espeakRATE, rate, 0);
 114
 115         return orig;
 116     }
 117
 118     return 0;
 119 }
 120
 121
 122 static int espeak_setpitch(double dpitch)
 123 {
 124     int min, max, step, pitch, orig;
 125
 126     if (0.0 < dpitch && dpitch <= 2.0) {
 127         pitch = (int)(50 * dpitch);
 128         orig = espeak_GetParameter(espeakPITCH, 1);
 129         espeak_SetParameter(espeakPITCH, pitch, 0);
 130
 131         return orig;
 132     }
 133
 134     return 0;
 135 }
 136
 137
 138 static uint32_t espeak_render(const char *msg, char **tags, int actor,
 139                               double rate, double pitch, int notify_events,
 140                               void *api_data)
 141 {
 142     espeak_t     *e = (espeak_t *)api_data;
 143     int           size, start, end, type, orate, opitch;
 144     unsigned int  flags, uid;
 145     synth_data_t  data;
 146     uint32_t      id;
 147     int           r;
 148
 149     MRP_UNUSED(rate);
 150     MRP_UNUSED(pitch);
 151
 152     if (0 <= actor && actor <= e->nactor) {
 153         if (espeak_SetVoiceByName(e->actors[actor].name) != EE_OK) {
 154             mrp_log_error("espeak: failed to activate espeak voice #%d ('%s').",
 155                           actor, e->actors[actor].name);
 156             return SRS_VOICE_INVALID;
 157         }
 158     }
 159     else {
 160         mrp_log_error("espeak: invalid espeak voice #%d requested.", actor);
 161         return SRS_VOICE_INVALID;
 162     }
 163
 164     size  = 0;
 165     type  = POS_CHARACTER;
 166     start = 0;
 167     end   = 0;
 168     flags = espeakCHARS_UTF8;
 169     uid   = 0;
 170     data  = (synth_data_t) { NULL, 0 };
 171
 172     orate  = espeak_setrate(rate);
 173     opitch = espeak_setpitch(pitch);
 174
 175     r = espeak_Synth(msg, size, start, type, end, flags, &uid, &data);
 176
 177     espeak_setrate(orate);
 178     espeak_setpitch(opitch);
 179
 180     if (r != EE_OK || data.samples == NULL) {
 181         mrp_log_error("espeak: failed to synthesize message with espeak.");
 182         return SRS_VOICE_INVALID;
 183     }
 184
 185     id = srs_play_stream(e->srs->pulse, data.samples, e->config.rate, 1,
 186                          data.nsample, tags, notify_events,
 187                          stream_event_cb, e);
 188
 189     if (id == SRS_VOICE_INVALID)
 190         mrp_free(data.samples);
 191
 192     return id;
 193 }
 194
 195
 196 static void espeak_cancel(uint32_t id, void *api_data)
 197 {
 198     espeak_t *e = (espeak_t *)api_data;
 199
 200     srs_stop_stream(e->srs->pulse, id, FALSE, FALSE);
 201 }
 202
 203
 204 static int create_espeak(srs_plugin_t *plugin)
 205 {
 206     espeak_t *e;
 207
 208     mrp_debug("creating espeak voice plugin");
 209
 210     e = mrp_allocz(sizeof(*e));
 211
 212     if (e != NULL) {
 213         e->self = plugin;
 214         e->srs  = plugin->srs;
 215
 216         plugin->plugin_data = e;
 217
 218         return TRUE;
 219     }
 220     else
 221         return FALSE;
 222 }
 223
 224
 225 static int config_espeak(srs_plugin_t *plugin, srs_cfg_t *cfg)
 226 {
 227     espeak_t      *e = (espeak_t *)plugin->plugin_data;
 228     const char    *path;
 229     int            out, blen, rate;
 230
 231     mrp_debug("configure espeak voice plugin");
 232
 233     e->config.voicedir = srs_get_string_config(cfg, CONFIG_VOICEDIR, NULL);
 234
 235     out  = AUDIO_OUTPUT_SYNCHRONOUS;
 236     path = e->config.voicedir;
 237     blen = 1000;
 238
 239     rate = espeak_Initialize(out, blen, path, 0);
 240
 241     if (rate <= 0) {
 242         mrp_log_error("espeak: failed to initialize espeak.");
 243         return FALSE;
 244     }
 245
 246     mrp_log_info("espeak: chose %d Hz for sample rate.", rate);
 247
 248     e->config.rate = rate;
 249
 250     espeak_SetSynthCallback(espeak_synth_cb);
 251     /*espeak_SetParameter(espeakRATE, espeakRATE_NORMAL, 0);
 252       espeak_SetParameter(espeakPITCH, 50, 0);*/
 253
 254     return TRUE;
 255 }
 256
 257
 258 static const char *espeak_parse_dialect(const char *lang, const char **dialect)
 259 {
 260     const char *dial, *l, *c, *d;
 261     char        code_buf[8];
 262     size_t      n;
 263
 264     c = lang;
 265     d = strchr(lang, '-');
 266
 267     if (d == NULL || d - c > 3)
 268         d = NULL;
 269     else {
 270         n = d - c;
 271         strncpy(code_buf, c, n);
 272         code_buf[n] = '\0';
 273         c = code_buf;
 274         d = d + 1;
 275     }
 276
 277     mrp_debug("parsed '%s' into code '%s', dialect '%s'", lang, c, d ? d : "-");
 278
 279     l = srs_iso6391_language(c);
 280
 281     if (l == NULL)
 282         dial = NULL;
 283     else {
 284         lang = l;
 285
 286         if (d != NULL && strcmp(c, d))
 287             dial = srs_iso6391_dialect(d);
 288         else
 289             dial = NULL;
 290     }
 291
 292     *dialect = dial;
 293     return lang;
 294 }
 295
 296
 297 static inline int espeak_gender(int gender)
 298 {
 299     switch (gender) {
 300     case 1:  return SRS_VOICE_GENDER_MALE;
 301     case 2:  return SRS_VOICE_GENDER_FEMALE;
 302     default: return SRS_VOICE_GENDER_MALE;
 303     }
 304 }
 305
 306
 307 static inline char *espeak_description(espeak_VOICE *v)
 308 {
 309     static char descr[256];
 310
 311     snprintf(descr, sizeof(descr), "espeak %s voice (%s).", v->languages,
 312              v->identifier ? v->identifier : "-");
 313
 314     return descr;
 315 }
 316
 317
 318 static int start_espeak(srs_plugin_t *plugin)
 319 {
 320     static srs_voice_api_t api = {
 321         .render = espeak_render,
 322         .cancel = espeak_cancel
 323     };
 324
 325     espeak_t      *e = (espeak_t *)plugin->plugin_data;
 326     espeak_VOICE **voices, *v;
 327     int            nvoice, i;
 328     const char    *lang, *language, *dialect;
 329     int            nactor;
 330
 331     if (e->srs->pulse == NULL)
 332         return FALSE;
 333
 334     voices = (espeak_VOICE **)espeak_ListVoices(NULL);
 335
 336     if (voices == NULL) {
 337         mrp_log_error("espeak: could not find any voices.");
 338         return FALSE;
 339     }
 340
 341     for (nvoice = 0; voices[nvoice] != NULL; nvoice++)
 342         ;
 343
 344     if ((e->actors = mrp_allocz_array(typeof(*e->actors), nvoice)) == NULL)
 345         goto fail;
 346
 347     mrp_log_info("espeak: found %d available voices.", nvoice);
 348
 349     for (i = 0; i < nvoice; i++) {
 350         v = voices[i];
 351
 352         mrp_log_info("    %s (%s)", v->name, v->identifier);
 353
 354         for (lang = v->languages + 1; *lang; lang += strlen(lang)) {
 355             mrp_log_info("      %s (priority %d)", lang, lang[-1]);
 356
 357             language = espeak_parse_dialect(lang, &dialect);
 358             e->actors[i].id          = i;
 359             e->actors[i].name        = mrp_strdup(v->name);
 360             e->actors[i].lang        = mrp_strdup(language);
 361             e->actors[i].dialect     = mrp_strdup(dialect);
 362             e->actors[i].gender      = espeak_gender(v->gender);
 363             e->actors[i].description = mrp_strdup(espeak_description(v));
 364
 365             if (e->actors[i].name == NULL || e->actors[i].lang == NULL)
 366                 goto fail;
 367
 368             e->nactor++;
 369         }
 370     }
 371
 372     if (srs_register_voice(e->self->srs, "espeak", &api, e,
 373                            e->actors, e->nactor,
 374                            &e->voice.notify, &e->voice.notify_data) == 0)
 375         return TRUE;
 376
 377  fail:
 378     for (i = 0; i < e->nactor; i++) {
 379         mrp_free(e->actors[i].name);
 380         mrp_free(e->actors[i].lang);
 381         mrp_free(e->actors[i].description);
 382     }
 383
 384     return FALSE;
 385 }
 386
 387
 388 static void stop_espeak(srs_plugin_t *plugin)
 389 {
 390     return;
 391 }
 392
 393
 394 static void destroy_espeak(srs_plugin_t *plugin)
 395 {
 396     espeak_t *e = (espeak_t *)plugin->plugin_data;
 397     int       i;
 398
 399     srs_unregister_voice(e->self->srs, "espeak");
 400     espeak_Terminate();
 401
 402     for (i = 0; i < e->nactor; i++) {
 403         mrp_free(e->actors[i].name);
 404         mrp_free(e->actors[i].lang);
 405         mrp_free(e->actors[i].description);
 406     }
 407
 408     mrp_free(e);
 409 }
 410
 411
 412 SRS_DECLARE_PLUGIN(PLUGIN_NAME, PLUGIN_DESCR, PLUGIN_AUTHORS, PLUGIN_VERSION,
 413                    create_espeak, config_espeak, start_espeak, stop_espeak,
 414                    destroy_espeak)