src/plugins/text-to-speech/espeak/espeak-voice.c

   1 /*
   2  * Copyright (c) 2012-2014, Intel Corporation
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *   * Redistributions of source code must retain the above copyright notice,
   9  *     this list of conditions and the following disclaimer.
  10  *   * Redistributions in binary form must reproduce the above copyright
  11  *     notice, this list of conditions and the following disclaimer in the
  12  *     documentation and/or other materials provided with the distribution.
  13  *   * Neither the name of Intel Corporation nor the names of its contributors
  14  *     may be used to endorse or promote products derived from this software
  15  *     without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30 #include <fcntl.h>
  31 #include <sys/types.h>
  32 #include <sys/stat.h>
  33
  34 #include <murphy/common/debug.h>
  35 #include <murphy/common/log.h>
  36 #include <murphy/common/mm.h>
  37 #include <murphy/common/mainloop.h>
  38
  39 #include <espeak/speak_lib.h>
  40
  41 #include "srs/daemon/plugin.h"
  42 #include "srs/daemon/voice.h"
  43 #include "srs/daemon/pulse.h"
  44
  45 #include "espeak-voice.h"
  46
  47 #define PLUGIN_NAME    "espeak-voice"
  48 #define PLUGIN_DESCR   "An espeak-based voice synthesizer plugin for SRS."
  49 #define PLUGIN_AUTHORS "Krisztian Litkey <kli@iki.fi>"
  50 #define PLUGIN_VERSION "0.0.1"
  51
  52 #define CONFIG_VOICEDIR "espeak.voicedir"
  53
  54 #define ESPEAK_CONTINUE 0
  55 #define ESPEAK_ABORT    1
  56
  57 typedef struct {
  58     void *samples;
  59     int   nsample;
  60 } synth_data_t;
  61
  62
  63 static void stream_event_cb(srs_pulse_t *p, srs_voice_event_t *event,
  64                             void *user_data)
  65 {
  66     espeak_t *e = (espeak_t *)user_data;
  67
  68     MRP_UNUSED(p);
  69
  70     e->voice.notify(event, e->voice.notify_data);
  71 }
  72
  73
  74 static int espeak_synth_cb(short *samples, int nsample, espeak_EVENT *events)
  75 {
  76     synth_data_t *data = events->user_data;
  77
  78     if (samples == NULL)
  79         return ESPEAK_CONTINUE;
  80
  81     if (mrp_realloc(data->samples, 2 * (data->nsample + nsample)) == NULL)
  82         return ESPEAK_ABORT;
  83
  84     memcpy(data->samples + 2 * data->nsample, samples, 2 * nsample);
  85     data->nsample += nsample;
  86
  87     return ESPEAK_CONTINUE;
  88 }
  89
  90
  91 static int espeak_setrate(double drate)
  92 {
  93     int min, max, step, rate, orig;
  94
  95     if (0.0 < drate && drate <= 2.0) {
  96         if (drate == 1.0)
  97             rate = espeakRATE_NORMAL;
  98         else if (drate < 1.0) {
  99             min  = espeakRATE_MINIMUM;
 100             max  = espeakRATE_NORMAL;
 101             step = (max - min) / 1.0;
 102             rate = (int)(min + drate * step);
 103         }
 104         else { /*drate > 1.0*/
 105             min  = espeakRATE_NORMAL;
 106             max  = espeakRATE_MAXIMUM;
 107             step = (max - min) / 1.0;
 108             rate = (int)(min + (drate - 1.0) * step);
 109         }
 110
 111         orig = espeak_GetParameter(espeakRATE, 1);
 112         espeak_SetParameter(espeakRATE, rate, 0);
 113
 114         return orig;
 115     }
 116
 117     return 0;
 118 }
 119
 120
 121 static int espeak_setpitch(double dpitch)
 122 {
 123     int min, max, step, pitch, orig;
 124
 125     if (0.0 < dpitch && dpitch <= 2.0) {
 126         pitch = (int)(50 * dpitch);
 127         orig = espeak_GetParameter(espeakPITCH, 1);
 128         espeak_SetParameter(espeakPITCH, pitch, 0);
 129
 130         return orig;
 131     }
 132
 133     return 0;
 134 }
 135
 136
 137 static uint32_t espeak_render(const char *msg, char **tags, int actor,
 138                               double rate, double pitch, int notify_events,
 139                               void *api_data)
 140 {
 141     espeak_t     *e = (espeak_t *)api_data;
 142     int           size, start, end, type, orate, opitch;
 143     unsigned int  flags, uid;
 144     synth_data_t  data;
 145     uint32_t      id;
 146     int           r;
 147
 148     MRP_UNUSED(rate);
 149     MRP_UNUSED(pitch);
 150
 151     if (0 <= actor && actor <= e->nactor) {
 152         if (espeak_SetVoiceByName(e->actors[actor].name) != EE_OK) {
 153             mrp_log_error("espeak: failed to activate espeak voice #%d ('%s').",
 154                           actor, e->actors[actor].name);
 155             return SRS_VOICE_INVALID;
 156         }
 157     }
 158     else {
 159         mrp_log_error("espeak: invalid espeak voice #%d requested.", actor);
 160         return SRS_VOICE_INVALID;
 161     }
 162
 163     size  = 0;
 164     type  = POS_CHARACTER;
 165     start = 0;
 166     end   = 0;
 167     flags = espeakCHARS_UTF8;
 168     uid   = 0;
 169     data  = (synth_data_t) { NULL, 0 };
 170
 171     orate  = espeak_setrate(rate);
 172     opitch = espeak_setpitch(pitch);
 173
 174     r = espeak_Synth(msg, size, start, type, end, flags, &uid, &data);
 175
 176     espeak_setrate(orate);
 177     espeak_setpitch(opitch);
 178
 179     if (r != EE_OK || data.samples == NULL) {
 180         mrp_log_error("espeak: failed to synthesize message with espeak.");
 181         return SRS_VOICE_INVALID;
 182     }
 183
 184     id = srs_play_stream(e->srs->pulse, data.samples, e->config.rate, 1,
 185                          data.nsample, tags, notify_events,
 186                          stream_event_cb, e);
 187
 188     if (id == SRS_VOICE_INVALID)
 189         mrp_free(data.samples);
 190
 191     return id;
 192 }
 193
 194
 195 static void espeak_cancel(uint32_t id, void *api_data)
 196 {
 197     espeak_t *e = (espeak_t *)api_data;
 198
 199     srs_stop_stream(e->srs->pulse, id, FALSE, FALSE);
 200 }
 201
 202
 203 static int create_espeak(srs_plugin_t *plugin)
 204 {
 205     espeak_t *e;
 206
 207     mrp_debug("creating espeak voice plugin");
 208
 209     e = mrp_allocz(sizeof(*e));
 210
 211     if (e != NULL) {
 212         e->self = plugin;
 213         e->srs  = plugin->srs;
 214
 215         plugin->plugin_data = e;
 216
 217         return TRUE;
 218     }
 219     else
 220         return FALSE;
 221 }
 222
 223
 224 static int config_espeak(srs_plugin_t *plugin, srs_cfg_t *cfg)
 225 {
 226     espeak_t      *e = (espeak_t *)plugin->plugin_data;
 227     const char    *path;
 228     int            out, blen, rate;
 229
 230     mrp_debug("configure espeak voice plugin");
 231
 232     e->config.voicedir = srs_get_string_config(cfg, CONFIG_VOICEDIR, NULL);
 233
 234     out  = AUDIO_OUTPUT_SYNCHRONOUS;
 235     path = e->config.voicedir;
 236     blen = 1000;
 237
 238     rate = espeak_Initialize(out, blen, path, 0);
 239
 240     if (rate <= 0) {
 241         mrp_log_error("espeak: failed to initialize espeak.");
 242         return FALSE;
 243     }
 244
 245     mrp_log_info("espeak: chose %d Hz for sample rate.", rate);
 246
 247     e->config.rate = rate;
 248
 249     espeak_SetSynthCallback(espeak_synth_cb);
 250     /*espeak_SetParameter(espeakRATE, espeakRATE_NORMAL, 0);
 251       espeak_SetParameter(espeakPITCH, 50, 0);*/
 252
 253     return TRUE;
 254 }
 255
 256
 257 static inline const char *espeak_language(const char *languages)
 258 {
 259     /*
 260      * XXX TODO:
 261      *     We don't handle correctly potential multiple languages. The
 262      *     documentation states that the 'languages' attribute of a
 263      *     queried voice has a "list of pairs of (byte) priority +
 264      *     (string) language[+dialect qualifier].
 265      *
 266      *     I haven't seen such a voice in practice yet but provided they
 267      *     do exist we'd need to change our voice->actor mapping logic
 268      *     to accomodate for this and in such a case extract all the
 269      *     languages and separately map them to several actors.
 270      */
 271     return languages + 1;  /* just strip priority for now */
 272 }
 273
 274
 275 static inline int espeak_gender(int gender)
 276 {
 277     switch (gender) {
 278     case 1:  return SRS_VOICE_GENDER_MALE;
 279     case 2:  return SRS_VOICE_GENDER_FEMALE;
 280     default: return SRS_VOICE_GENDER_MALE;
 281     }
 282 }
 283
 284
 285 static inline char *espeak_description(espeak_VOICE *v)
 286 {
 287     static char descr[256];
 288
 289     snprintf(descr, sizeof(descr), "espeak %s voice (%s).", v->languages,
 290              v->identifier ? v->identifier : "-");
 291
 292     return descr;
 293 }
 294
 295
 296 static int start_espeak(srs_plugin_t *plugin)
 297 {
 298     static srs_voice_api_t api = {
 299         .render = espeak_render,
 300         .cancel = espeak_cancel
 301     };
 302
 303     espeak_t      *e = (espeak_t *)plugin->plugin_data;
 304     espeak_VOICE **voices, *v;
 305     int            nvoice, i;
 306     int            nactor;
 307
 308     if (e->srs->pulse == NULL)
 309         return FALSE;
 310
 311     voices = (espeak_VOICE **)espeak_ListVoices(NULL);
 312
 313     if (voices == NULL) {
 314         mrp_log_error("espeak: could not find any voices.");
 315         return FALSE;
 316     }
 317
 318     for (nvoice = 0; voices[nvoice] != NULL; nvoice++)
 319         ;
 320
 321     if ((e->actors = mrp_allocz_array(typeof(*e->actors), nvoice)) == NULL)
 322         goto fail;
 323
 324     mrp_log_info("espeak: found available voices:");
 325
 326     for (i = 0; i < nvoice; i++) {
 327         v = voices[i];
 328
 329         mrp_log_info("    %s (%smale, age %d, languages: %s (id: %s))", v->name,
 330                      v->gender == 2 ? "fe" : "", v->age,
 331                      v->languages, v->identifier);
 332
 333         e->actors[i].id          = i;
 334         e->actors[i].name        = mrp_strdup(v->name);
 335         e->actors[i].lang        = mrp_strdup(espeak_language(v->languages));
 336         e->actors[i].dialect     = NULL;
 337         e->actors[i].gender      = espeak_gender(v->gender);
 338         e->actors[i].description = mrp_strdup(espeak_description(v));
 339
 340         if (e->actors[i].name == NULL || e->actors[i].lang == NULL)
 341             goto fail;
 342
 343         e->nactor++;
 344     }
 345
 346     if (srs_register_voice(e->self->srs, "espeak", &api, e,
 347                            e->actors, e->nactor,
 348                            &e->voice.notify, &e->voice.notify_data) == 0)
 349         return TRUE;
 350
 351  fail:
 352     for (i = 0; i < e->nactor; i++) {
 353         mrp_free(e->actors[i].name);
 354         mrp_free(e->actors[i].lang);
 355         mrp_free(e->actors[i].description);
 356     }
 357
 358     return FALSE;
 359 }
 360
 361
 362 static void stop_espeak(srs_plugin_t *plugin)
 363 {
 364     return;
 365 }
 366
 367
 368 static void destroy_espeak(srs_plugin_t *plugin)
 369 {
 370     espeak_t *e = (espeak_t *)plugin->plugin_data;
 371     int       i;
 372
 373     srs_unregister_voice(e->self->srs, "espeak");
 374     espeak_Terminate();
 375
 376     for (i = 0; i < e->nactor; i++) {
 377         mrp_free(e->actors[i].name);
 378         mrp_free(e->actors[i].lang);
 379         mrp_free(e->actors[i].description);
 380     }
 381
 382     mrp_free(e);
 383 }
 384
 385
 386 SRS_DECLARE_PLUGIN(PLUGIN_NAME, PLUGIN_DESCR, PLUGIN_AUTHORS, PLUGIN_VERSION,
 387                    create_espeak, config_espeak, start_espeak, stop_espeak,
 388                    destroy_espeak)