espeak: added an espeak-based text-to-speech plugin. espeak
authorKrisztian Litkey <kli@iki.fi>
Fri, 18 Apr 2014 20:59:38 +0000 (23:59 +0300)
committerKrisztian Litkey <kli@iki.fi>
Fri, 18 Apr 2014 20:59:38 +0000 (23:59 +0300)
configure.ac
src/Makefile.am
src/plugins/text-to-speech/espeak/Makefile [new file with mode: 0644]
src/plugins/text-to-speech/espeak/espeak-voice.c [new file with mode: 0644]
src/plugins/text-to-speech/espeak/espeak-voice.h [new file with mode: 0644]
src/plugins/text-to-speech/espeak/pulse.c [new file with mode: 0644]
src/plugins/text-to-speech/espeak/pulse.h [new file with mode: 0644]

index 770d0d1..0067963 100644 (file)
@@ -259,6 +259,36 @@ AC_SUBST(FESTIVAL_ENABLED)
 AC_SUBST(FESTIVAL_CXXFLAGS)
 AC_SUBST(FESTIVAL_LIBS)
 
+# Check if espeak support should be enabled.
+AC_ARG_ENABLE(espeak,
+              [  --enable-espeak           enable espeak synthesizer support],
+             [enable_espeak=$enableval], [enable_espeak=auto])
+
+if test "$enable_espeak" != "no"; then
+    AC_CHECK_HEADERS([espeak/speak_lib.h], [have_espeak=yes], [have_espeak=no])
+
+    if test "$have_espeak" = "no" -a "$enable_espeak" = "yes"; then
+        AC_MSG_ERROR([espeak development libraries not found.])
+    fi
+
+    enable_espeak=$have_espeak
+fi
+
+if test "$enable_espeak" = "yes"; then
+    AC_MSG_NOTICE([espeak synthesizer support is enabled.])
+    AC_DEFINE([ESPEAK_ENABLED], 1, [Enable espeak support ?])
+    ESPEAK_INCLUDES="-I$prefix/include"
+    ESPEAK_CFLAGS="$ESPEAK_INCLUDES"
+    ESPEAK_LIBS="-lespeak"
+else
+    AC_MSG_NOTICE([espeak synthesizer support is disabled.])
+fi
+
+AM_CONDITIONAL(ESPEAK_ENABLED, [test "$enable_espeak" = "yes"])
+AC_SUBST(ESPEAK_ENABLED)
+AC_SUBST(ESPEAK_CFLAGS)
+AC_SUBST(ESPEAK_LIBS)
+
 # Check if systemd socket-based activation was enabled.
 AC_ARG_ENABLE(systemd,
               [  --enable-systemd       enable systemd socket-based activation],
@@ -304,6 +334,7 @@ echo "Extra C warnings flags: $WARNING_CFLAGS"
 echo "D-Bus support: $enable_dbus"
 echo "Sphinx support: $enable_sphinx"
 echo "Festival support: $enable_festival"
+echo "Espeak support: $enable_espeak"
 echo "systemd socket-based activation: $enable_systemd"
 
 if test "$DUMP_LIB_FLAGS" = "yes"; then
index 960f5f4..f5c1507 100644 (file)
@@ -323,6 +323,24 @@ plugin_festival_voice_la_LDFLAGS =                         \
                -module -avoid-version
 endif
 
+if ESPEAK_ENABLED
+# espeak-voice synthesizer plugin
+plugin_LTLIBRARIES += plugin-espeak-voice.la
+
+plugin_espeak_voice_la_SOURCES =                               \
+               plugins/text-to-speech/espeak/espeak-voice.c    \
+               plugins/text-to-speech/espeak/pulse.c
+
+plugin_espeak_voice_la_CFLAGS  =                               \
+               $(AM_CFLAGS)
+
+plugin_espeak_voice_la_LDFLAGS =                               \
+               -module -avoid-version
+
+plugin_espeak_voice_la_LIBADD =                                        \
+               $(ESPEAK_LIBS)
+endif
+
 # simple-voice synthesizer plugin
 #plugin_LTLIBRARIES += plugin-simple-voice.la
 #
diff --git a/src/plugins/text-to-speech/espeak/Makefile b/src/plugins/text-to-speech/espeak/Makefile
new file mode 100644 (file)
index 0000000..1c6f118
--- /dev/null
@@ -0,0 +1,5 @@
+all:
+       $(MAKE) -C ../../.. $@
+
+%:
+       $(MAKE) -C ../../.. $(MAKECMDGOALS)
diff --git a/src/plugins/text-to-speech/espeak/espeak-voice.c b/src/plugins/text-to-speech/espeak/espeak-voice.c
new file mode 100644 (file)
index 0000000..bbf736c
--- /dev/null
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2012-2014, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Intel Corporation nor the names of its contributors
+ *     may be used to endorse or promote products derived from this software
+ *     without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <murphy/common/debug.h>
+#include <murphy/common/log.h>
+#include <murphy/common/mm.h>
+#include <murphy/common/mainloop.h>
+
+#include <espeak/speak_lib.h>
+
+#include "srs/daemon/plugin.h"
+#include "srs/daemon/voice.h"
+
+#include "espeak-voice.h"
+#include "pulse.h"
+
+#define PLUGIN_NAME    "espeak-voice"
+#define PLUGIN_DESCR   "An espeak-based voice synthesizer plugin for SRS."
+#define PLUGIN_AUTHORS "Krisztian Litkey <kli@iki.fi>"
+#define PLUGIN_VERSION "0.0.1"
+
+#define CONFIG_VOICEDIR "espeak.voicedir"
+
+#define ESPEAK_CONTINUE 0
+#define ESPEAK_ABORT    1
+
+typedef struct {
+    void *samples;
+    int   nsample;
+} synth_data_t;
+
+
+static void stream_event_cb(espeak_t *e, srs_voice_event_t *event,
+                            void *user_data)
+{
+    MRP_UNUSED(user_data);
+
+    e->voice.notify(event, e->voice.notify_data);
+}
+
+
+static int espeak_synth_cb(short *samples, int nsample, espeak_EVENT *events)
+{
+    synth_data_t *data = events->user_data;
+    espeak_EVENT *evt;
+
+    if (samples == NULL) {
+        if (data->samples != NULL) {
+            int fd = open("espeak.data", O_CREAT|O_WRONLY, 0644);
+
+            if (fd >= 0) {
+                write(fd, data->samples, 2 * data->nsample);
+                close(fd);
+            }
+        }
+
+        return ESPEAK_CONTINUE;
+    }
+
+    mrp_debug("got %d new samples from espeak", nsample);
+
+    if (mrp_realloc(data->samples, 2 * (data->nsample + nsample)) == NULL)
+        return ESPEAK_ABORT;
+
+    memcpy(data->samples + 2 * data->nsample, samples, 2 * nsample);
+    data->nsample += nsample;
+
+    return ESPEAK_CONTINUE;
+}
+
+
+static int espeak_setrate(double drate)
+{
+    int min, max, step, rate, orig;
+
+    if (0.0 < drate && drate <= 2.0) {
+        if (drate == 1.0)
+            rate = espeakRATE_NORMAL;
+        else if (drate < 1.0) {
+            min  = espeakRATE_MINIMUM;
+            max  = espeakRATE_NORMAL;
+            step = (max - min) / 1.0;
+            rate = (int)(min + drate * step);
+        }
+        else { /*drate > 1.0*/
+            min  = espeakRATE_NORMAL;
+            max  = espeakRATE_MAXIMUM;
+            step = (max - min) / 1.0;
+            rate = (int)(min + (drate - 1.0) * step);
+        }
+
+        orig = espeak_GetParameter(espeakRATE, 1);
+        espeak_SetParameter(espeakRATE, rate, 0);
+
+        return orig;
+    }
+
+    return 0;
+}
+
+
+static int espeak_setpitch(double dpitch)
+{
+    int min, max, step, pitch, orig;
+
+    if (0.0 < dpitch && dpitch <= 2.0) {
+        pitch = (int)(50 * dpitch);
+        orig = espeak_GetParameter(espeakPITCH, 1);
+        espeak_SetParameter(espeakPITCH, pitch, 0);
+
+        return orig;
+    }
+
+    return 0;
+}
+
+
+static uint32_t espeak_render(const char *msg, char **tags, int actor,
+                              double rate, double pitch, int notify_events,
+                              void *api_data)
+{
+    espeak_t     *e = (espeak_t *)api_data;
+    int           size, start, end, type, orate, opitch;
+    unsigned int  flags, uid;
+    synth_data_t  data;
+    uint32_t      id;
+    int           r;
+
+    MRP_UNUSED(rate);
+    MRP_UNUSED(pitch);
+
+    if (0 <= actor && actor <= e->nactor) {
+        if (espeak_SetVoiceByName(e->actors[actor].name) != EE_OK) {
+            mrp_log_error("Failed to activate espeak voice #%d ('%s').",
+                          actor, e->actors[actor].name);
+            return SRS_VOICE_INVALID;
+        }
+    }
+    else {
+        mrp_log_error("Invalid espeak voice #%d requested.", actor);
+        return SRS_VOICE_INVALID;
+    }
+
+    size  = 0;
+    type  = POS_CHARACTER;
+    start = 0;
+    end   = 0;
+    flags = espeakCHARS_UTF8;
+    uid   = 0;
+    data  = (synth_data_t) { NULL, 0 };
+
+    orate  = espeak_setrate(rate);
+    opitch = espeak_setpitch(pitch);
+
+    r = espeak_Synth(msg, size, start, type, end, flags, &uid, &data);
+
+    espeak_setrate(orate);
+    espeak_setpitch(opitch);
+
+    if (r != EE_OK || data.samples == NULL) {
+        mrp_log_error("Failed to synthesize message with espeak.");
+        return SRS_VOICE_INVALID;
+    }
+
+    id = pulse_play_stream(e, data.samples, e->config.rate, 1, data.nsample,
+                           tags, notify_events, stream_event_cb, NULL);
+
+    if (id == SRS_VOICE_INVALID)
+        mrp_free(data.samples);
+
+    return id;
+}
+
+
+static void espeak_cancel(uint32_t id, void *api_data)
+{
+    espeak_t *e = (espeak_t *)api_data;
+
+    pulse_stop_stream(e, id, FALSE, FALSE);
+}
+
+
+static int create_espeak(srs_plugin_t *plugin)
+{
+    espeak_t *e;
+
+    mrp_debug("creating espeak voice plugin");
+
+    e = mrp_allocz(sizeof(*e));
+
+    if (e != NULL) {
+        e->self = plugin;
+        e->srs  = plugin->srs;
+
+        plugin->plugin_data = e;
+
+        return TRUE;
+    }
+    else
+        return FALSE;
+}
+
+
+static int config_espeak(srs_plugin_t *plugin, srs_cfg_t *cfg)
+{
+    espeak_t      *e = (espeak_t *)plugin->plugin_data;
+    const char    *path;
+    int            out, blen, rate;
+
+    mrp_debug("configure espeak voice plugin");
+
+    e->config.voicedir = srs_get_string_config(cfg, CONFIG_VOICEDIR, NULL);
+
+    out  = AUDIO_OUTPUT_SYNCHRONOUS;
+    path = e->config.voicedir;
+    blen = 1000;
+
+    rate = espeak_Initialize(out, blen, path, 0);
+
+    if (rate <= 0) {
+        mrp_log_error("Failed to initialize espeak.");
+        return FALSE;
+    }
+
+    mrp_log_info("espeak chose %d Hz for sample rate.", rate);
+
+    e->config.rate = rate;
+
+    espeak_SetSynthCallback(espeak_synth_cb);
+    /*espeak_SetParameter(espeakRATE, espeakRATE_NORMAL, 0);
+      espeak_SetParameter(espeakPITCH, 50, 0);*/
+
+    return TRUE;
+}
+
+
+static inline const char *espeak_language(const char *languages)
+{
+    /*
+     * XXX TODO:
+     *     We don't handle correctly potential multiple languages. The
+     *     documentation states that the 'languages' attribute of a
+     *     queried voice has a "list of pairs of (byte) priority +
+     *     (string) language[+dialect qualifier].
+     *
+     *     I haven't seen such a voice in practice yet but provided they
+     *     do exist we'd need to change our voice->actor mapping logic
+     *     to accomodate for this and in such a case extract all the
+     *     languages and separately map them to several actors.
+     */
+    return languages + 1;  /* just strip priority for now */
+}
+
+
+static inline int espeak_gender(int gender)
+{
+    switch (gender) {
+    case 1:  return SRS_VOICE_GENDER_MALE;
+    case 2:  return SRS_VOICE_GENDER_FEMALE;
+    default: return SRS_VOICE_GENDER_MALE;
+    }
+}
+
+
+static inline char *espeak_description(espeak_VOICE *v)
+{
+    static char descr[256];
+
+    snprintf(descr, sizeof(descr), "espeak %s voice (%s).", v->languages,
+             v->identifier ? v->identifier : "-");
+
+    return descr;
+}
+
+
+static int start_espeak(srs_plugin_t *plugin)
+{
+    static srs_voice_api_t api = {
+        .render = espeak_render,
+        .cancel = espeak_cancel
+    };
+
+    espeak_t      *e = (espeak_t *)plugin->plugin_data;
+    espeak_VOICE **voices, *v;
+    int            nvoice, i;
+    int            nactor;
+
+    if (pulse_setup(e) != 0)
+        return FALSE;
+
+    voices = (espeak_VOICE **)espeak_ListVoices(NULL);
+
+    if (voices == NULL) {
+        mrp_log_error("Could not find any espeak voices.");
+        return FALSE;
+    }
+
+    for (nvoice = 0; voices[nvoice] != NULL; nvoice++)
+        ;
+
+    if ((e->actors = mrp_allocz_array(typeof(*e->actors), nvoice)) == NULL)
+        goto fail;
+
+    mrp_log_info("Available espeak voices:");
+
+    for (i = 0; i < nvoice; i++) {
+        v = voices[i];
+
+        mrp_log_info("    %s (%smale, age %d, languages: %s (id: %s))", v->name,
+                     v->gender == 2 ? "fe" : "", v->age,
+                     v->languages, v->identifier);
+
+        e->actors[i].id          = i;
+        e->actors[i].name        = mrp_strdup(v->name);
+        e->actors[i].lang        = mrp_strdup(espeak_language(v->languages));
+        e->actors[i].dialect     = NULL;
+        e->actors[i].gender      = espeak_gender(v->gender);
+        e->actors[i].description = mrp_strdup(espeak_description(v));
+
+        if (e->actors[i].name == NULL || e->actors[i].lang == NULL)
+            goto fail;
+
+        e->nactor++;
+    }
+
+    if (srs_register_voice(e->self->srs, "espeak", &api, e,
+                           e->actors, e->nactor,
+                           &e->voice.notify, &e->voice.notify_data) == 0)
+        return TRUE;
+
+ fail:
+    for (i = 0; i < e->nactor; i++) {
+        mrp_free(e->actors[i].name);
+        mrp_free(e->actors[i].lang);
+        mrp_free(e->actors[i].description);
+    }
+
+    return FALSE;
+}
+
+
+static void stop_espeak(srs_plugin_t *plugin)
+{
+    return;
+}
+
+
+static void destroy_espeak(srs_plugin_t *plugin)
+{
+    espeak_t *e = (espeak_t *)plugin->plugin_data;
+    int       i;
+
+    srs_unregister_voice(e->self->srs, "espeak");
+    espeak_Terminate();
+
+    for (i = 0; i < e->nactor; i++) {
+        mrp_free(e->actors[i].name);
+        mrp_free(e->actors[i].lang);
+        mrp_free(e->actors[i].description);
+    }
+
+    pulse_cleanup(e);
+
+    mrp_free(e);
+}
+
+
+SRS_DECLARE_PLUGIN(PLUGIN_NAME, PLUGIN_DESCR, PLUGIN_AUTHORS, PLUGIN_VERSION,
+                   create_espeak, config_espeak,
+                   start_espeak, stop_espeak,
+                   destroy_espeak)
diff --git a/src/plugins/text-to-speech/espeak/espeak-voice.h b/src/plugins/text-to-speech/espeak/espeak-voice.h
new file mode 100644 (file)
index 0000000..e00b0ba
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef __SRS_ESPEAK_VOICE_H__
+#define __SRS_ESPEAK_VOICE_H__
+
+#include <pulse/mainloop.h>
+
+#include "srs/daemon/plugin.h"
+#include "srs/daemon/voice.h"
+
+typedef struct {
+    srs_plugin_t      *self;             /* our plugin instance */
+    srs_context_t     *srs;              /* SRS context */
+    srs_voice_actor_t *actors;           /* loaded voices */
+    int                nactor;           /* number of voices */
+    struct {
+        const char    *voicedir;         /* voice directory */
+        int            rate;             /* sample rate */
+    } config;
+    struct {
+        srs_voice_notify_t  notify;      /* voice notification callback */
+        void               *notify_data; /* opaque notification data */
+    } voice;
+    void              *pulse;            /* PA streams et al. state */
+} espeak_t;
+
+#endif /* __SRS_ESPEAK_VOICE_H__ */
diff --git a/src/plugins/text-to-speech/espeak/pulse.c b/src/plugins/text-to-speech/espeak/pulse.c
new file mode 100644 (file)
index 0000000..3bd0857
--- /dev/null
@@ -0,0 +1,510 @@
+#include <errno.h>
+
+#include <murphy/common/debug.h>
+#include <murphy/common/log.h>
+#include <murphy/common/mm.h>
+#include <murphy/common/list.h>
+#include <murphy/common/refcnt.h>
+
+#include "pulse.h"
+
+#define SPEECH "speech"
+#define TTS    "text-to-speech"
+
+typedef struct {
+    espeak_t        *e;                  /* espeak voice context */
+    pa_mainloop_api *pa;                 /* PA mainloop API */
+    pa_context      *pc;                 /* PA context */
+    uint32_t         strmid;             /* next stream id */
+    mrp_list_hook_t  streams;            /* active streams */
+    int              connected;          /* whether connection is up */
+    mrp_timer_t     *reconn;             /* reconnect timer */
+} pulse_t;
+
+
+typedef struct {
+    pulse_t           *p;                /* our pulse_t context */
+    pa_stream         *s;                /* associated PA stream */
+    void              *buf;              /* pre-generated sample buffer */
+    size_t             size;             /* buffer size */
+    size_t             offs;             /* offset to next sample */
+    uint32_t           msec;             /* length in milliseconds */
+    int                rate;             /* sample rate */
+    int                nchannel;         /* number of channels */
+    uint32_t           nsample;          /* number of samples */
+    uint32_t           id;               /* our stream id */
+    int                event_mask;       /* mask of watched events */
+    int                fired_mask;       /* mask of delivered events */
+    pulse_stream_cb_t  cb;               /* notification callback */
+    void              *user_data;        /* callback user data */
+    mrp_list_hook_t    hook;             /* hook to list of streams */
+    mrp_refcnt_t       refcnt;           /* reference count */
+    int                stopped : 1;      /* stopped marker */
+    pa_operation      *drain;            /* draining operation */
+} stream_t;
+
+
+static void context_state_cb(pa_context *pc, void *user_data);
+static void context_event_cb(pa_context *pc, pa_subscription_event_type_t e,
+                             uint32_t idx, void *user_data);
+static void stream_state_cb(pa_stream *s, void *user_data);
+static void stream_write_cb(pa_stream *s, size_t size, void *user_data);
+static void stream_drain_cb(pa_stream *ps, int success, void *user_data);
+
+static void stream_drain(stream_t *s);
+static void stream_notify(stream_t *s, srs_voice_event_type_t event);
+
+
+int pulse_setup(espeak_t *e)
+{
+    pulse_t *p;
+
+    if ((p = mrp_allocz(sizeof(*p))) == NULL)
+        return -1;
+
+    mrp_list_init(&p->streams);
+    p->e  = e;
+    p->pa = e->srs->pa;
+    p->pc = pa_context_new(p->pa, "festival");
+
+    if (p->pc == NULL) {
+        mrp_free(p);
+
+        return -1;
+    }
+
+    e->pulse  = p;
+    p->strmid = 1;
+
+    pa_context_set_state_callback(p->pc, context_state_cb, p);
+    pa_context_set_subscribe_callback(p->pc, context_event_cb, p);
+    pa_context_connect(p->pc, NULL, PA_CONTEXT_NOFAIL, NULL);
+
+    return 0;
+}
+
+
+void pulse_cleanup(espeak_t *e)
+{
+    pulse_t *p = (pulse_t *)e->pulse;
+
+    if (p->pc != NULL) {
+        pa_context_disconnect(p->pc);
+        p->pc = NULL;
+    }
+}
+
+
+static void stream_destroy(stream_t *s)
+{
+    mrp_debug("destroying stream #%d", s->id);
+
+    mrp_list_delete(&s->hook);
+
+    if (s->s != NULL) {
+        pa_stream_set_state_callback(s->s, NULL, NULL);
+        pa_stream_set_write_callback(s->s, NULL, NULL);
+        pa_stream_disconnect(s->s);
+        pa_stream_unref(s->s);
+        s->s = NULL;
+        mrp_free(s->buf);
+        s->buf = NULL;
+    }
+
+    mrp_free(s);
+}
+
+
+static inline stream_t *stream_ref(stream_t *s)
+{
+    if (s != NULL) {
+        mrp_ref_obj(s, refcnt);
+        mrp_debug("stream reference count increased to %d", s->refcnt);
+    }
+
+    return s;
+}
+
+
+static inline void stream_unref(stream_t *s)
+{
+    if (mrp_unref_obj(s, refcnt))
+        stream_destroy(s);
+    else
+        mrp_debug("stream reference count decreased to %d", s->refcnt);
+}
+
+
+uint32_t pulse_play_stream(espeak_t *e, void *sample_buf, int sample_rate,
+                           int nchannel, uint32_t nsample, char **tags,
+                           int event_mask, pulse_stream_cb_t cb,
+                           void *user_data)
+{
+    pulse_t         *p    = (pulse_t *)e->pulse;
+    char           **t;
+    stream_t        *s;
+    pa_sample_spec   ss;
+    pa_buffer_attr   ba;
+    pa_proplist     *props;
+    size_t           pamin, pabuf;
+    int              flags;
+
+    if ((s = mrp_allocz(sizeof(*s))) == NULL)
+        return 0;
+
+    mrp_list_init(&s->hook);
+    mrp_refcnt_init(&s->refcnt);
+
+    if (tags != NULL) {
+        if ((props = pa_proplist_new()) == NULL) {
+            mrp_free(s);
+            return 0;
+        }
+
+        pa_proplist_sets(props, PA_PROP_MEDIA_ROLE, SPEECH);
+
+        for (t = tags; *t; t++)
+            pa_proplist_setp(props, *t);
+    }
+    else
+        props = NULL;
+
+    memset(&ss, 0, sizeof(ss));
+    ss.format   = PA_SAMPLE_S16LE;
+    ss.rate     = sample_rate;
+    ss.channels = nchannel;
+
+    pamin  = pa_usec_to_bytes(100 * PA_USEC_PER_MSEC, &ss);
+    pabuf  = pa_usec_to_bytes(300 * PA_USEC_PER_MSEC, &ss);
+
+    ba.maxlength = -1;
+    ba.tlength   = pabuf;
+    ba.minreq    = pamin;
+    ba.prebuf    = pabuf;
+    ba.fragsize  = -1;
+
+    s->s = pa_stream_new_with_proplist(p->pc, TTS, &ss, NULL, props);
+    if (props != NULL)
+        pa_proplist_free(props);
+
+    if (s->s == NULL) {
+        mrp_free(s);
+        return 0;
+    }
+
+    s->p          = p;
+    s->buf        = sample_buf;
+    s->offs       = 0;
+    s->rate       = sample_rate;
+    s->nchannel   = nchannel;
+    s->nsample    = nsample;
+    s->size       = 2 * nsample * nchannel;
+    s->msec       = (1.0 * nsample) / sample_rate * 1000;
+    s->cb         = cb;
+    s->user_data  = user_data;
+    s->id         = p->strmid++;
+    s->event_mask = event_mask;
+
+    pa_stream_set_state_callback(s->s, stream_state_cb, s);
+    pa_stream_set_write_callback(s->s, stream_write_cb, s);
+
+    flags = PA_STREAM_ADJUST_LATENCY;
+    pa_stream_connect_playback(s->s, NULL, &ba, flags, NULL, NULL);
+
+    mrp_list_append(&p->streams, &s->hook);
+
+    return s->id;
+}
+
+
+static void stream_stop(stream_t *s, int drain, int notify)
+{
+    if (s->stopped)
+        return;
+    else
+        s->stopped = TRUE;
+
+    if (!notify)
+        s->event_mask = 0;
+
+    if (!drain) {
+        stream_notify(s, s->offs >= s->size ?
+                      PULSE_STREAM_COMPLETED : PULSE_STREAM_ABORTED);
+    }
+    else
+        stream_drain(s);
+
+    stream_unref(s);                     /* remove intial reference */
+}
+
+
+int pulse_stop_stream(espeak_t *e, uint32_t id, int drain, int notify)
+{
+    pulse_t         *p = (pulse_t *)e->pulse;
+    mrp_list_hook_t *sp, *sn;
+    stream_t        *se, *s;
+
+    mrp_debug("stopping stream #%u", id);
+
+    s = NULL;
+    mrp_list_foreach(&p->streams, sp, sn) {
+        se = mrp_list_entry(sp, typeof(*se), hook);
+
+        if (se->id == id) {
+            s = se;
+            break;
+        }
+    }
+
+    if (s == NULL) {
+        errno = ENOENT;
+        return -1;
+    }
+
+    stream_stop(s, drain, notify);
+
+    return 0;
+}
+
+
+static void connect_timer_cb(mrp_timer_t *t, void *user_data)
+{
+    pulse_t *p = (pulse_t *)user_data;
+
+    if (p->pc != NULL) {
+        pa_context_unref(p->pc);
+        p->pc = NULL;
+    }
+
+    p->pc = pa_context_new(p->pa, "festival");
+
+    pa_context_set_state_callback(p->pc, context_state_cb, p);
+    pa_context_set_subscribe_callback(p->pc, context_event_cb, p);
+    pa_context_connect(p->pc, NULL, PA_CONTEXT_NOFAIL, NULL);
+
+    p->reconn = NULL;
+    mrp_del_timer(t);
+}
+
+
+static void stop_reconnect(pulse_t *p)
+{
+    if (p->reconn != NULL) {
+        mrp_del_timer(p->reconn);
+        p->reconn = NULL;
+    }
+}
+
+
+static void start_reconnect(pulse_t *p)
+{
+    stop_reconnect(p);
+
+    p->reconn = mrp_add_timer(p->e->srs->ml, 5000, connect_timer_cb, p);
+}
+
+
+static void context_state_cb(pa_context *pc, void *user_data)
+{
+    pulse_t *p = (pulse_t *)user_data;
+
+    switch (pa_context_get_state(pc)) {
+    case PA_CONTEXT_CONNECTING:
+        mrp_debug("PA connection: being established...");
+        p->connected = FALSE;
+        stop_reconnect(p);
+        break;
+
+    case PA_CONTEXT_AUTHORIZING:
+        mrp_debug("PA connection: being authenticated...");
+        p->connected = FALSE;
+        break;
+
+    case PA_CONTEXT_SETTING_NAME:
+        mrp_debug("PA connection: setting name...");
+        p->connected = FALSE;
+        break;
+
+    case PA_CONTEXT_READY:
+        mrp_log_info("festival: PA connection up and ready");
+        p->connected = TRUE;
+        break;
+
+    case PA_CONTEXT_TERMINATED:
+        mrp_log_info("festival: PA connection terminated");
+        p->connected = FALSE;
+        start_reconnect(p);
+        break;
+
+    case PA_CONTEXT_FAILED:
+        mrp_log_error("festival: PA connetion failed");
+    default:
+        p->connected = FALSE;
+        start_reconnect(p);
+        break;
+    }
+}
+
+
+static void context_event_cb(pa_context *pc, pa_subscription_event_type_t e,
+                             uint32_t idx, void *user_data)
+{
+    pulse_t *p = (pulse_t *)user_data;
+
+    MRP_UNUSED(pc);
+    MRP_UNUSED(e);
+    MRP_UNUSED(idx);
+    MRP_UNUSED(user_data);
+
+    MRP_UNUSED(p);
+
+    return;
+}
+
+
+static void stream_notify(stream_t *s, srs_voice_event_type_t event)
+{
+    int                mask = (1 << event);
+    srs_voice_event_t  e;
+
+    if (s->cb == NULL || !(s->event_mask & mask))
+        return;
+
+    if ((mask & PULSE_MASK_ONESHOT) && (s->fired_mask & mask))
+        return;
+
+    e.type = event;
+    e.id   = s->id;
+
+    switch (event) {
+    case PULSE_STREAM_STARTED:
+        e.data.progress.pcnt = 0;
+        e.data.progress.msec = 0;
+        break;
+
+    case PULSE_STREAM_PROGRESS:
+        e.data.progress.pcnt = ((1.0 * s->offs) / s->size) * 100;
+        e.data.progress.msec = ((1.0 * s->offs) / s->size) * s->msec;
+        break;
+
+    case PULSE_STREAM_COMPLETED:
+        e.data.progress.pcnt = ((1.0 * s->offs) / s->size) * 100;
+        e.data.progress.msec = ((1.0 * s->offs) / s->size) * s->msec;
+        break;
+
+    case PULSE_STREAM_ABORTED:
+        e.data.progress.pcnt = 0;
+        e.data.progress.msec = 0;
+        break;
+
+    default:
+        return;
+    }
+
+    stream_ref(s);
+    s->cb(s->p->e, &e, s->user_data);
+    stream_unref(s);
+}
+
+
+static void stream_state_cb(pa_stream *ps, void *user_data)
+{
+    stream_t           *s   = (stream_t *)user_data;
+    pulse_t            *p   = s->p;
+    pa_context_state_t  cst = pa_context_get_state(p->pc);
+    pa_stream_state_t   sst;
+
+    if (cst == PA_CONTEXT_TERMINATED || cst == PA_CONTEXT_FAILED)
+        return;
+
+    stream_ref(s);
+
+    switch ((sst = pa_stream_get_state(s->s))) {
+    case PA_STREAM_CREATING:
+        mrp_debug("stream #%u being created", s->id);
+        break;
+
+    case PA_STREAM_READY:
+        mrp_debug("stream #%u ready", s->id);
+        stream_notify(s, PULSE_STREAM_STARTED);
+        break;
+
+    case PA_STREAM_TERMINATED:
+    case PA_STREAM_FAILED:
+    default:
+        mrp_debug("stream #%u state %d", s->id, sst);
+
+        pa_stream_disconnect(s->s);
+        pa_stream_set_state_callback(s->s, NULL, NULL);
+        pa_stream_set_write_callback(s->s, NULL, NULL);
+
+        if (sst == PA_STREAM_TERMINATED)
+            stream_notify(s, PULSE_STREAM_COMPLETED);
+        else
+            stream_notify(s, PULSE_STREAM_ABORTED);
+    }
+
+    stream_unref(s);
+}
+
+
+static void stream_drain(stream_t *s)
+{
+    if (s->drain == NULL) {
+        mrp_debug("stream #%u done, draining", s->id);
+        stream_ref(s);
+        s->drain = pa_stream_drain(s->s, stream_drain_cb, s);
+    }
+}
+
+
+static void stream_drain_cb(pa_stream *ps, int success, void *user_data)
+{
+    stream_t *s = (stream_t *)user_data;
+
+    mrp_debug("stream #%u drained %s", s->id,
+              success ? "successfully" : "failed");
+
+    pa_operation_unref(s->drain);
+    s->drain = NULL;
+    stream_notify(s, PULSE_STREAM_COMPLETED);
+    stream_unref(s);
+}
+
+
+static void stream_write_cb(pa_stream *ps, size_t size, void *user_data)
+{
+    stream_t *s = (stream_t *)user_data;
+    int       done;
+
+    stream_notify(s, PULSE_STREAM_PROGRESS);
+
+    if (s->offs == s->size) {
+        pa_stream_set_write_callback(s->s, NULL, NULL);
+        return;
+    }
+
+    stream_ref(s);
+
+    if (s->offs + size >= s->size) {
+        size = s->size - s->offs;
+        done = TRUE;
+    }
+    else
+        done = FALSE;
+
+    if (pa_stream_write(s->s, s->buf + s->offs, size, NULL, 0,
+                        PA_SEEK_RELATIVE) < 0) {
+        mrp_log_error("festival: failed to write %zd bytes", size);
+        goto out;
+    }
+    else {
+        s->offs += size;
+
+        if (done)
+            stream_stop(s, TRUE, TRUE);
+    }
+
+ out:
+    stream_unref(s);
+}
diff --git a/src/plugins/text-to-speech/espeak/pulse.h b/src/plugins/text-to-speech/espeak/pulse.h
new file mode 100644 (file)
index 0000000..f7e6b70
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef __SRS_FESTIVAL_PULSE_H__
+#define __SRS_FESTIVAL_PULSE_H__
+
+#include <stdint.h>
+#include <murphy/common/macros.h>
+#include <pulse/pulseaudio.h>
+
+#include "espeak-voice.h"
+
+MRP_CDECL_BEGIN
+
+/*
+ * PA stream events
+ */
+
+typedef enum {
+    PULSE_STREAM_NONE      = SRS_VOICE_EVENT_STARTED - 1,
+    PULSE_STREAM_STARTED   = SRS_VOICE_EVENT_STARTED,
+    PULSE_STREAM_PROGRESS  = SRS_VOICE_EVENT_PROGRESS,
+    PULSE_STREAM_COMPLETED = SRS_VOICE_EVENT_COMPLETED,
+    PULSE_STREAM_TIMEOUT   = SRS_VOICE_EVENT_TIMEOUT,
+    PULSE_STREAM_ABORTED   = SRS_VOICE_EVENT_ABORTED,
+    PULSE_STREAM_CORKED,
+    PULSE_STREAM_UNCORKED,
+} pulse_stream_event_type_t;
+
+#define PULSE_MASK_NONE      SRS_VOICE_MASK_NONE
+#define PULSE_MASK_STARTED   SRS_VOICE_MASK_STARTED
+#define PULSE_MASK_PROGRESS  SRS_VOICE_MASK_PROGRESS
+#define PULSE_MASK_COMPLETED SRS_VOICE_MASK_COMPLETED
+#define PULSE_MASK_ABORTED   SRS_VOICE_MASK_ABORTED
+#define PULSE_MASK_CORKED    (1 << PULSE_STREAM_CORKED)
+#define PULSE_MASK_UNCORKED  (1 << PULSE_STREAM_UNCORKED)
+#define PULSE_MASK_ONESHOT   (~(PULSE_MASK_PROGRESS))
+#define PULSE_MASK_ALL       (PULSE_MASK_STARTED | PULSE_MASK_PROGRESS | \
+                              PULSE_MASK_COMPLETED | PULSE_MASK_ABORTED)
+
+typedef srs_voice_event_t pulse_stream_event_t;
+
+typedef void (*pulse_stream_cb_t)(espeak_t *e, pulse_stream_event_t *event,
+                                  void *user_data);
+
+/** Set up the PulseAudio interface. */
+int pulse_setup(espeak_t *e);
+
+/** Clean up the audio interface. */
+void pulse_cleanup(espeak_t *e);
+
+/** Render an stream (a buffer of audio samples). */
+uint32_t pulse_play_stream(espeak_t *e, void *sample_buf, int sample_rate,
+                           int nchannel, uint32_t nsample, char **tags,
+                           int event_mask, pulse_stream_cb_t cb,
+                           void *user_data);
+
+/** Stop an ongoing stream. */
+int pulse_stop_stream(espeak_t *e, uint32_t id, int drain, int notify);
+
+MRP_CDECL_END
+
+#endif /* __SRS_FESTIVAL_PULSE_H__ */