2 * Copyright (c) 2012 - 2013, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <murphy/common/macros.h>
34 #include <murphy/common/mm.h>
35 #include <murphy/common/list.h>
37 #include "srs/daemon/context.h"
38 #include "srs/daemon/voice.h"
40 typedef struct state_s state_t;
43 * a speech synthesizer backend
47 int id; /* internal backend ID */
48 srs_context_t *srs; /* main context */
49 char *name; /* engine name */
50 srs_voice_api_t api; /* backend API */
51 void *api_data; /* opaque engine data */
52 srs_voice_actor_t *actors; /* backend voice actors */
53 int nactor; /* number of actors */
54 mrp_list_hook_t hook; /* to list of backends */
55 state_t *state; /* our state */
64 char *lang; /* language name */
65 mrp_list_hook_t hook; /* to list of languages */
66 mrp_list_hook_t actors; /* list of actors */
67 int nmale; /* number of male actors */
68 int nfemale; /* number of female actors */
77 char *voice; /* globally unique voice id */
78 renderer_t *r; /* rendering backend */
79 int id; /* backend voice id */
80 char *dialect; /* language dialect, if any */
81 srs_voice_gender_t gender; /* actor gender */
82 int age; /* actor age */
83 char *description; /* description */
84 mrp_list_hook_t hook; /* to list of languages */
89 * ative and queued rendering requests
93 uint32_t id; /* request id */
94 renderer_t *r; /* rendering backend */
95 uint32_t vid; /* backend id */
96 int notify_mask; /* notification event mask */
97 srs_voice_notify_t notify; /* notification callback */
98 void *notify_data; /* opaque notification data */
99 mrp_timer_t *timer; /* request timeout timer */
100 mrp_list_hook_t hook; /* hook to list of requests */
105 request_t req; /* request */
106 char *msg; /* message to render */
107 char **tags; /* stream tags */
108 uint32_t actor; /* actor id */
109 double rate; /* synthesis rate (1 = normal) */
110 double pitch; /* synthesis pitch (1 = default) */
111 int timeout; /* timeout */
116 * speech synthesizer state
120 mrp_list_hook_t synthesizers; /* registered synthesizers */
121 int nsynthesizer; /* number of synthesizers */
122 mrp_list_hook_t languages; /* list of supported languages */
123 uint32_t nextid; /* next voice id */
124 mrp_list_hook_t requests; /* request queue */
125 request_t *active; /* active request */
126 request_t *cancelling; /* request being cancelled */
130 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid);
131 static request_t *activate_next(state_t *state);
134 static language_t *find_language(state_t *state, const char *lang, int create)
136 mrp_list_hook_t *p, *n;
139 mrp_list_foreach(&state->languages, p, n) {
140 l = mrp_list_entry(p, typeof(*l), hook);
142 if (!strcasecmp(l->lang, lang))
149 l = mrp_allocz(sizeof(*l));
154 mrp_list_init(&l->hook);
155 mrp_list_init(&l->actors);
157 l->lang = mrp_strdup(lang);
159 if (l->lang == NULL) {
164 mrp_list_append(&state->languages, &l->hook);
170 static int register_actor(renderer_t *r, srs_voice_actor_t *act)
172 state_t *state = r->state;
179 l = find_language(state, act->lang, TRUE);
184 a = mrp_allocz(sizeof(*a));
190 mrp_list_init(&a->hook);
193 a->dialect = mrp_strdup(act->dialect);
194 a->gender = act->gender;
196 a->description = mrp_strdup(act->description);
198 if ((act->dialect && !a->dialect) ||
199 (act->description && !a->description)) {
200 mrp_free(a->dialect);
201 mrp_free(a->description);
207 if (a->gender == SRS_VOICE_GENDER_MALE) {
212 else if (a->gender == SRS_VOICE_GENDER_FEMALE) {
217 a->gender = SRS_VOICE_GENDER_MALE;
222 snprintf(voice, sizeof(voice), "%s%s%s%s-%d", l->lang,
223 a->dialect ? "-" : "", a->dialect ? a->dialect : "", g, *n);
225 snprintf(voice, sizeof(voice), "%s%s%s%s", l->lang,
226 a->dialect ? "-" : "", a->dialect ? a->dialect : "", g);
228 a->voice = mrp_strdup(voice);
230 if (a->voice == NULL) {
231 mrp_free(a->dialect);
232 mrp_free(a->description);
238 mrp_list_append(&l->actors, &a->hook);
241 mrp_log_info("Registered voice %s/%s.", r->name, voice);
247 static void unregister_actors(renderer_t *r)
249 state_t *state = r->state;
250 mrp_list_hook_t *lp, *ln;
252 mrp_list_hook_t *ap, *an;
255 mrp_list_foreach(&state->languages, lp, ln) {
256 l = mrp_list_entry(lp, typeof(*l), hook);
258 mrp_list_foreach(&l->actors, ap, an) {
259 a = mrp_list_entry(ap, typeof(*a), hook);
262 if (a->gender == SRS_VOICE_GENDER_MALE)
267 mrp_log_info("Unregistering voice %s/%s.", r->name, a->voice);
269 mrp_list_delete(&l->hook);
272 mrp_free(a->dialect);
273 mrp_free(a->description);
278 if (mrp_list_empty(&l->actors)) {
279 mrp_list_delete(&l->hook);
286 static actor_t *find_actor(state_t *state, const char *r, const char *v)
288 mrp_list_hook_t *lp, *ln, *ap, *an;
292 mrp_list_foreach(&state->languages, lp, ln) {
293 l = mrp_list_entry(lp, typeof(*l), hook);
295 mrp_list_foreach(&l->actors, ap, an) {
296 a = mrp_list_entry(ap, typeof(*a), hook);
298 if (!strcmp(a->r->name, r) && !strcmp(a->voice, v))
307 static void free_renderer(renderer_t *r)
310 mrp_list_delete(&r->hook);
311 unregister_actors(r);
317 static void notify_request(request_t *req, srs_voice_event_t *event)
319 renderer_t *r = req->r;
320 int mask = (1 << event->type);
323 if (req->notify != NULL && (mask & req->notify_mask)) {
326 req->notify(&e, req->notify_data);
331 static void voice_notify_cb(srs_voice_event_t *event, void *notify_data)
333 renderer_t *r = (renderer_t *)notify_data;
334 state_t *state = (state_t *)r->state;
335 uint32_t vid = event->id;
336 request_t *req = find_request(state, -1, vid);
337 int mask = (1 << event->type);
340 mrp_log_error("Failed to find request for event 0x%x of <%d>.",
341 event->type, event->id);
345 if (event->type == SRS_VOICE_EVENT_STARTED) {
346 mrp_del_timer(req->timer);
350 notify_request(req, event);
352 if (mask & SRS_VOICE_MASK_DONE) {
353 mrp_del_timer(req->timer);
355 if (state->cancelling != req) {
356 mrp_list_delete(&req->hook);
360 if (state->active == req) {
361 state->active = NULL;
362 activate_next(state);
368 int srs_register_voice(srs_context_t *srs, const char *name,
369 srs_voice_api_t *api, void *api_data,
370 srs_voice_actor_t *actors, int nactor,
371 srs_voice_notify_t *notify, void **notify_data)
373 state_t *state = (state_t *)srs->synthesizer;
375 srs_voice_actor_t *a;
379 srs->synthesizer = state = mrp_allocz(sizeof(*state));
384 mrp_list_init(&state->synthesizers);
385 mrp_list_init(&state->languages);
386 mrp_list_init(&state->requests);
390 if (api == NULL || name == NULL || actors == NULL || nactor < 1) {
395 r = mrp_allocz(sizeof(*r));
400 mrp_list_init(&r->hook);
401 r->id = state->nsynthesizer++;
404 r->name = mrp_strdup(name);
406 if (r->name == NULL) {
412 r->api_data = api_data;
414 for (i = 0; i < nactor; i++) {
415 if (register_actor(r, actors + i) != 0)
419 mrp_log_info("Registered voice/TTS backend '%s'.", r->name);
421 mrp_list_append(&state->synthesizers, &r->hook);
423 *notify = voice_notify_cb;
430 void srs_unregister_voice(srs_context_t *srs, const char *name)
432 state_t *state = (state_t *)srs->synthesizer;
434 mrp_list_hook_t *p, *n;
437 mrp_list_foreach(&state->synthesizers, p, n) {
438 r = mrp_list_entry(p, typeof(*r), hook);
440 if (!strcmp(r->name, name)) {
441 mrp_log_info("Unregistering voice/TTS backend '%s'.", name);
450 static renderer_t *find_renderer(state_t *state, const char *voice,
454 actor_t *a, *fallback;
455 mrp_list_hook_t *ap, *an;
456 char lang[128], renderer[128], *e;
465 if ((e = strchr(voice, '/')) != NULL) {
468 if (n >= sizeof(renderer) - 1)
471 strncpy(renderer, voice, n);
474 if ((a = find_actor(state, renderer, e + 1)) != NULL) {
482 if ((e = strchr(voice, '-')) == NULL)
483 l = find_language(state, voice, FALSE);
486 if (snprintf(lang, sizeof(lang), "%*.*s", n, n, voice) >= sizeof(lang))
489 l = find_language(state, lang, FALSE);
496 mrp_list_foreach(&l->actors, ap, an) {
497 a = mrp_list_entry(ap, typeof(*a), hook);
499 if (!strcmp(a->voice, voice)) {
505 if (fallback == NULL)
509 if (fallback != NULL) {
510 *actor = fallback->id;
520 static renderer_t *select_renderer(state_t *state, const char *voice,
525 srs_voice_actor_t *a;
526 srs_voice_gender_t gender;
527 mrp_list_hook_t *p, *n;
537 if (!strcmp(actor, SRS_VOICE_FEMALE)) {
538 gender = SRS_VOICE_GENDER_FEMALE;
541 else if (!strcmp(actor, SRS_VOICE_MALE)) {
542 gender = SRS_VOICE_GENDER_MALE;
546 gender = SRS_VOICE_GENDER_ANY;
549 dfltid = SRS_VOICE_INVALID;
551 mrp_list_foreach(&state->synthesizers, p, n) {
552 r = mrp_list_entry(p, typeof(*r), hook);
554 for (i = 0, a = r->actors; i < r->nactor; i++, a++) {
555 if (strcmp(a->lang, lang))
559 if (gender == a->gender) {
565 if (!strcmp(a->name, actor)) {
570 if (dfltid == SRS_VOICE_INVALID) {
584 static void free_tags(char **tags)
591 for (i = 0; tags[i] != NULL; i++)
598 static char **copy_tags(char **tags)
606 for (i = 0; tags[i] != NULL; i++) {
607 if (!mrp_reallocz(cp, i, i + 1))
609 if ((cp[i] = mrp_strdup(tags[i])) == NULL)
613 if (mrp_reallocz(cp, i, i + 1))
622 static void request_timer_cb(mrp_timer_t *t, void *user_data)
624 queued_t *qr = (queued_t *)user_data;
625 request_t *req = &qr->req;
626 srs_voice_event_t event;
628 mrp_log_info("Voice/TTS request #%u timed out.", qr->req.id);
630 mrp_del_timer(req->timer);
634 event.type = SRS_VOICE_EVENT_TIMEOUT;
637 notify_request(req, &event);
639 mrp_list_delete(&req->hook);
648 static request_t *enqueue_request(state_t *state, const char *msg, char **tags,
649 renderer_t *r, uint32_t actor, double rate,
650 double pitch, int timeout, int notify_mask,
651 srs_voice_notify_t notify, void *notify_data)
655 qr = mrp_allocz(sizeof(*qr));
660 mrp_list_init(&qr->req.hook);
662 qr->req.id = state->nextid++;
664 qr->req.vid = SRS_VOICE_INVALID;
665 qr->req.notify_mask = notify_mask;
666 qr->req.notify = notify;
667 qr->req.notify_data = notify_data;
669 qr->msg = mrp_strdup(msg);
670 qr->tags = copy_tags(tags);
672 qr->timeout = timeout;
674 if (qr->msg != NULL && (qr->tags != NULL || tags == NULL)) {
675 mrp_list_append(&state->requests, &qr->req.hook);
678 qr->req.timer = mrp_add_timer(r->srs->ml, timeout,
679 request_timer_cb, qr);
693 static request_t *activate_next(state_t *state)
697 mrp_list_hook_t *p, *n;
699 if (state->active != NULL)
703 mrp_list_foreach(&state->requests, p, n) {
705 qr = mrp_list_entry(p, typeof(*qr), req.hook);
712 mrp_del_timer(qr->req.timer);
713 qr->req.timer = NULL;
716 qr->req.vid = r->api.render(qr->msg, qr->tags, qr->actor, qr->rate,
717 qr->pitch, qr->req.notify_mask, r->api_data);
724 if (qr->req.vid == SRS_VOICE_INVALID) {
725 if (qr->req.notify != NULL &&
726 (qr->req.notify_mask & 1 << SRS_VOICE_EVENT_ABORTED)) {
730 e.type = SRS_VOICE_EVENT_ABORTED;
733 voice_notify_cb(&e, qr->req.notify_data);
741 state->active = &qr->req;
748 request_t *render_request(state_t *state, const char *msg, char **tags,
749 renderer_t *r, uint32_t actor, double rate,
750 double pitch, int timeout, int notify_mask,
751 srs_voice_notify_t notify, void *notify_data)
755 req = mrp_allocz(sizeof(*req));
760 mrp_list_init(&req->hook);
761 req->id = state->nextid++;
763 req->vid = r->api.render(msg, tags, actor, rate, pitch,
764 notify_mask, r->api_data);
766 if (req->vid == SRS_VOICE_INVALID) {
771 req->notify = notify;
772 req->notify_mask = notify_mask;
773 req->notify_data = notify_data;
781 uint32_t srs_render_voice(srs_context_t *srs, const char *msg,
782 char **tags, const char *voice, double rate,
783 double pitch, int timeout, int notify_mask,
784 srs_voice_notify_t notify, void *user_data)
786 state_t *state = (state_t *)srs->synthesizer;
794 return SRS_VOICE_INVALID;
797 r = find_renderer(state, voice, &actid);
802 return SRS_VOICE_INVALID;
805 if (state->active == NULL)
806 req = render_request(state, msg, tags, r, actid, rate, pitch, timeout,
807 notify_mask, notify, user_data);
809 if (timeout == SRS_VOICE_IMMEDIATE) {
814 req = enqueue_request(state, msg, tags, r, actid, rate, pitch,
815 timeout, notify_mask, notify, user_data);
821 return SRS_VOICE_INVALID;
825 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid)
827 mrp_list_hook_t *p, *n;
835 if ((req = state->active) != NULL) {
836 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
840 mrp_list_foreach(&state->requests, p, n) {
841 req = mrp_list_entry(p, typeof(*req), hook);
843 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
854 void srs_cancel_voice(srs_context_t *srs, uint32_t rid, int notify)
856 state_t *state = (state_t *)srs->synthesizer;
857 request_t *req = find_request(state, rid, -1);
858 renderer_t *voice = req ? req->r : NULL;
863 mrp_del_timer(req->timer);
865 state->cancelling = req;
867 voice->api.cancel(req->vid, voice->api_data);
869 mrp_list_delete(&req->hook);
872 if (state->active == req) {
873 state->active = NULL;
874 activate_next(state);
879 int srs_query_voices(srs_context_t *srs, const char *language,
880 srs_voice_actor_t **actorsp)
882 state_t *state = (state_t *)srs->synthesizer;
883 srs_voice_actor_t *actors, *actor;
886 mrp_list_hook_t *lp, *ln;
888 mrp_list_hook_t *ap, *an;
899 mrp_list_foreach(&state->languages, lp, ln) {
900 l = mrp_list_entry(lp, typeof(*l), hook);
902 if (language != NULL && strcasecmp(l->lang, language))
905 mrp_list_foreach(&l->actors, ap, an) {
906 a = mrp_list_entry(ap, typeof(*a), hook);
908 if (mrp_reallocz(actors, nactor, nactor + 1) == NULL)
911 actor = actors + nactor++;
913 actor->name = mrp_strdup(a->voice);
914 actor->lang = mrp_strdup(l->lang);
915 actor->dialect = mrp_strdup(a->dialect);
916 actor->gender = a->gender;
918 actor->description = mrp_strdup(a->description);
920 if (!actor->name || !actor->lang ||
921 (!actor->dialect && a->dialect) ||
922 (!actor->description && a->description))
927 if (actors != NULL) {
928 if (!mrp_reallocz(actors, nactor, nactor + 1))
937 for (i = 0; i < nactor; i++) {
938 mrp_free(actors[i].name);
939 mrp_free(actors[i].lang);
940 mrp_free(actors[i].dialect);
941 mrp_free(actors[i].description);
948 void srs_free_queried_voices(srs_voice_actor_t *actors)
950 srs_voice_actor_t *a;
952 if (actors != NULL) {
953 for (a = actors; a->lang; a++) {
956 mrp_free(a->dialect);
957 mrp_free(a->description);