2 * Copyright (c) 2012 - 2013, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <murphy/common/macros.h>
35 #include <murphy/common/mm.h>
36 #include <murphy/common/list.h>
38 #include "srs/daemon/context.h"
39 #include "srs/daemon/voice.h"
41 typedef struct state_s state_t;
44 * a speech synthesizer backend
48 int id; /* internal backend ID */
49 srs_context_t *srs; /* main context */
50 char *name; /* engine name */
51 srs_voice_api_t api; /* backend API */
52 void *api_data; /* opaque engine data */
53 srs_voice_actor_t *actors; /* backend voice actors */
54 int nactor; /* number of actors */
55 mrp_list_hook_t hook; /* to list of backends */
56 state_t *state; /* our state */
65 char *lang; /* language name */
66 mrp_list_hook_t hook; /* to list of languages */
67 mrp_list_hook_t actors; /* list of actors */
68 int nmale; /* number of male actors */
69 int nfemale; /* number of female actors */
78 char *voice; /* globally unique voice id */
79 renderer_t *r; /* rendering backend */
80 int id; /* backend voice id */
81 char *dialect; /* language dialect, if any */
82 srs_voice_gender_t gender; /* actor gender */
83 int age; /* actor age */
84 char *description; /* description */
85 mrp_list_hook_t hook; /* to list of languages */
90 * ative and queued rendering requests
94 uint32_t id; /* request id */
95 renderer_t *r; /* rendering backend */
96 uint32_t vid; /* backend id */
97 int notify_mask; /* notification event mask */
98 srs_voice_notify_t notify; /* notification callback */
99 void *notify_data; /* opaque notification data */
100 mrp_timer_t *timer; /* request timeout timer */
101 mrp_list_hook_t hook; /* hook to list of requests */
106 request_t req; /* request */
107 char *msg; /* message to render */
108 char **tags; /* stream tags */
109 uint32_t actor; /* actor id */
110 double rate; /* synthesis rate (1 = normal) */
111 double pitch; /* synthesis pitch (1 = default) */
112 int timeout; /* timeout */
117 * speech synthesizer state
121 mrp_list_hook_t synthesizers; /* registered synthesizers */
122 int nsynthesizer; /* number of synthesizers */
123 mrp_list_hook_t languages; /* list of supported languages */
124 uint32_t nextid; /* next voice id */
125 mrp_list_hook_t requests; /* request queue */
126 request_t *active; /* active request */
127 request_t *cancelling; /* request being cancelled */
131 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid);
132 static request_t *activate_next(state_t *state);
135 static language_t *find_language(state_t *state, const char *lang, int create)
137 mrp_list_hook_t *p, *n;
140 mrp_list_foreach(&state->languages, p, n) {
141 l = mrp_list_entry(p, typeof(*l), hook);
143 if (!strcasecmp(l->lang, lang))
150 l = mrp_allocz(sizeof(*l));
155 mrp_list_init(&l->hook);
156 mrp_list_init(&l->actors);
158 l->lang = mrp_strdup(lang);
160 if (l->lang == NULL) {
165 mrp_list_append(&state->languages, &l->hook);
171 static char *canonical_actor_name(char *buf, size_t size,
172 language_t *language, actor_t *actor)
174 const char *lang, *dialect, *gender, *s;
178 lang = language->lang;
179 dialect = actor->dialect;
181 if (actor->gender != SRS_VOICE_GENDER_FEMALE) {
183 idx = language->nmale++;
187 idx = language->nfemale++;
193 while (*s && size > 0) {
204 if (dialect != NULL && size > 0) {
209 while (*s && size > 0) {
222 snprintf(d, size, "%s-%d", gender, idx);
224 snprintf(d, size, "%s", gender);
230 static int register_actor(renderer_t *r, srs_voice_actor_t *act)
232 state_t *state = r->state;
237 l = find_language(state, act->lang, TRUE);
242 a = mrp_allocz(sizeof(*a));
248 mrp_list_init(&a->hook);
251 a->dialect = mrp_strdup(act->dialect);
252 a->gender = act->gender;
254 a->description = mrp_strdup(act->description);
256 if ((act->dialect && !a->dialect) ||
257 (act->description && !a->description)) {
258 mrp_free(a->dialect);
259 mrp_free(a->description);
265 a->voice = mrp_strdup(canonical_actor_name(voice, sizeof(voice), l, a));
267 if (a->voice == NULL) {
268 mrp_free(a->dialect);
269 mrp_free(a->description);
275 mrp_list_append(&l->actors, &a->hook);
277 mrp_log_info("Registered voice %s/%s.", r->name, voice);
283 static void unregister_actors(renderer_t *r)
285 state_t *state = r->state;
286 mrp_list_hook_t *lp, *ln;
288 mrp_list_hook_t *ap, *an;
291 mrp_list_foreach(&state->languages, lp, ln) {
292 l = mrp_list_entry(lp, typeof(*l), hook);
294 mrp_list_foreach(&l->actors, ap, an) {
295 a = mrp_list_entry(ap, typeof(*a), hook);
298 if (a->gender == SRS_VOICE_GENDER_MALE)
303 mrp_log_info("Unregistering voice %s/%s.", r->name, a->voice);
305 mrp_list_delete(&l->hook);
308 mrp_free(a->dialect);
309 mrp_free(a->description);
314 if (mrp_list_empty(&l->actors)) {
315 mrp_list_delete(&l->hook);
322 static actor_t *find_actor(state_t *state, const char *r, const char *v)
324 mrp_list_hook_t *lp, *ln, *ap, *an;
328 mrp_list_foreach(&state->languages, lp, ln) {
329 l = mrp_list_entry(lp, typeof(*l), hook);
331 mrp_list_foreach(&l->actors, ap, an) {
332 a = mrp_list_entry(ap, typeof(*a), hook);
334 if (!strcmp(a->r->name, r) && !strcmp(a->voice, v))
343 static void free_renderer(renderer_t *r)
346 mrp_list_delete(&r->hook);
347 unregister_actors(r);
353 static void notify_request(request_t *req, srs_voice_event_t *event)
355 int mask = (1 << event->type);
358 if (req->notify != NULL && (mask & req->notify_mask)) {
361 req->notify(&e, req->notify_data);
366 static void voice_notify_cb(srs_voice_event_t *event, void *notify_data)
368 renderer_t *r = (renderer_t *)notify_data;
369 state_t *state = (state_t *)r->state;
370 uint32_t vid = event->id;
371 request_t *req = find_request(state, -1, vid);
372 int mask = (1 << event->type);
375 mrp_log_error("Failed to find request for event 0x%x of <%d>.",
376 event->type, event->id);
380 if (event->type == SRS_VOICE_EVENT_STARTED) {
381 mrp_del_timer(req->timer);
385 notify_request(req, event);
387 if (mask & SRS_VOICE_MASK_DONE) {
388 mrp_del_timer(req->timer);
390 if (state->cancelling != req) {
391 mrp_list_delete(&req->hook);
395 if (state->active == req) {
396 state->active = NULL;
397 activate_next(state);
403 int srs_register_voice(srs_context_t *srs, const char *name,
404 srs_voice_api_t *api, void *api_data,
405 srs_voice_actor_t *actors, int nactor,
406 srs_voice_notify_t *notify, void **notify_data)
408 state_t *state = (state_t *)srs->synthesizer;
413 srs->synthesizer = state = mrp_allocz(sizeof(*state));
418 mrp_list_init(&state->synthesizers);
419 mrp_list_init(&state->languages);
420 mrp_list_init(&state->requests);
424 if (api == NULL || name == NULL || actors == NULL || nactor < 1) {
429 r = mrp_allocz(sizeof(*r));
434 mrp_list_init(&r->hook);
435 r->id = state->nsynthesizer++;
438 r->name = mrp_strdup(name);
440 if (r->name == NULL) {
446 r->api_data = api_data;
448 for (i = 0; i < nactor; i++) {
449 if (register_actor(r, actors + i) != 0)
453 mrp_log_info("Registered voice/TTS backend '%s'.", r->name);
455 mrp_list_append(&state->synthesizers, &r->hook);
457 *notify = voice_notify_cb;
464 void srs_unregister_voice(srs_context_t *srs, const char *name)
466 state_t *state = (state_t *)srs->synthesizer;
468 mrp_list_hook_t *p, *n;
471 mrp_list_foreach(&state->synthesizers, p, n) {
472 r = mrp_list_entry(p, typeof(*r), hook);
474 if (!strcmp(r->name, name)) {
475 mrp_log_info("Unregistering voice/TTS backend '%s'.", name);
484 static renderer_t *find_renderer(state_t *state, const char *voice,
488 actor_t *a, *fallback;
489 mrp_list_hook_t *ap, *an;
490 char lang[128], renderer[128], *e;
499 if ((e = strchr(voice, '/')) != NULL) {
502 if (n >= (int)sizeof(renderer) - 1)
505 strncpy(renderer, voice, n);
508 if ((a = find_actor(state, renderer, e + 1)) != NULL) {
516 if ((e = strchr(voice, '-')) == NULL)
517 l = find_language(state, voice, FALSE);
520 if (snprintf(lang, sizeof(lang), "%*.*s",
521 (int)n, (int)n, voice) >= (int)sizeof(lang))
524 l = find_language(state, lang, FALSE);
531 mrp_list_foreach(&l->actors, ap, an) {
532 a = mrp_list_entry(ap, typeof(*a), hook);
534 if (!strcmp(a->voice, voice)) {
540 if (fallback == NULL)
544 if (fallback != NULL) {
545 *actor = fallback->id;
555 static renderer_t *select_renderer(state_t *state, const char *voice,
560 srs_voice_actor_t *a;
561 srs_voice_gender_t gender;
562 mrp_list_hook_t *p, *n;
572 if (!strcmp(actor, SRS_VOICE_FEMALE)) {
573 gender = SRS_VOICE_GENDER_FEMALE;
576 else if (!strcmp(actor, SRS_VOICE_MALE)) {
577 gender = SRS_VOICE_GENDER_MALE;
581 gender = SRS_VOICE_GENDER_ANY;
584 dfltid = SRS_VOICE_INVALID;
586 mrp_list_foreach(&state->synthesizers, p, n) {
587 r = mrp_list_entry(p, typeof(*r), hook);
589 for (i = 0, a = r->actors; i < r->nactor; i++, a++) {
590 if (strcmp(a->lang, lang))
594 if (gender == a->gender) {
600 if (!strcmp(a->name, actor)) {
605 if (dfltid == SRS_VOICE_INVALID) {
619 static void free_tags(char **tags)
626 for (i = 0; tags[i] != NULL; i++)
633 static char **copy_tags(char **tags)
641 for (i = 0; tags[i] != NULL; i++) {
642 if (!mrp_reallocz(cp, i, i + 1))
644 if ((cp[i] = mrp_strdup(tags[i])) == NULL)
648 if (mrp_reallocz(cp, i, i + 1))
657 static void request_timer_cb(mrp_timer_t *t, void *user_data)
659 queued_t *qr = (queued_t *)user_data;
660 request_t *req = &qr->req;
661 srs_voice_event_t event;
665 mrp_log_info("Voice/TTS request #%u timed out.", qr->req.id);
667 mrp_del_timer(req->timer);
671 event.type = SRS_VOICE_EVENT_TIMEOUT;
674 notify_request(req, &event);
676 mrp_list_delete(&req->hook);
685 static request_t *enqueue_request(state_t *state, const char *msg, char **tags,
686 renderer_t *r, uint32_t actor, double rate,
687 double pitch, int timeout, int notify_mask,
688 srs_voice_notify_t notify, void *notify_data)
696 qr = mrp_allocz(sizeof(*qr));
701 mrp_list_init(&qr->req.hook);
703 qr->req.id = state->nextid++;
705 qr->req.vid = SRS_VOICE_INVALID;
706 qr->req.notify_mask = notify_mask;
707 qr->req.notify = notify;
708 qr->req.notify_data = notify_data;
710 qr->msg = mrp_strdup(msg);
711 qr->tags = copy_tags(tags);
713 qr->timeout = timeout;
715 if (qr->msg != NULL && (qr->tags != NULL || tags == NULL)) {
716 mrp_list_append(&state->requests, &qr->req.hook);
719 qr->req.timer = mrp_add_timer(r->srs->ml, timeout,
720 request_timer_cb, qr);
734 static request_t *activate_next(state_t *state)
738 mrp_list_hook_t *p, *n;
740 if (state->active != NULL)
744 mrp_list_foreach(&state->requests, p, n) {
746 qr = mrp_list_entry(p, typeof(*qr), req.hook);
753 mrp_del_timer(qr->req.timer);
754 qr->req.timer = NULL;
757 qr->req.vid = r->api.render(qr->msg, qr->tags, qr->actor, qr->rate,
758 qr->pitch, qr->req.notify_mask, r->api_data);
765 if (qr->req.vid == SRS_VOICE_INVALID) {
766 if (qr->req.notify != NULL &&
767 (qr->req.notify_mask & 1 << SRS_VOICE_EVENT_ABORTED)) {
771 e.type = SRS_VOICE_EVENT_ABORTED;
774 voice_notify_cb(&e, qr->req.notify_data);
782 state->active = &qr->req;
789 request_t *render_request(state_t *state, const char *msg, char **tags,
790 renderer_t *r, uint32_t actor, double rate,
791 double pitch, int timeout, int notify_mask,
792 srs_voice_notify_t notify, void *notify_data)
794 request_t *req = NULL;
798 req = mrp_allocz(sizeof(*req));
803 mrp_list_init(&req->hook);
804 req->id = state->nextid++;
806 req->vid = r->api.render(msg, tags, actor, rate, pitch,
807 notify_mask, r->api_data);
809 if (req->vid == SRS_VOICE_INVALID) {
814 req->notify = notify;
815 req->notify_mask = notify_mask;
816 req->notify_data = notify_data;
824 uint32_t srs_render_voice(srs_context_t *srs, const char *msg,
825 char **tags, const char *voice, double rate,
826 double pitch, int timeout, int notify_mask,
827 srs_voice_notify_t notify, void *user_data)
829 state_t *state = (state_t *)srs->synthesizer;
837 return SRS_VOICE_INVALID;
840 r = find_renderer(state, voice, &actid);
845 return SRS_VOICE_INVALID;
848 if (state->active == NULL)
849 req = render_request(state, msg, tags, r, actid, rate, pitch, timeout,
850 notify_mask, notify, user_data);
852 if (timeout == SRS_VOICE_IMMEDIATE) {
857 req = enqueue_request(state, msg, tags, r, actid, rate, pitch,
858 timeout, notify_mask, notify, user_data);
864 return SRS_VOICE_INVALID;
868 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid)
870 mrp_list_hook_t *p, *n;
878 if ((req = state->active) != NULL) {
879 if ((rid == SRS_VOICE_INVALID || req->id == rid) &&
880 (vid == SRS_VOICE_INVALID || req->vid == vid))
884 mrp_list_foreach(&state->requests, p, n) {
885 req = mrp_list_entry(p, typeof(*req), hook);
887 if ((rid == SRS_VOICE_INVALID || req->id == rid) &&
888 (vid == SRS_VOICE_INVALID || req->vid == vid))
899 void srs_cancel_voice(srs_context_t *srs, uint32_t rid, int notify)
901 state_t *state = (state_t *)srs->synthesizer;
902 request_t *req = find_request(state, rid, -1);
903 renderer_t *voice = req ? req->r : NULL;
910 mrp_del_timer(req->timer);
912 state->cancelling = req;
914 voice->api.cancel(req->vid, voice->api_data);
916 mrp_list_delete(&req->hook);
919 if (state->active == req) {
920 state->active = NULL;
921 activate_next(state);
926 int srs_query_voices(srs_context_t *srs, const char *language,
927 srs_voice_actor_t **actorsp)
929 state_t *state = (state_t *)srs->synthesizer;
930 srs_voice_actor_t *actors, *actor;
933 mrp_list_hook_t *lp, *ln;
935 mrp_list_hook_t *ap, *an;
946 mrp_list_foreach(&state->languages, lp, ln) {
947 l = mrp_list_entry(lp, typeof(*l), hook);
949 if (language != NULL && strcasecmp(l->lang, language))
952 mrp_list_foreach(&l->actors, ap, an) {
953 a = mrp_list_entry(ap, typeof(*a), hook);
955 if (mrp_reallocz(actors, nactor, nactor + 1) == NULL)
958 actor = actors + nactor++;
960 actor->name = mrp_strdup(a->voice);
961 actor->lang = mrp_strdup(l->lang);
962 actor->dialect = mrp_strdup(a->dialect);
963 actor->gender = a->gender;
965 actor->description = mrp_strdup(a->description);
967 if (!actor->name || !actor->lang ||
968 (!actor->dialect && a->dialect) ||
969 (!actor->description && a->description))
974 if (actors != NULL) {
975 if (!mrp_reallocz(actors, nactor, nactor + 1))
984 for (i = 0; i < nactor; i++) {
985 mrp_free(actors[i].name);
986 mrp_free(actors[i].lang);
987 mrp_free(actors[i].dialect);
988 mrp_free(actors[i].description);
995 void srs_free_queried_voices(srs_voice_actor_t *actors)
997 srs_voice_actor_t *a;
999 if (actors != NULL) {
1000 for (a = actors; a->lang; a++) {
1003 mrp_free(a->dialect);
1004 mrp_free(a->description);