2 * Copyright (c) 2012 - 2013, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <murphy/common/macros.h>
34 #include <murphy/common/mm.h>
35 #include <murphy/common/list.h>
37 #include "srs/daemon/context.h"
38 #include "srs/daemon/voice.h"
40 typedef struct state_s state_t;
43 * a speech synthesizer backend
47 int id; /* internal backend ID */
48 srs_context_t *srs; /* main context */
49 char *name; /* engine name */
50 srs_voice_api_t api; /* backend API */
51 void *api_data; /* opaque engine data */
52 srs_voice_actor_t *actors; /* backend voice actors */
53 int nactor; /* number of actors */
54 mrp_list_hook_t hook; /* to list of backends */
55 state_t *state; /* our state */
64 char *lang; /* language name */
65 mrp_list_hook_t hook; /* to list of languages */
66 mrp_list_hook_t actors; /* list of actors */
67 int nmale; /* number of male actors */
68 int nfemale; /* number of female actors */
77 char *voice; /* globally unique voice id */
78 renderer_t *r; /* rendering backend */
79 int id; /* backend voice id */
80 char *dialect; /* language dialect, if any */
81 srs_voice_gender_t gender; /* actor gender */
82 int age; /* actor age */
83 char *description; /* description */
84 mrp_list_hook_t hook; /* to list of languages */
89 * ative and queued rendering requests
93 uint32_t id; /* request id */
94 renderer_t *r; /* rendering backend */
95 uint32_t vid; /* backend id */
96 int notify_mask; /* notification event mask */
97 srs_voice_notify_t notify; /* notification callback */
98 void *notify_data; /* opaque notification data */
99 mrp_timer_t *timer; /* request timeout timer */
100 mrp_list_hook_t hook; /* hook to list of requests */
105 request_t req; /* request */
106 char *msg; /* message to render */
107 char **tags; /* stream tags */
108 uint32_t actor; /* actor id */
109 double rate; /* synthesis rate (1 = normal) */
110 double pitch; /* synthesis pitch (1 = default) */
111 int timeout; /* timeout */
116 * speech synthesizer state
120 mrp_list_hook_t synthesizers; /* registered synthesizers */
121 int nsynthesizer; /* number of synthesizers */
122 mrp_list_hook_t languages; /* list of supported languages */
123 uint32_t nextid; /* next voice id */
124 mrp_list_hook_t requests; /* request queue */
125 request_t *active; /* active request */
126 request_t *cancelling; /* request being cancelled */
130 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid);
131 static request_t *activate_next(state_t *state);
134 static language_t *find_language(state_t *state, const char *lang, int create)
136 mrp_list_hook_t *p, *n;
139 mrp_list_foreach(&state->languages, p, n) {
140 l = mrp_list_entry(p, typeof(*l), hook);
142 if (!strcasecmp(l->lang, lang))
149 l = mrp_allocz(sizeof(*l));
154 mrp_list_init(&l->hook);
155 mrp_list_init(&l->actors);
157 l->lang = mrp_strdup(lang);
159 if (l->lang == NULL) {
164 mrp_list_append(&state->languages, &l->hook);
170 static int register_actor(renderer_t *r, srs_voice_actor_t *act)
172 state_t *state = r->state;
179 l = find_language(state, act->lang, TRUE);
184 a = mrp_allocz(sizeof(*a));
190 mrp_list_init(&a->hook);
193 a->dialect = mrp_strdup(act->dialect);
194 a->gender = act->gender;
196 a->description = mrp_strdup(act->description);
198 if ((act->dialect && !a->dialect) ||
199 (act->description && !a->description)) {
200 mrp_free(a->dialect);
201 mrp_free(a->description);
207 if (a->gender == SRS_VOICE_GENDER_MALE) {
212 else if (a->gender == SRS_VOICE_GENDER_FEMALE) {
217 a->gender = SRS_VOICE_GENDER_MALE;
222 snprintf(voice, sizeof(voice), "%s%s%s%s-%d", l->lang,
223 a->dialect ? "-" : "", a->dialect ? a->dialect : "", g, *n);
225 snprintf(voice, sizeof(voice), "%s%s%s%s", l->lang,
226 a->dialect ? "-" : "", a->dialect ? a->dialect : "", g);
228 a->voice = mrp_strdup(voice);
230 if (a->voice == NULL) {
231 mrp_free(a->dialect);
232 mrp_free(a->description);
238 mrp_list_append(&l->actors, &a->hook);
241 mrp_log_info("Registered voice %s/%s.", r->name, voice);
247 static void unregister_actors(renderer_t *r)
249 state_t *state = r->state;
250 mrp_list_hook_t *lp, *ln;
252 mrp_list_hook_t *ap, *an;
255 mrp_list_foreach(&state->languages, lp, ln) {
256 l = mrp_list_entry(lp, typeof(*l), hook);
258 mrp_list_foreach(&l->actors, ap, an) {
259 a = mrp_list_entry(ap, typeof(*a), hook);
262 if (a->gender == SRS_VOICE_GENDER_MALE)
267 mrp_log_info("Unregistering voice %s/%s.", r->name, a->voice);
269 mrp_list_delete(&l->hook);
272 mrp_free(a->dialect);
273 mrp_free(a->description);
278 if (mrp_list_empty(&l->actors)) {
279 mrp_list_delete(&l->hook);
286 static void free_renderer(renderer_t *r)
289 mrp_list_delete(&r->hook);
290 unregister_actors(r);
296 static void notify_request(request_t *req, srs_voice_event_t *event)
298 renderer_t *r = req->r;
299 int mask = (1 << event->type);
302 if (req->notify != NULL && (mask & req->notify_mask)) {
305 req->notify(&e, req->notify_data);
310 static void voice_notify_cb(srs_voice_event_t *event, void *notify_data)
312 renderer_t *r = (renderer_t *)notify_data;
313 state_t *state = (state_t *)r->state;
314 uint32_t vid = event->id;
315 request_t *req = find_request(state, -1, vid);
316 int mask = (1 << event->type);
319 mrp_log_error("Failed to find request for event 0x%x of <%d>.",
320 event->type, event->id);
324 if (event->type == SRS_VOICE_EVENT_STARTED) {
325 mrp_del_timer(req->timer);
329 notify_request(req, event);
331 if (mask & SRS_VOICE_MASK_DONE) {
332 mrp_del_timer(req->timer);
334 if (state->cancelling != req) {
335 mrp_list_delete(&req->hook);
339 if (state->active == req) {
340 state->active = NULL;
341 activate_next(state);
347 int srs_register_voice(srs_context_t *srs, const char *name,
348 srs_voice_api_t *api, void *api_data,
349 srs_voice_actor_t *actors, int nactor,
350 srs_voice_notify_t *notify, void **notify_data)
352 state_t *state = (state_t *)srs->synthesizer;
354 srs_voice_actor_t *a;
358 srs->synthesizer = state = mrp_allocz(sizeof(*state));
363 mrp_list_init(&state->synthesizers);
364 mrp_list_init(&state->languages);
365 mrp_list_init(&state->requests);
369 if (api == NULL || name == NULL || actors == NULL || nactor < 1) {
374 r = mrp_allocz(sizeof(*r));
379 mrp_list_init(&r->hook);
380 r->id = state->nsynthesizer++;
383 r->name = mrp_strdup(name);
385 if (r->name == NULL) {
391 r->api_data = api_data;
393 for (i = 0; i < nactor; i++) {
394 if (register_actor(r, actors + i) != 0)
398 mrp_log_info("Registered voice/TTS backend '%s'.", r->name);
400 mrp_list_append(&state->synthesizers, &r->hook);
402 *notify = voice_notify_cb;
409 void srs_unregister_voice(srs_context_t *srs, const char *name)
411 state_t *state = (state_t *)srs->synthesizer;
413 mrp_list_hook_t *p, *n;
416 mrp_list_foreach(&state->synthesizers, p, n) {
417 r = mrp_list_entry(p, typeof(*r), hook);
419 if (!strcmp(r->name, name)) {
420 mrp_log_info("Unregistering voice/TTS backend '%s'.", name);
429 static renderer_t *find_renderer(state_t *state, const char *voice,
433 actor_t *a, *fallback;
434 mrp_list_hook_t *ap, *an;
444 if ((e = strchr(voice, '-')) == NULL)
445 l = find_language(state, voice, FALSE);
448 if (snprintf(lang, sizeof(lang), "%*.*s", n, n, voice) >= sizeof(lang))
451 l = find_language(state, lang, FALSE);
458 mrp_list_foreach(&l->actors, ap, an) {
459 a = mrp_list_entry(ap, typeof(*a), hook);
461 if (!strcmp(a->voice, voice)) {
467 if (fallback == NULL)
471 if (fallback != NULL) {
472 *actor = fallback->id;
482 static renderer_t *select_renderer(state_t *state, const char *voice,
487 srs_voice_actor_t *a;
488 srs_voice_gender_t gender;
489 mrp_list_hook_t *p, *n;
499 if (!strcmp(actor, SRS_VOICE_FEMALE)) {
500 gender = SRS_VOICE_GENDER_FEMALE;
503 else if (!strcmp(actor, SRS_VOICE_MALE)) {
504 gender = SRS_VOICE_GENDER_MALE;
508 gender = SRS_VOICE_GENDER_ANY;
511 dfltid = SRS_VOICE_INVALID;
513 mrp_list_foreach(&state->synthesizers, p, n) {
514 r = mrp_list_entry(p, typeof(*r), hook);
516 for (i = 0, a = r->actors; i < r->nactor; i++, a++) {
517 if (strcmp(a->lang, lang))
521 if (gender == a->gender) {
527 if (!strcmp(a->name, actor)) {
532 if (dfltid == SRS_VOICE_INVALID) {
546 static void free_tags(char **tags)
553 for (i = 0; tags[i] != NULL; i++)
560 static char **copy_tags(char **tags)
568 for (i = 0; tags[i] != NULL; i++) {
569 if (!mrp_reallocz(cp, i, i + 1))
571 if ((cp[i] = mrp_strdup(tags[i])) == NULL)
575 if (mrp_reallocz(cp, i, i + 1))
584 static void request_timer_cb(mrp_timer_t *t, void *user_data)
586 queued_t *qr = (queued_t *)user_data;
587 request_t *req = &qr->req;
588 srs_voice_event_t event;
590 mrp_log_info("Voice/TTS request #%u timed out.", qr->req.id);
592 mrp_del_timer(req->timer);
596 event.type = SRS_VOICE_EVENT_TIMEOUT;
599 notify_request(req, &event);
601 mrp_list_delete(&req->hook);
610 static request_t *enqueue_request(state_t *state, const char *msg, char **tags,
611 renderer_t *r, uint32_t actor, double rate,
612 double pitch, int timeout, int notify_mask,
613 srs_voice_notify_t notify, void *notify_data)
617 qr = mrp_allocz(sizeof(*qr));
622 mrp_list_init(&qr->req.hook);
624 qr->req.id = state->nextid++;
626 qr->req.vid = SRS_VOICE_INVALID;
627 qr->req.notify_mask = notify_mask;
628 qr->req.notify = notify;
629 qr->req.notify_data = notify_data;
631 qr->msg = mrp_strdup(msg);
632 qr->tags = copy_tags(tags);
634 qr->timeout = timeout;
636 if (qr->msg != NULL && (qr->tags != NULL || tags == NULL)) {
637 mrp_list_append(&state->requests, &qr->req.hook);
640 qr->req.timer = mrp_add_timer(r->srs->ml, timeout,
641 request_timer_cb, qr);
655 static request_t *activate_next(state_t *state)
659 mrp_list_hook_t *p, *n;
661 if (state->active != NULL)
665 mrp_list_foreach(&state->requests, p, n) {
667 qr = mrp_list_entry(p, typeof(*qr), req.hook);
674 mrp_del_timer(qr->req.timer);
675 qr->req.timer = NULL;
678 qr->req.vid = r->api.render(qr->msg, qr->tags, qr->actor, qr->rate,
679 qr->pitch, qr->req.notify_mask, r->api_data);
686 if (qr->req.vid == SRS_VOICE_INVALID) {
687 if (qr->req.notify != NULL &&
688 (qr->req.notify_mask & 1 << SRS_VOICE_EVENT_ABORTED)) {
692 e.type = SRS_VOICE_EVENT_ABORTED;
695 voice_notify_cb(&e, qr->req.notify_data);
703 state->active = &qr->req;
710 request_t *render_request(state_t *state, const char *msg, char **tags,
711 renderer_t *r, uint32_t actor, double rate,
712 double pitch, int timeout, int notify_mask,
713 srs_voice_notify_t notify, void *notify_data)
717 req = mrp_allocz(sizeof(*req));
722 mrp_list_init(&req->hook);
723 req->id = state->nextid++;
725 req->vid = r->api.render(msg, tags, actor, rate, pitch,
726 notify_mask, r->api_data);
728 if (req->vid == SRS_VOICE_INVALID) {
733 req->notify = notify;
734 req->notify_mask = notify_mask;
735 req->notify_data = notify_data;
743 uint32_t srs_render_voice(srs_context_t *srs, const char *msg,
744 char **tags, const char *voice, double rate,
745 double pitch, int timeout, int notify_mask,
746 srs_voice_notify_t notify, void *user_data)
748 state_t *state = (state_t *)srs->synthesizer;
756 return SRS_VOICE_INVALID;
759 r = find_renderer(state, voice, &actid);
764 return SRS_VOICE_INVALID;
767 if (state->active == NULL)
768 req = render_request(state, msg, tags, r, actid, rate, pitch, timeout,
769 notify_mask, notify, user_data);
771 if (timeout == SRS_VOICE_IMMEDIATE) {
776 req = enqueue_request(state, msg, tags, r, actid, rate, pitch,
777 timeout, notify_mask, notify, user_data);
783 return SRS_VOICE_INVALID;
787 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid)
789 mrp_list_hook_t *p, *n;
797 if ((req = state->active) != NULL) {
798 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
802 mrp_list_foreach(&state->requests, p, n) {
803 req = mrp_list_entry(p, typeof(*req), hook);
805 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
816 void srs_cancel_voice(srs_context_t *srs, uint32_t rid, int notify)
818 state_t *state = (state_t *)srs->synthesizer;
819 request_t *req = find_request(state, rid, -1);
820 renderer_t *voice = req ? req->r : NULL;
825 mrp_del_timer(req->timer);
827 state->cancelling = req;
829 voice->api.cancel(req->vid, voice->api_data);
831 mrp_list_delete(&req->hook);
834 if (state->active == req) {
835 state->active = NULL;
836 activate_next(state);
841 int srs_query_voices(srs_context_t *srs, const char *language,
842 srs_voice_actor_t **actorsp)
844 state_t *state = (state_t *)srs->synthesizer;
845 srs_voice_actor_t *actors, *actor;
848 mrp_list_hook_t *lp, *ln;
850 mrp_list_hook_t *ap, *an;
861 mrp_list_foreach(&state->languages, lp, ln) {
862 l = mrp_list_entry(lp, typeof(*l), hook);
864 if (language != NULL && strcasecmp(l->lang, language))
867 mrp_list_foreach(&l->actors, ap, an) {
868 a = mrp_list_entry(ap, typeof(*a), hook);
870 if (mrp_reallocz(actors, nactor, nactor + 1) == NULL)
873 actor = actors + nactor++;
875 actor->name = mrp_strdup(a->voice);
876 actor->lang = mrp_strdup(l->lang);
877 actor->dialect = mrp_strdup(a->dialect);
878 actor->gender = a->gender;
880 actor->description = mrp_strdup(a->description);
882 if (!actor->name || !actor->lang ||
883 (!actor->dialect && a->dialect) ||
884 (!actor->description && a->description))
889 if (actors != NULL) {
890 if (!mrp_reallocz(actors, nactor, nactor + 1))
899 for (i = 0; i < nactor; i++) {
900 mrp_free(actors[i].name);
901 mrp_free(actors[i].lang);
902 mrp_free(actors[i].dialect);
903 mrp_free(actors[i].description);
910 void srs_free_queried_voices(srs_voice_actor_t *actors)
912 srs_voice_actor_t *a;
914 if (actors != NULL) {
915 for (a = actors; a->lang; a++) {
918 mrp_free(a->dialect);
919 mrp_free(a->description);