2 * Copyright (c) 2012 - 2013, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <murphy/common/macros.h>
35 #include <murphy/common/mm.h>
36 #include <murphy/common/list.h>
38 #include "srs/daemon/context.h"
39 #include "srs/daemon/voice.h"
41 typedef struct state_s state_t;
44 * a speech synthesizer backend
48 int id; /* internal backend ID */
49 srs_context_t *srs; /* main context */
50 char *name; /* engine name */
51 srs_voice_api_t api; /* backend API */
52 void *api_data; /* opaque engine data */
53 srs_voice_actor_t *actors; /* backend voice actors */
54 int nactor; /* number of actors */
55 mrp_list_hook_t hook; /* to list of backends */
56 state_t *state; /* our state */
65 char *lang; /* language name */
66 mrp_list_hook_t hook; /* to list of languages */
67 mrp_list_hook_t actors; /* list of actors */
68 int nmale; /* number of male actors */
69 int nfemale; /* number of female actors */
78 char *voice; /* globally unique voice id */
79 renderer_t *r; /* rendering backend */
80 int id; /* backend voice id */
81 char *dialect; /* language dialect, if any */
82 srs_voice_gender_t gender; /* actor gender */
83 int age; /* actor age */
84 char *description; /* description */
85 mrp_list_hook_t hook; /* to list of languages */
90 * ative and queued rendering requests
94 uint32_t id; /* request id */
95 renderer_t *r; /* rendering backend */
96 uint32_t vid; /* backend id */
97 int notify_mask; /* notification event mask */
98 srs_voice_notify_t notify; /* notification callback */
99 void *notify_data; /* opaque notification data */
100 mrp_timer_t *timer; /* request timeout timer */
101 mrp_list_hook_t hook; /* hook to list of requests */
106 request_t req; /* request */
107 char *msg; /* message to render */
108 char **tags; /* stream tags */
109 uint32_t actor; /* actor id */
110 double rate; /* synthesis rate (1 = normal) */
111 double pitch; /* synthesis pitch (1 = default) */
112 int timeout; /* timeout */
117 * speech synthesizer state
121 mrp_list_hook_t synthesizers; /* registered synthesizers */
122 int nsynthesizer; /* number of synthesizers */
123 mrp_list_hook_t languages; /* list of supported languages */
124 uint32_t nextid; /* next voice id */
125 mrp_list_hook_t requests; /* request queue */
126 request_t *active; /* active request */
127 request_t *cancelling; /* request being cancelled */
131 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid);
132 static request_t *activate_next(state_t *state);
135 static language_t *find_language(state_t *state, const char *lang, int create)
137 mrp_list_hook_t *p, *n;
140 mrp_list_foreach(&state->languages, p, n) {
141 l = mrp_list_entry(p, typeof(*l), hook);
143 if (!strcasecmp(l->lang, lang))
150 l = mrp_allocz(sizeof(*l));
155 mrp_list_init(&l->hook);
156 mrp_list_init(&l->actors);
158 l->lang = mrp_strdup(lang);
160 if (l->lang == NULL) {
165 mrp_list_append(&state->languages, &l->hook);
171 static char *canonical_actor_name(char *buf, size_t size,
172 language_t *language, actor_t *actor)
174 const char *lang, *dialect, *gender, *s;
179 lang = language->lang;
180 dialect = actor->dialect;
182 if (actor->gender != SRS_VOICE_GENDER_FEMALE) {
184 idx = language->nmale++;
188 idx = language->nfemale++;
194 while (*s && size > 0) {
205 if (dialect != NULL && size > 0) {
210 while (*s && size > 0) {
223 snprintf(d, size, "%s-%d", gender, idx);
225 snprintf(d, size, "%s", gender);
231 static int register_actor(renderer_t *r, srs_voice_actor_t *act)
233 state_t *state = r->state;
238 l = find_language(state, act->lang, TRUE);
243 a = mrp_allocz(sizeof(*a));
249 mrp_list_init(&a->hook);
252 a->dialect = mrp_strdup(act->dialect);
253 a->gender = act->gender;
255 a->description = mrp_strdup(act->description);
257 if ((act->dialect && !a->dialect) ||
258 (act->description && !a->description)) {
259 mrp_free(a->dialect);
260 mrp_free(a->description);
266 a->voice = mrp_strdup(canonical_actor_name(voice, sizeof(voice), l, a));
268 if (a->voice == NULL) {
269 mrp_free(a->dialect);
270 mrp_free(a->description);
276 mrp_list_append(&l->actors, &a->hook);
278 mrp_log_info("Registered voice %s/%s.", r->name, voice);
284 static void unregister_actors(renderer_t *r)
286 state_t *state = r->state;
287 mrp_list_hook_t *lp, *ln;
289 mrp_list_hook_t *ap, *an;
292 mrp_list_foreach(&state->languages, lp, ln) {
293 l = mrp_list_entry(lp, typeof(*l), hook);
295 mrp_list_foreach(&l->actors, ap, an) {
296 a = mrp_list_entry(ap, typeof(*a), hook);
299 if (a->gender == SRS_VOICE_GENDER_MALE)
304 mrp_log_info("Unregistering voice %s/%s.", r->name, a->voice);
306 mrp_list_delete(&l->hook);
309 mrp_free(a->dialect);
310 mrp_free(a->description);
315 if (mrp_list_empty(&l->actors)) {
316 mrp_list_delete(&l->hook);
323 static actor_t *find_actor(state_t *state, const char *r, const char *v)
325 mrp_list_hook_t *lp, *ln, *ap, *an;
329 mrp_list_foreach(&state->languages, lp, ln) {
330 l = mrp_list_entry(lp, typeof(*l), hook);
332 mrp_list_foreach(&l->actors, ap, an) {
333 a = mrp_list_entry(ap, typeof(*a), hook);
335 if (!strcmp(a->r->name, r) && !strcmp(a->voice, v))
344 static void free_renderer(renderer_t *r)
347 mrp_list_delete(&r->hook);
348 unregister_actors(r);
354 static void notify_request(request_t *req, srs_voice_event_t *event)
356 renderer_t *r = req->r;
357 int mask = (1 << event->type);
360 if (req->notify != NULL && (mask & req->notify_mask)) {
363 req->notify(&e, req->notify_data);
368 static void voice_notify_cb(srs_voice_event_t *event, void *notify_data)
370 renderer_t *r = (renderer_t *)notify_data;
371 state_t *state = (state_t *)r->state;
372 uint32_t vid = event->id;
373 request_t *req = find_request(state, -1, vid);
374 int mask = (1 << event->type);
377 mrp_log_error("Failed to find request for event 0x%x of <%d>.",
378 event->type, event->id);
382 if (event->type == SRS_VOICE_EVENT_STARTED) {
383 mrp_del_timer(req->timer);
387 notify_request(req, event);
389 if (mask & SRS_VOICE_MASK_DONE) {
390 mrp_del_timer(req->timer);
392 if (state->cancelling != req) {
393 mrp_list_delete(&req->hook);
397 if (state->active == req) {
398 state->active = NULL;
399 activate_next(state);
405 int srs_register_voice(srs_context_t *srs, const char *name,
406 srs_voice_api_t *api, void *api_data,
407 srs_voice_actor_t *actors, int nactor,
408 srs_voice_notify_t *notify, void **notify_data)
410 state_t *state = (state_t *)srs->synthesizer;
412 srs_voice_actor_t *a;
416 srs->synthesizer = state = mrp_allocz(sizeof(*state));
421 mrp_list_init(&state->synthesizers);
422 mrp_list_init(&state->languages);
423 mrp_list_init(&state->requests);
427 if (api == NULL || name == NULL || actors == NULL || nactor < 1) {
432 r = mrp_allocz(sizeof(*r));
437 mrp_list_init(&r->hook);
438 r->id = state->nsynthesizer++;
441 r->name = mrp_strdup(name);
443 if (r->name == NULL) {
449 r->api_data = api_data;
451 for (i = 0; i < nactor; i++) {
452 if (register_actor(r, actors + i) != 0)
456 mrp_log_info("Registered voice/TTS backend '%s'.", r->name);
458 mrp_list_append(&state->synthesizers, &r->hook);
460 *notify = voice_notify_cb;
467 void srs_unregister_voice(srs_context_t *srs, const char *name)
469 state_t *state = (state_t *)srs->synthesizer;
471 mrp_list_hook_t *p, *n;
474 mrp_list_foreach(&state->synthesizers, p, n) {
475 r = mrp_list_entry(p, typeof(*r), hook);
477 if (!strcmp(r->name, name)) {
478 mrp_log_info("Unregistering voice/TTS backend '%s'.", name);
487 static renderer_t *find_renderer(state_t *state, const char *voice,
491 actor_t *a, *fallback;
492 mrp_list_hook_t *ap, *an;
493 char lang[128], renderer[128], *e;
502 if ((e = strchr(voice, '/')) != NULL) {
505 if (n >= sizeof(renderer) - 1)
508 strncpy(renderer, voice, n);
511 if ((a = find_actor(state, renderer, e + 1)) != NULL) {
519 if ((e = strchr(voice, '-')) == NULL)
520 l = find_language(state, voice, FALSE);
523 if (snprintf(lang, sizeof(lang), "%*.*s", n, n, voice) >= sizeof(lang))
526 l = find_language(state, lang, FALSE);
533 mrp_list_foreach(&l->actors, ap, an) {
534 a = mrp_list_entry(ap, typeof(*a), hook);
536 if (!strcmp(a->voice, voice)) {
542 if (fallback == NULL)
546 if (fallback != NULL) {
547 *actor = fallback->id;
557 static renderer_t *select_renderer(state_t *state, const char *voice,
562 srs_voice_actor_t *a;
563 srs_voice_gender_t gender;
564 mrp_list_hook_t *p, *n;
574 if (!strcmp(actor, SRS_VOICE_FEMALE)) {
575 gender = SRS_VOICE_GENDER_FEMALE;
578 else if (!strcmp(actor, SRS_VOICE_MALE)) {
579 gender = SRS_VOICE_GENDER_MALE;
583 gender = SRS_VOICE_GENDER_ANY;
586 dfltid = SRS_VOICE_INVALID;
588 mrp_list_foreach(&state->synthesizers, p, n) {
589 r = mrp_list_entry(p, typeof(*r), hook);
591 for (i = 0, a = r->actors; i < r->nactor; i++, a++) {
592 if (strcmp(a->lang, lang))
596 if (gender == a->gender) {
602 if (!strcmp(a->name, actor)) {
607 if (dfltid == SRS_VOICE_INVALID) {
621 static void free_tags(char **tags)
628 for (i = 0; tags[i] != NULL; i++)
635 static char **copy_tags(char **tags)
643 for (i = 0; tags[i] != NULL; i++) {
644 if (!mrp_reallocz(cp, i, i + 1))
646 if ((cp[i] = mrp_strdup(tags[i])) == NULL)
650 if (mrp_reallocz(cp, i, i + 1))
659 static void request_timer_cb(mrp_timer_t *t, void *user_data)
661 queued_t *qr = (queued_t *)user_data;
662 request_t *req = &qr->req;
663 srs_voice_event_t event;
665 mrp_log_info("Voice/TTS request #%u timed out.", qr->req.id);
667 mrp_del_timer(req->timer);
671 event.type = SRS_VOICE_EVENT_TIMEOUT;
674 notify_request(req, &event);
676 mrp_list_delete(&req->hook);
685 static request_t *enqueue_request(state_t *state, const char *msg, char **tags,
686 renderer_t *r, uint32_t actor, double rate,
687 double pitch, int timeout, int notify_mask,
688 srs_voice_notify_t notify, void *notify_data)
692 qr = mrp_allocz(sizeof(*qr));
697 mrp_list_init(&qr->req.hook);
699 qr->req.id = state->nextid++;
701 qr->req.vid = SRS_VOICE_INVALID;
702 qr->req.notify_mask = notify_mask;
703 qr->req.notify = notify;
704 qr->req.notify_data = notify_data;
706 qr->msg = mrp_strdup(msg);
707 qr->tags = copy_tags(tags);
709 qr->timeout = timeout;
711 if (qr->msg != NULL && (qr->tags != NULL || tags == NULL)) {
712 mrp_list_append(&state->requests, &qr->req.hook);
715 qr->req.timer = mrp_add_timer(r->srs->ml, timeout,
716 request_timer_cb, qr);
730 static request_t *activate_next(state_t *state)
734 mrp_list_hook_t *p, *n;
736 if (state->active != NULL)
740 mrp_list_foreach(&state->requests, p, n) {
742 qr = mrp_list_entry(p, typeof(*qr), req.hook);
749 mrp_del_timer(qr->req.timer);
750 qr->req.timer = NULL;
753 qr->req.vid = r->api.render(qr->msg, qr->tags, qr->actor, qr->rate,
754 qr->pitch, qr->req.notify_mask, r->api_data);
761 if (qr->req.vid == SRS_VOICE_INVALID) {
762 if (qr->req.notify != NULL &&
763 (qr->req.notify_mask & 1 << SRS_VOICE_EVENT_ABORTED)) {
767 e.type = SRS_VOICE_EVENT_ABORTED;
770 voice_notify_cb(&e, qr->req.notify_data);
778 state->active = &qr->req;
785 request_t *render_request(state_t *state, const char *msg, char **tags,
786 renderer_t *r, uint32_t actor, double rate,
787 double pitch, int timeout, int notify_mask,
788 srs_voice_notify_t notify, void *notify_data)
792 req = mrp_allocz(sizeof(*req));
797 mrp_list_init(&req->hook);
798 req->id = state->nextid++;
800 req->vid = r->api.render(msg, tags, actor, rate, pitch,
801 notify_mask, r->api_data);
803 if (req->vid == SRS_VOICE_INVALID) {
808 req->notify = notify;
809 req->notify_mask = notify_mask;
810 req->notify_data = notify_data;
818 uint32_t srs_render_voice(srs_context_t *srs, const char *msg,
819 char **tags, const char *voice, double rate,
820 double pitch, int timeout, int notify_mask,
821 srs_voice_notify_t notify, void *user_data)
823 state_t *state = (state_t *)srs->synthesizer;
831 return SRS_VOICE_INVALID;
834 r = find_renderer(state, voice, &actid);
839 return SRS_VOICE_INVALID;
842 if (state->active == NULL)
843 req = render_request(state, msg, tags, r, actid, rate, pitch, timeout,
844 notify_mask, notify, user_data);
846 if (timeout == SRS_VOICE_IMMEDIATE) {
851 req = enqueue_request(state, msg, tags, r, actid, rate, pitch,
852 timeout, notify_mask, notify, user_data);
858 return SRS_VOICE_INVALID;
862 static request_t *find_request(state_t *state, uint32_t rid, uint32_t vid)
864 mrp_list_hook_t *p, *n;
872 if ((req = state->active) != NULL) {
873 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
877 mrp_list_foreach(&state->requests, p, n) {
878 req = mrp_list_entry(p, typeof(*req), hook);
880 if ((rid == -1 || req->id == rid) && (vid == -1 || req->vid == vid))
891 void srs_cancel_voice(srs_context_t *srs, uint32_t rid, int notify)
893 state_t *state = (state_t *)srs->synthesizer;
894 request_t *req = find_request(state, rid, -1);
895 renderer_t *voice = req ? req->r : NULL;
900 mrp_del_timer(req->timer);
902 state->cancelling = req;
904 voice->api.cancel(req->vid, voice->api_data);
906 mrp_list_delete(&req->hook);
909 if (state->active == req) {
910 state->active = NULL;
911 activate_next(state);
916 int srs_query_voices(srs_context_t *srs, const char *language,
917 srs_voice_actor_t **actorsp)
919 state_t *state = (state_t *)srs->synthesizer;
920 srs_voice_actor_t *actors, *actor;
923 mrp_list_hook_t *lp, *ln;
925 mrp_list_hook_t *ap, *an;
936 mrp_list_foreach(&state->languages, lp, ln) {
937 l = mrp_list_entry(lp, typeof(*l), hook);
939 if (language != NULL && strcasecmp(l->lang, language))
942 mrp_list_foreach(&l->actors, ap, an) {
943 a = mrp_list_entry(ap, typeof(*a), hook);
945 if (mrp_reallocz(actors, nactor, nactor + 1) == NULL)
948 actor = actors + nactor++;
950 actor->name = mrp_strdup(a->voice);
951 actor->lang = mrp_strdup(l->lang);
952 actor->dialect = mrp_strdup(a->dialect);
953 actor->gender = a->gender;
955 actor->description = mrp_strdup(a->description);
957 if (!actor->name || !actor->lang ||
958 (!actor->dialect && a->dialect) ||
959 (!actor->description && a->description))
964 if (actors != NULL) {
965 if (!mrp_reallocz(actors, nactor, nactor + 1))
974 for (i = 0; i < nactor; i++) {
975 mrp_free(actors[i].name);
976 mrp_free(actors[i].lang);
977 mrp_free(actors[i].dialect);
978 mrp_free(actors[i].description);
985 void srs_free_queried_voices(srs_voice_actor_t *actors)
987 srs_voice_actor_t *a;
989 if (actors != NULL) {
990 for (a = actors; a->lang; a++) {
993 mrp_free(a->dialect);
994 mrp_free(a->description);