2 * Copyright (c) 2012, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <murphy/common/list.h>
36 #include "srs/daemon/context.h"
37 #include "srs/daemon/voice.h"
38 #include "srs/daemon/recognizer.h"
42 * a speech recognition backend
46 srs_context_t *srs; /* main context */
47 char *name; /* recognizer name */
48 mrp_list_hook_t hook; /* to list of recognizers */
49 srs_srec_api_t api; /* backend API */
50 void *api_data; /* opaque backend data */
51 srs_srec_result_t *result; /* result being processed, if any */
56 * a speech recognition disambiguator
68 * feedback TTS or effect
72 int tts; /* non-zero for TTS feedback */
73 const char *data; /* TTS messgae or effect path */
77 static srs_srec_t *find_srec(srs_context_t *srs, const char *name);
78 static int srec_notify_cb(srs_srec_utterance_t *utt, void *notify_data);
79 static srs_disamb_t *find_disamb(srs_context_t *srs, const char *name);
83 * speech recognizer backend handling
86 int srs_register_srec(srs_context_t *srs, const char *name,
87 srs_srec_api_t *api, void *api_data,
88 srs_srec_notify_t *notify, void **notify_data)
92 if (notify == NULL || notify_data == NULL) {
97 if (find_srec(srs, name) != NULL) {
98 mrp_log_error("A recognizer '%s' already registered.", name);
104 srec = mrp_allocz(sizeof(*srec));
107 mrp_list_init(&srec->hook);
109 srec->name = mrp_strdup(name);
111 srec->api_data = api_data;
113 if (srec->name != NULL) {
114 mrp_list_append(&srs->recognizers, &srec->hook);
116 if (srs->cached_srec == NULL)
117 srs->cached_srec = srec;
119 if (srs->default_srec == NULL)
120 srs->default_srec = srec;
122 mrp_log_info("Registered speech recognition engine '%s'.", name);
124 *notify = srec_notify_cb;
133 mrp_log_error("Failed to allocate speech recognition engine '%s'.", name);
139 void srs_unregister_srec(srs_context_t *srs, const char *name)
141 srs_srec_t *srec = find_srec(srs, name);
144 mrp_list_delete(&srec->hook);
145 mrp_free(srec->name);
148 if (srs->cached_srec == srec)
149 srs->cached_srec = NULL;
151 if (srs->default_srec == srec)
152 srs->default_srec = NULL;
154 mrp_log_info("Unregistered speech recognition engine '%s'.", name);
159 int srs_activate_srec(srs_context_t *srs, const char *name)
161 srs_srec_t *srec = find_srec(srs, name);
164 return srec->api.activate(srec->api_data);
170 void srs_deactivate_srec(srs_context_t *srs, const char *name)
172 srs_srec_t *srec = find_srec(srs, name);
175 srec->api.deactivate(srec->api_data);
179 int srs_check_decoder(srs_context_t *srs, const char *name,
182 srs_srec_t *srec = find_srec(srs, name);
185 return srec->api.check_decoder(decoder, srec->api_data);
191 int srs_set_decoder(srs_context_t *srs, const char *name, const char *decoder)
193 srs_srec_t *srec = find_srec(srs, name);
196 return srec->api.select_decoder(decoder, srec->api_data);
202 static srs_srec_t *find_srec(srs_context_t *srs, const char *name)
204 srs_srec_t *srec = srs->cached_srec;
205 mrp_list_hook_t *p, *n;
207 if (name == SRS_DEFAULT_RECOGNIZER)
208 return srs->default_srec;
210 if (srec == NULL || strcmp(srec->name, name) != 0) {
211 mrp_list_foreach(&srs->recognizers, p, n) {
212 srec = mrp_list_entry(p, typeof(*srec), hook);
214 if (!strcmp(srec->name, name)) {
215 srs->cached_srec = srec;
227 static void free_match(srs_srec_match_t *m)
233 static void free_srec_result(srs_srec_result_t *res)
236 mrp_list_hook_t *p, *n;
240 case SRS_SREC_RESULT_MATCH:
241 mrp_list_foreach(&res->result.matches, p, n) {
242 m = mrp_list_entry(p, typeof(*m), hook);
243 mrp_list_delete(&m->hook);
248 case SRS_SREC_RESULT_DICT:
251 case SRS_SREC_RESULT_AMBIGUOUS:
258 for (i = 0; i < res->ntoken; i++)
259 mrp_free(res->tokens[i]);
260 mrp_free(res->tokens);
262 mrp_free(res->start);
264 srs_unref_audiobuf(res->samplebuf);
266 for (i = 0; i < res->ndict; i++)
267 mrp_free(res->dicts[i]);
268 mrp_free(res->dicts);
271 mrp_list_delete(&res->hook);
276 static int switch_dict(srs_srec_t *srec, const char *dict)
278 return srec->api.select_decoder(dict, srec->api_data) ? 0 : -1;
282 static int push_dict(srs_srec_t *srec, srs_srec_result_t *res)
284 const char *active, *next;
286 active = srec->api.active_decoder(srec->api_data);
287 next = res->result.dict.dict;
289 if (mrp_reallocz(res->dicts, res->ndict, res->ndict + 1) == NULL ||
290 (res->dicts[res->ndict] = mrp_strdup(active)) == NULL)
293 if (res->dicts[res->ndict] == NULL)
298 return switch_dict(srec, next);
302 static int pop_dict(srs_srec_t *srec, srs_srec_result_t *res)
306 if (res->ndict <= 0) {
311 prev = res->dicts[res->ndict - 1];
313 if (switch_dict(srec, prev) == 0) {
314 mrp_reallocz(res->dicts, res->ndict, res->ndict - 1);
325 static void process_match_result(srs_srec_t *srec, srs_srec_result_t *res)
327 mrp_list_hook_t *p, *n;
328 srs_srec_match_t *match;
331 mrp_list_foreach(&res->result.matches, p, n) {
332 match = mrp_list_entry(p, typeof(*match), hook);
334 for (i = 0; i < res->ntoken; i++) {
335 mrp_log_info(" #%d token ('%s'): %u - %u", i,
336 res->tokens[i], res->start[i], res->end[i]);
339 client_notify_command(match->client, match->index,
340 res->ntoken, (const char **)res->tokens,
341 res->start, res->end, res->samplebuf);
343 while (res->ndict > 0)
349 static int process_dict_result(srs_srec_t *srec, srs_srec_result_t *res)
351 if (srec->result != NULL && srec->result != res) {
352 mrp_log_error("Conflicting results (%p != %p) for dictionary switch.",
357 switch (res->result.dict.op) {
358 case SRS_DICT_OP_POP:
359 if (pop_dict(srec, res) < 0) {
360 mrp_log_error("Failed to pop dictionary.");
365 case SRS_DICT_OP_PUSH:
366 if (push_dict(srec, res) < 0) {
367 mrp_log_error("Failed to push dictionary '%s'.",
368 res->result.dict.dict);
373 case SRS_DICT_OP_SWITCH:
374 if (switch_dict(srec, res->result.dict.dict) < 0) {
375 mrp_log_error("Failed to switch to dictionary '%s'.",
376 res->result.dict.dict);
386 res->sampleoffs += res->result.dict.rescan;
388 return res->result.dict.rescan;
392 static void process_ambiguity(srs_srec_t *srec, srs_srec_result_t *res)
401 static int get_effect_config(srs_context_t *srs, effect_t **cfgp)
403 static effect_t effects[32];
404 static int neffect = -1;
415 cnt = srs_config_collect(srs->settings, "feedback.tts.", &cfg);
417 if (cnt > (int)MRP_ARRAY_SIZE(effects))
418 cnt = (int)MRP_ARRAY_SIZE(effects);
421 for (i = 0; i < cnt; i++, n++) {
422 effects[n].tts = TRUE;
423 effects[n].data = cfg[i].value;
424 mrp_log_info("Configured TTS effect '%s'.", cfg[i].value);
427 cnt = srs_config_collect(srs->settings, "feedback.sound.", &cfg);
429 if (n + cnt > (int)MRP_ARRAY_SIZE(effects))
430 cnt = (int)MRP_ARRAY_SIZE(effects) - n;
432 for (i = 0; i < cnt; i++) {
433 if (access(cfg[i].value, R_OK) == 0) {
434 effects[n].tts = FALSE;
435 effects[n].data = cfg[i].value;
437 mrp_log_info("Configured sound effect '%s'.", cfg[i].value);
440 mrp_log_warning("Dropping non-accessible effect '%s'.",
451 static void process_unrecognized(srs_srec_t *srec, srs_srec_result_t *res)
454 static effect_t *effects = NULL;
455 static int neffect = -1;
459 if (MRP_UNLIKELY(neffect < 0))
460 neffect = get_effect_config(srec->srs, &effects);
462 if (neffect <= 0) /* no effects */
465 i = ((1.0 * neffect * rand()) / RAND_MAX);
468 mrp_debug("Chosen feedback %s: '%s'", e->tts ? "TTS" : "effect", e->data);
471 srs_play_sound_file(srec->srs, e->data, NULL, NULL);
473 srs_say_msg(srec->srs, e->data, NULL, NULL);
478 static int srec_notify_cb(srs_srec_utterance_t *utt, void *notify_data)
480 srs_srec_t *srec = (srs_srec_t *)notify_data;
482 srs_srec_candidate_t *c;
483 srs_srec_result_t *res;
488 mrp_log_info("Got %zd recognition candidates in from %s backend:",
489 utt->ncand, srec->name);
491 for (i = 0; i < (int)utt->ncand; i++) {
493 mrp_log_info("Candidate #%d:", i);
494 for (j = 0, t = c->tokens; j < (int)c->ntoken; j++, t++) {
495 mrp_log_info(" token #%d: '%s' (%u - %u)", j, t->token,
500 flush = SRS_SREC_FLUSH_ALL;
501 dis = find_disamb(srec->srs, SRS_DEFAULT_DISAMBIGUATOR);
504 if (srec->result == NULL) {
505 res = srec->result = mrp_allocz(sizeof(*srec->result));
508 return SRS_SREC_FLUSH_ALL;
510 mrp_list_init(&res->hook);
511 mrp_list_init(&res->result.matches);
517 res->samplebuf = srec->api.sampledup(start, end, srec->api_data);
522 if (dis->api.disambiguate(utt, &res, dis->api_data) == 0 && res) {
523 mrp_log_info("Disambiguation succeeded.");
526 case SRS_SREC_RESULT_MATCH:
527 process_match_result(srec, res);
528 free_srec_result(res);
530 flush = SRS_SREC_FLUSH_ALL;
533 case SRS_SREC_RESULT_DICT:
534 flush = process_dict_result(srec, res);
537 case SRS_SREC_RESULT_AMBIGUOUS:
538 process_ambiguity(srec, res);
539 free_srec_result(res);
541 flush = SRS_SREC_FLUSH_ALL;
544 case SRS_SREC_RESULT_UNRECOGNIZED:
545 mrp_log_error("Unrecognized command.");
546 process_unrecognized(srec, res);
547 free_srec_result(res);
549 flush = SRS_SREC_FLUSH_ALL;
553 flush = SRS_SREC_FLUSH_ALL;
554 free_srec_result(res);
561 flush = SRS_SREC_FLUSH_ALL;
562 free_srec_result(res);
573 * disambiguator handling
576 int srs_register_disambiguator(srs_context_t *srs, const char *name,
577 srs_disamb_api_t *api, void *api_data)
581 if (find_disamb(srs, name) != NULL) {
582 mrp_log_error("A disambiguator '%s' already exists.", name);
588 dis = mrp_allocz(sizeof(*dis));
591 mrp_list_init(&dis->hook);
592 dis->name = mrp_strdup(name);
594 dis->api_data = api_data;
596 if (dis->name != NULL) {
597 mrp_list_append(&srs->disambiguators, &dis->hook);
599 if (srs->default_disamb == NULL)
600 srs->default_disamb = dis;
602 mrp_log_info("Registered disambiguator '%s'.", name);
610 mrp_log_error("Failed to allocate disambiguator '%s'.", name);
616 void srs_unregister_disambiguator(srs_context_t *srs, const char *name)
618 srs_disamb_t *dis = find_disamb(srs, name);
621 mrp_list_delete(&dis->hook);
625 if (srs->default_disamb == dis)
626 srs->default_disamb = NULL;
628 mrp_log_info("Unregistered disambiguator '%s'.", name);
633 static srs_disamb_t *find_disamb(srs_context_t *srs, const char *name)
636 mrp_list_hook_t *p, *n;
638 if (name == SRS_DEFAULT_DISAMBIGUATOR)
639 return srs->default_disamb;
641 mrp_list_foreach(&srs->disambiguators, p, n) {
642 dis = mrp_list_entry(p, typeof(*dis), hook);
644 if (!strcmp(dis->name, name))
652 int srs_srec_add_client(srs_context_t *srs, srs_client_t *client)
654 srs_disamb_t *dis = find_disamb(srs, SRS_DEFAULT_DISAMBIGUATOR);
657 return dis->api.add_client(client, dis->api_data);
663 void srs_srec_del_client(srs_context_t *srs, srs_client_t *client)
665 srs_disamb_t *dis = find_disamb(srs, SRS_DEFAULT_DISAMBIGUATOR);
668 dis->api.del_client(client, dis->api_data);