src/daemon/recognizer.c

   1 /*
   2  * Copyright (c) 2012, Intel Corporation
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *   * Redistributions of source code must retain the above copyright notice,
   9  *     this list of conditions and the following disclaimer.
  10  *   * Redistributions in binary form must reproduce the above copyright
  11  *     notice, this list of conditions and the following disclaimer in the
  12  *     documentation and/or other materials provided with the distribution.
  13  *   * Neither the name of Intel Corporation nor the names of its contributors
  14  *     may be used to endorse or promote products derived from this software
  15  *     without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30 #include <stdlib.h>
  31 #include <unistd.h>
  32 #include <errno.h>
  33
  34 #include <murphy/common/list.h>
  35
  36 #include "srs/daemon/context.h"
  37 #include "srs/daemon/voice.h"
  38 #include "srs/daemon/recognizer.h"
  39
  40
  41 /*
  42  * a speech recognition backend
  43  */
  44
  45 typedef struct {
  46     srs_context_t     *srs;              /* main context */
  47     char              *name;             /* recognizer name */
  48     mrp_list_hook_t    hook;             /* to list of recognizers */
  49     srs_srec_api_t     api;              /* backend API */
  50     void              *api_data;         /* opaque backend data */
  51     srs_srec_result_t *result;           /* result being processed, if any */
  52 } srs_srec_t;
  53
  54
  55 /*
  56  * a speech recognition disambiguator
  57  */
  58
  59 typedef struct {
  60     char             *name;
  61     mrp_list_hook_t   hook;
  62     srs_disamb_api_t  api;
  63     void             *api_data;
  64 } srs_disamb_t;
  65
  66
  67 /*
  68  * feedback TTS or effect
  69  */
  70
  71 typedef struct {
  72     int         tts;                     /* non-zero for TTS feedback */
  73     const char *data;                    /* TTS messgae or effect path */
  74 } effect_t;
  75
  76
  77 static srs_srec_t *find_srec(srs_context_t *srs, const char *name);
  78 static int srec_notify_cb(srs_srec_utterance_t *utt, void *notify_data);
  79 static srs_disamb_t *find_disamb(srs_context_t *srs, const char *name);
  80
  81
  82 /*
  83  * speech recognizer backend handling
  84  */
  85
  86 int srs_register_srec(srs_context_t *srs, const char *name,
  87                       srs_srec_api_t *api, void *api_data,
  88                       srs_srec_notify_t *notify, void **notify_data)
  89 {
  90     srs_srec_t *srec;
  91
  92     if (notify == NULL || notify_data == NULL) {
  93         errno = EINVAL;
  94         return -1;
  95     }
  96
  97     if (find_srec(srs, name) != NULL) {
  98         mrp_log_error("A recognizer '%s' already registered.", name);
  99
 100         errno = EEXIST;
 101         return -1;
 102     }
 103
 104     srec = mrp_allocz(sizeof(*srec));
 105
 106     if (srec != NULL) {
 107         mrp_list_init(&srec->hook);
 108         srec->srs      = srs;
 109         srec->name     = mrp_strdup(name);
 110         srec->api      = *api;
 111         srec->api_data = api_data;
 112
 113         if (srec->name != NULL) {
 114             mrp_list_append(&srs->recognizers, &srec->hook);
 115
 116             if (srs->cached_srec == NULL)
 117                 srs->cached_srec = srec;
 118
 119             if (srs->default_srec == NULL)
 120                 srs->default_srec = srec;
 121
 122             mrp_log_info("Registered speech recognition engine '%s'.", name);
 123
 124             *notify      = srec_notify_cb;
 125             *notify_data = srec;
 126
 127             return 0;
 128         }
 129
 130         mrp_free(srec);
 131     }
 132
 133     mrp_log_error("Failed to allocate speech recognition engine '%s'.", name);
 134
 135     return -1;
 136 }
 137
 138
 139 void srs_unregister_srec(srs_context_t *srs, const char *name)
 140 {
 141     srs_srec_t *srec = find_srec(srs, name);
 142
 143     if (srec != NULL) {
 144         mrp_list_delete(&srec->hook);
 145         mrp_free(srec->name);
 146         mrp_free(srec);
 147
 148         if (srs->cached_srec == srec)
 149             srs->cached_srec = NULL;
 150
 151         if (srs->default_srec == srec)
 152             srs->default_srec = NULL;
 153
 154         mrp_log_info("Unregistered speech recognition engine '%s'.", name);
 155     }
 156 }
 157
 158
 159 int srs_activate_srec(srs_context_t *srs, const char *name)
 160 {
 161     srs_srec_t *srec = find_srec(srs, name);
 162
 163     if (srec != NULL)
 164         return srec->api.activate(srec->api_data);
 165     else
 166         return FALSE;
 167 }
 168
 169
 170 void srs_deactivate_srec(srs_context_t *srs, const char *name)
 171 {
 172     srs_srec_t *srec = find_srec(srs, name);
 173
 174     if (srec != NULL)
 175         srec->api.deactivate(srec->api_data);
 176 }
 177
 178
 179 int srs_check_decoder(srs_context_t *srs, const char *name,
 180                       const char *decoder)
 181 {
 182     srs_srec_t *srec = find_srec(srs, name);
 183
 184     if (srec != NULL)
 185         return srec->api.check_decoder(decoder, srec->api_data);
 186     else
 187         return FALSE;
 188 }
 189
 190
 191 int srs_set_decoder(srs_context_t *srs, const char *name, const char *decoder)
 192 {
 193     srs_srec_t *srec = find_srec(srs, name);
 194
 195     if (srec != NULL)
 196         return srec->api.select_decoder(decoder, srec->api_data);
 197     else
 198         return FALSE;
 199 }
 200
 201
 202 static srs_srec_t *find_srec(srs_context_t *srs, const char *name)
 203 {
 204     srs_srec_t      *srec = srs->cached_srec;
 205     mrp_list_hook_t *p, *n;
 206
 207     if (name == SRS_DEFAULT_RECOGNIZER)
 208         return srs->default_srec;
 209
 210     if (srec == NULL || strcmp(srec->name, name) != 0) {
 211         mrp_list_foreach(&srs->recognizers, p, n) {
 212             srec = mrp_list_entry(p, typeof(*srec), hook);
 213
 214             if (!strcmp(srec->name, name)) {
 215                 srs->cached_srec = srec;
 216                 return srec;
 217             }
 218         }
 219
 220         return NULL;
 221     }
 222     else
 223         return srec;
 224 }
 225
 226
 227 static void free_match(srs_srec_match_t *m)
 228 {
 229     mrp_free(m);
 230 }
 231
 232
 233 static void free_srec_result(srs_srec_result_t *res)
 234 {
 235     srs_srec_match_t *m;
 236     mrp_list_hook_t  *p, *n;
 237     int               i;
 238
 239     switch (res->type) {
 240     case SRS_SREC_RESULT_MATCH:
 241         mrp_list_foreach(&res->result.matches, p, n) {
 242             m = mrp_list_entry(p, typeof(*m), hook);
 243             mrp_list_delete(&m->hook);
 244             free_match(m);
 245         }
 246         break;
 247
 248     case SRS_SREC_RESULT_DICT:
 249         break;
 250
 251     case SRS_SREC_RESULT_AMBIGUOUS:
 252         break;
 253
 254     default:
 255         break;
 256     }
 257
 258     for (i = 0; i < res->ntoken; i++)
 259         mrp_free(res->tokens[i]);
 260     mrp_free(res->tokens);
 261
 262     mrp_free(res->start);
 263     mrp_free(res->end);
 264     srs_unref_audiobuf(res->samplebuf);
 265
 266     for (i = 0; i < res->ndict; i++)
 267         mrp_free(res->dicts[i]);
 268     mrp_free(res->dicts);
 269
 270
 271     mrp_list_delete(&res->hook);
 272     mrp_free(res);
 273 }
 274
 275
 276 static int switch_dict(srs_srec_t *srec, const char *dict)
 277 {
 278     return srec->api.select_decoder(dict, srec->api_data) ? 0 : -1;
 279 }
 280
 281
 282 static int push_dict(srs_srec_t *srec, srs_srec_result_t *res)
 283 {
 284     const char *active, *next;
 285
 286     active = srec->api.active_decoder(srec->api_data);
 287     next   = res->result.dict.dict;
 288
 289     if (mrp_reallocz(res->dicts, res->ndict, res->ndict + 1) == NULL ||
 290         (res->dicts[res->ndict] = mrp_strdup(active))        == NULL)
 291         return -1;
 292
 293     if (res->dicts[res->ndict] == NULL)
 294         return -1;
 295
 296     res->ndict++;
 297
 298     return switch_dict(srec, next);
 299 }
 300
 301
 302 static int pop_dict(srs_srec_t *srec, srs_srec_result_t *res)
 303 {
 304     char *prev;
 305
 306     if (res->ndict <= 0) {
 307         errno = ENOENT;
 308         return -1;
 309     }
 310
 311     prev = res->dicts[res->ndict - 1];
 312
 313     if (switch_dict(srec, prev) == 0) {
 314         mrp_reallocz(res->dicts, res->ndict, res->ndict - 1);
 315         res->ndict--;
 316         mrp_free(prev);
 317
 318         return 0;
 319     }
 320     else
 321         return -1;
 322 }
 323
 324
 325 static void process_match_result(srs_srec_t *srec, srs_srec_result_t *res)
 326 {
 327     mrp_list_hook_t  *p, *n;
 328     srs_srec_match_t *match;
 329     int               i;
 330
 331     mrp_list_foreach(&res->result.matches, p, n) {
 332         match = mrp_list_entry(p, typeof(*match), hook);
 333
 334         for (i = 0; i < res->ntoken; i++) {
 335             mrp_log_info("  #%d token ('%s'): %u - %u", i,
 336                          res->tokens[i], res->start[i], res->end[i]);
 337         }
 338
 339         client_notify_command(match->client, match->index,
 340                               res->ntoken, (const char **)res->tokens,
 341                               res->start, res->end, res->samplebuf);
 342
 343         while (res->ndict > 0)
 344             pop_dict(srec, res);
 345     }
 346 }
 347
 348
 349 static int process_dict_result(srs_srec_t *srec, srs_srec_result_t *res)
 350 {
 351     if (srec->result != NULL && srec->result != res) {
 352         mrp_log_error("Conflicting results (%p != %p) for dictionary switch.",
 353                       res, srec->result);
 354         return 0;
 355     }
 356
 357     switch (res->result.dict.op) {
 358     case SRS_DICT_OP_POP:
 359         if (pop_dict(srec, res) < 0) {
 360             mrp_log_error("Failed to pop dictionary.");
 361             return 0;
 362         }
 363         break;
 364
 365     case SRS_DICT_OP_PUSH:
 366         if (push_dict(srec, res) < 0) {
 367             mrp_log_error("Failed to push dictionary '%s'.",
 368                           res->result.dict.dict);
 369             return 0;
 370         }
 371         break;
 372
 373     case SRS_DICT_OP_SWITCH:
 374         if (switch_dict(srec, res->result.dict.dict) < 0) {
 375             mrp_log_error("Failed to switch to dictionary '%s'.",
 376                           res->result.dict.dict);
 377             return 0;
 378         }
 379         break;
 380
 381     default:
 382         break;
 383     }
 384
 385     srec->result     = res;
 386     res->sampleoffs += res->result.dict.rescan;
 387
 388     return res->result.dict.rescan;
 389 }
 390
 391
 392 static void process_ambiguity(srs_srec_t *srec, srs_srec_result_t *res)
 393 {
 394     MRP_UNUSED(srec);
 395     MRP_UNUSED(res);
 396
 397     return;
 398 }
 399
 400
 401 static int get_effect_config(srs_context_t *srs, effect_t **cfgp)
 402 {
 403     static effect_t effects[32];
 404     static int      neffect = -1;
 405
 406     srs_cfg_t *cfg;
 407     int        n, i, cnt;
 408
 409     if (neffect != -1) {
 410         *cfgp = &effects[0];
 411         return neffect;
 412     }
 413
 414
 415     cnt = srs_config_collect(srs->settings, "feedback.tts.", &cfg);
 416
 417     if (cnt > (int)MRP_ARRAY_SIZE(effects))
 418         cnt = (int)MRP_ARRAY_SIZE(effects);
 419
 420     n = 0;
 421     for (i = 0; i < cnt; i++, n++) {
 422         effects[n].tts  = TRUE;
 423         effects[n].data = cfg[i].value;
 424         mrp_log_info("Configured TTS effect '%s'.", cfg[i].value);
 425     }
 426
 427     cnt = srs_config_collect(srs->settings, "feedback.sound.", &cfg);
 428
 429     if (n + cnt > (int)MRP_ARRAY_SIZE(effects))
 430         cnt = (int)MRP_ARRAY_SIZE(effects) - n;
 431
 432     for (i = 0; i < cnt; i++) {
 433         if (access(cfg[i].value, R_OK) == 0) {
 434             effects[n].tts  = FALSE;
 435             effects[n].data = cfg[i].value;
 436             n++;
 437             mrp_log_info("Configured sound effect '%s'.", cfg[i].value);
 438         }
 439         else
 440             mrp_log_warning("Dropping non-accessible effect '%s'.",
 441                             cfg[i].value);
 442     }
 443
 444     *cfgp   = effects;
 445     neffect = n;
 446
 447     return neffect;
 448 }
 449
 450
 451 static void process_unrecognized(srs_srec_t *srec, srs_srec_result_t *res)
 452 {
 453 #if 0
 454     static effect_t *effects = NULL;
 455     static int       neffect = -1;
 456     effect_t        *e;
 457     int              i;
 458
 459     if (MRP_UNLIKELY(neffect < 0))
 460         neffect = get_effect_config(srec->srs, &effects);
 461
 462     if (neffect <= 0)                    /* no effects */
 463         return;
 464
 465     i = ((1.0 * neffect * rand()) / RAND_MAX);
 466     e = effects + i;
 467
 468     mrp_debug("Chosen feedback %s: '%s'", e->tts ? "TTS" : "effect", e->data);
 469
 470     if (!e->tts)
 471         srs_play_sound_file(srec->srs, e->data, NULL, NULL);
 472     else
 473         srs_say_msg(srec->srs, e->data, NULL, NULL);
 474 #endif
 475 }
 476
 477
 478 static int srec_notify_cb(srs_srec_utterance_t *utt, void *notify_data)
 479 {
 480     srs_srec_t           *srec = (srs_srec_t *)notify_data;
 481     srs_disamb_t         *dis;
 482     srs_srec_candidate_t *c;
 483     srs_srec_result_t    *res;
 484     srs_srec_token_t     *t;
 485     int                   flush, i, j;
 486     uint32_t              start, end;
 487
 488     mrp_log_info("Got %zd recognition candidates in from %s backend:",
 489                  utt->ncand, srec->name);
 490
 491     for (i = 0; i < (int)utt->ncand; i++) {
 492         c = utt->cands[i];
 493         mrp_log_info("Candidate #%d:", i);
 494         for (j = 0, t = c->tokens; j < (int)c->ntoken; j++, t++) {
 495             mrp_log_info("    token #%d: '%s' (%u - %u)", j, t->token,
 496                          t->start, t->end);
 497         }
 498     }
 499
 500     flush = SRS_SREC_FLUSH_ALL;
 501     dis   = find_disamb(srec->srs, SRS_DEFAULT_DISAMBIGUATOR);
 502
 503     if (dis != NULL) {
 504         if (srec->result == NULL) {
 505             res = srec->result = mrp_allocz(sizeof(*srec->result));
 506
 507             if (res == NULL)
 508                 return SRS_SREC_FLUSH_ALL;
 509
 510             mrp_list_init(&res->hook);
 511             mrp_list_init(&res->result.matches);
 512
 513             c     = utt->cands[0];
 514             start = 0;
 515             end   = utt->length;
 516
 517             res->samplebuf = srec->api.sampledup(start, end, srec->api_data);
 518         }
 519
 520         res = srec->result;
 521
 522         if (dis->api.disambiguate(utt, &res, dis->api_data) == 0 && res) {
 523             mrp_log_info("Disambiguation succeeded.");
 524
 525             switch (res->type) {
 526             case SRS_SREC_RESULT_MATCH:
 527                 process_match_result(srec, res);
 528                 free_srec_result(res);
 529                 srec->result = NULL;
 530                 flush = SRS_SREC_FLUSH_ALL;
 531                 break;
 532
 533             case SRS_SREC_RESULT_DICT:
 534                 flush = process_dict_result(srec, res);
 535                 break;
 536
 537             case SRS_SREC_RESULT_AMBIGUOUS:
 538                 process_ambiguity(srec, res);
 539                 free_srec_result(res);
 540                 srec->result = NULL;
 541                 flush = SRS_SREC_FLUSH_ALL;
 542                 break;
 543
 544             case SRS_SREC_RESULT_UNRECOGNIZED:
 545                 mrp_log_error("Unrecognized command.");
 546                 process_unrecognized(srec, res);
 547                 free_srec_result(res);
 548                 srec->result = NULL;
 549                 flush = SRS_SREC_FLUSH_ALL;
 550                 break;
 551
 552             default:
 553                 flush = SRS_SREC_FLUSH_ALL;
 554                 free_srec_result(res);
 555                 srec->result = NULL;
 556                 break;
 557             }
 558         }
 559         else {
 560             if (res) {
 561                 flush = SRS_SREC_FLUSH_ALL;
 562                 free_srec_result(res);
 563                 srec->result = NULL;
 564             }
 565         }
 566     }
 567
 568     return flush;
 569 }
 570
 571
 572 /*
 573  * disambiguator handling
 574  */
 575
 576 int srs_register_disambiguator(srs_context_t *srs, const char *name,
 577                                srs_disamb_api_t *api, void *api_data)
 578 {
 579     srs_disamb_t *dis;
 580
 581     if (find_disamb(srs, name) != NULL) {
 582         mrp_log_error("A disambiguator '%s' already exists.", name);
 583
 584         errno = EEXIST;
 585         return -1;
 586     }
 587
 588     dis = mrp_allocz(sizeof(*dis));
 589
 590     if (dis != NULL) {
 591         mrp_list_init(&dis->hook);
 592         dis->name     = mrp_strdup(name);
 593         dis->api      = *api;
 594         dis->api_data = api_data;
 595
 596         if (dis->name != NULL) {
 597             mrp_list_append(&srs->disambiguators, &dis->hook);
 598
 599             if (srs->default_disamb == NULL)
 600                 srs->default_disamb = dis;
 601
 602             mrp_log_info("Registered disambiguator '%s'.", name);
 603
 604             return 0;
 605         }
 606
 607         mrp_free(dis);
 608     }
 609
 610     mrp_log_error("Failed to allocate disambiguator '%s'.", name);
 611
 612     return -1;
 613 }
 614
 615
 616 void srs_unregister_disambiguator(srs_context_t *srs, const char *name)
 617 {
 618     srs_disamb_t *dis = find_disamb(srs, name);
 619
 620     if (dis != NULL) {
 621         mrp_list_delete(&dis->hook);
 622         mrp_free(dis->name);
 623         mrp_free(dis);
 624
 625         if (srs->default_disamb == dis)
 626             srs->default_disamb = NULL;
 627
 628         mrp_log_info("Unregistered disambiguator '%s'.", name);
 629     }
 630 }
 631
 632
 633 static srs_disamb_t *find_disamb(srs_context_t *srs, const char *name)
 634 {
 635     srs_disamb_t    *dis;
 636     mrp_list_hook_t *p, *n;
 637
 638     if (name == SRS_DEFAULT_DISAMBIGUATOR)
 639         return srs->default_disamb;
 640
 641     mrp_list_foreach(&srs->disambiguators, p, n) {
 642         dis = mrp_list_entry(p, typeof(*dis), hook);
 643
 644             if (!strcmp(dis->name, name))
 645                 return dis;
 646     }
 647
 648     return NULL;
 649 }
 650
 651
 652 int srs_srec_add_client(srs_context_t *srs, srs_client_t *client)
 653 {
 654     srs_disamb_t *dis = find_disamb(srs, SRS_DEFAULT_DISAMBIGUATOR);
 655
 656     if (dis != NULL)
 657         return dis->api.add_client(client, dis->api_data);
 658     else
 659         return -1;
 660 }
 661
 662
 663 void srs_srec_del_client(srs_context_t *srs, srs_client_t *client)
 664 {
 665    srs_disamb_t *dis = find_disamb(srs, SRS_DEFAULT_DISAMBIGUATOR);
 666
 667    if (dis != NULL)
 668        dis->api.del_client(client, dis->api_data);
 669 }