sphinx-speech-plugin: add support for multiple dictionaries
authorJanos Kovacs <jankovac503@gmail.com>
Thu, 6 Jun 2013 13:43:19 +0000 (16:43 +0300)
committerKrisztian Litkey <krisztian.litkey@intel.com>
Thu, 6 Jun 2013 13:57:15 +0000 (16:57 +0300)
src/plugins/sphinx-speech-engine/decoder-set.c
src/plugins/sphinx-speech-engine/decoder-set.h
src/plugins/sphinx-speech-engine/options.c
src/plugins/sphinx-speech-engine/options.h
src/plugins/sphinx-speech-engine/sphinx-plugin.c
src/plugins/sphinx-speech-engine/sphinx-plugin.h

index 4588593..85a24c0 100644 (file)
 int decoder_set_create(context_t *ctx)
 {
     options_t *opts;
+    options_decoder_t *dec;
     decoder_set_t *decset;
-    int sts;
+    size_t i;
+    int sts, retval;
 
     if (!ctx || !(opts = ctx->opts)) {
         errno = EINVAL;
@@ -36,17 +38,21 @@ int decoder_set_create(context_t *ctx)
 
     ctx->decset = decset;
 
-    sts = decoder_set_add(ctx, "default", opts->hmm, opts->lm,
-                          opts->dict, opts->fsg, opts->topn);
-    if (sts  < 0) {
-        mrp_log_error("failed to create default decoder");
-        errno = EIO;
-        return -1;
+    for (i = 0, retval = 0;  i < opts->ndec;  i++) {
+        dec = opts->decs + i;
+
+        sts = decoder_set_add(ctx, dec->name, dec->hmm, dec->lm,
+                              dec->dict, dec->fsg, opts->topn);
+        if (sts  < 0) {
+            mrp_log_error("failed to create '%s' decoder", dec->name);
+            errno = EIO;
+            retval= -1;
+        }
     }
 
     decset->curdec = decset->decs;
 
-    return 0;
+    return retval;
 }
 
 void decoder_set_destroy(context_t *ctx)
@@ -115,7 +121,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
     }
 
     if (!hmm)
-        hmm = opts->hmm;
+        hmm = opts->decs[0].hmm;
 
     curidx = decset->curdec - decset->decs;
     new_size = sizeof(decoder_t) * (decset->ndec + 2);
@@ -126,7 +132,6 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
         return -1;
     }
 
-
     memset(decset->decs + sizeof(decoder_t) * decset->ndec, 0,
            sizeof(decoder_t) * 2);
 
@@ -149,7 +154,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
         cmd_ln_set_str_r(cfg, "-logfn", opts->logfn);
 
     if (fsg)
-        cmd_ln_set_str_r(cfg, "-fsg", opts->fsg);
+        cmd_ln_set_str_r(cfg, "-fsg", fsg);
     
     if (!(ps = ps_init(cfg)))
         return -1;
@@ -206,6 +211,22 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
 #undef FSG_NAMES_MAX
 }
 
+bool decoder_set_contains(context_t *ctx, const char *decoder_name)
+{
+    decoder_set_t *decset;
+    decoder_t *d;
+
+    if (!ctx || !(decset = ctx->decset))
+        return false;
+
+    for (d = decset->decs;  d->name;  d++) {
+        if (!strcmp(decoder_name, d->name))
+            return true;
+    }
+
+    return false;
+}
+
 int decoder_set_use(context_t *ctx, const char *decoder_name)
 {
     decoder_set_t *decset;
index 234ad55..dda4a2e 100644 (file)
@@ -32,6 +32,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
                     const char *hmm, const char *lm,
                     const char *dict, const char *fsg,
                     uint32_t topn);
+bool decoder_set_contains(context_t *ctx, const char *decoder_name);
 int decoder_set_use(context_t *ctx, const char *decoder_name);
 const char *decoder_set_name(context_t *ctx);
 
index b5d60e6..e84c85a 100644 (file)
 #define DEFAULT_LM   "/usr/share/pocketsphinx/model/lm/en_US/wsj0vp.5000.DMP"
 #define DEFAULT_DICT "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic"
 
+static int add_decoder(int, srs_cfg_t *, const char *,
+                       size_t *, options_decoder_t **pdecs);
+static int print_decoders(size_t, options_decoder_t *, int, char *);
+
 
 int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
 {
@@ -25,6 +29,9 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
     int i;
     int sts;
     size_t pfxlen;
+    size_t ndec;
+    options_decoder_t *decs;
+    char buf[65536];
 
     if (!ctx) {
         errno = EINVAL;
@@ -33,13 +40,17 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
 
     pfxlen = strlen(SPHINX_PREFIX);
 
-    if (!(opts = mrp_allocz(sizeof(options_t))))
+    if (!(opts = mrp_allocz(sizeof(options_t))) ||
+        !(decs = mrp_allocz(sizeof(options_decoder_t))))
         return -1;
 
-    opts->hmm = mrp_strdup(DEFAULT_HMM);
-    opts->lm = mrp_strdup(DEFAULT_LM);
-    opts->dict = mrp_strdup(DEFAULT_DICT);
-    opts->fsg = NULL;
+    ndec = 1;
+    decs->name = mrp_strdup("default");
+    decs->hmm = mrp_strdup(DEFAULT_HMM);
+    decs->lm = mrp_strdup(DEFAULT_LM);
+    decs->dict = mrp_strdup(DEFAULT_DICT);
+    decs->fsg = NULL;
+
     opts->srcnam = NULL;
     opts->audio = NULL;
     opts->logfn = mrp_strdup("/dev/null");
@@ -61,29 +72,32 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
 
             case 'd':
                 if (!strcmp(key, "dict")) {
-                    mrp_free((void *)opts->dict);
-                    opts->dict = mrp_strdup(value);
+                    mrp_free((void *)decs->dict);
+                    decs->dict = mrp_strdup(value);
+                }
+                else if (!strcmp(key, "decoder")) {
+                    add_decoder(ncfg, cfgs, value, &ndec, &decs);
                 }
                 break;
 
             case 'f':
                 if (!strcmp(key, "fsg")) {
-                    mrp_free((void *)opts->fsg);
-                    opts->fsg = mrp_strdup(value);
+                    mrp_free((void *)decs->fsg);
+                    decs->fsg = mrp_strdup(value);
                 }
                 break;
 
             case 'h':
                 if (!strcmp(key, "hmm")) {
-                    mrp_free((void *)opts->hmm);
-                    opts->hmm = mrp_strdup(value);
+                    mrp_free((void *)decs->hmm);
+                    decs->hmm = mrp_strdup(value);
                 }
                 break;
 
             case 'l':
                 if (!strcmp(key, "lm")) {
-                    mrp_free((void *)opts->lm);
-                    opts->lm = mrp_strdup(value);
+                    mrp_free((void *)decs->lm);
+                    decs->lm = mrp_strdup(value);
                 }
                 break;
 
@@ -126,27 +140,29 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
                 break;
 
             default:
-                cfg->used = FALSE;
+                // cfg->used = FALSE;
                 break;
 
             } /* switch key */
         }
     } /* for cfg */
 
+    opts->ndec = ndec;
+    opts->decs = decs;
+
     if (sts == 0) {
-        mrp_log_info("directory for acoustic model: %s\n"
-                     "   language model file: %s\n"
-                     "   dictionary file: %s\n"
-                     "   model: %s%s\n"
-                     "   topn: %u\n"
+        print_decoders(opts->ndec, opts->decs, sizeof(buf), buf);
+
+        mrp_log_info("topn: %u\n"
                      "   pulseaudio source name: %s\n"
                      "   sample rate: %.1lf KHz\n"
-                     "   audio recording file: %s",
-                     opts->hmm, opts->lm, opts->dict,
-                     opts->fsg?"fsg - ":"audio", opts->fsg?opts->fsg:"",
+                     "   audio recording file: %s\n"
+                     "%s",
                      opts->topn,
                      opts->srcnam ? opts->srcnam : "<default-source>",
-                     (double)opts->rate / 1000.0, opts->audio);
+                     (double)opts->rate / 1000.0,
+                     opts->audio,
+                     buf);
     }
 
     ctx->opts = opts;
@@ -158,14 +174,23 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
 void options_destroy(context_t *ctx)
 {
     options_t *opts;
+    options_decoder_t*dec;
+    size_t i;
 
     if (ctx && (opts = ctx->opts)) {
         ctx->opts = NULL;
 
-        mrp_free((void *)opts->hmm);
-        mrp_free((void *)opts->lm);
-        mrp_free((void *)opts->dict);
-        mrp_free((void *)opts->fsg);
+        if (opts->decs) {
+            for (i = 0; i < opts->ndec;  i++) {
+                dec = opts->decs + i;
+                mrp_free((void *)dec->name);
+                mrp_free((void *)dec->hmm);
+                mrp_free((void *)dec->lm);
+                mrp_free((void *)dec->dict);
+                mrp_free((void *)dec->fsg);
+            }
+        }
+
         mrp_free((void *)opts->srcnam);
         mrp_free((void *)opts->audio);
         mrp_free((void *)opts->logfn);
@@ -174,6 +199,109 @@ void options_destroy(context_t *ctx)
     }
 }
 
+static int add_decoder(int ncfg,
+                       srs_cfg_t *cfgs,
+                       const char *name,
+                       size_t *pndec,
+                       options_decoder_t **pdecs)
+{
+    int i;
+    srs_cfg_t *cfg;
+    const char *key;
+    const char *value;
+    size_t pfxlen;
+    char pfx[1024];
+    options_decoder_t *decs, *dec;
+    size_t ndec, size;
+    const char *hmm = NULL;
+    const char *lm = NULL;
+    const char *dict = NULL;
+    const char *fsg = NULL;
+
+    pfxlen = snprintf(pfx, sizeof(pfx), SPHINX_PREFIX "%s.", name);
+
+    for (i = 0;  i < ncfg;  i++) {
+        cfg = cfgs + i;
+        key = cfg->key + pfxlen;
+        value = cfg->value;
+
+        if (!strncmp(cfg->key, pfx, pfxlen)) {
+
+            switch (key[0]) {
+
+            case 'd':
+                if (!strcmp(key, "dict"))
+                    dict = value;
+                break;
+
+            case 'f':
+                if (!strcmp(key, "fsg"))
+                    fsg = value;
+                break;
+
+            case 'h':
+                if (!strcmp(key, "hmm"))
+                    hmm = value;
+                break;
+
+            case 'l':
+                if (!strcmp(key, "lm"))
+                    lm = value;
+                break;
+            }
+        }
+    }
+
+    ndec = *pndec;
+    size = sizeof(options_decoder_t) * (ndec + 1);
+
+    if (lm && dict && (decs = mrp_realloc(*pdecs, size))) {
+        dec = decs + ndec++;
+
+        dec->name = mrp_strdup(name);
+        dec->hmm  = hmm ? mrp_strdup(hmm) : NULL;
+        dec->lm   = mrp_strdup(lm);
+        dec->dict = mrp_strdup(dict);
+        dec->fsg  = fsg ? mrp_strdup(fsg) : NULL;
+
+        *pndec = ndec;
+        *pdecs = decs;
+
+        return 0;
+    }
+
+    return -1;
+}
+
+static int print_decoders(size_t ndec,
+                          options_decoder_t *decs,
+                          int len,
+                          char *buf)
+{
+    options_decoder_t *dec;
+    char *p, *e;
+    size_t i;
+
+    e = (p = buf) + len;
+
+    for (i = 0;  i < ndec && p < e;  i++) {
+        dec = decs + i;
+
+        p += snprintf(p, e-p,
+                      "   decoder\n"
+                      "      name : '%s'\n"
+                      "      acoustic model directory: %s\n"
+                      "      language model file: %s\n"
+                      "      dictionary file: %s\n"
+                      "      model: %s%s\n",
+                      dec->name, dec->hmm ? dec->hmm : "<default>",
+                      dec->lm, dec->dict,
+                      dec->fsg ? "fsg - ":"acoustic", dec->fsg ? dec->fsg:"");
+    }
+
+    return p - buf;
+}
+
 
 /*
  * Local Variables:
index 8a76074..9ea3b31 100644 (file)
@@ -8,10 +8,8 @@
 #define SPHINX_PREFIX "sphinx."
 
 struct options_s {
-    const char *hmm;
-    const char *lm;
-    const char *dict;
-    const char *fsg;
+    size_t ndec;
+    options_decoder_t *decs;
     const char *srcnam;
     const char *audio;
     const char *logfn;
@@ -20,6 +18,14 @@ struct options_s {
     double silen;
 };
 
+struct options_decoder_s {
+    const char *name;
+    const char *hmm;
+    const char *lm;
+    const char *dict;
+    const char *fsg;
+};
+
 
 int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs);
 void options_destroy(context_t *ctx);
index 1a66d05..2803421 100644 (file)
@@ -147,13 +147,16 @@ static srs_audiobuf_t *sampledup(uint32_t start, uint32_t end, void *user_data)
 static int check_decoder(const char *decoder, void *user_data)
 {
     context_t *ctx = (context_t *)user_data;
-
-    MRP_UNUSED(ctx);
+    int available;
 
     mrp_debug("checking availability of decoder '%s' for CMU Sphinx backend",
               decoder);
 
-    return TRUE;
+    available = decoder_set_contains(ctx, decoder);
+
+    mrp_debug("decoder %s %savailable", decoder, available ? "" : "un");
+
+    return available;
 }
 
 
@@ -161,10 +164,11 @@ static int select_decoder(const char *decoder, void *user_data)
 {
     context_t *ctx = (context_t *)user_data;
 
-    MRP_UNUSED(ctx);
-
     mrp_debug("selecting decoder '%s' for CMU Sphinx backend", decoder);
 
+    if (decoder_set_use(ctx, decoder) < 0)
+        return FALSE;
+
     return TRUE;
 }
 
@@ -172,12 +176,15 @@ static int select_decoder(const char *decoder, void *user_data)
 static const char *active_decoder(void *user_data)
 {
     context_t *ctx = (context_t *)user_data;
+    const char *decoder;
 
-    MRP_UNUSED(ctx);
+    mrp_debug("querying active CMU Sphinx backend decoder");
+
+    decoder = decoder_set_name(ctx);
 
-    mrp_debug("returning hardcoded CMU Sphinx backend decoder string");
+    mrp_debug("active decoder is '%s'", decoder);
 
-    return "<active-sphinx-decoder-name>";
+    return decoder;
 }
 
 
index df76dc6..1f376ec 100644 (file)
@@ -12,6 +12,7 @@ typedef enum utterance_processor_e  utterance_processor_t;
 typedef struct context_s            context_t;
 typedef struct plugin_s             plugin_t;
 typedef struct options_s            options_t;
+typedef struct options_decoder_s    options_decoder_t;
 typedef struct context_s            context_t;
 typedef struct decoder_set_s        decoder_set_t;
 typedef struct decoder_s            decoder_t;