sphinx-speech-plugin: add support for multiple dictionaries

author Janos Kovacs <jankovac503@gmail.com>

Thu, 6 Jun 2013 13:43:19 +0000 (16:43 +0300)

committer Krisztian Litkey <krisztian.litkey@intel.com>

Thu, 6 Jun 2013 13:57:15 +0000 (16:57 +0300)
author Janos Kovacs <jankovac503@gmail.com>
Thu, 6 Jun 2013 13:43:19 +0000 (16:43 +0300)
committer Krisztian Litkey <krisztian.litkey@intel.com>
Thu, 6 Jun 2013 13:57:15 +0000 (16:57 +0300)
diff --git a/src/plugins/sphinx-speech-engine/decoder-set.c b/src/plugins/sphinx-speech-engine/decoder-set.c

index 4588593..85a24c0 100644 (file)
--- a/src/plugins/sphinx-speech-engine/decoder-set.c
+++ b/src/plugins/sphinx-speech-engine/decoder-set.c
@@ -23,8 +23,10 @@
  int decoder_set_create(context_t *ctx)
  {
      options_t *opts;
+    options_decoder_t *dec;
      decoder_set_t *decset;
-    int sts;
+    size_t i;
+    int sts, retval;
  
      if (!ctx || !(opts = ctx->opts)) {
          errno = EINVAL;
@@ -36,17 +38,21 @@ int decoder_set_create(context_t *ctx)
  
      ctx->decset = decset;
  
-    sts = decoder_set_add(ctx, "default", opts->hmm, opts->lm,
-                          opts->dict, opts->fsg, opts->topn);
-    if (sts  < 0) {
-        mrp_log_error("failed to create default decoder");
-        errno = EIO;
-        return -1;
+    for (i = 0, retval = 0;  i < opts->ndec;  i++) {
+        dec = opts->decs + i;
+
+        sts = decoder_set_add(ctx, dec->name, dec->hmm, dec->lm,
+                              dec->dict, dec->fsg, opts->topn);
+        if (sts  < 0) {
+            mrp_log_error("failed to create '%s' decoder", dec->name);
+            errno = EIO;
+            retval= -1;
+        }
      }
  
      decset->curdec = decset->decs;
  
-    return 0;
+    return retval;
  }
  
  void decoder_set_destroy(context_t *ctx)
@@ -115,7 +121,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
      }
  
      if (!hmm)
-        hmm = opts->hmm;
+        hmm = opts->decs[0].hmm;
  
      curidx = decset->curdec - decset->decs;
      new_size = sizeof(decoder_t) * (decset->ndec + 2);
@@ -126,7 +132,6 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
          return -1;
      }
  
-
      memset(decset->decs + sizeof(decoder_t) * decset->ndec, 0,
             sizeof(decoder_t) * 2);
  
@@ -149,7 +154,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
          cmd_ln_set_str_r(cfg, "-logfn", opts->logfn);
  
      if (fsg)
-        cmd_ln_set_str_r(cfg, "-fsg", opts->fsg);
+        cmd_ln_set_str_r(cfg, "-fsg", fsg);
      
      if (!(ps = ps_init(cfg)))
          return -1;
@@ -206,6 +211,22 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
  #undef FSG_NAMES_MAX
  }
  
+bool decoder_set_contains(context_t *ctx, const char *decoder_name)
+{
+    decoder_set_t *decset;
+    decoder_t *d;
+
+    if (!ctx || !(decset = ctx->decset))
+        return false;
+
+    for (d = decset->decs;  d->name;  d++) {
+        if (!strcmp(decoder_name, d->name))
+            return true;
+    }
+
+    return false;
+}
+
  int decoder_set_use(context_t *ctx, const char *decoder_name)
  {
      decoder_set_t *decset;
diff --git a/src/plugins/sphinx-speech-engine/decoder-set.h b/src/plugins/sphinx-speech-engine/decoder-set.h

index 234ad55..dda4a2e 100644 (file)
--- a/src/plugins/sphinx-speech-engine/decoder-set.h
+++ b/src/plugins/sphinx-speech-engine/decoder-set.h
@@ -32,6 +32,7 @@ int decoder_set_add(context_t *ctx, const char *decoder_name,
                      const char *hmm, const char *lm,
                      const char *dict, const char *fsg,
                      uint32_t topn);
+bool decoder_set_contains(context_t *ctx, const char *decoder_name);
  int decoder_set_use(context_t *ctx, const char *decoder_name);
  const char *decoder_set_name(context_t *ctx);
  
diff --git a/src/plugins/sphinx-speech-engine/options.c b/src/plugins/sphinx-speech-engine/options.c

index b5d60e6..e84c85a 100644 (file)
--- a/src/plugins/sphinx-speech-engine/options.c
+++ b/src/plugins/sphinx-speech-engine/options.c
@@ -13,6 +13,10 @@
  #define DEFAULT_LM   "/usr/share/pocketsphinx/model/lm/en_US/wsj0vp.5000.DMP"
  #define DEFAULT_DICT "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic"
  
+static int add_decoder(int, srs_cfg_t *, const char *,
+                       size_t *, options_decoder_t **pdecs);
+static int print_decoders(size_t, options_decoder_t *, int, char *);
+
  
  int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
  {
@@ -25,6 +29,9 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
      int i;
      int sts;
      size_t pfxlen;
+    size_t ndec;
+    options_decoder_t *decs;
+    char buf[65536];
  
      if (!ctx) {
          errno = EINVAL;
@@ -33,13 +40,17 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
  
      pfxlen = strlen(SPHINX_PREFIX);
  
-    if (!(opts = mrp_allocz(sizeof(options_t))))
+    if (!(opts = mrp_allocz(sizeof(options_t))) ||
+        !(decs = mrp_allocz(sizeof(options_decoder_t))))
          return -1;
  
-    opts->hmm = mrp_strdup(DEFAULT_HMM);
-    opts->lm = mrp_strdup(DEFAULT_LM);
-    opts->dict = mrp_strdup(DEFAULT_DICT);
-    opts->fsg = NULL;
+    ndec = 1;
+    decs->name = mrp_strdup("default");
+    decs->hmm = mrp_strdup(DEFAULT_HMM);
+    decs->lm = mrp_strdup(DEFAULT_LM);
+    decs->dict = mrp_strdup(DEFAULT_DICT);
+    decs->fsg = NULL;
+
      opts->srcnam = NULL;
      opts->audio = NULL;
      opts->logfn = mrp_strdup("/dev/null");
@@ -61,29 +72,32 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
  
              case 'd':
                  if (!strcmp(key, "dict")) {
-                    mrp_free((void *)opts->dict);
-                    opts->dict = mrp_strdup(value);
+                    mrp_free((void *)decs->dict);
+                    decs->dict = mrp_strdup(value);
+                }
+                else if (!strcmp(key, "decoder")) {
+                    add_decoder(ncfg, cfgs, value, &ndec, &decs);
                  }
                  break;
  
              case 'f':
                  if (!strcmp(key, "fsg")) {
-                    mrp_free((void *)opts->fsg);
-                    opts->fsg = mrp_strdup(value);
+                    mrp_free((void *)decs->fsg);
+                    decs->fsg = mrp_strdup(value);
                  }
                  break;
  
              case 'h':
                  if (!strcmp(key, "hmm")) {
-                    mrp_free((void *)opts->hmm);
-                    opts->hmm = mrp_strdup(value);
+                    mrp_free((void *)decs->hmm);
+                    decs->hmm = mrp_strdup(value);
                  }
                  break;
  
              case 'l':
                  if (!strcmp(key, "lm")) {
-                    mrp_free((void *)opts->lm);
-                    opts->lm = mrp_strdup(value);
+                    mrp_free((void *)decs->lm);
+                    decs->lm = mrp_strdup(value);
                  }
                  break;
  
@@ -126,27 +140,29 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
                  break;
  
              default:
-                cfg->used = FALSE;
+                // cfg->used = FALSE;
                  break;
  
              } /* switch key */
          }
      } /* for cfg */
  
+    opts->ndec = ndec;
+    opts->decs = decs;
+
      if (sts == 0) {
-        mrp_log_info("directory for acoustic model: %s\n"
-                     "   language model file: %s\n"
-                     "   dictionary file: %s\n"
-                     "   model: %s%s\n"
-                     "   topn: %u\n"
+        print_decoders(opts->ndec, opts->decs, sizeof(buf), buf);
+
+        mrp_log_info("topn: %u\n"
                       "   pulseaudio source name: %s\n"
                       "   sample rate: %.1lf KHz\n"
-                     "   audio recording file: %s",
-                     opts->hmm, opts->lm, opts->dict,
-                     opts->fsg?"fsg - ":"audio", opts->fsg?opts->fsg:"",
+                     "   audio recording file: %s\n"
+                     "%s",
                       opts->topn,
                       opts->srcnam ? opts->srcnam : "<default-source>",
-                     (double)opts->rate / 1000.0, opts->audio);
+                     (double)opts->rate / 1000.0,
+                     opts->audio,
+                     buf);
      }
  
      ctx->opts = opts;
@@ -158,14 +174,23 @@ int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs)
  void options_destroy(context_t *ctx)
  {
      options_t *opts;
+    options_decoder_t*dec;
+    size_t i;
  
      if (ctx && (opts = ctx->opts)) {
          ctx->opts = NULL;
  
-        mrp_free((void *)opts->hmm);
-        mrp_free((void *)opts->lm);
-        mrp_free((void *)opts->dict);
-        mrp_free((void *)opts->fsg);
+        if (opts->decs) {
+            for (i = 0; i < opts->ndec;  i++) {
+                dec = opts->decs + i;
+                mrp_free((void *)dec->name);
+                mrp_free((void *)dec->hmm);
+                mrp_free((void *)dec->lm);
+                mrp_free((void *)dec->dict);
+                mrp_free((void *)dec->fsg);
+            }
+        }
+
          mrp_free((void *)opts->srcnam);
          mrp_free((void *)opts->audio);
          mrp_free((void *)opts->logfn);
@@ -174,6 +199,109 @@ void options_destroy(context_t *ctx)
      }
  }
  
+static int add_decoder(int ncfg,
+                       srs_cfg_t *cfgs,
+                       const char *name,
+                       size_t *pndec,
+                       options_decoder_t **pdecs)
+{
+    int i;
+    srs_cfg_t *cfg;
+    const char *key;
+    const char *value;
+    size_t pfxlen;
+    char pfx[1024];
+    options_decoder_t *decs, *dec;
+    size_t ndec, size;
+    const char *hmm = NULL;
+    const char *lm = NULL;
+    const char *dict = NULL;
+    const char *fsg = NULL;
+
+    pfxlen = snprintf(pfx, sizeof(pfx), SPHINX_PREFIX "%s.", name);
+
+    for (i = 0;  i < ncfg;  i++) {
+        cfg = cfgs + i;
+        key = cfg->key + pfxlen;
+        value = cfg->value;
+
+        if (!strncmp(cfg->key, pfx, pfxlen)) {
+
+            switch (key[0]) {
+
+            case 'd':
+                if (!strcmp(key, "dict"))
+                    dict = value;
+                break;
+
+            case 'f':
+                if (!strcmp(key, "fsg"))
+                    fsg = value;
+                break;
+
+            case 'h':
+                if (!strcmp(key, "hmm"))
+                    hmm = value;
+                break;
+
+            case 'l':
+                if (!strcmp(key, "lm"))
+                    lm = value;
+                break;
+            }
+        }
+    }
+
+    ndec = *pndec;
+    size = sizeof(options_decoder_t) * (ndec + 1);
+
+    if (lm && dict && (decs = mrp_realloc(*pdecs, size))) {
+        dec = decs + ndec++;
+
+        dec->name = mrp_strdup(name);
+        dec->hmm  = hmm ? mrp_strdup(hmm) : NULL;
+        dec->lm   = mrp_strdup(lm);
+        dec->dict = mrp_strdup(dict);
+        dec->fsg  = fsg ? mrp_strdup(fsg) : NULL;
+
+        *pndec = ndec;
+        *pdecs = decs;
+
+        return 0;
+    }
+
+    return -1;
+}
+
+static int print_decoders(size_t ndec,
+                          options_decoder_t *decs,
+                          int len,
+                          char *buf)
+{
+    options_decoder_t *dec;
+    char *p, *e;
+    size_t i;
+
+    e = (p = buf) + len;
+
+    for (i = 0;  i < ndec && p < e;  i++) {
+        dec = decs + i;
+
+        p += snprintf(p, e-p,
+                      "   decoder\n"
+                      "      name : '%s'\n"
+                      "      acoustic model directory: %s\n"
+                      "      language model file: %s\n"
+                      "      dictionary file: %s\n"
+                      "      model: %s%s\n",
+                      dec->name, dec->hmm ? dec->hmm : "<default>",
+                      dec->lm, dec->dict,
+                      dec->fsg ? "fsg - ":"acoustic", dec->fsg ? dec->fsg:"");
+    }
+
+    return p - buf;
+}
+
  
  /*
   * Local Variables:
diff --git a/src/plugins/sphinx-speech-engine/options.h b/src/plugins/sphinx-speech-engine/options.h

index 8a76074..9ea3b31 100644 (file)
--- a/src/plugins/sphinx-speech-engine/options.h
+++ b/src/plugins/sphinx-speech-engine/options.h
@@ -8,10 +8,8 @@
  #define SPHINX_PREFIX "sphinx."
  
  struct options_s {
-    const char *hmm;
-    const char *lm;
-    const char *dict;
-    const char *fsg;
+    size_t ndec;
+    options_decoder_t *decs;
      const char *srcnam;
      const char *audio;
      const char *logfn;
@@ -20,6 +18,14 @@ struct options_s {
      double silen;
  };
  
+struct options_decoder_s {
+    const char *name;
+    const char *hmm;
+    const char *lm;
+    const char *dict;
+    const char *fsg;
+};
+
  
  int options_create(context_t *ctx, int ncfg, srs_cfg_t *cfgs);
  void options_destroy(context_t *ctx);
diff --git a/src/plugins/sphinx-speech-engine/sphinx-plugin.c b/src/plugins/sphinx-speech-engine/sphinx-plugin.c

index 1a66d05..2803421 100644 (file)
--- a/src/plugins/sphinx-speech-engine/sphinx-plugin.c
+++ b/src/plugins/sphinx-speech-engine/sphinx-plugin.c
@@ -147,13 +147,16 @@ static srs_audiobuf_t *sampledup(uint32_t start, uint32_t end, void *user_data)
  static int check_decoder(const char *decoder, void *user_data)
  {
      context_t *ctx = (context_t *)user_data;
-
-    MRP_UNUSED(ctx);
+    int available;
  
      mrp_debug("checking availability of decoder '%s' for CMU Sphinx backend",
                decoder);
  
-    return TRUE;
+    available = decoder_set_contains(ctx, decoder);
+
+    mrp_debug("decoder %s %savailable", decoder, available ? "" : "un");
+
+    return available;
  }
  
  
@@ -161,10 +164,11 @@ static int select_decoder(const char *decoder, void *user_data)
  {
      context_t *ctx = (context_t *)user_data;
  
-    MRP_UNUSED(ctx);
-
      mrp_debug("selecting decoder '%s' for CMU Sphinx backend", decoder);
  
+    if (decoder_set_use(ctx, decoder) < 0)
+        return FALSE;
+
      return TRUE;
  }
  
@@ -172,12 +176,15 @@ static int select_decoder(const char *decoder, void *user_data)
  static const char *active_decoder(void *user_data)
  {
      context_t *ctx = (context_t *)user_data;
+    const char *decoder;
  
-    MRP_UNUSED(ctx);
+    mrp_debug("querying active CMU Sphinx backend decoder");
+
+    decoder = decoder_set_name(ctx);
  
-    mrp_debug("returning hardcoded CMU Sphinx backend decoder string");
+    mrp_debug("active decoder is '%s'", decoder);
  
-    return "<active-sphinx-decoder-name>";
+    return decoder;
  }
  
  
diff --git a/src/plugins/sphinx-speech-engine/sphinx-plugin.h b/src/plugins/sphinx-speech-engine/sphinx-plugin.h

index df76dc6..1f376ec 100644 (file)
--- a/src/plugins/sphinx-speech-engine/sphinx-plugin.h
+++ b/src/plugins/sphinx-speech-engine/sphinx-plugin.h
@@ -12,6 +12,7 @@ typedef enum utterance_processor_e  utterance_processor_t;
  typedef struct context_s            context_t;
  typedef struct plugin_s             plugin_t;
  typedef struct options_s            options_t;
+typedef struct options_decoder_s    options_decoder_t;
  typedef struct context_s            context_t;
  typedef struct decoder_set_s        decoder_set_t;
  typedef struct decoder_s            decoder_t;
author	Janos Kovacs <jankovac503@gmail.com>
	Thu, 6 Jun 2013 13:43:19 +0000 (16:43 +0300)
committer	Krisztian Litkey <krisztian.litkey@intel.com>
	Thu, 6 Jun 2013 13:57:15 +0000 (16:57 +0300)
src/plugins/sphinx-speech-engine/decoder-set.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/decoder-set.h		patch \| blob \| history
src/plugins/sphinx-speech-engine/options.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/options.h		patch \| blob \| history
src/plugins/sphinx-speech-engine/sphinx-plugin.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/sphinx-plugin.h		patch \| blob \| history