sphinx-speech-engine: fixes for puffer management (purging filter buffer)

author Janos Kovacs <jankovac503@gmail.com>

Thu, 6 Jun 2013 13:44:01 +0000 (16:44 +0300)

committer Krisztian Litkey <krisztian.litkey@intel.com>

Thu, 6 Jun 2013 13:57:57 +0000 (16:57 +0300)
author Janos Kovacs <jankovac503@gmail.com>
Thu, 6 Jun 2013 13:44:01 +0000 (16:44 +0300)
committer Krisztian Litkey <krisztian.litkey@intel.com>
Thu, 6 Jun 2013 13:57:57 +0000 (16:57 +0300)
diff --git a/src/plugins/sphinx-speech-engine/filter-buffer.c b/src/plugins/sphinx-speech-engine/filter-buffer.c

index 19ebaa7..5b07845 100644 (file)
--- a/src/plugins/sphinx-speech-engine/filter-buffer.c
+++ b/src/plugins/sphinx-speech-engine/filter-buffer.c
@@ -22,6 +22,8 @@
  #include "decoder-set.h"
  #include "utterance.h"
  
+#define INJECTED_SILENCE 10     /* injected silence in frames */
+
  static int open_file_for_recording(const char *);
  
  
@@ -77,6 +79,7 @@ void filter_buffer_initialize(context_t *ctx,
      uint32_t rate;
      int32_t frlen;
      int32_t hwm;
+    size_t silence;
  
      if (!ctx || !(opts = ctx->opts) || !(filtbuf = ctx->filtbuf))
          return;
@@ -85,8 +88,9 @@ void filter_buffer_initialize(context_t *ctx,
      frlen = filtbuf->frlen;
      bufsiz = (bufsiz + (frlen - 1)) / frlen * frlen;
      hwm = (highwater_mark + (frlen - 1)) / frlen * frlen;
+    silence = INJECTED_SILENCE * frlen;
  
-    filtbuf->buf = mrp_alloc(bufsiz * sizeof(int16_t));
+    filtbuf->buf = mrp_alloc((bufsiz + silence) * sizeof(int16_t));
      filtbuf->max = bufsiz;
      filtbuf->hwm = hwm;
      filtbuf->silen = silen;    
@@ -118,7 +122,7 @@ bool filter_buffer_is_empty(context_t *ctx)
  void filter_buffer_purge(context_t *ctx, int32_t length)
  {
      filter_buf_t *filtbuf;
-    size_t offset, size, origlen;
+    size_t size, offset, origlen, sillen;
  
      if (!ctx || !(filtbuf = ctx->filtbuf))
          return;
@@ -137,8 +141,9 @@ void filter_buffer_purge(context_t *ctx, int32_t length)
                  mrp_debug("purging buffer. nothing preserved");
          }
          else {
+            sillen = INJECTED_SILENCE * filtbuf->frlen;
              origlen = filtbuf->len;
-            filtbuf->len -= length;
+            filtbuf->len = filtbuf->len - length + sillen;
  
              if (ctx->verbose) {
                  mrp_debug("purging buffer. %d samples preserved out of %u",
@@ -148,7 +153,8 @@ void filter_buffer_purge(context_t *ctx, int32_t length)
              offset = length;
              size = (origlen - offset) * sizeof(int16);
  
-            memmove(filtbuf->buf, filtbuf->buf + offset,  size);
+            memmove(filtbuf->buf + sillen, filtbuf->buf + offset, size);
+            memset(filtbuf->buf, 0, sillen * sizeof(int16_t));
          }
      }
  }
@@ -217,6 +223,8 @@ void filter_buffer_utter(context_t *ctx, bool full_utterance)
          !(filtbuf = ctx->filtbuf))
          return;
  
+    printf("*** utter %d\n", filtbuf->len);
+
      if (filtbuf->len > 0) {
          if (filtbuf->fdrec >= 0) {
              size = filtbuf->len * sizeof(int16);
@@ -281,7 +289,7 @@ static int open_file_for_recording(const char *path)
      if (!path)
          fd = -1;
      else {
-        fd = open(path, O_RDWR | O_CREAT, 0644);
+        fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
  
          if (fd < 0)
              mrp_log_error("can't open file '%s': %s", path, strerror(errno));
diff --git a/src/plugins/sphinx-speech-engine/sphinx-plugin.c b/src/plugins/sphinx-speech-engine/sphinx-plugin.c

index 13c292b..ee3ccc3 100644 (file)
--- a/src/plugins/sphinx-speech-engine/sphinx-plugin.c
+++ b/src/plugins/sphinx-speech-engine/sphinx-plugin.c
@@ -65,9 +65,7 @@ int32_t plugin_utterance_handler(context_t *ctx, srs_srec_utterance_t *utt)
          length = -1;
      else {
          length = notify(utt, pl->notify.data);
-
-        if (length < 0 && utt->length)
-            length = utt->length;
+        mrp_log_info("buffer processed till %d", length);
      }
  
      return length;
diff --git a/src/plugins/sphinx-speech-engine/utterance.c b/src/plugins/sphinx-speech-engine/utterance.c

index b100ef9..95c7def 100644 (file)
--- a/src/plugins/sphinx-speech-engine/utterance.c
+++ b/src/plugins/sphinx-speech-engine/utterance.c
@@ -65,6 +65,7 @@ void utterance_end(context_t *ctx)
  
  static void process_utterance(context_t *ctx)
  {
+    filter_buf_t *filtbuf;
      decoder_set_t *decset;
      decoder_t *dec;
      srs_srec_utterance_t utt;
@@ -74,39 +75,43 @@ static void process_utterance(context_t *ctx)
      int32_t purgelen;
      int i;
  
-    if (ctx && (decset = ctx->decset) && (dec = decset->curdec)) {
+    if (!ctx || !(decset = ctx->decset) || !(dec = decset->curdec) ||
+        !(filtbuf = ctx->filtbuf))
+        return;
  
-        for (i = 0;  i < CANDIDATE_MAX;  i++)
-            cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
+    for (i = 0;  i < CANDIDATE_MAX;  i++)
+        cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
  
-        switch (dec->utproc) {
+    switch (dec->utproc) {
  
-        case UTTERANCE_PROCESSOR_ACOUSTIC:
-            acoustic_processor(ctx, &utt, cands, sorted);
-            goto processed;
+    case UTTERANCE_PROCESSOR_ACOUSTIC:
+        acoustic_processor(ctx, &utt, cands, sorted);
+        goto processed;
  
-        case UTTERANCE_PROCESSOR_FSG:
-            fsg_processor(ctx, &utt, cands, sorted);
-            goto processed;
+    case UTTERANCE_PROCESSOR_FSG:
+        fsg_processor(ctx, &utt, cands, sorted);
+        goto processed;
  
-        processed:
-            if (ctx->verbose || 1)
-                print_utterance(ctx, &utt);
+    processed:
+        if (ctx->verbose || 1)
+            print_utterance(ctx, &utt);
  
-            purgelen = plugin_utterance_handler(ctx, &utt);
-            filter_buffer_purge(ctx, purgelen);
+        purgelen = plugin_utterance_handler(ctx, &utt);
  
-            if (!filter_buffer_is_empty(ctx)) {
-                mrp_log_info("processing what is left in filter buffer");
-                utterance_start(ctx);
-                filter_buffer_utter(ctx, true);
-                utterance_end(ctx);
-            }
-            break;
+        if (purgelen > 0)
+            purgelen += 20 * filtbuf->frlen;
+        filter_buffer_purge(ctx, purgelen);
  
-        default:
-            break;
+        if (!filter_buffer_is_empty(ctx)) {
+            mrp_log_info("processing what is left in filter buffer");
+            utterance_start(ctx);
+            filter_buffer_utter(ctx, true);
+            utterance_end(ctx);
          }
+        break;
+
+    default:
+        break;
      }
  }
  
@@ -177,11 +182,11 @@ static void acoustic_processor(context_t *ctx,
                          cand->ntoken >= CANDIDATE_TOKEN_MAX)
                      {
                          ncand++;
-                        memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+                        //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
                          ps_seg_frames(seg, &start, &end);
                          ps_seg_free(seg);
                          //printf("hyp=</s> ncand=%d\n", ncand);
-                        length = end * frlen;
+                        length = (end + 1) * frlen;
                          break;
                      }
                      else if (!strcmp(hyp, "<sil>")) {
@@ -193,7 +198,7 @@ static void acoustic_processor(context_t *ctx,
                          tkn->token = tknbase(hyp);
                          ps_seg_frames(seg, &start, &end);
                          tkn->start = start * frlen;
-                        tkn->end = end * frlen;
+                        tkn->end = (end + 1) * frlen;
                          //printf("hyp=%s (%d, %d) tkn count %d\n",
                          //      tkn->token, tkn->start,tkn->end, cand->ntoken);
                      }
@@ -203,7 +208,7 @@ static void acoustic_processor(context_t *ctx,
              if (!seg && cand->ntoken > 0) {
                  ncand++;
                  cand->score *= 0.9; /* some penalty */
-                memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+                //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
              }
              
              if (!length) {
@@ -212,10 +217,13 @@ static void acoustic_processor(context_t *ctx,
              }
          }
      } /* for nb */
+
+    memset(cand+1, 0, sizeof(srs_srec_candidate_t));
      
      utt->id = uttid;
      utt->score = prob;
-    utt->length = length;
+    //utt->length = length;
+    utt->length = filtbuf->len;
      utt->ncand = candidate_sort(cands, sorted);
      utt->cands = sorted;
  }
@@ -289,7 +297,7 @@ static void fsg_processor(context_t *ctx,
                          tkn = cand->tokens + cand->ntoken++;
                          tkn->token = tknbase(token);
                          tkn->start = start;
-                        tkn->end = end;
+                        tkn->end = end + frlen;
                      }
                  }
              }
@@ -301,7 +309,8 @@ static void fsg_processor(context_t *ctx,
  
      utt->id = uttid;
      utt->score = prob < 0.00001 ? 0.00001 : prob;
-    utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+    //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+    utt->length = filtbuf->len;
      utt->ncand = 1;
      utt->cands = sorted;
  }
diff --git a/src/plugins/sphinx-speech-engine/utterance.h b/src/plugins/sphinx-speech-engine/utterance.h

index 6d0e0c9..9f17f15 100644 (file)
--- a/src/plugins/sphinx-speech-engine/utterance.h
+++ b/src/plugins/sphinx-speech-engine/utterance.h
@@ -4,7 +4,7 @@
  #include "sphinx-plugin.h"
  
  #define CANDIDATE_TOKEN_MAX  50
-#define CANDIDATE_MAX        1000
+#define CANDIDATE_MAX        5
  
  
  void utterance_start(context_t *ctx);
author	Janos Kovacs <jankovac503@gmail.com>
	Thu, 6 Jun 2013 13:44:01 +0000 (16:44 +0300)
committer	Krisztian Litkey <krisztian.litkey@intel.com>
	Thu, 6 Jun 2013 13:57:57 +0000 (16:57 +0300)
src/plugins/sphinx-speech-engine/filter-buffer.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/sphinx-plugin.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/utterance.c		patch \| blob \| history
src/plugins/sphinx-speech-engine/utterance.h		patch \| blob \| history