sphinx-speech-engine: fixes for puffer management (purging filter buffer)
authorJanos Kovacs <jankovac503@gmail.com>
Thu, 6 Jun 2013 13:44:01 +0000 (16:44 +0300)
committerKrisztian Litkey <krisztian.litkey@intel.com>
Thu, 6 Jun 2013 13:57:57 +0000 (16:57 +0300)
src/plugins/sphinx-speech-engine/filter-buffer.c
src/plugins/sphinx-speech-engine/sphinx-plugin.c
src/plugins/sphinx-speech-engine/utterance.c
src/plugins/sphinx-speech-engine/utterance.h

index 19ebaa7..5b07845 100644 (file)
@@ -22,6 +22,8 @@
 #include "decoder-set.h"
 #include "utterance.h"
 
+#define INJECTED_SILENCE 10     /* injected silence in frames */
+
 static int open_file_for_recording(const char *);
 
 
@@ -77,6 +79,7 @@ void filter_buffer_initialize(context_t *ctx,
     uint32_t rate;
     int32_t frlen;
     int32_t hwm;
+    size_t silence;
 
     if (!ctx || !(opts = ctx->opts) || !(filtbuf = ctx->filtbuf))
         return;
@@ -85,8 +88,9 @@ void filter_buffer_initialize(context_t *ctx,
     frlen = filtbuf->frlen;
     bufsiz = (bufsiz + (frlen - 1)) / frlen * frlen;
     hwm = (highwater_mark + (frlen - 1)) / frlen * frlen;
+    silence = INJECTED_SILENCE * frlen;
 
-    filtbuf->buf = mrp_alloc(bufsiz * sizeof(int16_t));
+    filtbuf->buf = mrp_alloc((bufsiz + silence) * sizeof(int16_t));
     filtbuf->max = bufsiz;
     filtbuf->hwm = hwm;
     filtbuf->silen = silen;    
@@ -118,7 +122,7 @@ bool filter_buffer_is_empty(context_t *ctx)
 void filter_buffer_purge(context_t *ctx, int32_t length)
 {
     filter_buf_t *filtbuf;
-    size_t offset, size, origlen;
+    size_t size, offset, origlen, sillen;
 
     if (!ctx || !(filtbuf = ctx->filtbuf))
         return;
@@ -137,8 +141,9 @@ void filter_buffer_purge(context_t *ctx, int32_t length)
                 mrp_debug("purging buffer. nothing preserved");
         }
         else {
+            sillen = INJECTED_SILENCE * filtbuf->frlen;
             origlen = filtbuf->len;
-            filtbuf->len -= length;
+            filtbuf->len = filtbuf->len - length + sillen;
 
             if (ctx->verbose) {
                 mrp_debug("purging buffer. %d samples preserved out of %u",
@@ -148,7 +153,8 @@ void filter_buffer_purge(context_t *ctx, int32_t length)
             offset = length;
             size = (origlen - offset) * sizeof(int16);
 
-            memmove(filtbuf->buf, filtbuf->buf + offset,  size);
+            memmove(filtbuf->buf + sillen, filtbuf->buf + offset, size);
+            memset(filtbuf->buf, 0, sillen * sizeof(int16_t));
         }
     }
 }
@@ -217,6 +223,8 @@ void filter_buffer_utter(context_t *ctx, bool full_utterance)
         !(filtbuf = ctx->filtbuf))
         return;
 
+    printf("*** utter %d\n", filtbuf->len);
+
     if (filtbuf->len > 0) {
         if (filtbuf->fdrec >= 0) {
             size = filtbuf->len * sizeof(int16);
@@ -281,7 +289,7 @@ static int open_file_for_recording(const char *path)
     if (!path)
         fd = -1;
     else {
-        fd = open(path, O_RDWR | O_CREAT, 0644);
+        fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
 
         if (fd < 0)
             mrp_log_error("can't open file '%s': %s", path, strerror(errno));
index 13c292b..ee3ccc3 100644 (file)
@@ -65,9 +65,7 @@ int32_t plugin_utterance_handler(context_t *ctx, srs_srec_utterance_t *utt)
         length = -1;
     else {
         length = notify(utt, pl->notify.data);
-
-        if (length < 0 && utt->length)
-            length = utt->length;
+        mrp_log_info("buffer processed till %d", length);
     }
 
     return length;
index b100ef9..95c7def 100644 (file)
@@ -65,6 +65,7 @@ void utterance_end(context_t *ctx)
 
 static void process_utterance(context_t *ctx)
 {
+    filter_buf_t *filtbuf;
     decoder_set_t *decset;
     decoder_t *dec;
     srs_srec_utterance_t utt;
@@ -74,39 +75,43 @@ static void process_utterance(context_t *ctx)
     int32_t purgelen;
     int i;
 
-    if (ctx && (decset = ctx->decset) && (dec = decset->curdec)) {
+    if (!ctx || !(decset = ctx->decset) || !(dec = decset->curdec) ||
+        !(filtbuf = ctx->filtbuf))
+        return;
 
-        for (i = 0;  i < CANDIDATE_MAX;  i++)
-            cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
+    for (i = 0;  i < CANDIDATE_MAX;  i++)
+        cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
 
-        switch (dec->utproc) {
+    switch (dec->utproc) {
 
-        case UTTERANCE_PROCESSOR_ACOUSTIC:
-            acoustic_processor(ctx, &utt, cands, sorted);
-            goto processed;
+    case UTTERANCE_PROCESSOR_ACOUSTIC:
+        acoustic_processor(ctx, &utt, cands, sorted);
+        goto processed;
 
-        case UTTERANCE_PROCESSOR_FSG:
-            fsg_processor(ctx, &utt, cands, sorted);
-            goto processed;
+    case UTTERANCE_PROCESSOR_FSG:
+        fsg_processor(ctx, &utt, cands, sorted);
+        goto processed;
 
-        processed:
-            if (ctx->verbose || 1)
-                print_utterance(ctx, &utt);
+    processed:
+        if (ctx->verbose || 1)
+            print_utterance(ctx, &utt);
 
-            purgelen = plugin_utterance_handler(ctx, &utt);
-            filter_buffer_purge(ctx, purgelen);
+        purgelen = plugin_utterance_handler(ctx, &utt);
 
-            if (!filter_buffer_is_empty(ctx)) {
-                mrp_log_info("processing what is left in filter buffer");
-                utterance_start(ctx);
-                filter_buffer_utter(ctx, true);
-                utterance_end(ctx);
-            }
-            break;
+        if (purgelen > 0)
+            purgelen += 20 * filtbuf->frlen;
+        filter_buffer_purge(ctx, purgelen);
 
-        default:
-            break;
+        if (!filter_buffer_is_empty(ctx)) {
+            mrp_log_info("processing what is left in filter buffer");
+            utterance_start(ctx);
+            filter_buffer_utter(ctx, true);
+            utterance_end(ctx);
         }
+        break;
+
+    default:
+        break;
     }
 }
 
@@ -177,11 +182,11 @@ static void acoustic_processor(context_t *ctx,
                         cand->ntoken >= CANDIDATE_TOKEN_MAX)
                     {
                         ncand++;
-                        memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+                        //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
                         ps_seg_frames(seg, &start, &end);
                         ps_seg_free(seg);
                         //printf("hyp=</s> ncand=%d\n", ncand);
-                        length = end * frlen;
+                        length = (end + 1) * frlen;
                         break;
                     }
                     else if (!strcmp(hyp, "<sil>")) {
@@ -193,7 +198,7 @@ static void acoustic_processor(context_t *ctx,
                         tkn->token = tknbase(hyp);
                         ps_seg_frames(seg, &start, &end);
                         tkn->start = start * frlen;
-                        tkn->end = end * frlen;
+                        tkn->end = (end + 1) * frlen;
                         //printf("hyp=%s (%d, %d) tkn count %d\n",
                         //      tkn->token, tkn->start,tkn->end, cand->ntoken);
                     }
@@ -203,7 +208,7 @@ static void acoustic_processor(context_t *ctx,
             if (!seg && cand->ntoken > 0) {
                 ncand++;
                 cand->score *= 0.9; /* some penalty */
-                memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+                //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
             }
             
             if (!length) {
@@ -212,10 +217,13 @@ static void acoustic_processor(context_t *ctx,
             }
         }
     } /* for nb */
+
+    memset(cand+1, 0, sizeof(srs_srec_candidate_t));
     
     utt->id = uttid;
     utt->score = prob;
-    utt->length = length;
+    //utt->length = length;
+    utt->length = filtbuf->len;
     utt->ncand = candidate_sort(cands, sorted);
     utt->cands = sorted;
 }
@@ -289,7 +297,7 @@ static void fsg_processor(context_t *ctx,
                         tkn = cand->tokens + cand->ntoken++;
                         tkn->token = tknbase(token);
                         tkn->start = start;
-                        tkn->end = end;
+                        tkn->end = end + frlen;
                     }
                 }
             }
@@ -301,7 +309,8 @@ static void fsg_processor(context_t *ctx,
 
     utt->id = uttid;
     utt->score = prob < 0.00001 ? 0.00001 : prob;
-    utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+    //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+    utt->length = filtbuf->len;
     utt->ncand = 1;
     utt->cands = sorted;
 }
index 6d0e0c9..9f17f15 100644 (file)
@@ -4,7 +4,7 @@
 #include "sphinx-plugin.h"
 
 #define CANDIDATE_TOKEN_MAX  50
-#define CANDIDATE_MAX        1000
+#define CANDIDATE_MAX        5
 
 
 void utterance_start(context_t *ctx);