#include "decoder-set.h"
#include "utterance.h"
+#define INJECTED_SILENCE 10 /* injected silence in frames */
+
static int open_file_for_recording(const char *);
uint32_t rate;
int32_t frlen;
int32_t hwm;
+ size_t silence;
if (!ctx || !(opts = ctx->opts) || !(filtbuf = ctx->filtbuf))
return;
frlen = filtbuf->frlen;
bufsiz = (bufsiz + (frlen - 1)) / frlen * frlen;
hwm = (highwater_mark + (frlen - 1)) / frlen * frlen;
+ silence = INJECTED_SILENCE * frlen;
- filtbuf->buf = mrp_alloc(bufsiz * sizeof(int16_t));
+ filtbuf->buf = mrp_alloc((bufsiz + silence) * sizeof(int16_t));
filtbuf->max = bufsiz;
filtbuf->hwm = hwm;
filtbuf->silen = silen;
void filter_buffer_purge(context_t *ctx, int32_t length)
{
filter_buf_t *filtbuf;
- size_t offset, size, origlen;
+ size_t size, offset, origlen, sillen;
if (!ctx || !(filtbuf = ctx->filtbuf))
return;
mrp_debug("purging buffer. nothing preserved");
}
else {
+ sillen = INJECTED_SILENCE * filtbuf->frlen;
origlen = filtbuf->len;
- filtbuf->len -= length;
+ filtbuf->len = filtbuf->len - length + sillen;
if (ctx->verbose) {
mrp_debug("purging buffer. %d samples preserved out of %u",
offset = length;
size = (origlen - offset) * sizeof(int16);
- memmove(filtbuf->buf, filtbuf->buf + offset, size);
+ memmove(filtbuf->buf + sillen, filtbuf->buf + offset, size);
+ memset(filtbuf->buf, 0, sillen * sizeof(int16_t));
}
}
}
!(filtbuf = ctx->filtbuf))
return;
+ printf("*** utter %d\n", filtbuf->len);
+
if (filtbuf->len > 0) {
if (filtbuf->fdrec >= 0) {
size = filtbuf->len * sizeof(int16);
if (!path)
fd = -1;
else {
- fd = open(path, O_RDWR | O_CREAT, 0644);
+ fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
if (fd < 0)
mrp_log_error("can't open file '%s': %s", path, strerror(errno));
static void process_utterance(context_t *ctx)
{
+ filter_buf_t *filtbuf;
decoder_set_t *decset;
decoder_t *dec;
srs_srec_utterance_t utt;
int32_t purgelen;
int i;
- if (ctx && (decset = ctx->decset) && (dec = decset->curdec)) {
+ if (!ctx || !(decset = ctx->decset) || !(dec = decset->curdec) ||
+ !(filtbuf = ctx->filtbuf))
+ return;
- for (i = 0; i < CANDIDATE_MAX; i++)
- cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
+ for (i = 0; i < CANDIDATE_MAX; i++)
+ cands[i].tokens = token_pool + (i * (CANDIDATE_TOKEN_MAX + 1));
- switch (dec->utproc) {
+ switch (dec->utproc) {
- case UTTERANCE_PROCESSOR_ACOUSTIC:
- acoustic_processor(ctx, &utt, cands, sorted);
- goto processed;
+ case UTTERANCE_PROCESSOR_ACOUSTIC:
+ acoustic_processor(ctx, &utt, cands, sorted);
+ goto processed;
- case UTTERANCE_PROCESSOR_FSG:
- fsg_processor(ctx, &utt, cands, sorted);
- goto processed;
+ case UTTERANCE_PROCESSOR_FSG:
+ fsg_processor(ctx, &utt, cands, sorted);
+ goto processed;
- processed:
- if (ctx->verbose || 1)
- print_utterance(ctx, &utt);
+ processed:
+ if (ctx->verbose || 1)
+ print_utterance(ctx, &utt);
- purgelen = plugin_utterance_handler(ctx, &utt);
- filter_buffer_purge(ctx, purgelen);
+ purgelen = plugin_utterance_handler(ctx, &utt);
- if (!filter_buffer_is_empty(ctx)) {
- mrp_log_info("processing what is left in filter buffer");
- utterance_start(ctx);
- filter_buffer_utter(ctx, true);
- utterance_end(ctx);
- }
- break;
+ if (purgelen > 0)
+ purgelen += 20 * filtbuf->frlen;
+ filter_buffer_purge(ctx, purgelen);
- default:
- break;
+ if (!filter_buffer_is_empty(ctx)) {
+ mrp_log_info("processing what is left in filter buffer");
+ utterance_start(ctx);
+ filter_buffer_utter(ctx, true);
+ utterance_end(ctx);
}
+ break;
+
+ default:
+ break;
}
}
cand->ntoken >= CANDIDATE_TOKEN_MAX)
{
ncand++;
- memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+ //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
ps_seg_frames(seg, &start, &end);
ps_seg_free(seg);
//printf("hyp=</s> ncand=%d\n", ncand);
- length = end * frlen;
+ length = (end + 1) * frlen;
break;
}
else if (!strcmp(hyp, "<sil>")) {
tkn->token = tknbase(hyp);
ps_seg_frames(seg, &start, &end);
tkn->start = start * frlen;
- tkn->end = end * frlen;
+ tkn->end = (end + 1) * frlen;
//printf("hyp=%s (%d, %d) tkn count %d\n",
// tkn->token, tkn->start,tkn->end, cand->ntoken);
}
if (!seg && cand->ntoken > 0) {
ncand++;
cand->score *= 0.9; /* some penalty */
- memset(cand+1, 0, sizeof(srs_srec_candidate_t));
+ //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
}
if (!length) {
}
}
} /* for nb */
+
+ memset(cand+1, 0, sizeof(srs_srec_candidate_t));
utt->id = uttid;
utt->score = prob;
- utt->length = length;
+ //utt->length = length;
+ utt->length = filtbuf->len;
utt->ncand = candidate_sort(cands, sorted);
utt->cands = sorted;
}
tkn = cand->tokens + cand->ntoken++;
tkn->token = tknbase(token);
tkn->start = start;
- tkn->end = end;
+ tkn->end = end + frlen;
}
}
}
utt->id = uttid;
utt->score = prob < 0.00001 ? 0.00001 : prob;
- utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+ //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
+ utt->length = filtbuf->len;
utt->ncand = 1;
utt->cands = sorted;
}