From dfeea08fbe87f456e137034d3458d95fd6256992 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Thu, 6 Jun 2013 16:42:04 +0300 Subject: [PATCH] speech-recognition: use a dedicated data type for passing audio samples around. --- src/Makefile.am | 2 ++ src/daemon/audiobuf.c | 77 ++++++++++++++++++++++++++++++++++++++++++++ src/daemon/audiobuf.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ src/daemon/client.c | 6 ++-- src/daemon/client.h | 11 +++++-- src/daemon/dbusif.c | 10 +++--- src/daemon/recognizer.c | 8 ++--- src/daemon/recognizer.h | 10 +++--- 8 files changed, 188 insertions(+), 21 deletions(-) create mode 100644 src/daemon/audiobuf.c create mode 100644 src/daemon/audiobuf.h diff --git a/src/Makefile.am b/src/Makefile.am index c6a3b82..f3e25db 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,6 +12,7 @@ QUIET_GEN = $(Q:@=@echo ' GEN '$@;) # SRS daemon srs_daemon_PUBLIC_SYMBOLS = \ daemon/context.h \ + daemon/audiobuf.h \ daemon/recognizer.h \ daemon/plugin.h @@ -22,6 +23,7 @@ srs_daemon_SOURCES = \ daemon/resourceif.c \ daemon/client.c \ daemon/plugin.c \ + daemon/audiobuf.c \ daemon/recognizer.c srs_daemon_CFLAGS = \ diff --git a/src/daemon/audiobuf.c b/src/daemon/audiobuf.c new file mode 100644 index 0000000..c47f60e --- /dev/null +++ b/src/daemon/audiobuf.c @@ -0,0 +1,77 @@ +#include +#include + +#include "src/daemon/audiobuf.h" + +/* + * audio buffer handling + */ + +srs_audiobuf_t *srs_create_audiobuf(srs_audioformat_t format, uint32_t rate, + uint8_t channels, size_t samples, + void *data) +{ + srs_audiobuf_t *buf; + size_t width, size; + + switch (format) { + case SRS_AUDIO_U8: + case SRS_AUDIO_ALAW: + case SRS_AUDIO_ULAW: + width = 1; + break; + case SRS_AUDIO_S16LE: + case SRS_AUDIO_S16BE: + width = 2; + break; + case SRS_AUDIO_FLOAT32LE: + case SRS_AUDIO_FLOAT32BE: + width = sizeof(float); + break; + case SRS_AUDIO_S32LE: + case SRS_AUDIO_S32BE: + case SRS_AUDIO_S24_32LE: + case SRS_AUDIO_S24_32BE: + width = 4; + break; + case SRS_AUDIO_S24LE: + case SRS_AUDIO_S24BE: + width = 3; + + default: + return NULL; + } + + size = channels * samples * width; + + if ((buf = mrp_allocz(sizeof(*buf))) != NULL) { + if ((buf->data = mrp_datadup(data, size)) != NULL) { + mrp_refcnt_init(&buf->refcnt); + buf->format = format; + buf->rate = rate; + buf->channels = channels; + buf->samples = samples; + + return buf; + } + else + mrp_free(buf); + } + + return NULL; +} + + +srs_audiobuf_t *srs_ref_audiobuf(srs_audiobuf_t *buf) +{ + return mrp_ref_obj(buf, refcnt); +} + + +void srs_unref_audiobuf(srs_audiobuf_t *buf) +{ + if (mrp_unref_obj(buf, refcnt)) { + mrp_free(buf->data); + mrp_free(buf); + } +} diff --git a/src/daemon/audiobuf.h b/src/daemon/audiobuf.h new file mode 100644 index 0000000..9562921 --- /dev/null +++ b/src/daemon/audiobuf.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SRS_DAEMON_AUDIOBUF_H__ +#define __SRS_DAEMON_AUDIOBUF_H__ + +#include +#include + +/* + * audio formats + */ + +typedef enum { +#define MAP(type) SRS_AUDIO_##type = PA_SAMPLE_##type + MAP(INVALID), + MAP(U8), + MAP(ALAW), + MAP(ULAW), + MAP(S16LE), + MAP(S16BE), + MAP(FLOAT32LE), + MAP(FLOAT32BE), + MAP(S32LE), + MAP(S32BE), + MAP(S24LE), + MAP(S24BE), + MAP(S24_32LE), + MAP(S24_32BE), + MAP(MAX), +#undef MAP +} srs_audioformat_t; + + +/* + * a reference-counted audio buffer + */ + +typedef struct { + mrp_refcnt_t refcnt; /* reference count */ + srs_audioformat_t format; /* audio format */ + uint32_t rate; /* sample rate */ + uint8_t channels; /* number of channels */ + size_t samples; /* amount of sample data */ + void *data; /* actual sample data */ +} srs_audiobuf_t; + +/** Create a new audio buffer. */ +srs_audiobuf_t *srs_create_audiobuf(srs_audioformat_t format, uint32_t rate, + uint8_t channels, size_t samples, + void *data); + +/** Add a reference to the given audio buffer. */ +srs_audiobuf_t *srs_ref_audiobuf(srs_audiobuf_t *buf); + +/** Remove a reference from the given audio buffer, potentially freeing it. */ +void srs_unref_audiobuf(srs_audiobuf_t *buf); + +#endif /* __SRS_DAEMON_AUDIOBUF_H__ */ diff --git a/src/daemon/client.c b/src/daemon/client.c index c57b0c6..5f513ee 100644 --- a/src/daemon/client.c +++ b/src/daemon/client.c @@ -304,11 +304,11 @@ void client_resource_event(srs_client_t *c, srs_resset_event_t e) void client_notify_command(srs_client_t *c, int index, int ntoken, const char **tokens, - void *samplebuf, size_t samplelen, - uint32_t *start, uint32_t *end) + uint32_t *start, uint32_t *end, + srs_audiobuf_t *audio) { if (c->enabled && /*c->allowed && */ 0 <= index && index < c->ncommand) { c->ops.notify_command(c, index, ntoken, (char **)tokens, - samplebuf, samplelen, start, end); + start, end, audio); } } diff --git a/src/daemon/client.h b/src/daemon/client.h index 8f71f15..c0e460d 100644 --- a/src/daemon/client.h +++ b/src/daemon/client.h @@ -34,6 +34,7 @@ typedef struct srs_client_s srs_client_t; #include "src/daemon/context.h" #include "src/daemon/resourceif.h" +#include "src/daemon/audiobuf.h" /* @@ -119,9 +120,15 @@ typedef enum { typedef struct { int (*notify_focus)(srs_client_t *c, srs_voice_focus_t focus); +#if 1 + int (*notify_command)(srs_client_t *c, int idx, int ntoken, + char **tokens, uint32_t *start, uint32_t *end, + srs_audiobuf_t *audio); +#else int (*notify_command)(srs_client_t *c, int idx, int ntoken, char **tokens, void *samplebuf, size_t samplelen, uint32_t *start, uint32_t *end); +#endif } srs_client_ops_t; @@ -172,7 +179,7 @@ void client_resource_event(srs_client_t *c, srs_resset_event_t event); /** Deliver a command notification event to the client. */ void client_notify_command(srs_client_t *c, int idx, int ntoken, - const char **tokens, void *samplebuf, - size_t samplelen, uint32_t *start, uint32_t *end); + const char **tokens, uint32_t *start, uint32_t *end, + srs_audiobuf_t *audio); #endif /* __SRS_DAEMON_CLIENT_H__ */ diff --git a/src/daemon/dbusif.c b/src/daemon/dbusif.c index 25d3565..7892623 100644 --- a/src/daemon/dbusif.c +++ b/src/daemon/dbusif.c @@ -46,8 +46,8 @@ static int focus_cb(mrp_dbus_t *dbus, DBusMessage *msg, void *user_data); static int focus_notify(srs_client_t *c, srs_voice_focus_t focus); static int command_notify(srs_client_t *c, int idx, int ntoken, char **tokens, - void *samplebuf, size_t samplelen, uint32_t *start, - uint32_t *end); + uint32_t *start, uint32_t *end, + srs_audiobuf_t *audio); #define reply_register simple_reply #define reply_unregister simple_reply @@ -417,8 +417,7 @@ static int focus_notify(srs_client_t *c, srs_voice_focus_t focus) static int command_notify(srs_client_t *c, int idx, int ntoken, char **tokens, - void *samplebuf, size_t samplelen, uint32_t *start, - uint32_t *end) + uint32_t *start, uint32_t *end, srs_audiobuf_t *audio) { srs_context_t *srs = c->srs; const char *dest = c->id; @@ -430,10 +429,9 @@ static int command_notify(srs_client_t *c, int idx, int ntoken, char **tokens, int i, n, l; MRP_UNUSED(idx); - MRP_UNUSED(samplebuf); - MRP_UNUSED(samplelen); MRP_UNUSED(start); MRP_UNUSED(end); + MRP_UNUSED(audio); p = cmd = buf; l = sizeof(buf) - 1; diff --git a/src/daemon/recognizer.c b/src/daemon/recognizer.c index 921c548..1ea8df9 100644 --- a/src/daemon/recognizer.c +++ b/src/daemon/recognizer.c @@ -248,7 +248,7 @@ static void free_srec_result(srs_srec_result_t *res) mrp_free(res->start); mrp_free(res->end); - mrp_free(res->samplebuf); + srs_unref_audiobuf(res->samplebuf); for (i = 0; i < res->ndict; i++) mrp_free(res->dicts[i]); @@ -325,8 +325,7 @@ static void process_match_result(srs_srec_t *srec, srs_srec_result_t *res) client_notify_command(match->client, match->index, res->ntoken, (const char **)res->tokens, - res->samplebuf, res->samplelen, - res->start, res->end); + res->start, res->end, res->samplebuf); while (res->ndict > 0) pop_dict(srec, res); @@ -424,8 +423,7 @@ static int srec_notify_cb(srs_srec_utterance_t *utt, void *notify_data) start = 0; end = utt->length; - res->samplebuf = srec->api.sampledup(start, end, &res->samplelen, - srec->api_data); + res->samplebuf = srec->api.sampledup(start, end, srec->api_data); } res = srec->result; diff --git a/src/daemon/recognizer.h b/src/daemon/recognizer.h index 6030899..ad40691 100644 --- a/src/daemon/recognizer.h +++ b/src/daemon/recognizer.h @@ -31,6 +31,7 @@ #define __SRS_DAEMON_RECOGNIZER_H__ #include "src/daemon/client.h" +#include "src/daemon/audiobuf.h" /* * speech recognition backend interface @@ -45,6 +46,7 @@ typedef int (*srs_srec_notify_t)(srs_srec_utterance_t *utt, void *notify_data); /** Notification callback return value for flushing the full audio buffer. */ #define SRS_SREC_FLUSH_ALL -1 + /* * API to a speech recognition backend. */ @@ -58,8 +60,7 @@ typedef struct { /** Schedule a rescan of the given portion of the audio buffer. */ int (*rescan)(uint32_t start, uint32_t end, void *user_data); /** Get a copy of the audio samples in the buffer. */ - void *(*sampledup)(uint32_t start, uint32_t end, size_t *size, - void *user_data); + srs_audiobuf_t *(*sampledup)(uint32_t start, uint32_t end, void *user_data); /** Check if the given language model exists/is usable. */ int (*check_decoder)(const char *decoder, void *user_data); /** Set language model to be used. */ @@ -160,9 +161,8 @@ typedef struct { struct srs_srec_result_s { srs_srec_result_type_t type; /* result type */ mrp_list_hook_t hook; /* to list of results */ - void *samplebuf; /* utterance audio */ - size_t samplelen; /* audio buffer length */ - uint32_t sampleoffs; /* extra audio offset */ + srs_audiobuf_t *samplebuf; /* audio sample buffer */ + uint32_t sampleoffs; /* extra audio sample offset */ char **tokens; /* matched tokens */ uint32_t *start; /* token start offset */ uint32_t *end; /* token end offsets */ -- 2.7.4