src/plugins/speech-to-text/sphinx/sphinx-plugin.c

   1 /*
   2  * Copyright (c) 2012, Intel Corporation
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *   * Redistributions of source code must retain the above copyright notice,
   9  *     this list of conditions and the following disclaimer.
  10  *   * Redistributions in binary form must reproduce the above copyright
  11  *     notice, this list of conditions and the following disclaimer in the
  12  *     documentation and/or other materials provided with the distribution.
  13  *   * Neither the name of Intel Corporation nor the names of its contributors
  14  *     may be used to endorse or promote products derived from this software
  15  *     without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30 #include <murphy/common/debug.h>
  31 #include <murphy/common/mainloop.h>
  32
  33 #include <pulse/pulseaudio.h>
  34 #include <pulse/mainloop.h>
  35
  36 #include "options.h"
  37 #include "decoder-set.h"
  38 #include "utterance.h"
  39 #include "filter-buffer.h"
  40 #include "input-buffer.h"
  41 #include "pulse-interface.h"
  42
  43
  44 #define SPHINX_NAME        "sphinx-speech"
  45 #define SPHINX_DESCRIPTION "A CMU Sphinx-based speech engine backend plugin."
  46 #define SPHINX_AUTHORS     "Janos Kovacs <janos.kovacs@intel.com>"
  47 #define SPHINX_VERSION     "0.0.1"
  48
  49 struct plugin_s {
  50     srs_plugin_t *self;               /* us, the backend plugin */
  51     struct {
  52         srs_srec_notify_t callback;   /* recognition notification callback */
  53         void *data;                   /* notifiation callback data */
  54     } notify;
  55 };
  56
  57
  58 mrp_mainloop_t *plugin_get_mainloop(plugin_t *plugin)
  59 {
  60     return plugin->self->srs->ml;
  61 }
  62
  63
  64 int32_t plugin_utterance_handler(context_t *ctx, srs_srec_utterance_t *utt)
  65 {
  66     plugin_t *pl;
  67     srs_srec_notify_t notify;
  68     int32_t length;
  69
  70     if (!(pl = ctx->plugin) || !(notify = pl->notify.callback))
  71         length = -1;
  72     else {
  73         length = notify(utt, pl->notify.data);
  74         mrp_log_info("buffer processed till %d", length);
  75     }
  76
  77     return length;
  78 }
  79
  80 static int activate(void *user_data)
  81 {
  82     context_t *ctx = (context_t *)user_data;
  83
  84     MRP_UNUSED(ctx);
  85
  86     mrp_log_info("Activating CMU Sphinx backend.");
  87
  88     pulse_interface_cork_input_stream(ctx, false);
  89
  90     return TRUE;
  91 }
  92
  93
  94 static void deactivate(void *user_data)
  95 {
  96     context_t *ctx = (context_t *)user_data;
  97
  98     MRP_UNUSED(ctx);
  99
 100     mrp_log_info("Deactivating CMU Sphinx backend.");
 101
 102     pulse_interface_cork_input_stream(ctx, true);
 103     filter_buffer_purge(ctx, -1);
 104     input_buffer_purge(ctx);
 105 }
 106
 107
 108 static int flush(uint32_t start, uint32_t end, void *user_data)
 109 {
 110     context_t *ctx = (context_t *)user_data;
 111
 112     MRP_UNUSED(ctx);
 113
 114     mrp_log_info("flushing CMU Sphinx backend buffer (%u - %u)", start, end);
 115
 116     return TRUE;
 117 }
 118
 119
 120 static int rescan(uint32_t start, uint32_t end, void *user_data)
 121 {
 122     context_t *ctx = (context_t *)user_data;
 123
 124     MRP_UNUSED(ctx);
 125
 126     mrp_log_info("scheduling CMU Sphinx backend buffer rescan (%u - %u)",
 127               start, end);
 128
 129     return TRUE;
 130 }
 131
 132
 133 static srs_audiobuf_t *sampledup(uint32_t start, uint32_t end, void *user_data)
 134 {
 135     context_t *ctx  = (context_t *)user_data;
 136     options_t *opts;
 137     srs_audioformat_t format;
 138     uint32_t rate;
 139     uint8_t channels;
 140     size_t  samples;
 141     int16_t *buf;
 142
 143     if (!ctx || !(opts = ctx->opts))
 144         return NULL;
 145
 146     mrp_debug("duplicating CMU Sphinx backend sample (%u - %u)", start, end);
 147
 148     format = SRS_AUDIO_S16LE;
 149     rate = opts->rate;
 150     channels = 1;
 151     buf = filter_buffer_dup(ctx, start, end, &samples);
 152
 153     return srs_create_audiobuf(format, rate, channels, samples, buf);
 154 }
 155
 156
 157 static int check_decoder(const char *decoder, void *user_data)
 158 {
 159     context_t *ctx = (context_t *)user_data;
 160     int available;
 161
 162     mrp_log_info("checking availability of decoder '%s' for CMU Sphinx backend",
 163               decoder);
 164
 165     available = decoder_set_contains(ctx, decoder);
 166
 167     mrp_debug("decoder %s %savailable", decoder, available ? "" : "un");
 168
 169     return available;
 170 }
 171
 172
 173 static int select_decoder(const char *decoder, void *user_data)
 174 {
 175     context_t *ctx = (context_t *)user_data;
 176
 177     mrp_log_info("selecting decoder '%s' for CMU Sphinx backend", decoder);
 178
 179     if (decoder_set_use(ctx, decoder) < 0)
 180         return FALSE;
 181
 182     return TRUE;
 183 }
 184
 185
 186 static const char *active_decoder(void *user_data)
 187 {
 188     context_t *ctx = (context_t *)user_data;
 189     const char *decoder;
 190
 191     mrp_log_info("querying active CMU Sphinx backend decoder");
 192
 193     decoder = decoder_set_name(ctx);
 194
 195     mrp_debug("active decoder is '%s'", decoder);
 196
 197     return decoder;
 198 }
 199
 200
 201 static int create_sphinx(srs_plugin_t *plugin)
 202 {
 203     srs_srec_api_t api = {
 204         activate:         activate,
 205         deactivate:       deactivate,
 206         flush:            flush,
 207         rescan:           rescan,
 208         sampledup:        sampledup,
 209         check_decoder:    check_decoder,
 210         select_decoder:   select_decoder,
 211         active_decoder:   active_decoder,
 212     };
 213
 214     srs_context_t *srs = plugin->srs;
 215     context_t     *ctx = NULL;
 216     plugin_t      *pl = NULL;
 217     int            sts;
 218
 219     mrp_debug("creating CMU Sphinx speech recognition backend plugin");
 220
 221     if ((ctx = mrp_allocz(sizeof(context_t))) &&
 222         (pl  = mrp_allocz(sizeof(plugin_t)))   )
 223     {
 224         ctx->plugin = pl;
 225
 226         pl->self = plugin;
 227
 228         sts = srs_register_srec(srs, SPHINX_NAME, &api, ctx,
 229                                 &pl->notify.callback,
 230                                 &pl->notify.data);
 231         if (sts == 0) {
 232             plugin->plugin_data = ctx;
 233             return TRUE;
 234         }
 235     }
 236
 237     mrp_free(pl);
 238     mrp_free(ctx);
 239
 240     mrp_log_error("Failed to create CMU Sphinx plugin.");
 241
 242     return FALSE;
 243 }
 244
 245
 246 static int config_sphinx(srs_plugin_t *plugin, srs_cfg_t *settings)
 247 {
 248     context_t *ctx = (context_t *)plugin->plugin_data;
 249     srs_cfg_t *cfg;
 250     int        n;
 251
 252     mrp_debug("configuring CMU Sphinx speech recognition backend plugin");
 253
 254     n = srs_collect_config(settings, SPHINX_PREFIX, &cfg);
 255
 256     mrp_log_info("Found %d CMU Sphinx plugin configuration keys.", n);
 257
 258     if (options_create(ctx, n, cfg) < 0 ||
 259         decoder_set_create(ctx)     < 0 ||
 260         filter_buffer_create(ctx)   < 0 ||
 261         input_buffer_create(ctx)    < 0  )
 262     {
 263         mrp_log_error("Failed to configure CMU Sphinx plugin.");
 264         return FALSE;
 265     }
 266
 267     srs_free_config(cfg);
 268
 269     return TRUE;
 270 }
 271
 272
 273 static int start_sphinx(srs_plugin_t *plugin)
 274 {
 275     srs_context_t *srs = plugin->srs;
 276     context_t *ctx = (context_t *)plugin->plugin_data;
 277
 278     mrp_debug("start CMU Sphinx speech recognition backend plugin");
 279
 280     if (pulse_interface_create(ctx, srs->pa) < 0) {
 281         mrp_log_error("Failed to start CMU Sphinx plugin: can't create "
 282                       "pulseaudio interface");
 283     }
 284
 285     return TRUE;
 286 }
 287
 288
 289 static void stop_sphinx(srs_plugin_t *plugin)
 290 {
 291     context_t *ctx = (context_t *)plugin->plugin_data;
 292
 293     mrp_debug("stop CMU Sphinx speech recognition backend plugin");
 294
 295     pulse_interface_destroy(ctx);
 296 }
 297
 298
 299 static void destroy_sphinx(srs_plugin_t *plugin)
 300 {
 301     srs_context_t *srs = plugin->srs;
 302     context_t     *ctx = (context_t *)plugin->plugin_data;
 303
 304     mrp_debug("destroy CMU Sphinx speech recognition backend plugin");
 305
 306     if (ctx != NULL) {
 307         srs_unregister_srec(srs, SPHINX_NAME);
 308         mrp_free(ctx->plugin);
 309
 310         input_buffer_destroy(ctx);
 311         filter_buffer_destroy(ctx);
 312         decoder_set_destroy(ctx);
 313         options_destroy(ctx);
 314
 315         mrp_free(ctx);
 316     }
 317 }
 318
 319
 320 SRS_DECLARE_PLUGIN(SPHINX_NAME, SPHINX_DESCRIPTION, SPHINX_AUTHORS,
 321                    SPHINX_VERSION, create_sphinx, config_sphinx,
 322                    start_sphinx, stop_sphinx, destroy_sphinx)
 323
 324
 325 /*
 326  * Local Variables:
 327  * c-basic-offset: 4
 328  * indent-tabs-mode: nil
 329  * End:
 330  *
 331  */