test/unit/test_ngram/test_lm_mmap.c

   1 #include <ngram_model.h>
   2 #include <logmath.h>
   3 #include <strfuncs.h>
   4
   5 #include "test_macros.h"
   6
   7 #include <stdio.h>
   8 #include <string.h>
   9 #include <math.h>
  10
  11 static const arg_t defn[] = {
  12         { "-mmap", ARG_BOOLEAN, "yes", "use mmap" },
  13         { "-lw", ARG_FLOAT32, "1.0", "language weight" },
  14         { "-wip", ARG_FLOAT32, "1.0", "word insertion penalty" },
  15         { "-uw", ARG_FLOAT32, "1.0", "unigram weight" },
  16         { NULL, 0, NULL, NULL }
  17 };
  18
  19 int
  20 main(int argc, char *argv[])
  21 {
  22         logmath_t *lmath;
  23         ngram_model_t *model;
  24         cmd_ln_t *config;
  25         int32 n_used;
  26
  27         /* Initialize a logmath object to pass to ngram_read */
  28         lmath = logmath_init(1.0001, 0, 0);
  29         /* Initialize a cmd_ln_t with -mmap yes */
  30         config = cmd_ln_parse_r(NULL, defn, 0, NULL, FALSE);
  31
  32         /* Read a language model (this won't mmap) */
  33         model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
  34         TEST_ASSERT(model);
  35         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
  36         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
  37         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
  38         /* Test unigrams. */
  39         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
  40         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
  41                                   NGRAM_INVALID_WID, &n_used), -75346);
  42         TEST_EQUAL(n_used, 1);
  43         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
  44         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
  45                                   NGRAM_INVALID_WID, &n_used), -64208);
  46         TEST_EQUAL(n_used, 1);
  47         /* Test bigrams. */
  48         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
  49         /* Test trigrams. */
  50         TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9450);
  51
  52         ngram_model_free(model);
  53
  54         /* Read a language model (this will mmap) */
  55         model = ngram_model_read(config, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
  56         TEST_ASSERT(model);
  57         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
  58         TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0);
  59         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
  60         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
  61         /* Test unigrams. */
  62         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
  63         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
  64                                   NGRAM_INVALID_WID, &n_used), -75346);
  65         TEST_EQUAL(n_used, 1);
  66         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
  67         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
  68                                   NGRAM_INVALID_WID, &n_used), -64208);
  69         TEST_EQUAL(n_used, 1);
  70         /* Test bigrams. */
  71         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
  72         /* Test trigrams. */
  73         TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
  74
  75         ngram_model_free(model);
  76
  77         /* Test language weights on the command line. */
  78         cmd_ln_set_float32_r(config, "-lw", 2.0);
  79         cmd_ln_set_float32_r(config, "-wip", 0.5);
  80         model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
  81         TEST_ASSERT(model);
  82         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
  83         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
  84         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
  85         /* Test unigrams. */
  86         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346
  87                    * 2 + logmath_log(lmath, 0.5));
  88         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
  89                                   NGRAM_INVALID_WID, &n_used), -75346
  90                    * 2 + logmath_log(lmath, 0.5));
  91         TEST_EQUAL(n_used, 1);
  92         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208
  93                    * 2 + logmath_log(lmath, 0.5));
  94         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
  95                                   NGRAM_INVALID_WID, &n_used), -64208
  96                    * 2 + logmath_log(lmath, 0.5));
  97         TEST_EQUAL(n_used, 1);
  98         /* Test bigrams. */
  99         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL),
 100                    -831 * 2 + logmath_log(lmath, 0.5));
 101         /* Test trigrams. */
 102         TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
 103                        -9450 * 2 + logmath_log(lmath, 0.5));
 104
 105         ngram_model_free(model);
 106
 107         logmath_free(lmath);
 108         cmd_ln_free_r(config);
 109
 110         return 0;
 111 }