1 #include <ngram_model.h>
5 #include "test_macros.h"
11 static const arg_t defn[] = {
12 { "-mmap", ARG_BOOLEAN, "yes", "use mmap" },
13 { "-lw", ARG_FLOAT32, "1.0", "language weight" },
14 { "-wip", ARG_FLOAT32, "1.0", "word insertion penalty" },
15 { "-uw", ARG_FLOAT32, "1.0", "unigram weight" },
16 { NULL, 0, NULL, NULL }
20 main(int argc, char *argv[])
27 /* Initialize a logmath object to pass to ngram_read */
28 lmath = logmath_init(1.0001, 0, 0);
29 /* Initialize a cmd_ln_t with -mmap yes */
30 config = cmd_ln_parse_r(NULL, defn, 0, NULL, FALSE);
32 /* Read a language model (this won't mmap) */
33 model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
35 TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
36 TEST_EQUAL(ngram_wid(model, "absolute"), 13);
37 TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
39 TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
40 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
41 NGRAM_INVALID_WID, &n_used), -75346);
42 TEST_EQUAL(n_used, 1);
43 TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
44 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
45 NGRAM_INVALID_WID, &n_used), -64208);
46 TEST_EQUAL(n_used, 1);
48 TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
50 TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9450);
52 ngram_model_free(model);
54 /* Read a language model (this will mmap) */
55 model = ngram_model_read(config, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
57 TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
58 TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0);
59 TEST_EQUAL(ngram_wid(model, "absolute"), 13);
60 TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
62 TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
63 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
64 NGRAM_INVALID_WID, &n_used), -75346);
65 TEST_EQUAL(n_used, 1);
66 TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
67 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
68 NGRAM_INVALID_WID, &n_used), -64208);
69 TEST_EQUAL(n_used, 1);
71 TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
73 TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
75 ngram_model_free(model);
77 /* Test language weights on the command line. */
78 cmd_ln_set_float32_r(config, "-lw", 2.0);
79 cmd_ln_set_float32_r(config, "-wip", 0.5);
80 model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
82 TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
83 TEST_EQUAL(ngram_wid(model, "absolute"), 13);
84 TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
86 TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346
87 * 2 + logmath_log(lmath, 0.5));
88 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
89 NGRAM_INVALID_WID, &n_used), -75346
90 * 2 + logmath_log(lmath, 0.5));
91 TEST_EQUAL(n_used, 1);
92 TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208
93 * 2 + logmath_log(lmath, 0.5));
94 TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
95 NGRAM_INVALID_WID, &n_used), -64208
96 * 2 + logmath_log(lmath, 0.5));
97 TEST_EQUAL(n_used, 1);
99 TEST_EQUAL(ngram_score(model, "huggins", "david", NULL),
100 -831 * 2 + logmath_log(lmath, 0.5));
102 TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
103 -9450 * 2 + logmath_log(lmath, 0.5));
105 ngram_model_free(model);
108 cmd_ln_free_r(config);