1 #include <ngram_model.h>
5 #include "test_macros.h"
13 main(int argc, char *argv[])
16 ngram_model_t *lms[3];
18 const char *names[] = { "100", "100_2" };
19 const char *words[] = {
27 const int32 n_words = sizeof(words) / sizeof(words[0]);
28 float32 weights[] = { 0.6, 0.4 };
30 lmath = logmath_init(1.0001, 0, 0);
32 lms[0] = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
33 lms[1] = ngram_model_read(NULL, LMDIR "/100_2.arpa.DMP", NGRAM_DMP, lmath);
35 lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 2);
37 TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
38 TEST_EQUAL(ngram_model_set_select(lmset, "100"), lms[0]);
39 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
40 logmath_log10_to_log(lmath, -2.7884));
41 TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
42 logmath_log10_to_log(lmath, -0.0361));
43 TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
44 logmath_log10_to_log(lmath, -0.4105));
46 TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
47 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
48 logmath_log10_to_log(lmath, -2.8192));
49 TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
50 logmath_log10_to_log(lmath, -0.1597));
51 TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
52 logmath_log10_to_log(lmath, -0.0512));
54 /* Test interpolation with default weights. */
55 TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
56 TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
58 0.5 * pow(10, -2.7884)
59 + 0.5 * pow(10, -2.8192)));
61 /* Test interpolation with set weights. */
62 TEST_ASSERT(ngram_model_set_interp(lmset, names, weights));
63 TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
65 0.6 * pow(10, -2.7884)
66 + 0.4 * pow(10, -2.8192)));
68 /* Test switching back to selected mode. */
69 TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
70 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
71 logmath_log10_to_log(lmath, -2.8192));
72 TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
73 logmath_log10_to_log(lmath, -0.1597));
74 TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
75 logmath_log10_to_log(lmath, -0.0512));
77 /* Test interpolation with previously set weights. */
78 TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
79 TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
81 0.6 * pow(10, -2.7884)
82 + 0.4 * pow(10, -2.8192)));
84 /* Test interpolation with closed-vocabulary models and OOVs. */
85 lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
86 TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
87 TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
89 0.6 * (2.0 / 3.0) * pow(10, -2.7884)
90 + 0.4 * (2.0 / 3.0) * pow(10, -2.8192)));
91 ngram_model_free(lmset);
93 /* Test adding and removing language models with preserved
94 * word ID mappings. */
95 lms[0] = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
96 lms[1] = ngram_model_read(NULL, LMDIR "/100_2.arpa.DMP", NGRAM_DMP, lmath);
97 lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
98 lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 1);
101 wid = ngram_wid(lmset, "sphinxtrain");
102 TEST_ASSERT(ngram_model_set_add(lmset, lms[1], "100_2", 1.0, TRUE));
103 /* Verify that it is the same. */
104 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
105 /* Now add another model and verify that its words
106 * don't actually get added. */
107 TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, TRUE));
108 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
109 TEST_EQUAL(ngram_unknown_wid(lmset), ngram_wid(lmset, "FORWARD"));
110 /* Remove language model, make sure this doesn't break horribly. */
111 TEST_EQUAL(lms[1], ngram_model_set_remove(lmset, "100_2", TRUE));
112 ngram_model_free(lms[1]);
113 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
114 /* Now enable remapping of word IDs and verify that it works. */
115 TEST_EQUAL(lms[2], ngram_model_set_remove(lmset, "turtle", TRUE));
116 TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
117 printf("FORWARD = %d\n", ngram_wid(lmset, "FORWARD"));
120 ngram_model_free(lmset);
122 /* Now test lmctl files. */
123 lmset = ngram_model_set_read(NULL, LMDIR "/100.lmctl", lmath);
125 /* Test iterators. */
127 ngram_model_set_iter_t *itor;
131 itor = ngram_model_set_iter(lmset);
133 lm = ngram_model_set_iter_model(itor, &lmname);
134 printf("1: %s\n", lmname);
135 itor = ngram_model_set_iter_next(itor);
136 lm = ngram_model_set_iter_model(itor, &lmname);
137 printf("2: %s\n", lmname);
138 itor = ngram_model_set_iter_next(itor);
139 lm = ngram_model_set_iter_model(itor, &lmname);
140 printf("3: %s\n", lmname);
141 itor = ngram_model_set_iter_next(itor);
142 TEST_EQUAL(itor, NULL);
145 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
146 logmath_log10_to_log(lmath, -2.7884));
148 TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
149 TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
151 (1.0 / 3.0) * pow(10, -2.7884)
152 + (1.0 / 3.0) * pow(10, -2.8192)));
154 ngram_model_set_select(lmset, "100_2");
155 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
156 logmath_log10_to_log(lmath, -2.8192));
157 TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
158 logmath_log10_to_log(lmath, -0.1597));
159 TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
160 logmath_log10_to_log(lmath, -0.0512));
162 ngram_model_set_select(lmset, "100");
163 TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
164 logmath_log10_to_log(lmath, -2.7884));
165 TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
166 logmath_log10_to_log(lmath, -0.0361));
167 TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
168 logmath_log10_to_log(lmath, -0.4105));
170 /* Test class probabilities. */
171 ngram_model_set_select(lmset, "100");
172 TEST_EQUAL_LOG(ngram_score(lmset, "scylla:scylla", NULL),
173 logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
174 TEST_EQUAL_LOG(ngram_score(lmset, "scooby:scylla", NULL),
175 logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
176 TEST_EQUAL_LOG(ngram_score(lmset, "apparently", "karybdis:scylla", NULL),
177 logmath_log10_to_log(lmath, -0.5172));
179 /* Test word ID mapping. */
180 ngram_model_set_select(lmset, "turtle");
181 TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
182 ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
183 TEST_EQUAL(ngram_wid(lmset, "bigbird"),
184 ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
185 TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), ngram_unknown_wid(lmset));
186 TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
187 ngram_model_set_map_words(lmset, words, n_words);
188 TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
189 ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
190 TEST_EQUAL(ngram_wid(lmset, "bigbird"),
191 ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
192 TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), 5);
193 TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
195 ngram_model_free(lmset);