Initial import to Tizen
[profile/ivi/sphinxbase.git] / test / unit / test_ngram / test_lm_set.c
1 #include <ngram_model.h>
2 #include <logmath.h>
3 #include <strfuncs.h>
4
5 #include "test_macros.h"
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h>
11
12 int
13 main(int argc, char *argv[])
14 {
15         logmath_t *lmath;
16         ngram_model_t *lms[3];
17         ngram_model_t *lmset;
18         const char *names[] = { "100", "100_2" };
19         const char *words[] = {
20                 "<UNK>",
21                 "ROBOMAN",
22                 "libio",
23                 "sphinxtrain",
24                 "bigbird",
25                 "quuxfuzz"
26         };
27         const int32 n_words = sizeof(words) / sizeof(words[0]);
28         float32 weights[] = { 0.6, 0.4 };
29
30         lmath = logmath_init(1.0001, 0, 0);
31
32         lms[0] = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
33         lms[1] = ngram_model_read(NULL, LMDIR "/100_2.arpa.DMP", NGRAM_DMP, lmath);
34
35         lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 2);
36         TEST_ASSERT(lmset);
37         TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
38         TEST_EQUAL(ngram_model_set_select(lmset, "100"), lms[0]);
39         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
40                    logmath_log10_to_log(lmath, -2.7884));
41         TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
42                    logmath_log10_to_log(lmath, -0.0361));
43         TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
44                        logmath_log10_to_log(lmath, -0.4105));
45
46         TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
47         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
48                    logmath_log10_to_log(lmath, -2.8192));
49         TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
50                    logmath_log10_to_log(lmath, -0.1597));
51         TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
52                        logmath_log10_to_log(lmath, -0.0512));
53
54         /* Test interpolation with default weights. */
55         TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
56         TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
57                        logmath_log(lmath,
58                                    0.5 * pow(10, -2.7884)
59                                    + 0.5 * pow(10, -2.8192)));
60
61         /* Test interpolation with set weights. */
62         TEST_ASSERT(ngram_model_set_interp(lmset, names, weights));
63         TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
64                        logmath_log(lmath,
65                                    0.6 * pow(10, -2.7884)
66                                    + 0.4 * pow(10, -2.8192)));
67
68         /* Test switching back to selected mode. */
69         TEST_EQUAL(ngram_model_set_select(lmset, "100_2"), lms[1]);
70         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
71                    logmath_log10_to_log(lmath, -2.8192));
72         TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
73                    logmath_log10_to_log(lmath, -0.1597));
74         TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
75                        logmath_log10_to_log(lmath, -0.0512));
76
77         /* Test interpolation with previously set weights. */
78         TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
79         TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
80                        logmath_log(lmath,
81                                    0.6 * pow(10, -2.7884)
82                                    + 0.4 * pow(10, -2.8192)));
83
84         /* Test interpolation with closed-vocabulary models and OOVs. */
85         lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
86         TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
87         TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
88                        logmath_log(lmath,
89                                    0.6 * (2.0 / 3.0) * pow(10, -2.7884)
90                                    + 0.4 * (2.0 / 3.0) * pow(10, -2.8192)));
91         ngram_model_free(lmset);
92
93         /* Test adding and removing language models with preserved
94          * word ID mappings. */
95         lms[0] = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
96         lms[1] = ngram_model_read(NULL, LMDIR "/100_2.arpa.DMP", NGRAM_DMP, lmath);
97         lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
98         lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 1);
99         {
100                 int32 wid;
101                 wid = ngram_wid(lmset, "sphinxtrain");
102                 TEST_ASSERT(ngram_model_set_add(lmset, lms[1], "100_2", 1.0, TRUE));
103                 /* Verify that it is the same. */
104                 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
105                 /* Now add another model and verify that its words
106                  * don't actually get added. */
107                 TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, TRUE));
108                 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
109                 TEST_EQUAL(ngram_unknown_wid(lmset), ngram_wid(lmset, "FORWARD"));
110                 /* Remove language model, make sure this doesn't break horribly. */
111                 TEST_EQUAL(lms[1], ngram_model_set_remove(lmset, "100_2", TRUE));
112                 ngram_model_free(lms[1]);
113                 TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
114                 /* Now enable remapping of word IDs and verify that it works. */
115                 TEST_EQUAL(lms[2], ngram_model_set_remove(lmset, "turtle", TRUE));
116                 TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
117                 printf("FORWARD = %d\n", ngram_wid(lmset, "FORWARD"));
118         }
119
120         ngram_model_free(lmset);
121
122         /* Now test lmctl files. */
123         lmset = ngram_model_set_read(NULL, LMDIR "/100.lmctl", lmath);
124         TEST_ASSERT(lmset);
125         /* Test iterators. */
126         {
127                 ngram_model_set_iter_t *itor;
128                 ngram_model_t *lm;
129                 char const *lmname;
130
131                 itor = ngram_model_set_iter(lmset);
132                 TEST_ASSERT(itor);
133                 lm = ngram_model_set_iter_model(itor, &lmname);
134                 printf("1: %s\n", lmname);
135                 itor = ngram_model_set_iter_next(itor);
136                 lm = ngram_model_set_iter_model(itor, &lmname);
137                 printf("2: %s\n", lmname);
138                 itor = ngram_model_set_iter_next(itor);
139                 lm = ngram_model_set_iter_model(itor, &lmname);
140                 printf("3: %s\n", lmname);
141                 itor = ngram_model_set_iter_next(itor);
142                 TEST_EQUAL(itor, NULL);
143         }
144
145         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
146                    logmath_log10_to_log(lmath, -2.7884));
147
148         TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
149         TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
150                        logmath_log(lmath,
151                                    (1.0 / 3.0) * pow(10, -2.7884)
152                                    + (1.0 / 3.0) * pow(10, -2.8192)));
153
154         ngram_model_set_select(lmset, "100_2");
155         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
156                    logmath_log10_to_log(lmath, -2.8192));
157         TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
158                    logmath_log10_to_log(lmath, -0.1597));
159         TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
160                        logmath_log10_to_log(lmath, -0.0512));
161
162         ngram_model_set_select(lmset, "100");
163         TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
164                    logmath_log10_to_log(lmath, -2.7884));
165         TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
166                    logmath_log10_to_log(lmath, -0.0361));
167         TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
168                        logmath_log10_to_log(lmath, -0.4105));
169
170         /* Test class probabilities. */
171         ngram_model_set_select(lmset, "100");
172         TEST_EQUAL_LOG(ngram_score(lmset, "scylla:scylla", NULL),
173                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
174         TEST_EQUAL_LOG(ngram_score(lmset, "scooby:scylla", NULL),
175                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
176         TEST_EQUAL_LOG(ngram_score(lmset, "apparently", "karybdis:scylla", NULL),
177                        logmath_log10_to_log(lmath, -0.5172));
178
179         /* Test word ID mapping. */
180         ngram_model_set_select(lmset, "turtle");
181         TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
182                    ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
183         TEST_EQUAL(ngram_wid(lmset, "bigbird"),
184                    ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
185         TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), ngram_unknown_wid(lmset));
186         TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
187         ngram_model_set_map_words(lmset, words, n_words);
188         TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
189                    ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
190         TEST_EQUAL(ngram_wid(lmset, "bigbird"),
191                    ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
192         TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), 5);
193         TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
194
195         ngram_model_free(lmset);
196         logmath_free(lmath);
197         return 0;
198 }