Initial import to Tizen
[profile/ivi/sphinxbase.git] / test / unit / test_ngram / test_lm_mmap.c
1 #include <ngram_model.h>
2 #include <logmath.h>
3 #include <strfuncs.h>
4
5 #include "test_macros.h"
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <math.h>
10
11 static const arg_t defn[] = {
12         { "-mmap", ARG_BOOLEAN, "yes", "use mmap" },
13         { "-lw", ARG_FLOAT32, "1.0", "language weight" },
14         { "-wip", ARG_FLOAT32, "1.0", "word insertion penalty" },
15         { "-uw", ARG_FLOAT32, "1.0", "unigram weight" },
16         { NULL, 0, NULL, NULL }
17 };
18
19 int
20 main(int argc, char *argv[])
21 {
22         logmath_t *lmath;
23         ngram_model_t *model;
24         cmd_ln_t *config;
25         int32 n_used;
26
27         /* Initialize a logmath object to pass to ngram_read */
28         lmath = logmath_init(1.0001, 0, 0);
29         /* Initialize a cmd_ln_t with -mmap yes */
30         config = cmd_ln_parse_r(NULL, defn, 0, NULL, FALSE);
31
32         /* Read a language model (this won't mmap) */
33         model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
34         TEST_ASSERT(model);
35         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
36         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
37         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
38         /* Test unigrams. */
39         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
40         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
41                                   NGRAM_INVALID_WID, &n_used), -75346);
42         TEST_EQUAL(n_used, 1);
43         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
44         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
45                                   NGRAM_INVALID_WID, &n_used), -64208);
46         TEST_EQUAL(n_used, 1);
47         /* Test bigrams. */
48         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
49         /* Test trigrams. */
50         TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9450);
51
52         ngram_model_free(model);
53
54         /* Read a language model (this will mmap) */
55         model = ngram_model_read(config, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
56         TEST_ASSERT(model);
57         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
58         TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0);
59         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
60         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
61         /* Test unigrams. */
62         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
63         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
64                                   NGRAM_INVALID_WID, &n_used), -75346);
65         TEST_EQUAL(n_used, 1);
66         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
67         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
68                                   NGRAM_INVALID_WID, &n_used), -64208);
69         TEST_EQUAL(n_used, 1);
70         /* Test bigrams. */
71         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
72         /* Test trigrams. */
73         TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
74
75         ngram_model_free(model);
76
77         /* Test language weights on the command line. */
78         cmd_ln_set_float32_r(config, "-lw", 2.0);
79         cmd_ln_set_float32_r(config, "-wip", 0.5);
80         model = ngram_model_read(config, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
81         TEST_ASSERT(model);
82         TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
83         TEST_EQUAL(ngram_wid(model, "absolute"), 13);
84         TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
85         /* Test unigrams. */
86         TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346
87                    * 2 + logmath_log(lmath, 0.5));
88         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
89                                   NGRAM_INVALID_WID, &n_used), -75346
90                    * 2 + logmath_log(lmath, 0.5));
91         TEST_EQUAL(n_used, 1);
92         TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208
93                    * 2 + logmath_log(lmath, 0.5));
94         TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
95                                   NGRAM_INVALID_WID, &n_used), -64208
96                    * 2 + logmath_log(lmath, 0.5));
97         TEST_EQUAL(n_used, 1);
98         /* Test bigrams. */
99         TEST_EQUAL(ngram_score(model, "huggins", "david", NULL),
100                    -831 * 2 + logmath_log(lmath, 0.5));
101         /* Test trigrams. */
102         TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
103                        -9450 * 2 + logmath_log(lmath, 0.5));
104
105         ngram_model_free(model);
106
107         logmath_free(lmath);
108         cmd_ln_free_r(config);
109
110         return 0;
111 }