Initial import to Tizen
[profile/ivi/sphinxbase.git] / test / unit / test_ngram / test_lm_class.c
1 #include <ngram_model.h>
2 #include <logmath.h>
3 #include <strfuncs.h>
4
5 #include "test_macros.h"
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <math.h>
10
11 void
12 run_tests(logmath_t *lmath, ngram_model_t *model)
13 {
14         int32 rv, i;
15
16         TEST_ASSERT(model);
17
18         TEST_EQUAL(ngram_wid(model, "scylla"), 285);
19         TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
20
21         rv = ngram_model_read_classdef(model, LMDIR "/100.probdef");
22         TEST_EQUAL(rv, 0);
23
24         /* Verify that class word IDs remain the same. */
25         TEST_EQUAL(ngram_wid(model, "scylla"), 285);
26         TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
27
28         /* Verify in-class word IDs. */
29         TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400);
30
31         /* Verify in-class and out-class unigram scores. */
32         TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL),
33                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
34         TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL),
35                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
36         TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL),
37                        logmath_log10_to_log(lmath, -2.7884));
38         TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL),
39                        logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7));
40         TEST_EQUAL_LOG(ngram_score(model, "zero", NULL),
41                        logmath_log10_to_log(lmath, -1.9038));
42
43         /* Verify class bigram scores. */
44         TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL),
45                        logmath_log10_to_log(lmath, -1.2642));
46         TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL),
47                        logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4));
48         TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL),
49                        logmath_log10_to_log(lmath, -0.5172));
50         TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL),
51                        logmath_log10_to_log(lmath, -0.5172));
52         TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL),
53                        logmath_log10_to_log(lmath, -0.5172));
54
55         /* Verify class trigram scores. */
56         TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL),
57                        logmath_log10_to_log(lmath, -0.5725));
58         TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL),
59                        logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7));
60         TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL),
61                        logmath_log10_to_log(lmath, -0.9404));
62         TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL),
63                        logmath_log10_to_log(lmath, -0.9404));
64
65         /* Add words to classes. */
66         rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0);
67         TEST_ASSERT(rv >= 0);
68         TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196);
69         TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL),
70                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2));
71         printf("scrappy:scylla %08x %d %f\n", 
72                ngram_wid(model, "scrappy:scylla"),
73                ngram_score(model, "scrappy:scylla", NULL),
74                logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL)));
75         /* Add a lot of words to a class. */
76         for (i = 0; i < 129; ++i) {
77                 char word[32];
78                 sprintf(word, "%d:scylla", i);
79                 rv = ngram_model_add_class_word(model, "scylla", word, 1.0);
80                 printf("%s %08x %d %f\n", word,
81                        ngram_wid(model, word),
82                        ngram_score(model, word, NULL),
83                        logmath_exp(lmath, ngram_score(model, word, NULL)));
84                 TEST_ASSERT(rv >= 0);
85                 TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i);
86         }
87
88         /* Add a new class. */
89         {
90                 const char *words[] = { "blatz:foobie", "hurf:foobie" };
91                 float32 weights[] = { 0.6, 0.4 };
92                 int32 foobie_prob;
93                 rv = ngram_model_add_class(model, "[foobie]", 1.0,
94                                            words, weights, 2);
95                 TEST_ASSERT(rv >= 0);
96                 foobie_prob = ngram_score(model, "[foobie]", NULL);
97                 TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL),
98                                foobie_prob + logmath_log(lmath, 0.6));
99                 TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL),
100                                foobie_prob + logmath_log(lmath, 0.4));
101         }
102 }
103
104 int
105 main(int argc, char *argv[])
106 {
107         logmath_t *lmath;
108         ngram_model_t *model;
109
110         lmath = logmath_init(1.0001, 0, 0);
111
112         model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
113         run_tests(lmath, model);
114         ngram_model_free(model);
115
116         model = ngram_model_read(NULL, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
117         run_tests(lmath, model);
118         ngram_model_free(model);
119
120         logmath_free(lmath);
121
122         return 0;
123 }