1 #include <ngram_model.h>
5 #include "test_macros.h"
12 run_tests(logmath_t *lmath, ngram_model_t *model)
18 TEST_EQUAL(ngram_wid(model, "scylla"), 285);
19 TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
21 rv = ngram_model_read_classdef(model, LMDIR "/100.probdef");
24 /* Verify that class word IDs remain the same. */
25 TEST_EQUAL(ngram_wid(model, "scylla"), 285);
26 TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
28 /* Verify in-class word IDs. */
29 TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400);
31 /* Verify in-class and out-class unigram scores. */
32 TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL),
33 logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
34 TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL),
35 logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
36 TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL),
37 logmath_log10_to_log(lmath, -2.7884));
38 TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL),
39 logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7));
40 TEST_EQUAL_LOG(ngram_score(model, "zero", NULL),
41 logmath_log10_to_log(lmath, -1.9038));
43 /* Verify class bigram scores. */
44 TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL),
45 logmath_log10_to_log(lmath, -1.2642));
46 TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL),
47 logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4));
48 TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL),
49 logmath_log10_to_log(lmath, -0.5172));
50 TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL),
51 logmath_log10_to_log(lmath, -0.5172));
52 TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL),
53 logmath_log10_to_log(lmath, -0.5172));
55 /* Verify class trigram scores. */
56 TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL),
57 logmath_log10_to_log(lmath, -0.5725));
58 TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL),
59 logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7));
60 TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL),
61 logmath_log10_to_log(lmath, -0.9404));
62 TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL),
63 logmath_log10_to_log(lmath, -0.9404));
65 /* Add words to classes. */
66 rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0);
68 TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196);
69 TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL),
70 logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2));
71 printf("scrappy:scylla %08x %d %f\n",
72 ngram_wid(model, "scrappy:scylla"),
73 ngram_score(model, "scrappy:scylla", NULL),
74 logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL)));
75 /* Add a lot of words to a class. */
76 for (i = 0; i < 129; ++i) {
78 sprintf(word, "%d:scylla", i);
79 rv = ngram_model_add_class_word(model, "scylla", word, 1.0);
80 printf("%s %08x %d %f\n", word,
81 ngram_wid(model, word),
82 ngram_score(model, word, NULL),
83 logmath_exp(lmath, ngram_score(model, word, NULL)));
85 TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i);
88 /* Add a new class. */
90 const char *words[] = { "blatz:foobie", "hurf:foobie" };
91 float32 weights[] = { 0.6, 0.4 };
93 rv = ngram_model_add_class(model, "[foobie]", 1.0,
96 foobie_prob = ngram_score(model, "[foobie]", NULL);
97 TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL),
98 foobie_prob + logmath_log(lmath, 0.6));
99 TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL),
100 foobie_prob + logmath_log(lmath, 0.4));
105 main(int argc, char *argv[])
108 ngram_model_t *model;
110 lmath = logmath_init(1.0001, 0, 0);
112 model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
113 run_tests(lmath, model);
114 ngram_model_free(model);
116 model = ngram_model_read(NULL, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
117 run_tests(lmath, model);
118 ngram_model_free(model);