test/unit/test_ngram/test_lm_class.c

   1 #include <ngram_model.h>
   2 #include <logmath.h>
   3 #include <strfuncs.h>
   4
   5 #include "test_macros.h"
   6
   7 #include <stdio.h>
   8 #include <string.h>
   9 #include <math.h>
  10
  11 void
  12 run_tests(logmath_t *lmath, ngram_model_t *model)
  13 {
  14         int32 rv, i;
  15
  16         TEST_ASSERT(model);
  17
  18         TEST_EQUAL(ngram_wid(model, "scylla"), 285);
  19         TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
  20
  21         rv = ngram_model_read_classdef(model, LMDIR "/100.probdef");
  22         TEST_EQUAL(rv, 0);
  23
  24         /* Verify that class word IDs remain the same. */
  25         TEST_EQUAL(ngram_wid(model, "scylla"), 285);
  26         TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
  27
  28         /* Verify in-class word IDs. */
  29         TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400);
  30
  31         /* Verify in-class and out-class unigram scores. */
  32         TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL),
  33                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
  34         TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL),
  35                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
  36         TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL),
  37                        logmath_log10_to_log(lmath, -2.7884));
  38         TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL),
  39                        logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7));
  40         TEST_EQUAL_LOG(ngram_score(model, "zero", NULL),
  41                        logmath_log10_to_log(lmath, -1.9038));
  42
  43         /* Verify class bigram scores. */
  44         TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL),
  45                        logmath_log10_to_log(lmath, -1.2642));
  46         TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL),
  47                        logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4));
  48         TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL),
  49                        logmath_log10_to_log(lmath, -0.5172));
  50         TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL),
  51                        logmath_log10_to_log(lmath, -0.5172));
  52         TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL),
  53                        logmath_log10_to_log(lmath, -0.5172));
  54
  55         /* Verify class trigram scores. */
  56         TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL),
  57                        logmath_log10_to_log(lmath, -0.5725));
  58         TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL),
  59                        logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7));
  60         TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL),
  61                        logmath_log10_to_log(lmath, -0.9404));
  62         TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL),
  63                        logmath_log10_to_log(lmath, -0.9404));
  64
  65         /* Add words to classes. */
  66         rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0);
  67         TEST_ASSERT(rv >= 0);
  68         TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196);
  69         TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL),
  70                        logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2));
  71         printf("scrappy:scylla %08x %d %f\n",
  72                ngram_wid(model, "scrappy:scylla"),
  73                ngram_score(model, "scrappy:scylla", NULL),
  74                logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL)));
  75         /* Add a lot of words to a class. */
  76         for (i = 0; i < 129; ++i) {
  77                 char word[32];
  78                 sprintf(word, "%d:scylla", i);
  79                 rv = ngram_model_add_class_word(model, "scylla", word, 1.0);
  80                 printf("%s %08x %d %f\n", word,
  81                        ngram_wid(model, word),
  82                        ngram_score(model, word, NULL),
  83                        logmath_exp(lmath, ngram_score(model, word, NULL)));
  84                 TEST_ASSERT(rv >= 0);
  85                 TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i);
  86         }
  87
  88         /* Add a new class. */
  89         {
  90                 const char *words[] = { "blatz:foobie", "hurf:foobie" };
  91                 float32 weights[] = { 0.6, 0.4 };
  92                 int32 foobie_prob;
  93                 rv = ngram_model_add_class(model, "[foobie]", 1.0,
  94                                            words, weights, 2);
  95                 TEST_ASSERT(rv >= 0);
  96                 foobie_prob = ngram_score(model, "[foobie]", NULL);
  97                 TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL),
  98                                foobie_prob + logmath_log(lmath, 0.6));
  99                 TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL),
 100                                foobie_prob + logmath_log(lmath, 0.4));
 101         }
 102 }
 103
 104 int
 105 main(int argc, char *argv[])
 106 {
 107         logmath_t *lmath;
 108         ngram_model_t *model;
 109
 110         lmath = logmath_init(1.0001, 0, 0);
 111
 112         model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
 113         run_tests(lmath, model);
 114         ngram_model_free(model);
 115
 116         model = ngram_model_read(NULL, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
 117         run_tests(lmath, model);
 118         ngram_model_free(model);
 119
 120         logmath_free(lmath);
 121
 122         return 0;
 123 }