test/unit/test_ngram/test_lm_iter.c

   1 #include <ngram_model.h>
   2 #include <logmath.h>
   3 #include <strfuncs.h>
   4
   5 #include "test_macros.h"
   6
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <math.h>
  11
  12 int
  13 main(int argc, char *argv[])
  14 {
  15         logmath_t *lmath;
  16         ngram_model_t *model;
  17         ngram_iter_t *itor;
  18         int i;
  19
  20         /* Initialize a logmath object to pass to ngram_read */
  21         lmath = logmath_init(1.0001, 0, 0);
  22         /* Read a language model */
  23         model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
  24         TEST_ASSERT(model);
  25
  26         for (i = 0, itor = ngram_model_mgrams(model, 0);
  27              itor; ++i, itor = ngram_iter_next(itor)) {
  28                 int32 score, bowt;
  29                 int32 const *wids = ngram_iter_get(itor, &score, &bowt);
  30
  31                 /*
  32                 printf("%.4f %s %.4f\n",
  33                        logmath_log_to_log10(lmath, score),
  34                        ngram_word(model, wids[0]),
  35                        logmath_log_to_log10(lmath, bowt));
  36                 */
  37
  38                 if (i == 0) TEST_EQUAL(wids[0], ngram_wid(model, "<UNK>"));
  39                 if (i == 1) TEST_EQUAL(wids[0], ngram_wid(model, "'s"));
  40         }
  41
  42         for (i = 0, itor = ngram_model_mgrams(model, 1);
  43              itor; ++i, itor = ngram_iter_next(itor)) {
  44                 int32 score, bowt;
  45                 int32 const *wids = ngram_iter_get(itor, &score, &bowt);
  46
  47                 /*
  48                 printf("%.4f %s %s %.4f\n",
  49                        logmath_log_to_log10(lmath, score),
  50                        ngram_word(model, wids[0]),
  51                        ngram_word(model, wids[1]),
  52                        logmath_log_to_log10(lmath, bowt));
  53                 */
  54
  55                 /* FIXME: These tests are not sufficient - actually we
  56                  * need to make sure all word IDs line up
  57                  * correctly. */
  58                 if (i == 0) TEST_EQUAL(wids[0], ngram_wid(model, "'s"));
  59                 if (i == 0) TEST_EQUAL(wids[1], ngram_wid(model, "an"));
  60                 if (i == 1) TEST_EQUAL(wids[0], ngram_wid(model, "'s"));
  61                 if (i == 1) TEST_EQUAL(wids[1], ngram_wid(model, "going"));
  62         }
  63
  64         for (i = 0, itor = ngram_model_mgrams(model, 2);
  65              itor; ++i, itor = ngram_iter_next(itor)) {
  66                 int32 score, bowt;
  67                 int32 const *wids = ngram_iter_get(itor, &score, &bowt);
  68
  69                 /*
  70                 printf("%.4f %s %s %s\n",
  71                        logmath_log_to_log10(lmath, score),
  72                        ngram_word(model, wids[0]),
  73                        ngram_word(model, wids[1]),
  74                        ngram_word(model, wids[2]));
  75                 */
  76
  77                 /* FIXME: These tests are not sufficient - actually we
  78                  * need to make sure all word IDs line up
  79                  * correctly. */
  80                 if (i == 0) TEST_EQUAL(wids[0], ngram_wid(model, "'s"));
  81                 if (i == 0) TEST_EQUAL(wids[1], ngram_wid(model, "an"));
  82                 if (i == 0) TEST_EQUAL(wids[2], ngram_wid(model, "r"));
  83                 if (i == 1) TEST_EQUAL(wids[0], ngram_wid(model, "'s"));
  84                 if (i == 1) TEST_EQUAL(wids[1], ngram_wid(model, "going"));
  85                 if (i == 1) TEST_EQUAL(wids[2], ngram_wid(model, "so"));
  86         }
  87
  88         {
  89                 ngram_iter_t *itor2, *itor3;
  90                 int32 score, bowt;
  91                 int32 const *wids;
  92
  93                 /* Test the boundary condition - successors of last 1-gram. */
  94                 itor = ngram_ng_iter(model, ngram_model_get_counts(model)[0] - 1,
  95                                      NULL, 0);
  96                 wids = ngram_iter_get(itor, &score, &bowt);
  97                 printf("%.4f %s %.4f\n",
  98                        logmath_log_to_log10(lmath, score),
  99                        ngram_word(model, wids[0]),
 100                        logmath_log_to_log10(lmath, bowt));
 101                 TEST_EQUAL(wids[0], ngram_wid(model, "~"));
 102
 103                 for (itor2 = ngram_iter_successors(itor);
 104                      itor2; itor2 = ngram_iter_next(itor2)) {
 105                         wids = ngram_iter_get(itor2, &score, &bowt);
 106                         printf("%.4f %s %s %.4f\n",
 107                                logmath_log_to_log10(lmath, score),
 108                                ngram_word(model, wids[0]),
 109                                ngram_word(model, wids[1]),
 110                                logmath_log_to_log10(lmath, bowt));
 111                         TEST_EQUAL(wids[0], ngram_wid(model, "~"));
 112                         TEST_EQUAL(wids[1], ngram_wid(model, "eleven"));
 113                 }
 114                 itor2 = ngram_iter_successors(itor);
 115                 for (itor3 = ngram_iter_successors(itor2);
 116                      itor3; itor3 = ngram_iter_next(itor3)) {
 117                         wids = ngram_iter_get(itor3, &score, &bowt);
 118                         printf("%.4f %s %s %s\n",
 119                                logmath_log_to_log10(lmath, score),
 120                                ngram_word(model, wids[0]),
 121                                ngram_word(model, wids[1]),
 122                                ngram_word(model, wids[2]));
 123                         TEST_EQUAL(wids[0], ngram_wid(model, "~"));
 124                         TEST_EQUAL(wids[1], ngram_wid(model, "eleven"));
 125                         TEST_EQUAL(wids[2], ngram_wid(model, "per"));
 126                 }
 127                 ngram_iter_free(itor2);
 128                 ngram_iter_free(itor);
 129         }
 130
 131         TEST_EQUAL(0, ngram_model_free(model));
 132         logmath_free(lmath);
 133
 134         return 0;
 135 }