1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * \file sphinx_lm_eval.c
39 * Language model evaluation tool.
41 #include <sphinxbase/logmath.h>
42 #include <sphinxbase/ngram_model.h>
43 #include <sphinxbase/cmd_ln.h>
44 #include <sphinxbase/ckd_alloc.h>
45 #include <sphinxbase/err.h>
46 #include <sphinxbase/pio.h>
47 #include <sphinxbase/strfuncs.h>
53 static const arg_t defn[] = {
57 "Shows the usage of the tool"},
62 "Base in which all log-likelihoods calculated" },
67 "Language model file"},
72 "Probability definition file for classes in LM"},
77 "Control file listing a set of language models"},
82 "Name of language model in -lmctlfn to use for all utterances" },
87 "Transcription file to evaluate"},
91 "Text string to evaluate"},
96 "Use memory-mapped I/O for reading binary LM files"},
101 "Language model weight" },
106 "Word insertion probability" },
111 "Unigram probability weight (interpolated with uniform distribution)"},
116 "Print details of perplexity calculation" },
118 /* FIXME: Support -lmstartsym, -lmendsym, -lmctlfn, -ctl_lm */
119 { NULL, 0, NULL, NULL }
125 calc_entropy(ngram_model_t *lm, char **words, int32 n,
126 int32 *out_n_ccs, int32 *out_n_oovs, int32 *out_lm_score)
130 int32 i, ch, nccs, noovs, unk;
135 unk = ngram_unknown_wid(lm);
137 /* Reverse this array into an array of word IDs. */
138 wids = ckd_calloc(n, sizeof(*wids));
139 for (i = 0; i < n; ++i)
140 wids[n-i-1] = ngram_wid(lm, words[i]);
141 /* Skip <s> as it's a context cue (HACK, this should be configurable). */
142 startwid = ngram_wid(lm, "<s>");
144 /* Now evaluate the list of words in reverse using the
145 * remainder of the array as the history. */
146 ch = noovs = nccs = 0;
147 for (i = 0; i < n; ++i) {
151 /* Skip <s> as it's a context cue (HACK, this should be configurable). */
152 if (wids[i] == startwid) {
156 /* Skip and count OOVs. */
157 if (wids[i] == NGRAM_INVALID_WID || wids[i] == unk) {
161 /* Sum up information for each N-gram */
162 prob = ngram_ng_score(lm,
163 wids[i], wids + i + 1,
167 printf("log P(%s|", ngram_word(lm, wids[i]));
168 m = i + ngram_model_get_size(lm) - 1;
172 printf("%s ", ngram_word(lm, wids[m--]));
174 printf(") = %d\n", prob);
179 if (out_n_ccs) *out_n_ccs = nccs;
180 if (out_n_oovs) *out_n_oovs = noovs;
182 /* Calculate cross-entropy CH = - 1/N sum log P(W|H) */
192 evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn)
196 int32 nccs, noovs, nwords, lscr;
197 float64 ch, log_to_log2;;
199 if ((fh = fopen(lsnfn, "r")) == NULL)
200 E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn);
202 /* We have to keep ch in floating-point to avoid overflows, so
203 * we might as well use log2. */
204 log_to_log2 = log(logmath_get_base(lmath)) / log(2);
205 nccs = noovs = nwords = 0;
207 for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) {
209 int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;
211 n = str2words(litor->buf, NULL, 0);
213 E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n);
214 if (n == 0) /* Do nothing! */
216 words = ckd_calloc(n, sizeof(*words));
217 str2words(litor->buf, words, n);
219 /* Remove any utterance ID (FIXME: has to be a single "word") */
220 if (words[n-1][0] == '('
221 && words[n-1][strlen(words[n-1])-1] == ')')
224 tmp_ch = calc_entropy(lm, words, n, &tmp_nccs,
225 &tmp_noovs, &tmp_lscr);
227 ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
236 ch /= (nwords - nccs - noovs);
237 printf("cross-entropy: %f bits\n", ch);
239 /* Calculate perplexity pplx = exp CH */
240 printf("perplexity: %f\n", pow(2.0, ch));
241 printf("lm score: %d\n", lscr);
243 /* Report OOVs and CCs */
244 printf("%d words evaluated\n", nwords);
245 printf("%d OOVs (%.2f%%), %d context cues removed\n",
246 noovs, (double)noovs / nwords * 100, nccs);
250 evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text)
254 int32 n, ch, noovs, nccs, lscr;
256 /* Split it into an array of strings. */
257 textfoo = ckd_salloc(text);
258 n = str2words(textfoo, NULL, 0);
260 E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n);
261 if (n == 0) /* Do nothing! */
263 words = ckd_calloc(n, sizeof(*words));
264 str2words(textfoo, words, n);
266 ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr);
268 printf("input: %s\n", text);
269 printf("cross-entropy: %f bits\n",
270 ch * log(logmath_get_base(lmath)) / log(2));
272 /* Calculate perplexity pplx = exp CH */
273 printf("perplexity: %f\n", logmath_exp(lmath, ch));
274 printf("lm score: %d\n", lscr);
276 /* Report OOVs and CCs */
277 printf("%d words evaluated\n", n);
278 printf("%d OOVs, %d context cues removed\n",
286 main(int argc, char *argv[])
289 ngram_model_t *lm = NULL;
291 const char *lmfn, *probdefn, *lsnfn, *text;
293 if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
296 verbose = cmd_ln_boolean_r(config, "-verbose");
298 /* Create log math object. */
299 if ((lmath = logmath_init
300 (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
301 E_FATAL("Failed to initialize log math\n");
304 /* Load the language model. */
305 lmfn = cmd_ln_str_r(config, "-lm");
307 || (lm = ngram_model_read(config, lmfn,
308 NGRAM_AUTO, lmath)) == NULL) {
309 E_FATAL("Failed to load language model from %s\n",
310 cmd_ln_str_r(config, "-lm"));
312 if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL)
313 ngram_model_read_classdef(lm, probdefn);
314 ngram_model_apply_weights(lm,
315 cmd_ln_float32_r(config, "-lw"),
316 cmd_ln_float32_r(config, "-wip"),
317 cmd_ln_float32_r(config, "-uw"));
319 /* Now evaluate some text. */
320 lsnfn = cmd_ln_str_r(config, "-lsn");
321 text = cmd_ln_str_r(config, "-text");
323 evaluate_file(lm, lmath, lsnfn);
326 evaluate_string(lm, lmath, text);