src/libsphinxbase/util/logmath.c

   1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
   2 /* ====================================================================
   3  * Copyright (c) 1999-2007 Carnegie Mellon University.  All rights
   4  * reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  *
  18  * This work was supported in part by funding from the Defense Advanced
  19  * Research Projects Agency and the National Science Foundation of the
  20  * United States of America, and the CMU Sphinx Speech Consortium.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
  23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33  *
  34  * ====================================================================
  35  *
  36  */
  37
  38 #include <math.h>
  39 #include <string.h>
  40 #include <assert.h>
  41
  42 #include "sphinxbase/logmath.h"
  43 #include "sphinxbase/err.h"
  44 #include "sphinxbase/ckd_alloc.h"
  45 #include "sphinxbase/mmio.h"
  46 #include "sphinxbase/bio.h"
  47 #include "sphinxbase/strfuncs.h"
  48
  49 struct logmath_s {
  50     logadd_t t;
  51     int refcount;
  52     mmio_file_t *filemap;
  53     float64 base;
  54     float64 log_of_base;
  55     float64 log10_of_base;
  56     float64 inv_log_of_base;
  57     float64 inv_log10_of_base;
  58     int32 zero;
  59 };
  60
  61 logmath_t *
  62 logmath_init(float64 base, int shift, int use_table)
  63 {
  64     logmath_t *lmath;
  65     uint32 maxyx, i;
  66     float64 byx;
  67     int width;
  68
  69     /* Check that the base is correct. */
  70     if (base <= 1.0) {
  71         E_ERROR("Base must be greater than 1.0\n");
  72         return NULL;
  73     }
  74
  75     /* Set up various necessary constants. */
  76     lmath = ckd_calloc(1, sizeof(*lmath));
  77     lmath->refcount = 1;
  78     lmath->base = base;
  79     lmath->log_of_base = log(base);
  80     lmath->log10_of_base = log10(base);
  81     lmath->inv_log_of_base = 1.0/lmath->log_of_base;
  82     lmath->inv_log10_of_base = 1.0/lmath->log10_of_base;
  83     lmath->t.shift = shift;
  84     /* Shift this sufficiently that overflows can be avoided. */
  85     lmath->zero = MAX_NEG_INT32 >> (shift + 2);
  86
  87     if (!use_table)
  88         return lmath;
  89
  90     /* Create a logadd table with the appropriate width */
  91     maxyx = (uint32) (log(2.0) / log(base) + 0.5) >> shift;
  92     /* Poor man's log2 */
  93     if (maxyx < 256) width = 1;
  94     else if (maxyx < 65536) width = 2;
  95     else width = 4;
  96
  97     lmath->t.width = width;
  98     /* Figure out size of add table required. */
  99     byx = 1.0; /* Maximum possible base^{y-x} value - note that this implies that y-x == 0 */
 100     for (i = 0;; ++i) {
 101         float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base; /* log_{base}(1 + base^{y-x}); */
 102         int32 k = (int32) (lobyx + 0.5 * (1<<shift)) >> shift; /* Round to shift */
 103
 104         /* base^{y-x} has reached the smallest representable value. */
 105         if (k <= 0)
 106             break;
 107
 108         /* This table is indexed by -(y-x), so we multiply byx by
 109          * base^{-1} here which is equivalent to subtracting one from
 110          * (y-x). */
 111         byx /= base;
 112     }
 113     i >>= shift;
 114
 115     /* Never produce a table smaller than 256 entries. */
 116     if (i < 255) i = 255;
 117
 118     lmath->t.table = ckd_calloc(i+1, width);
 119     lmath->t.table_size = i + 1;
 120     /* Create the add table (see above). */
 121     byx = 1.0;
 122     for (i = 0;; ++i) {
 123         float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base;
 124         int32 k = (int32) (lobyx + 0.5 * (1<<shift)) >> shift; /* Round to shift */
 125         uint32 prev = 0;
 126
 127         /* Check any previous value - if there is a shift, we want to
 128          * only store the highest one. */
 129         switch (width) {
 130         case 1:
 131             prev = ((uint8 *)lmath->t.table)[i >> shift];
 132             break;
 133         case 2:
 134             prev = ((uint16 *)lmath->t.table)[i >> shift];
 135             break;
 136         case 4:
 137             prev = ((uint32 *)lmath->t.table)[i >> shift];
 138             break;
 139         }
 140         if (prev == 0) {
 141             switch (width) {
 142             case 1:
 143                 ((uint8 *)lmath->t.table)[i >> shift] = (uint8) k;
 144                 break;
 145             case 2:
 146                 ((uint16 *)lmath->t.table)[i >> shift] = (uint16) k;
 147                 break;
 148             case 4:
 149                 ((uint32 *)lmath->t.table)[i >> shift] = (uint32) k;
 150                 break;
 151             }
 152         }
 153         if (k <= 0)
 154             break;
 155
 156         /* Decay base^{y-x} exponentially according to base. */
 157         byx /= base;
 158     }
 159
 160     return lmath;
 161 }
 162
 163 logmath_t *
 164 logmath_read(const char *file_name)
 165 {
 166     logmath_t *lmath;
 167     char **argname, **argval;
 168     int32 byteswap, i;
 169     int chksum_present, do_mmap;
 170     uint32 chksum;
 171     long pos;
 172     FILE *fp;
 173
 174     E_INFO("Reading log table file '%s'\n", file_name);
 175     if ((fp = fopen(file_name, "rb")) == NULL) {
 176         E_ERROR("Failed to open log table file '%s' for reading: %s\n", file_name, strerror(errno));
 177         return NULL;
 178     }
 179
 180     /* Read header, including argument-value info and 32-bit byteorder magic */
 181     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) {
 182         E_ERROR("bio_readhdr(%s) failed\n", file_name);
 183         fclose(fp);
 184         return NULL;
 185     }
 186
 187     lmath = ckd_calloc(1, sizeof(*lmath));
 188     /* Default values. */
 189     lmath->t.shift = 0;
 190     lmath->t.width = 2;
 191     lmath->base = 1.0001;
 192
 193     /* Parse argument-value list */
 194     chksum_present = 0;
 195     for (i = 0; argname[i]; i++) {
 196         if (strcmp(argname[i], "version") == 0) {
 197         }
 198         else if (strcmp(argname[i], "chksum0") == 0) {
 199             if (strcmp(argval[i], "yes") == 0)
 200                 chksum_present = 1;
 201         }
 202         else if (strcmp(argname[i], "width") == 0) {
 203             lmath->t.width = atoi(argval[i]);
 204         }
 205         else if (strcmp(argname[i], "shift") == 0) {
 206             lmath->t.shift = atoi(argval[i]);
 207         }
 208         else if (strcmp(argname[i], "logbase") == 0) {
 209             lmath->base = atof_c(argval[i]);
 210         }
 211     }
 212     bio_hdrarg_free(argname, argval);
 213     chksum = 0;
 214
 215     /* Set up various necessary constants. */
 216     lmath->log_of_base = log(lmath->base);
 217     lmath->log10_of_base = log10(lmath->base);
 218     lmath->inv_log_of_base = 1.0/lmath->log_of_base;
 219     lmath->inv_log10_of_base = 1.0/lmath->log10_of_base;
 220     /* Shift this sufficiently that overflows can be avoided. */
 221     lmath->zero = MAX_NEG_INT32 >> (lmath->t.shift + 2);
 222
 223     /* #Values to follow */
 224     if (bio_fread(&lmath->t.table_size, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
 225         E_ERROR("fread(%s) (total #values) failed\n", file_name);
 226         goto error_out;
 227     }
 228
 229     /* Check alignment constraints for memory mapping */
 230     do_mmap = 1;
 231     pos = ftell(fp);
 232     if (pos & ((long)lmath->t.width - 1)) {
 233         E_WARN("%s: Data start %ld is not aligned on %d-byte boundary, will not memory map\n",
 234                   file_name, pos, lmath->t.width);
 235         do_mmap = 0;
 236     }
 237     /* Check byte order for memory mapping */
 238     if (byteswap) {
 239         E_WARN("%s: Data is wrong-endian, will not memory map\n", file_name);
 240         do_mmap = 0;
 241     }
 242
 243     if (do_mmap) {
 244         lmath->filemap = mmio_file_read(file_name);
 245         lmath->t.table = (char *)mmio_file_ptr(lmath->filemap) + pos;
 246     }
 247     else {
 248         lmath->t.table = ckd_calloc(lmath->t.table_size, lmath->t.width);
 249         if (bio_fread(lmath->t.table, lmath->t.width, lmath->t.table_size,
 250                       fp, byteswap, &chksum) != lmath->t.table_size) {
 251             E_ERROR("fread(%s) (%d x %d bytes) failed\n",
 252                     file_name, lmath->t.table_size, lmath->t.width);
 253             goto error_out;
 254         }
 255         if (chksum_present)
 256             bio_verify_chksum(fp, byteswap, chksum);
 257
 258         if (fread(&i, 1, 1, fp) == 1) {
 259             E_ERROR("%s: More data than expected\n", file_name);
 260             goto error_out;
 261         }
 262     }
 263     fclose(fp);
 264
 265     return lmath;
 266 error_out:
 267     logmath_free(lmath);
 268     return NULL;
 269 }
 270
 271 int32
 272 logmath_write(logmath_t *lmath, const char *file_name)
 273 {
 274     FILE *fp;
 275     long pos;
 276     uint32 chksum;
 277
 278     if (lmath->t.table == NULL) {
 279         E_ERROR("No log table to write!\n");
 280         return -1;
 281     }
 282
 283     E_INFO("Writing log table file '%s'\n", file_name);
 284     if ((fp = fopen(file_name, "wb")) == NULL) {
 285         E_ERROR("Failed to open logtable file '%s' for writing: %s\n", file_name, strerror(errno));
 286         return -1;
 287     }
 288
 289     /* For whatever reason, we have to do this manually at the
 290      * moment. */
 291     fprintf(fp, "s3\nversion 1.0\nchksum0 yes\n");
 292     fprintf(fp, "width %d\n", lmath->t.width);
 293     fprintf(fp, "shift %d\n", lmath->t.shift);
 294     fprintf(fp, "logbase %f\n", lmath->base);
 295     /* Pad it out to ensure alignment. */
 296     pos = ftell(fp) + strlen("endhdr\n");
 297     if (pos & ((long)lmath->t.width - 1)) {
 298         size_t align = lmath->t.width - (pos & ((long)lmath->t.width - 1));
 299         assert(lmath->t.width <= 8);
 300         fwrite("        " /* 8 spaces */, 1, align, fp);
 301     }
 302     fprintf(fp, "endhdr\n");
 303
 304     /* Now write the binary data. */
 305     chksum = (uint32)BYTE_ORDER_MAGIC;
 306     fwrite(&chksum, sizeof(uint32), 1, fp);
 307     chksum = 0;
 308     /* #Values to follow */
 309     if (bio_fwrite(&lmath->t.table_size, sizeof(uint32),
 310                    1, fp, 0, &chksum) != 1) {
 311         E_ERROR("fwrite(%s) (total #values) failed\n", file_name);
 312         goto error_out;
 313     }
 314
 315     if (bio_fwrite(lmath->t.table, lmath->t.width, lmath->t.table_size,
 316                    fp, 0, &chksum) != lmath->t.table_size) {
 317         E_ERROR("fwrite(%s) (%d x %d bytes) failed\n",
 318                 file_name, lmath->t.table_size, lmath->t.width);
 319         goto error_out;
 320     }
 321     if (bio_fwrite(&chksum, sizeof(uint32), 1, fp, 0, NULL) != 1) {
 322         E_ERROR("fwrite(%s) checksum failed\n", file_name);
 323         goto error_out;
 324     }
 325
 326     fclose(fp);
 327     return 0;
 328
 329 error_out:
 330     fclose(fp);
 331     return -1;
 332 }
 333
 334 logmath_t *
 335 logmath_retain(logmath_t *lmath)
 336 {
 337     ++lmath->refcount;
 338     return lmath;
 339 }
 340
 341 int
 342 logmath_free(logmath_t *lmath)
 343 {
 344     if (lmath == NULL)
 345         return 0;
 346     if (--lmath->refcount > 0)
 347         return lmath->refcount;
 348     if (lmath->filemap)
 349         mmio_file_unmap(lmath->filemap);
 350     else
 351         ckd_free(lmath->t.table);
 352     ckd_free(lmath);
 353     return 0;
 354 }
 355
 356 int32
 357 logmath_get_table_shape(logmath_t *lmath, uint32 *out_size,
 358                         uint32 *out_width, uint32 *out_shift)
 359 {
 360     if (out_size) *out_size = lmath->t.table_size;
 361     if (out_width) *out_width = lmath->t.width;
 362     if (out_shift) *out_shift = lmath->t.shift;
 363
 364     return lmath->t.table_size * lmath->t.width;
 365 }
 366
 367 float64
 368 logmath_get_base(logmath_t *lmath)
 369 {
 370     return lmath->base;
 371 }
 372
 373 int
 374 logmath_get_zero(logmath_t *lmath)
 375 {
 376     return lmath->zero;
 377 }
 378
 379 int
 380 logmath_get_width(logmath_t *lmath)
 381 {
 382     return lmath->t.width;
 383 }
 384
 385 int
 386 logmath_get_shift(logmath_t *lmath)
 387 {
 388     return lmath->t.shift;
 389 }
 390
 391 int
 392 logmath_add(logmath_t *lmath, int logb_x, int logb_y)
 393 {
 394     logadd_t *t = LOGMATH_TABLE(lmath);
 395     int d, r;
 396
 397     /* handle 0 + x = x case. */
 398     if (logb_x <= lmath->zero)
 399         return logb_y;
 400     if (logb_y <= lmath->zero)
 401         return logb_x;
 402
 403     if (t->table == NULL)
 404         return logmath_add_exact(lmath, logb_x, logb_y);
 405
 406     /* d must be positive, obviously. */
 407     if (logb_x > logb_y) {
 408         d = (logb_x - logb_y);
 409         r = logb_x;
 410     }
 411     else {
 412         d = (logb_y - logb_x);
 413         r = logb_y;
 414     }
 415
 416     if (d < 0) {
 417         /* Some kind of overflow has occurred, fail gracefully. */
 418         return r;
 419     }
 420     if ((size_t)d >= t->table_size) {
 421         /* If this happens, it's not actually an error, because the
 422          * last entry in the logadd table is guaranteed to be zero.
 423          * Therefore we just return the larger of the two values. */
 424         return r;
 425     }
 426
 427     switch (t->width) {
 428     case 1:
 429         return r + (((uint8 *)t->table)[d]);
 430     case 2:
 431         return r + (((uint16 *)t->table)[d]);
 432     case 4:
 433         return r + (((uint32 *)t->table)[d]);
 434     }
 435     return r;
 436 }
 437
 438 int
 439 logmath_add_exact(logmath_t *lmath, int logb_p, int logb_q)
 440 {
 441     return logmath_log(lmath,
 442                        logmath_exp(lmath, logb_p)
 443                        + logmath_exp(lmath, logb_q));
 444 }
 445
 446 int
 447 logmath_log(logmath_t *lmath, float64 p)
 448 {
 449     if (p <= 0) {
 450         return lmath->zero;
 451     }
 452     return (int)(log(p) * lmath->inv_log_of_base) >> lmath->t.shift;
 453 }
 454
 455 float64
 456 logmath_exp(logmath_t *lmath, int logb_p)
 457 {
 458     return pow(lmath->base, (float64)(logb_p << lmath->t.shift));
 459 }
 460
 461 int
 462 logmath_ln_to_log(logmath_t *lmath, float64 log_p)
 463 {
 464     return (int)(log_p * lmath->inv_log_of_base) >> lmath->t.shift;
 465 }
 466
 467 float64
 468 logmath_log_to_ln(logmath_t *lmath, int logb_p)
 469 {
 470     return (float64)(logb_p << lmath->t.shift) * lmath->log_of_base;
 471 }
 472
 473 int
 474 logmath_log10_to_log(logmath_t *lmath, float64 log_p)
 475 {
 476     return (int)(log_p * lmath->inv_log10_of_base) >> lmath->t.shift;
 477 }
 478
 479 float64
 480 logmath_log_to_log10(logmath_t *lmath, int logb_p)
 481 {
 482     return (float64)(logb_p << lmath->t.shift) * lmath->log10_of_base;
 483 }