1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * bio.c -- Sphinx-3 binary file I/O functions.
40 * **********************************************
41 * CMU ARPA Speech Project
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
49 * Revision 1.4 2005/06/21 20:40:46 arthchan2003
50 * 1, Fixed doxygen documentation, 2, Add the $ keyword.
52 * Revision 1.3 2005/03/30 01:22:46 archan
53 * Fixed mistakes in last updates. Add
56 * 02-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
57 * Bugfix: Added byteswapping in bio_verify_chksum().
59 * 18-Dec-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
68 #pragma warning (disable: 4996)
71 #include "sphinxbase/bio.h"
72 #include "sphinxbase/err.h"
73 #include "sphinxbase/ckd_alloc.h"
76 #define BIO_HDRARG_MAX 32
77 #define END_COMMENT "*end_comment*\n"
81 bcomment_read(FILE * fp)
83 __BIGSTACKVARIABLE__ char iline[16384];
85 while (fgets(iline, sizeof(iline), fp) != NULL) {
86 if (strcmp(iline, END_COMMENT) == 0)
89 E_FATAL("Missing %s marker\n", END_COMMENT);
98 if (fread(&magic, sizeof(uint32), 1, fp) != 1) {
99 E_ERROR("Cannot read BYTEORDER MAGIC NO.\n");
103 if (magic != BYTE_ORDER_MAGIC) {
104 /* either need to swap or got bogus magic number */
107 if (magic == BYTE_ORDER_MAGIC)
111 E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n",
112 magic, BYTE_ORDER_MAGIC);
121 bio_hdrarg_free(char **argname, char **argval)
127 for (i = 0; argname[i]; i++) {
128 ckd_free(argname[i]);
137 bio_writehdr_version(FILE * fp, char *version)
142 fprintf(fp, "version %s\n", version);
143 fprintf(fp, "endhdr\n");
146 b = (uint32) BYTE_ORDER_MAGIC;
147 fwrite(&b, sizeof(uint32), 1, fp);
155 bio_writehdr(FILE *fp, ...)
163 while ((key = va_arg(args, char const *)) != NULL) {
164 char const *val = va_arg(args, char const *);
166 E_ERROR("Wrong number of arguments\n");
170 fprintf(fp, "%s %s\n", key, val);
174 fprintf(fp, "endhdr\n");
177 b = (uint32) BYTE_ORDER_MAGIC;
178 if (fwrite(&b, sizeof(uint32), 1, fp) != 1)
187 bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap)
189 __BIGSTACKVARIABLE__ char line[16384], word[4096];
193 *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *));
194 *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *));
197 if (fgets(line, sizeof(line), fp) == NULL){
198 E_ERROR("Premature EOF, line %d\n", lineno);
203 if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) {
204 /* New format (post Dec-1996, including checksums); read argument-value pairs */
206 if (fgets(line, sizeof(line), fp) == NULL) {
207 E_ERROR("Premature EOF, line %d\n", lineno);
212 if (sscanf(line, "%s%n", word, &l) != 1) {
213 E_ERROR("Header format error, line %d\n", lineno);
216 if (strcmp(word, "endhdr") == 0)
218 if (word[0] == '#') /* Skip comments */
221 if (i >= BIO_HDRARG_MAX) {
223 ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n",
228 (*argname)[i] = ckd_salloc(word);
229 if (sscanf(line + l, "%s", word) != 1) { /* Multi-word values not allowed */
230 E_ERROR("Header format error, line %d\n", lineno);
233 (*argval)[i] = ckd_salloc(word);
238 /* Old format (without checksums); the first entry must be the version# */
239 if (sscanf(line, "%s", word) != 1) {
240 E_ERROR("Header format error, line %d\n", lineno);
244 (*argname)[0] = ckd_salloc("version");
245 (*argval)[0] = ckd_salloc(word);
250 (*argname)[i] = NULL;
252 if ((*swap = swap_check(fp)) < 0) {
253 E_ERROR("swap_check failed\n");
259 bio_hdrarg_free(*argname, *argval);
260 *argname = *argval = NULL;
266 chksum_accum(void *buf, int32 el_sz, int32 n_el, uint32 sum)
276 for (i = 0; i < n_el; i++)
277 sum = (sum << 5 | sum >> 27) + i8[i];
280 i16 = (uint16 *) buf;
281 for (i = 0; i < n_el; i++)
282 sum = (sum << 10 | sum >> 22) + i16[i];
285 i32 = (uint32 *) buf;
286 for (i = 0; i < n_el; i++)
287 sum = (sum << 20 | sum >> 12) + i32[i];
290 E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz);
299 swap_buf(void *buf, int32 el_sz, int32 n_el)
309 buf16 = (uint16 *) buf;
310 for (i = 0; i < n_el; i++)
311 SWAP_INT16(buf16 + i);
314 buf32 = (uint32 *) buf;
315 for (i = 0; i < n_el; i++)
316 SWAP_INT32(buf32 + i);
319 E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz);
326 bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap,
329 if (fread(buf, el_sz, n_el, fp) != (size_t) n_el)
333 swap_buf(buf, el_sz, n_el);
336 *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
342 bio_fwrite(void *buf, int32 el_sz, int32 n_el, FILE *fp,
343 int32 swap, uint32 *chksum)
346 *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
351 nbuf = ckd_calloc(n_el, el_sz);
352 memcpy(nbuf, buf, n_el * el_sz);
353 swap_buf(nbuf, el_sz, n_el);
354 rv = fwrite(nbuf, el_sz, n_el, fp);
359 return fwrite(buf, el_sz, n_el, fp);
364 bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp,
365 int32 sw, uint32 * ck)
367 /* Read 1-d array size */
368 if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1)
369 E_FATAL("fread(arraysize) failed\n");
371 E_FATAL("Bad arraysize: %d\n", *n_el);
373 /* Allocate memory for array data */
374 *buf = (void *) ckd_calloc(*n_el, el_sz);
376 /* Read array data */
377 if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != *n_el)
378 E_FATAL("fread(arraydata) failed\n");
384 bio_fread_2d(void ***arr,
397 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
400 E_ERROR_SYSTEM("Unable to read complete data");
403 E_ERROR_SYSTEM("OS error in bio_fread_2d");
407 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
410 E_ERROR_SYSTEM("Unable to read complete data");
413 E_ERROR_SYSTEM("OS error in bio_fread_2d");
417 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n)
420 assert(n == l_d1*l_d2);
424 *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz);
430 bio_fread_3d(void ****arr,
446 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
449 E_ERROR_SYSTEM("Unable to read complete data");
452 E_ERROR_SYSTEM("OS error in bio_fread_3d");
456 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
459 E_ERROR_SYSTEM("Unable to read complete data");
462 E_ERROR_SYSTEM("OS error in bio_fread_3d");
466 ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum);
469 E_ERROR_SYSTEM("Unable to read complete data");
472 E_ERROR_SYSTEM("OS error in bio_fread_3d");
477 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) {
481 assert(n == l_d1 * l_d2 * l_d3);
483 *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz);
492 bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum)
496 if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1)
497 E_FATAL("fread(chksum) failed\n");
499 SWAP_INT32(&file_chksum);
500 if (file_chksum != chksum)
502 ("Checksum error; file-checksum %08x, computed %08x\n",
503 file_chksum, chksum);
507 bio_read_wavfile(char const *directory,
508 char const *filename,
509 char const *extension,
519 n = strlen(extension);
520 l = strlen(filename);
521 if ((n <= l) && (0 == strcmp(filename + l - n, extension)))
523 inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1);
525 sprintf(inputfile, "%s/%s%s", directory, filename, extension);
527 sprintf(inputfile, "%s%s", filename, extension);
530 if ((uttfp = fopen(inputfile, "rb")) == NULL) {
531 E_FATAL("fopen(%s,rb) failed\n", inputfile);
533 fseek(uttfp, 0, SEEK_END);
535 fseek(uttfp, 0, SEEK_SET);
537 if (fseek(uttfp, header, SEEK_SET) < 0) {
538 E_ERROR("fseek(%s,%d) failed\n", inputfile, header);
546 data = ckd_calloc(n, sizeof(*data));
547 if ((l = fread(data, sizeof(int16), n, uttfp)) < n) {
548 E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l);
556 if (nsamps) *nsamps = n;