src/sphinx_fe/cmd_ln_defn.h

   1 /* ====================================================================
   2  * Copyright (c) 1998-2000 Carnegie Mellon University.  All rights
   3  * reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * This work was supported in part by funding from the Defense Advanced
  18  * Research Projects Agency and the National Science Foundation of the
  19  * United States of America, and the CMU Sphinx Speech Consortium.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
  22  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  25  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * ====================================================================
  34  *
  35  */
  36 /*********************************************************************
  37  *
  38  * File: cmd_ln_defn.h
  39  *
  40  * Description:
  41  *      Command line argument definition
  42  *
  43  * Author:
  44  *
  45  *********************************************************************/
  46
  47 #ifndef CMD_LN_DEFN_H
  48 #define CMD_LN_DEFN_H
  49
  50 #include <sphinxbase/cmd_ln.h>
  51 #include <sphinxbase/fe.h>
  52
  53 const char helpstr[] =
  54   "Description: \n\
  55 Extract acoustic features form from audio file.\n\
  56 \n\
  57 The main parameters that affect the final output, with typical values, are:\n\
  58 \n\
  59 samprate, typically 8000, 11025, or 16000\n\
  60 lowerf, 130, 200, 130, for the respective sampling rates above\n\
  61 upperf, 3700, 5200, 6800, for the respective sampling rates above\n\
  62 nfilt, 31, 37, 40, for the respective sampling rates above\n\
  63 nfft, 256 or 512\n\
  64 format, raw or nist or mswav\n\
  65 \"";
  66
  67 const char examplestr[] =
  68   "Example: \n\
  69 This example creates a cepstral file named \"output.mfc\" from an input audio file named \"input.raw\", which is a raw audio file (no header information), which was originally sampled at 16kHz. \n\
  70 \n\
  71 sphinx_fe -i  input.raw \n\
  72         -o   output.mfc \n\
  73         -input_endian little \n\
  74         -samprate  16000 \n\
  75         -lowerf    130 \n\
  76         -upperf    6800 \n\
  77         -nfilt     40 \n\
  78         -nfft      512";
  79
  80 static arg_t defn[] = {
  81   { "-help",
  82     ARG_BOOLEAN,
  83     "no",
  84     "Shows the usage of the tool"},
  85
  86   { "-example",
  87     ARG_BOOLEAN,
  88     "no",
  89     "Shows example of how to use the tool"},
  90
  91   waveform_to_cepstral_command_line_macro(),
  92
  93   { "-argfile",
  94     ARG_STRING,
  95     NULL,
  96     "Argument file (e.g. feat.params from an acoustic model) to read parameters from.  This will override anything set in other command line arguments." },
  97
  98   { "-i",
  99     ARG_STRING,
 100     NULL,
 101     "Single audio input file" },
 102
 103   { "-o",
 104     ARG_STRING,
 105     NULL,
 106     "Single cepstral output file" },
 107
 108   { "-c",
 109     ARG_STRING,
 110     NULL,
 111     "Control file for batch processing" },
 112
 113   { "-nskip",
 114     ARG_INT32,
 115     "0",
 116     "If a control file was specified, the number of utterances to skip at the head of the file" },
 117
 118   { "-runlen",
 119     ARG_INT32,
 120     "-1",
 121     "If a control file was specified, the number of utterances to process, or -1 for all" },
 122
 123   { "-part",
 124     ARG_INT32,
 125     "0",
 126     "Index of the part to run (supersedes -nskip and -runlen if non-zero)" },
 127
 128   { "-npart",
 129     ARG_INT32,
 130     "0",
 131     "Number of parts to run in (supersedes -nskip and -runlen if non-zero)" },
 132
 133   { "-di",
 134     ARG_STRING,
 135     NULL,
 136     "Input directory, input file names are relative to this, if defined" },
 137
 138   { "-ei",
 139     ARG_STRING,
 140     NULL,
 141     "Input extension to be applied to all input files" },
 142
 143   { "-do",
 144     ARG_STRING,
 145     NULL,
 146     "Output directory, output files are relative to this" },
 147
 148   { "-eo",
 149     ARG_STRING,
 150     NULL,
 151     "Output extension to be applied to all output files" },
 152
 153   { "-build_outdirs",
 154     ARG_BOOLEAN,
 155     "yes",
 156     "Create missing subdirectories in output directory" },
 157
 158   { "-sph2pipe",
 159     ARG_BOOLEAN,
 160     "no",
 161     "Input is NIST sphere (possibly with Shorten), use sph2pipe to convert" },
 162
 163   { "-nist",
 164     ARG_BOOLEAN,
 165     "no",
 166     "Defines input format as NIST sphere" },
 167
 168   { "-raw",
 169     ARG_BOOLEAN,
 170     "no",
 171     "Defines input format as raw binary data" },
 172
 173   { "-mswav",
 174     ARG_BOOLEAN,
 175     "no",
 176     "Defines input format as Microsoft Wav (RIFF)" },
 177
 178 #ifdef HAVE_SNDFILE_H
 179   { "-sndfile",
 180     ARG_BOOLEAN,
 181     "no",
 182     "Use libsndfile to read input data" },
 183 #endif
 184
 185   { "-nchans",
 186     ARG_INT32,
 187     "1",
 188     "Number of channels of data (interlaced samples assumed)" },
 189
 190   { "-whichchan",
 191     ARG_INT32,
 192     "0",
 193     "Channel to process (numbered from 1), or 0 to mix all channels" },
 194
 195   { "-ofmt",
 196     ARG_STRING,
 197     "sphinx",
 198     "Format of output files - one of sphinx, htk, text." },
 199
 200   { "-mach_endian",
 201     ARG_STRING,
 202 #ifdef WORDS_BIGENDIAN
 203     "big",
 204 #else
 205     "little",
 206 #endif
 207     "Endianness of machine, big or little" },
 208
 209   { "-blocksize",
 210     ARG_INT32,
 211     "2048",
 212     "Number of samples to read at a time." },
 213
 214   { "-spec2cep",
 215     ARG_BOOLEAN,
 216     "no",
 217     "Input is log spectral files, output is cepstral files" },
 218
 219   { "-cep2spec",
 220     ARG_BOOLEAN,
 221     "no",
 222     "Input is cepstral files, output is log spectral files" },
 223
 224   { NULL, 0, NULL, NULL }
 225 };
 226
 227
 228 #define CMD_LN_DEFN_H
 229
 230 #endif /* CMD_LN_DEFN_H */
 231 \f
 232 /*
 233  * Log record.  Maintained by RCS.
 234  *
 235  * $Log: cmd_ln_defn.h,v $
 236  * Revision 1.7  2006/02/25 00:53:48  egouvea
 237  * Added the flag "-seed". If dither is being used and the seed is less
 238  * than zero, the random number generator is initialized with time(). If
 239  * it is at least zero, it's initialized with the provided seed. This way
 240  * we have the benefit of having dither, and the benefit of being
 241  * repeatable.
 242  *
 243  * This is consistent with what sphinx3 does. Well, almost. The random
 244  * number generator is still what the compiler provides.
 245  *
 246  * Also, moved fe_init_params to fe_interface.c, so one can initialize a
 247  * variable of type param_t with meaningful values.
 248  *
 249  * Revision 1.6  2006/02/17 00:31:34  egouvea
 250  * Removed switch -melwarp. Changed the default for window length to
 251  * 0.025625 from 0.256 (so that a window at 16kHz sampling rate has
 252  * exactly 410 samples). Cleaned up include's. Replaced some E_FATAL()
 253  * with E_WARN() and return.
 254  *
 255  * Revision 1.5  2006/02/16 00:18:26  egouvea
 256  * Implemented flexible warping function. The user can specify at run
 257  * time which of several shapes they want to use. Currently implemented
 258  * are an affine function (y = ax + b), an inverse linear (y = a/x) and a
 259  * piecewise linear (y = ax, up to a frequency F, and then it "breaks" so
 260  * Nyquist frequency matches in both scales.
 261  *
 262  * Added two switches, -warp_type and -warp_params. The first specifies
 263  * the type, which valid values:
 264  *
 265  * -inverse or inverse_linear
 266  * -linear or affine
 267  * -piecewise or piecewise_linear
 268  *
 269  * The inverse_linear is the same as implemented by EHT. The -mel_warp
 270  * switch was kept for compatibility (maybe remove it in the
 271  * future?). The code is compatible with EHT's changes: cepstra created
 272  * from code after his changes should be the same as now. Scripts that
 273  * worked with his changes should work now without changes. Tested a few
 274  * cases, same results.
 275  *
 276  * Revision 1.4  2006/02/14 20:56:54  eht
 277  * Implement an argument -melwarp that changes the standard mel-scale
 278  * equation from:
 279  *      M(f) = 2595 * log10( 1 + f/700 )
 280  * to:
 281  *      M(f,w) = 2595 * log10( 1 + f/(700*w))
 282  *
 283  * So, 1.0 means no warp,  w > 1.0 means linear compression w < 1.0 means
 284  * linear expansion.
 285  *
 286  * Implement argument -nskip and -runlen arguments so that a subset of the
 287  * utterances in the control file can be executed.  Allows a simple
 288  * distribution of wave2feat processing over N processors.
 289  *
 290  * Revision 1.3  2005/05/19 21:21:55  egouvea
 291  * Bug #1176394: example bug
 292  *
 293  * Revision 1.2  2004/11/23 04:14:06  egouvea
 294  * Fixed bug in cmd_ln.c in which a wrong boolean argument led into an
 295  * infinite loop, and fixed the help and example strings, getting rid of
 296  * spaces, so that the appearance is better.
 297  *
 298  * Revision 1.1  2004/09/09 17:59:30  egouvea
 299  * Adding missing files to wave2feat
 300  *
 301  *
 302  *
 303  */