src/modules/base/modules.cc

   1 /*************************************************************************/
   2 /*                                                                       */
   3 /*                Centre for Speech Technology Research                  */
   4 /*                     University of Edinburgh, UK                       */
   5 /*                       Copyright (c) 1996,1997                         */
   6 /*                        All Rights Reserved.                           */
   7 /*                                                                       */
   8 /*  Permission is hereby granted, free of charge, to use and distribute  */
   9 /*  this software and its documentation without restriction, including   */
  10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
  11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
  12 /*  permit persons to whom this work is furnished to do so, subject to   */
  13 /*  the following conditions:                                            */
  14 /*   1. The code must retain the above copyright notice, this list of    */
  15 /*      conditions and the following disclaimer.                         */
  16 /*   2. Any modifications must be clearly marked as such.                */
  17 /*   3. Original authors' names are not deleted.                         */
  18 /*   4. The authors' names are not used to endorse or promote products   */
  19 /*      derived from this software without specific prior written        */
  20 /*      permission.                                                      */
  21 /*                                                                       */
  22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
  23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
  24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
  25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
  26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
  27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
  28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
  29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
  30 /*  THIS SOFTWARE.                                                       */
  31 /*                                                                       */
  32 /*************************************************************************/
  33 /*                      Author :  Alan W Black                           */
  34 /*                      Date   :  April 1996                             */
  35 /*-----------------------------------------------------------------------*/
  36 /*                                                                       */
  37 /* Some basic initialization functions for modules                       */
  38 /*                                                                       */
  39 /*=======================================================================*/
  40 #include <cstdio>
  41 #include "festival.h"
  42 #include "lexicon.h"
  43 #include "modules.h"
  44 #include "intonation.h"
  45
  46 static void create_words(EST_Utterance *u);
  47 static void create_segments(EST_Utterance *u);
  48 static void create_wave(EST_Utterance *u);
  49 static void create_phones(EST_Utterance *u);
  50
  51 LISP FT_Initialize_Utt(LISP utt)
  52 {
  53     // Main utterance intialization routine
  54     // creates appropriate streams and loads them from the input
  55     EST_Utterance *u = get_c_utt(utt);
  56     EST_String type;
  57
  58     *cdebug << "Initialize module\n";
  59
  60     type = utt_type(*u);
  61
  62     utt_cleanup(*u);  // delete all relations
  63
  64     if (type == "Words")
  65         create_words(u);
  66     else if (type == "Text")
  67         ;
  68     else if (type == "Segments")
  69         create_segments(u);
  70     else if (type == "Phones")
  71         create_phones(u);
  72     else if (type == "Phrase")
  73         create_phraseinput(u);
  74     else if (type == "Wave")
  75         create_wave(u);
  76     else
  77     {
  78         // error
  79         cerr << "Unknown utterance type \"" << type << "\" for initialization "
  80             << endl;
  81         festival_error();
  82     }
  83
  84     return utt;
  85 }
  86
  87 void create_words(EST_Utterance *u)
  88 {
  89     // Add words from IForm
  90     LISP lwords,w;
  91     EST_Item *word;
  92
  93     u->create_relation("Word");
  94     lwords = utt_iform(*u);
  95
  96     for (w=lwords; w != NIL; w=cdr(w))
  97     {
  98         if (consp(car(w)))  // word has features too
  99         {
 100             word = add_word(u,get_c_string(car(car(w))));
 101             add_item_features(word,car(cdr(car(w))));
 102         }
 103         else
 104             add_word(u,get_c_string(car(w)));
 105     }
 106
 107 }
 108
 109 void create_wave(EST_Utterance *u)
 110 {
 111     // Get the fname for the wave and load it
 112     EST_Item *item = 0;
 113     LISP lwave;
 114     EST_Wave *wave = new EST_Wave;
 115
 116     lwave = utt_iform(*u);
 117
 118     if (wave->load(get_c_string(lwave)) != format_ok)
 119     {
 120         cerr << "Cannot load wavefile: " << get_c_string(lwave) << endl;
 121         festival_error();
 122     }
 123
 124     item = u->create_relation("Wave")->append();
 125     item->set_val("wave",est_val(wave));
 126
 127 }
 128
 129 void create_segments(EST_Utterance *u)
 130 {
 131     // Add segments from IForm
 132     LISP lsegs,s,targs,t;
 133     EST_String seg;
 134     EST_Item *Seg;;
 135     float start,end,dur,tpos,tval;
 136     u->create_relation("Segment");
 137     u->create_relation("Target");
 138
 139     lsegs = utt_iform(*u);
 140
 141     end = 0.0;
 142     for (s=lsegs; s != NIL; s=cdr(s))
 143     {
 144         seg = get_c_string(car(car(s)));
 145         dur = get_c_float(car(cdr(car(s))));
 146         targs = cdr(cdr(car(s)));
 147         Seg = add_segment(u,seg);
 148         start = end;
 149         end += dur;
 150         Seg->set("end",end);
 151         for (t=targs; t != NIL; t=cdr(t))
 152         {
 153             tpos = start + (get_c_float(car(car(t))));
 154             tval = get_c_float(car(cdr(car(t))));
 155             add_target(u,Seg,tpos,tval);
 156         }
 157     }
 158
 159 }
 160
 161 static void create_phones(EST_Utterance *u)
 162 {
 163     // Add phones from IForm
 164     LISP lsegs,s;
 165     EST_String seg;
 166
 167     u->create_relation("Segment");
 168     lsegs = utt_iform(*u);
 169
 170     for (s=lsegs; s != NIL; s=cdr(s))
 171     {
 172         seg = get_c_string(car(s));
 173         add_segment(u,seg);
 174     }
 175 }
 176
 177 LISP FT_Initialize_Utt(LISP args);
 178 LISP FT_Classic_Phrasify_Utt(LISP args);
 179 LISP FT_Classic_Word_Utt(LISP args);
 180 LISP FT_Unilex_Word_Utt(LISP args);
 181 LISP FT_Classic_POS_Utt(LISP args);
 182 LISP FT_PostLex_Utt(LISP utt);
 183 void festival_ff_init(void);
 184
 185 void festival_base_init(void)
 186 {
 187     // Thing I haven't put anywhere else yet
 188
 189     festival_ff_init();  // basic feature functions
 190     // Basic EST_Utterance modules
 191     festival_def_utt_module("Initialize",FT_Initialize_Utt,
 192     "(Initialize UTT)\n\
 193   This module should be called first on all utterances it does some\n\
 194   necessary initialization of the utterance and loads the base\n\
 195   streams with the information from the input form.");
 196     festival_def_utt_module("Classic_Phrasify",FT_Classic_Phrasify_Utt,
 197     "(Classic_Phrasify UTT)\n\
 198   Creates phrases from words, if pos_supported is non-nil, a more elaborate\n\
 199   system of prediction is used.  Here probability models based on part of\n\
 200   speech and B/NB distribution are used to predict breaks.  This system\n\
 201   uses standard Viterbi decoding techniques. If pos_supported is nil,\n\
 202   a simple CART-based prediction model is used. [see Phrase breaks]");
 203     festival_def_utt_module("Classic_Word",FT_Classic_Word_Utt,
 204     "(Classic_Word UTT)\n\
 205   Build the syllable/segment/SylStructure from the given words using the\n\
 206   Lexicon.  Uses part of speech information in the lexicon look up if\n\
 207   present.");
 208     festival_def_utt_module("Unilex_Word",FT_Unilex_Word_Utt,
 209     "(Unilex_Word UTT)\n\
 210   Build the syllable/segment/SylStructure from the given words using the\n\
 211   Lexicon.  Uses part of speech information in the lexicon look up if\n\
 212   present.");
 213     festival_def_utt_module("Classic_POS",FT_Classic_POS_Utt,
 214     "(Classic_POS UTT)\n\
 215   Predict part of speech tags for the existing word stream.  If the variable\n\
 216   pos_lex_name is nil nothing happens, otherwise it is assumed to point to\n\
 217   a lexicon file giving part of speech distribution for words. An ngram\n\
 218   model file should be in pos_ngram_name.  The system uses standard\n\
 219   Viterbi decoding techniques. [see POS tagging]");
 220     festival_def_utt_module("Builtin_PostLex",FT_PostLex_Utt,
 221     "(Builtin_PostLex UTT)\n\
 222   Post-lexical rules.  Currently only vowel reduction applied to each\n\
 223   syllable using postlex_vowel_reduce_cart_tree, and the table of \n\
 224   vowel reduction pairs in postlex_vowel_reduce_table.");
 225
 226 }