src/modules/Text/text.cc

   1 /*************************************************************************/
   2 /*                                                                       */
   3 /*                Centre for Speech Technology Research                  */
   4 /*                     University of Edinburgh, UK                       */
   5 /*                       Copyright (c) 1996,1997                         */
   6 /*                        All Rights Reserved.                           */
   7 /*                                                                       */
   8 /*  Permission is hereby granted, free of charge, to use and distribute  */
   9 /*  this software and its documentation without restriction, including   */
  10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
  11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
  12 /*  permit persons to whom this work is furnished to do so, subject to   */
  13 /*  the following conditions:                                            */
  14 /*   1. The code must retain the above copyright notice, this list of    */
  15 /*      conditions and the following disclaimer.                         */
  16 /*   2. Any modifications must be clearly marked as such.                */
  17 /*   3. Original authors' names are not deleted.                         */
  18 /*   4. The authors' names are not used to endorse or promote products   */
  19 /*      derived from this software without specific prior written        */
  20 /*      permission.                                                      */
  21 /*                                                                       */
  22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
  23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
  24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
  25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
  26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
  27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
  28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
  29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
  30 /*  THIS SOFTWARE.                                                       */
  31 /*                                                                       */
  32 /*************************************************************************/
  33 /*             Author :  Alan W Black                                    */
  34 /*             Date   :  April 1996                                      */
  35 /*-----------------------------------------------------------------------*/
  36 /*                                                                       */
  37 /* Basic text utilities                                                  */
  38 /*                                                                       */
  39 /* This seems to be the only language specific part that cannot be       */
  40 /* reasonably parameterized.  I'd like to change this but I'm not sure   */
  41 /* of the best way.  Language-specific token processing module           */
  42 /* generating Words (lexical items) from Tokens are current written as   */
  43 /* FT_*_Token_Utt functions.  A language-independent one is available    */
  44 /* FT_Any_Token_Utt which depends heavily on the lexicon can be used     */
  45 /* when you don't have the language specific version.                    */
  46 /*                                                                       */
  47 /*=======================================================================*/
  48 #include <cstdio>
  49 #include "festival.h"
  50 #include "text.h"
  51
  52 static void tts_raw_token(EST_Item *t);
  53 static void tts_raw_utt(LISP utt);
  54
  55 LISP FT_Text_Utt(LISP utt)
  56 {
  57     // Parse text into words
  58     EST_Utterance *u = get_c_utt(utt);
  59     EST_String text;
  60     EST_TokenStream ts;
  61     LISP ws,punc,scs;
  62     EST_Token tok;
  63
  64     *cdebug << "Text module\n";
  65
  66     text = get_c_string(utt_iform(*u));
  67
  68     u->create_relation("Token");
  69
  70     ts.open_string(text);
  71     ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
  72     ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
  73     ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
  74     if ((ws = siod_get_lval("token.whitespace",NULL)) == NIL)
  75         ts.set_WhiteSpaceChars(EST_Token_Default_WhiteSpaceChars);
  76     else
  77         ts.set_WhiteSpaceChars(get_c_string(ws));
  78     if ((punc = siod_get_lval("token.punctuation",NULL)) == NIL)
  79         ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
  80     else
  81         ts.set_PunctuationSymbols(get_c_string(punc));
  82     if ((punc = siod_get_lval("token.prepunctuation",NULL)) == NIL)
  83         ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
  84     else
  85         ts.set_PrePunctuationSymbols(get_c_string(punc));
  86     if ((scs = siod_get_lval("token.singlecharsymbols",NULL)) == NIL)
  87         ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
  88     else
  89         ts.set_SingleCharSymbols(get_c_string(scs));
  90
  91     for (ts >> tok; tok.string() != ""; ts >> tok)
  92         add_token(u,tok);
  93
  94     return utt;
  95 }
  96
  97 LISP tts_file(LISP filename,LISP mode)
  98 {
  99     LISP user_text_modes,t_mode;
 100
 101     user_text_modes = siod_get_lval("tts_text_modes",NULL);
 102
 103     if ((mode == NIL) ||
 104         (streq(get_c_string(mode),"text")) ||
 105         (streq(get_c_string(mode),"fundamental")))
 106         tts_file_raw(filename);  // Simple text file
 107     else
 108     {
 109         t_mode = siod_assoc_str(get_c_string(mode),user_text_modes);
 110         if (t_mode == NIL)
 111         {
 112             // Attempt to load it
 113             leval(cons(rintern("request"),
 114                        cons(strintern(EST_String(get_c_string(mode))+
 115                                       "-mode"),NIL)),NIL);
 116             // get it again, and see if its defined
 117             user_text_modes = siod_get_lval("tts_text_modes",NULL);
 118         }
 119         t_mode = siod_assoc_str(get_c_string(mode),user_text_modes);
 120         if (t_mode == NIL)
 121         {
 122             cerr << "tts_file: can't find mode description \""
 123                 << get_c_string(mode) << "\" using raw mode instead" << endl;
 124             tts_file_raw(filename);  // so read it as simple text file
 125         }
 126         else
 127             tts_file_user_mode(filename,car(cdr(t_mode)));
 128     }
 129
 130     return NIL;
 131 }
 132
 133 void tts_file_raw(LISP filename)
 134 {
 135     // Say the contents of a named file
 136     EST_TokenStream ts;
 137     LISP ws,prepunc,punc,scs;
 138     LISP lutt,eou_tree;
 139     LISP stream = NULL;
 140
 141
 142     stream = fopen_c(get_c_string(filename), "rb");
 143     if (ts.open(stream->storage_as.c_file.f, FALSE) == -1)
 144       {
 145         cerr << "tts_file: can't open file \"" << filename << "\"\n";
 146         festival_error();
 147       }
 148     ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
 149     ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
 150     ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
 151     if ((ws = siod_get_lval("token.whitespace",NULL)) == NIL)
 152         ts.set_WhiteSpaceChars(EST_Token_Default_WhiteSpaceChars);
 153     else
 154         ts.set_WhiteSpaceChars(get_c_string(ws));
 155     if ((punc = siod_get_lval("token.punctuation",NULL)) == NIL)
 156         ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
 157     else
 158         ts.set_PunctuationSymbols(get_c_string(punc));
 159     if ((prepunc = siod_get_lval("token.prepunctuation",NULL)) == NIL)
 160         ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
 161     else
 162         ts.set_PrePunctuationSymbols(get_c_string(prepunc));
 163     if ((scs = siod_get_lval("token.singlecharsymbols",NULL)) == NIL)
 164         ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
 165     else
 166         ts.set_SingleCharSymbols(get_c_string(scs));
 167     eou_tree = siod_get_lval("eou_tree","No end of utterance tree set");
 168
 169     lutt = tts_chunk_stream(ts,tts_raw_token,tts_raw_utt,eou_tree,0);
 170
 171     // The last one is returned because the chunker doesn't know if this
 172     // is truly the end of an utterance or not, but here we do know.
 173     tts_raw_utt(lutt);
 174
 175     ts.close();
 176     if (stream)
 177       fclose_l(stream);
 178 }
 179
 180 static void tts_raw_token(EST_Item *t)
 181 {
 182     // Do something to token, in this case nothing
 183     (void)t;
 184 }
 185
 186 static void tts_raw_utt(LISP utt)
 187 {
 188     // Do (simple) tts on this utt
 189     LISP lutt;
 190
 191     // There are some pessimal cases when the utterance is empty
 192     if ((utt == NIL) ||
 193         (get_c_utt(utt)->relation("Token")->length() == 0))
 194         return;   // in this case do nothing.
 195
 196     lutt = quote(utt);
 197     lutt = cons(rintern("apply_hooks"),
 198                 cons(rintern("tts_hooks"),
 199                      cons(lutt,NIL)));
 200
 201
 202
 203     lutt = cons(rintern("set!"),
 204                 cons(rintern("utt_tts"),
 205                      cons(lutt,NIL)));
 206
 207     // Synth and Play it
 208     lutt = leval(lutt,NIL);
 209     user_gc(NIL);
 210 }
 211
 212 LISP new_token_utt(void)
 213 {
 214     // An empty utterance ready to take Tokens
 215     EST_Utterance *u = new EST_Utterance;
 216     u->f.set("type","Tokens");
 217     u->create_relation("Token");
 218     return siod(u);
 219 }
 220
 221 LISP tts_chunk_stream(EST_TokenStream &ts,
 222                       TTS_app_tok app_tok,
 223                       TTS_app_utt app_utt,
 224                       LISP eou_tree,
 225                       LISP utt)
 226 {
 227     // Get tokens from ts and cummulate them in u.
 228     // Apply app_tok to each token
 229     // Apply app_utt to each utt signalled
 230     // Return untermitated utterance potentially for next call
 231     // Uses the wagon tree eou_tree to predict utterance termination on
 232     // penultimate token.
 233     EST_Item *tok, *ebo;
 234     EST_Token t;
 235     if (utt == NIL)
 236         utt = new_token_utt();
 237     EST_Utterance *u = get_c_utt(utt);
 238
 239     while (!ts.eof())
 240     {
 241         t = ts.get();
 242         tok = add_token(u,t);
 243         app_tok(tok);     // do what you do with the token
 244         ebo = as(tok,"Token")->prev();  // end but one token
 245         if ((ebo != 0) &&
 246             (wagon_predict(ebo,eou_tree) == 1))
 247         {
 248             // Remove that extra token
 249             remove_item(tok,"Token");
 250             app_utt(utt);  // do what you do with the utt
 251             utt = new_token_utt();
 252             u = get_c_utt(utt);
 253             add_token(u,t);  // add that last token to the new utt.
 254         }
 255     }
 256
 257     return utt;
 258 }
 259
 260 #if 0
 261 LISP memon(void)
 262 {
 263     printf("memon\n");
 264     putenv("MALLOC_TRACE=mallfile");
 265     mtrace();
 266     return NIL;
 267 }
 268
 269 LISP memoff(void)
 270 {
 271     muntrace();
 272     printf("memoff\n");
 273     return NIL;
 274 }
 275 #endif
 276
 277 void festival_Text_init(void)
 278 {
 279     festival_token_init();
 280     festival_def_utt_module("Text",FT_Text_Utt,
 281     "(Text UTT)\n\
 282   From string in input form tokenize and create a token stream.");
 283     init_subr_2("tts_file",tts_file,
 284     "(tts_file FILE MODE)\n\
 285   Low level access to tts function, you probably want to use the function\n\
 286   tts rather than this one.  Render data in FILE as speech.  Respect\n\
 287   MODE.  Currently modes are defined through the variable tts_text_modes.");
 288 #if 0
 289     init_subr_0("memon",memon,
 290                 "(tts_file FILE MODE)");
 291     init_subr_0("memoff",memoff,
 292                 "(tts_file FILE MODE)");
 293 #endif
 294     init_subr_3("extract_tokens",extract_tokens,
 295     "(extract_tokens FILE TOKENS OUTFILE)\n\
 296   Find all occurrences of TOKENS in FILE and output specified context around\n\
 297   the token.  Results are appended to OUTFILE, if OUTFILE is nil, output\n\
 298   goes to stdout.");
 299 }
 300