1 /*************************************************************************/
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : April 1996 */
35 /*-----------------------------------------------------------------------*/
37 /* Basic text utilities */
39 /* This seems to be the only language specific part that cannot be */
40 /* reasonably parameterized. I'd like to change this but I'm not sure */
41 /* of the best way. Language-specific token processing module */
42 /* generating Words (lexical items) from Tokens are current written as */
43 /* FT_*_Token_Utt functions. A language-independent one is available */
44 /* FT_Any_Token_Utt which depends heavily on the lexicon can be used */
45 /* when you don't have the language specific version. */
47 /*=======================================================================*/
52 static void tts_raw_token(EST_Item *t);
53 static void tts_raw_utt(LISP utt);
55 LISP FT_Text_Utt(LISP utt)
57 // Parse text into words
58 EST_Utterance *u = get_c_utt(utt);
64 *cdebug << "Text module\n";
66 text = get_c_string(utt_iform(*u));
68 u->create_relation("Token");
71 ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
72 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
73 ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
74 if ((ws = siod_get_lval("token.whitespace",NULL)) == NIL)
75 ts.set_WhiteSpaceChars(EST_Token_Default_WhiteSpaceChars);
77 ts.set_WhiteSpaceChars(get_c_string(ws));
78 if ((punc = siod_get_lval("token.punctuation",NULL)) == NIL)
79 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
81 ts.set_PunctuationSymbols(get_c_string(punc));
82 if ((punc = siod_get_lval("token.prepunctuation",NULL)) == NIL)
83 ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
85 ts.set_PrePunctuationSymbols(get_c_string(punc));
86 if ((scs = siod_get_lval("token.singlecharsymbols",NULL)) == NIL)
87 ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
89 ts.set_SingleCharSymbols(get_c_string(scs));
91 for (ts >> tok; tok.string() != ""; ts >> tok)
97 LISP tts_file(LISP filename,LISP mode)
99 LISP user_text_modes,t_mode;
101 user_text_modes = siod_get_lval("tts_text_modes",NULL);
104 (streq(get_c_string(mode),"text")) ||
105 (streq(get_c_string(mode),"fundamental")))
106 tts_file_raw(filename); // Simple text file
109 t_mode = siod_assoc_str(get_c_string(mode),user_text_modes);
112 // Attempt to load it
113 leval(cons(rintern("request"),
114 cons(strintern(EST_String(get_c_string(mode))+
116 // get it again, and see if its defined
117 user_text_modes = siod_get_lval("tts_text_modes",NULL);
119 t_mode = siod_assoc_str(get_c_string(mode),user_text_modes);
122 cerr << "tts_file: can't find mode description \""
123 << get_c_string(mode) << "\" using raw mode instead" << endl;
124 tts_file_raw(filename); // so read it as simple text file
127 tts_file_user_mode(filename,car(cdr(t_mode)));
133 void tts_file_raw(LISP filename)
135 // Say the contents of a named file
137 LISP ws,prepunc,punc,scs;
142 stream = fopen_c(get_c_string(filename), "rb");
143 if (ts.open(stream->storage_as.c_file.f, FALSE) == -1)
145 cerr << "tts_file: can't open file \"" << filename << "\"\n";
148 ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
149 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
150 ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
151 if ((ws = siod_get_lval("token.whitespace",NULL)) == NIL)
152 ts.set_WhiteSpaceChars(EST_Token_Default_WhiteSpaceChars);
154 ts.set_WhiteSpaceChars(get_c_string(ws));
155 if ((punc = siod_get_lval("token.punctuation",NULL)) == NIL)
156 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
158 ts.set_PunctuationSymbols(get_c_string(punc));
159 if ((prepunc = siod_get_lval("token.prepunctuation",NULL)) == NIL)
160 ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
162 ts.set_PrePunctuationSymbols(get_c_string(prepunc));
163 if ((scs = siod_get_lval("token.singlecharsymbols",NULL)) == NIL)
164 ts.set_SingleCharSymbols(EST_Token_Default_SingleCharSymbols);
166 ts.set_SingleCharSymbols(get_c_string(scs));
167 eou_tree = siod_get_lval("eou_tree","No end of utterance tree set");
169 lutt = tts_chunk_stream(ts,tts_raw_token,tts_raw_utt,eou_tree,0);
171 // The last one is returned because the chunker doesn't know if this
172 // is truly the end of an utterance or not, but here we do know.
180 static void tts_raw_token(EST_Item *t)
182 // Do something to token, in this case nothing
186 static void tts_raw_utt(LISP utt)
188 // Do (simple) tts on this utt
191 // There are some pessimal cases when the utterance is empty
193 (get_c_utt(utt)->relation("Token")->length() == 0))
194 return; // in this case do nothing.
197 lutt = cons(rintern("apply_hooks"),
198 cons(rintern("tts_hooks"),
203 lutt = cons(rintern("set!"),
204 cons(rintern("utt_tts"),
208 lutt = leval(lutt,NIL);
212 LISP new_token_utt(void)
214 // An empty utterance ready to take Tokens
215 EST_Utterance *u = new EST_Utterance;
216 u->f.set("type","Tokens");
217 u->create_relation("Token");
221 LISP tts_chunk_stream(EST_TokenStream &ts,
227 // Get tokens from ts and cummulate them in u.
228 // Apply app_tok to each token
229 // Apply app_utt to each utt signalled
230 // Return untermitated utterance potentially for next call
231 // Uses the wagon tree eou_tree to predict utterance termination on
232 // penultimate token.
236 utt = new_token_utt();
237 EST_Utterance *u = get_c_utt(utt);
242 tok = add_token(u,t);
243 app_tok(tok); // do what you do with the token
244 ebo = as(tok,"Token")->prev(); // end but one token
246 (wagon_predict(ebo,eou_tree) == 1))
248 // Remove that extra token
249 remove_item(tok,"Token");
250 app_utt(utt); // do what you do with the utt
251 utt = new_token_utt();
253 add_token(u,t); // add that last token to the new utt.
264 putenv("MALLOC_TRACE=mallfile");
277 void festival_Text_init(void)
279 festival_token_init();
280 festival_def_utt_module("Text",FT_Text_Utt,
282 From string in input form tokenize and create a token stream.");
283 init_subr_2("tts_file",tts_file,
284 "(tts_file FILE MODE)\n\
285 Low level access to tts function, you probably want to use the function\n\
286 tts rather than this one. Render data in FILE as speech. Respect\n\
287 MODE. Currently modes are defined through the variable tts_text_modes.");
289 init_subr_0("memon",memon,
290 "(tts_file FILE MODE)");
291 init_subr_0("memoff",memoff,
292 "(tts_file FILE MODE)");
294 init_subr_3("extract_tokens",extract_tokens,
295 "(extract_tokens FILE TOKENS OUTFILE)\n\
296 Find all occurrences of TOKENS in FILE and output specified context around\n\
297 the token. Results are appended to OUTFILE, if OUTFILE is nil, output\n\