1 /*************************************************************************/
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : July 1995 */
35 /*-----------------------------------------------------------------------*/
37 /* Klatt Duration Rules */
39 /*=======================================================================*/
42 This is an implementation of the Klatt rule system as described in
43 chapter 9 of "From text to speech: The MITalk system", Allen,
46 The function klatt_seg_dur() calculates a duration for each
47 segment in the input. It does this by calling a number
48 of rules (named 1 to 11) as defined in the MITalk book. Most
49 rules return a number which modifies the inherenent duration of
50 each segment. The original rules are set up so as to return
51 a percentage, here the system retursn a floating point value
52 which I think is neater.
57 #include "durationP.h"
59 static void klatt_dur_debug(EST_Item *s);
61 static float rule2(EST_Item *seg);
62 static float rule3(EST_Item *seg);
63 static float rule4(EST_Item *seg);
64 static float rule5(EST_Item *seg);
65 static float rule6(EST_Item *seg);
66 static float rule7(EST_Item *seg);
67 static float rule8(EST_Item *seg);
68 static float rule9(EST_Item *seg);
69 static float rule10(EST_Item *seg);
70 static float rule9a(EST_Item *seg);
71 static float sub_rule9a(const EST_String &ph);
73 static int klatt_seg_dur(EST_Item *seg);
74 static float min_dur(EST_Item *s_seg);
75 static float inher_dur(EST_Item *s_seg);
77 int onset(EST_Item *seg);
79 static LISP klatt_params = NIL;
82 LISP FT_Duration_Klatt_Utt(LISP utt)
84 // Predict fixed duration on segments
85 EST_Utterance *u = get_c_utt(utt);
88 *cdebug << "Duration Klatt module\n";
90 klatt_params = siod_get_lval("duration_klatt_params",
91 "no klatt duration params");
93 for (s=u->relation("Segment")->first(); s != 0; s = s->next())
99 static int klatt_seg_dur(EST_Item *seg)
104 float duration_speed = dur_get_stretch_at_seg(seg);
106 start = ffeature(seg,"segment_start");
108 if (ph_is_silence(seg->name()))
109 dur = 0.250 * duration_speed;
112 if (debug) klatt_dur_debug(seg);
113 fact *= rule2(seg) * rule3(seg) * rule4(seg) * rule5(seg)
114 * rule6(seg) * rule7(seg) * rule8(seg) *
115 rule9(seg) * rule10(seg);
117 min = (rule7(seg) != 1.0) ? min_dur(seg)/2: min_dur(seg);
119 dur = ((((inher_dur(seg) - min) * fact) + min) / 1000.0)
123 seg->set("end",start + dur);
128 static float min_dur(EST_Item *seg)
130 LISP p = siod_assoc_str(seg->name(),klatt_params);
134 cerr << "Klatt_Duration: no minimum duration for \"" << seg->name()
139 return get_c_float(car(cdr(cdr(p))));
142 static float inher_dur(EST_Item *seg)
144 LISP p = siod_assoc_str(seg->name(),klatt_params);
148 cerr << "Klatt_Duration: no minimum duration for \"" << seg->name()
153 return get_c_float(car(cdr(p)));
156 static int word_final(EST_Item *seg)
158 // True if this segment is the last in a word
159 EST_Item *nn = seg->as_relation("SylStructure");
161 if (nn->next() || (parent(nn)->next()))
167 static int syl_final(EST_Item *seg)
169 // True if this segment is the last in a syllable
170 EST_Item *nn = seg->as_relation("SylStructure");
178 static int word_initial(EST_Item *seg)
180 // True if this segment is the first in a word
181 EST_Item *nn = seg->as_relation("SylStructure");
183 if (nn->prev() || parent(nn)->prev())
189 static int phrase_initial(EST_Item *seg)
191 // True if this segment is the first in a phrase
193 if (word_initial(seg))
195 EST_Item *nn = parent(parent(seg,"SylStructure"));
196 if (as(nn,"Phrase")->prev())
205 int onset(EST_Item *seg)
207 if (ffeature(seg,"onsetcoda") == "onset")
213 int coda(EST_Item *seg)
215 if (ffeature(seg,"onsetcoda") == "coda")
221 static float rule2(EST_Item *seg)
222 { // clause final lengthening
226 int b = ffeature(seg,"R:SylStructure.parent.syl_break");
227 if ((b > 1) && (b < 4))
234 static float rule3(EST_Item *seg)
235 { // Non-phrase-final shortening
236 // syllabic segments are shortened by 60 if not in a phrase-final syllable
237 int b = ffeature(seg,"R:SylStructure.parent.syl_break");
239 if ((b < 2) && ph_is_syllabic(seg->name()))
242 // A phrase-final postvocalic liquid or nasal is lengthened by 140
243 if ((b == 4) && (ph_is_liquid(seg->name()) || ph_is_nasal(seg->name())))
249 static float rule4(EST_Item *seg)
250 { // Non-word-final shortening
251 int b = ffeature(seg,"R:SylStructure.parent.syl_break");
253 // Syllabic segments are shortened by 85 if not in a word-final syllable
254 if ((b == 0) && ph_is_syllabic(seg->name()))
260 static float rule5(EST_Item *seg)
261 { // Polysyllabic Shortening
262 int num_syls = ffeature(seg,"R:SylStructure.parent.parent.num_syls");
264 // Syllabic segments in a polysyllabic word are shortened by 80.
265 if ((num_syls > 1) && ph_is_syllabic(seg->name()))
271 static float rule6(EST_Item *seg)
272 { // Non-initial-consonant shortening
274 if (!word_initial(seg) && (ph_is_consonant(seg->name())))
280 static float rule7(EST_Item *seg)
281 { // Unstressed shortening
283 if (ffeature(seg,"R:SylStructure.parent.stress") == 1)
286 if (ph_is_syllabic(seg->name()))
288 if (word_initial(seg) || word_final(seg))
294 if (onset(seg) && ph_is_liquid(seg->name())) // or glide...
300 // Lengthening for emphasis
301 static float rule8(EST_Item *seg)
304 if (!ph_is_vowel(seg->name()))
307 if (ffeature(seg,"R:SylStructure.parent.accented") == 1)
313 // this is really rule 9b, but its eaiser to make it call rule 9a
315 static float rule9(EST_Item *seg)
316 { // Postvocalic context of vowels */
317 int b = ffeature(seg,"R:SylStructure.parent.syl_break");
320 return (0.7 + (0.3 * rule9a(seg)));
326 static float rule9a(EST_Item *seg)
327 { // Postvocalic context of vowels
328 EST_Item *s_next,*s_next_next;
330 if (ph_is_vowel(seg->name()))
334 s_next = seg->next();
335 if ((s_next) && (syl_final(s_next)))
336 return sub_rule9a(s_next->name());
337 s_next_next = s_next->next();
338 if ((ph_is_sonorant(s_next->name())) &&
340 (ph_is_obstruent(s_next_next->name())))
341 return sub_rule9a(s_next_next->name());
345 else if (ph_is_sonorant(seg->name()))
349 s_next = seg->next();
350 if (ph_is_obstruent(s_next->name()))
351 return sub_rule9a(s_next->name());
357 // sub rule, independent of seg position
358 static float sub_rule9a(const EST_String &ph)
360 if (ph_is_voiced(ph))
362 if (ph_is_fricative(ph))
364 else if (ph_is_stop(ph))
366 else if (ph_is_nasal(ph))
371 else if (ph_is_stop(ph))
377 // Shortening in clusters
379 static float rule10(EST_Item *seg)
381 int b = ffeature(seg,"R:SylStructure.parent.syl_break");
383 if (syl_final(seg) && (b > 1))
387 if (ph_is_vowel(seg->name()))
389 if (ph_is_vowel(seg->next()->name()))
391 else if ((!phrase_initial(seg)) &&
392 (ph_is_vowel(seg->prev()->name())))
397 else if (ph_is_consonant(seg->next()->name()))
398 if (!phrase_initial(seg) &&
399 (ph_is_consonant(seg->prev()->name())))
403 else if (!phrase_initial(seg) &&
404 (ph_is_consonant(seg->prev()->name())))
412 static void klatt_dur_debug(EST_Item *seg)
415 if ((f = rule2(seg))!= 1.0) cout << "Fired rule 2 " << f << endl;
416 if ((f = rule3(seg))!= 1.0) cout << "Fired rule 3 " << f << endl;
417 if ((f = rule4(seg))!= 1.0) cout << "Fired rule 4 " << f << endl;
418 if ((f = rule5(seg))!= 1.0) cout << "Fired rule 5 " << f << endl;
419 if ((f = rule6(seg))!= 1.0) cout << "Fired rule 6 " << f << endl;
420 if ((f = rule7(seg))!= 1.0) cout << "Fired rule 7 " << f << endl;
421 if ((f = rule8(seg))!= 1.0) cout << "Fired rule 8 " << f << endl;
422 if ((f = rule9(seg))!= 1.0) cout << "Fired rule 9 " << f << endl;
423 if ((f = rule10(seg))!= 1.0) cout << "Fired rule 10" << f << endl;