1 /*************************************************************************/
3 /* Centre for Speech Technology Research */
4 /* (University of Edinburgh, UK) and */
6 /* Copyright (c) 2002 */
7 /* All Rights Reserved. */
9 /* Permission is hereby granted, free of charge, to use and distribute */
10 /* this software and its documentation without restriction, including */
11 /* without limitation the rights to use, copy, modify, merge, publish, */
12 /* distribute, sublicense, and/or sell copies of this work, and to */
13 /* permit persons to whom this work is furnished to do so, subject to */
14 /* the following conditions: */
16 /* 1. The code must retain the above copyright notice, this list of */
17 /* conditions and the following disclaimer. */
18 /* 2. Any modifications must be clearly marked as such. */
19 /* 3. Original authors' names are not deleted. */
20 /* 4. The authors' names are not used to endorse or promote products */
21 /* derived from this software without specific prior written */
24 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
25 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
26 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */
27 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
28 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
29 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
30 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
31 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
34 /*************************************************************************/
36 /* Author: Korin Richmond */
38 /* --------------------------------------------------------------------- */
39 /* A diphone unit selection "voice module" */
40 /* (implemented using a list of utterance objects) */
41 /*************************************************************************/
43 #ifndef __DIPHONEVOICEMODULE_H__
44 #define __DIPHONEVOICEMODULE_H__
46 #include "VoiceModuleBase.h"
47 #include "EST_DiphoneCoverage.h"
48 #include "siod_defs.h"
49 #include "EST_Val_defs.h"
50 #include "EST_String.h"
52 #include "EST_viterbi.h"
54 #include "EST_types.h" // for EST_StrList
56 #include "EST_FlatTargetCost.h"
60 class EST_VTCandidate;
67 // return standard join point time for this segment
68 // (one half of a diphone)
69 float getJoinTime( const EST_Item *seg );
71 //template<class T> class EST_TStringHash;
72 #include "EST_THash.h"
73 template<class T> class EST_TList;
74 typedef EST_TList<EST_Item*> ItemList;
77 SIOD_REGISTER_CLASS_DCLS(du_voicemodule,DiphoneVoiceModule)
78 VAL_REGISTER_CLASS_DCLS(du_voicemodule,DiphoneVoiceModule)
80 // following is necessary to make some of a candidate's information
81 // available in a faster way that EST_Item feature lookups (critically
82 // the join cost coefficients EST_FVectors for example)
83 // i.e. yet another temporary hack... (would be better if EST_viterbi
84 // code allowed other things apart from just EST_Item* in order to
85 // perform the search)
86 class DiphoneCandidate {
88 DiphoneCandidate( const EST_Item *phone1,
89 const DiphoneVoiceModule *p,
90 const EST_FVector *left,
91 const EST_FVector *right )
92 : ph1(phone1), dvm( p ), l_coef(left), r_coef(right),
93 ph1_jccid(-1), ph1_jccindex(-1), ph2_jccid(-1), ph2_jccindex(-1){};
96 const DiphoneVoiceModule *dvm;
97 const EST_FVector *l_coef;
98 const EST_FVector *r_coef;
99 int ph1_jccid, ph1_jccindex;
100 int ph2_jccid, ph2_jccindex;
103 VAL_REGISTER_CLASS_DCLS(diphonecandidate,DiphoneCandidate)
105 class DiphoneVoiceModule : public VoiceModuleBase {
107 DiphoneVoiceModule( const EST_StrList& basenames,
108 const EST_String& uttDir,
109 const EST_String& wavDir,
110 const EST_String& pmDir,
111 const EST_String& coefDir,
112 unsigned int srate = 16000,
113 const EST_String& uttExt = ".utt",
114 const EST_String& wavExt = ".wav",
115 const EST_String& pmExt = ".pm",
116 const EST_String& coefExt = ".coef" );
118 virtual ~DiphoneVoiceModule();
120 virtual void initialise(const EST_TargetCost *tc, bool ignore_bad_tag=false );
121 virtual unsigned int numModuleUnits() const;
122 virtual unsigned int numUnitTypes() const;
123 virtual unsigned int numAvailableCandidates( const EST_String &unit ) const;
126 ///// Some "debugging" functions - deliberately don't mind doing
127 // slow things like returning copies of things. Such functions are
128 // not intended to do important things, but just to make it easier
129 // to work out whats "in" the voice database object.
131 // return copy of utterance number
132 bool getUtterance( EST_Utterance **utt, int n ) const;
135 // return pointer to utterance which has feature "feat_name"
136 // set to value "value"
137 bool getUtterance( EST_Utterance **utt,
138 const EST_String &feat_name,
139 const EST_Val &value ) const;
140 void getDiphoneCoverageStats(EST_DiphoneCoverage *dc) const;
142 // int DiphoneVoiceModule::getCandidateList( const EST_Item& target,
143 // const EST_TargetCost& tc,
144 // EST_VTCandidate *head,
145 // EST_VTCandidate *tail ) const;
149 int getCandidateList( const EST_Item& target,
150 const EST_TargetCost *tc,
151 const TCDataHash *tcdh,
152 const float tc_weight,
153 EST_VTCandidate **head,
154 EST_VTCandidate **tail ) const;
156 // append all instances of a certain phone present in the utterances
157 // in this voice. Returns the number added
158 int getPhoneList( const EST_String &phone, ItemList &list );
161 // don't allow copying of Voices (for now?)
162 DiphoneVoiceModule( const DiphoneVoiceModule& );
163 DiphoneVoiceModule& operator=( const DiphoneVoiceModule& );
166 void flatPack( EST_Relation *segs, const EST_TargetCost *tc) const;
168 void addCoefficients( EST_Relation *segs, const EST_Track& coefs );
169 void addToCatalogue( const EST_Utterance *utt, int *num_ignored, bool ignore_bad=false );
170 void getDiphone( const EST_Item *phone1,
171 EST_Track* coef, EST_Wave* sig, int* midframe,
172 bool extendLeft=0, bool extendRight=0 ) const;
174 friend class DiphoneUnitVoice;
177 EST_StrList fileList;
178 EST_String utt_dir; // utterance files
180 EST_String pm_dir; // pitch marks
182 EST_String coef_dir; // for coefficients that aren't pitch syncronous
184 EST_String wave_dir; // waveform (or residual)
187 unsigned int wav_srate; //sample rate of voice waveform data
189 TCDataHash *tcdatahash;
191 EST_TList<EST_Utterance *> *utt_dbase;
192 EST_TStringHash<ItemList*> *catalogue;
195 #endif // __DIPHONEVOICEMODULE_H__