1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
4 // Classes for representing the mapping between state tuples and state IDs.
6 #ifndef FST_STATE_TABLE_H_
7 #define FST_STATE_TABLE_H_
15 #include <fst/bi-table.h>
16 #include <fst/expanded-fst.h>
17 #include <fst/filter-state.h>
22 // State tables determine the bijective mapping between state tuples (e.g., in
23 // composition, triples of two FST states and a composition filter state) and
24 // their corresponding state IDs. They are classes, templated on state tuples,
25 // with the following interface:
30 // using StateTuple = T;
32 // // Required constructors.
35 // StateTable(const StateTable &);
37 // // Looks up state ID by tuple. If it doesn't exist, then add it.
38 // StateId FindState(const StateTuple &tuple);
40 // // Looks up state tuple by state ID.
41 // const StateTuple<StateId> &Tuple(StateId s) const;
43 // // # of stored tuples.
44 // StateId Size() const;
47 // A state tuple has the form:
50 // struct StateTuple {
53 // // Required constructors.
57 // StateTuple(const StateTuple &tuple);
60 // An implementation using a hash map for the tuple to state ID mapping. The
61 // state tuple T must support operator==.
62 template <class T, class H>
63 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
66 using StateId = typename StateTuple::StateId;
68 using HashBiTable<StateId, StateTuple, H>::FindId;
69 using HashBiTable<StateId, StateTuple, H>::FindEntry;
70 using HashBiTable<StateId, StateTuple, H>::Size;
72 HashStateTable() : HashBiTable<StateId, StateTuple, H>() {}
74 explicit HashStateTable(size_t table_size)
75 : HashBiTable<StateId, StateTuple, H>(table_size) {}
77 StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
79 const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
82 // An implementation using a hash map for the tuple to state ID mapping. The
83 // state tuple T must support operator==.
84 template <class T, class H>
85 class CompactHashStateTable
86 : public CompactHashBiTable<typename T::StateId, T, H> {
89 using StateId = typename StateTuple::StateId;
91 using CompactHashBiTable<StateId, StateTuple, H>::FindId;
92 using CompactHashBiTable<StateId, StateTuple, H>::FindEntry;
93 using CompactHashBiTable<StateId, StateTuple, H>::Size;
95 CompactHashStateTable() : CompactHashBiTable<StateId, StateTuple, H>() {}
97 explicit CompactHashStateTable(size_t table_size)
98 : CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
100 StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
102 const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
105 // An implementation using a vector for the tuple to state mapping. It is
106 // passed a fingerprint functor that should fingerprint tuples uniquely to an
107 // integer that can used as a vector index. Normally, VectorStateTable
108 // constructs the fingerprint functor. Alternately, the user can pass this
109 // objert, in which case the table takes ownership.
110 template <class T, class FP>
111 class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
113 using StateTuple = T;
114 using StateId = typename StateTuple::StateId;
116 using VectorBiTable<StateId, StateTuple, FP>::FindId;
117 using VectorBiTable<StateId, StateTuple, FP>::FindEntry;
118 using VectorBiTable<StateId, StateTuple, FP>::Size;
119 using VectorBiTable<StateId, StateTuple, FP>::Fingerprint;
121 explicit VectorStateTable(FP *fingerprint = nullptr, size_t table_size = 0)
122 : VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
124 StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
126 const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
129 // An implementation using a vector and a compact hash table. The selection
130 // functor returns true for tuples to be hashed in the vector. The fingerprint
131 // functor should fingerprint tuples uniquely to an integer that can be used as
132 // a vector index. A hash functor is used when hashing tuples into the compact
134 template <class T, class Select, class FP, class H>
135 class VectorHashStateTable
136 : public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
138 using StateTuple = T;
139 using StateId = typename StateTuple::StateId;
141 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindId;
142 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::FindEntry;
143 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Size;
144 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Selector;
145 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Fingerprint;
146 using VectorHashBiTable<StateId, StateTuple, Select, FP, H>::Hash;
148 VectorHashStateTable(Select *select, FP *fingerprint, H *hash,
149 size_t vector_size = 0, size_t tuple_size = 0)
150 : VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
151 select, fingerprint, hash, vector_size, tuple_size) {}
153 StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
155 const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
158 // An implementation using a hash map to map from tuples to state IDs. This
159 // version permits erasing of states. The state tuple's default constructor
160 // must produce a tuple that will never be seen and the table must suppor
162 template <class T, class H>
163 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
165 using StateTuple = T;
166 using StateId = typename StateTuple::StateId;
168 using ErasableBiTable<StateId, StateTuple, H>::FindId;
169 using ErasableBiTable<StateId, StateTuple, H>::FindEntry;
170 using ErasableBiTable<StateId, StateTuple, H>::Size;
171 using ErasableBiTable<StateId, StateTuple, H>::Erase;
173 ErasableStateTable() : ErasableBiTable<StateId, StateTuple, H>() {}
175 StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
177 const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
180 // The composition state table has the form:
182 // template <class Arc, class FilterState>
183 // class ComposeStateTable {
185 // using StateId = typename Arc::StateId;
187 // // Required constructors.
189 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
190 // ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
192 // // Looks up a state ID by tuple, adding it if doesn't exist.
193 // StateId FindState(const StateTuple &tuple);
195 // // Looks up a tuple by state ID.
196 // const ComposeStateTuple<StateId> &Tuple(StateId s) const;
198 // // The number of of stored tuples.
199 // StateId Size() const;
201 // // Return true if error was encountered.
202 // bool Error() const;
205 // The following interface is used to represent the composition state.
207 // template <class S, class FS>
208 // class CompositionStateTuple {
210 // using StateId = typename StateId;
211 // using FS = FilterState;
213 // // Required constructors.
215 // StateTuple(StateId s1, StateId s2, const FilterState &fs);
217 // StateId StateId1() const;
218 // StateId StateId2() const;
220 // FilterState GetFilterState() const;
222 // std::pair<StateId, StateId> StatePair() const;
224 // size_t Hash() const;
226 // friend bool operator==(const StateTuple& x, const StateTuple &y);
229 template <typename S, typename FS>
230 class DefaultComposeStateTuple {
233 using FilterState = FS;
235 DefaultComposeStateTuple()
236 : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
238 DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
239 : state_pair_(s1, s2), fs_(fs) {}
241 StateId StateId1() const { return state_pair_.first; }
243 StateId StateId2() const { return state_pair_.second; }
245 FilterState GetFilterState() const { return fs_; }
247 const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
249 friend bool operator==(const DefaultComposeStateTuple &x,
250 const DefaultComposeStateTuple &y) {
251 return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
254 size_t Hash() const {
255 return static_cast<size_t>(StateId1()) +
256 static_cast<size_t>(StateId2()) * 7853u +
257 GetFilterState().Hash() * 7867u;
261 std::pair<StateId, StateId> state_pair_;
262 FilterState fs_; // State of composition filter.
265 // Specialization for TrivialFilterState that does not explicitely store the
266 // filter state since it is always the unique non-blocking state.
267 template <typename S>
268 class DefaultComposeStateTuple<S, TrivialFilterState> {
271 using FilterState = TrivialFilterState;
273 DefaultComposeStateTuple()
274 : state_pair_(kNoStateId, kNoStateId) {}
276 DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
277 : state_pair_(s1, s2) {}
279 StateId StateId1() const { return state_pair_.first; }
281 StateId StateId2() const { return state_pair_.second; }
283 FilterState GetFilterState() const { return FilterState(true); }
285 const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
287 friend bool operator==(const DefaultComposeStateTuple &x,
288 const DefaultComposeStateTuple &y) {
289 return (&x == &y) || (x.state_pair_ == y.state_pair_);
292 size_t Hash() const { return StateId1() + StateId2() * 7853; }
295 std::pair<StateId, StateId> state_pair_;
298 // Hashing of composition state tuples.
299 template <typename T>
302 size_t operator()(const T &t) const { return t.Hash(); }
305 // A HashStateTable over composition tuples.
306 template <typename Arc, typename FilterState,
307 typename StateTuple =
308 DefaultComposeStateTuple<typename Arc::StateId, FilterState>,
309 typename StateTable =
310 CompactHashStateTable<StateTuple, ComposeHash<StateTuple>>>
311 class GenericComposeStateTable : public StateTable {
313 using StateId = typename Arc::StateId;
315 GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
317 GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
319 : StateTable(table_size) {}
321 constexpr bool Error() const { return false; }
324 GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
328 // Fingerprint for general composition tuples.
329 template <typename StateTuple>
330 class ComposeFingerprint {
332 using StateId = typename StateTuple::StateId;
334 // Required but suboptimal constructor.
335 ComposeFingerprint() : mult1_(8192), mult2_(8192) {
336 LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
339 // Constructor is provided the sizes of the input FSTs.
340 ComposeFingerprint(StateId nstates1, StateId nstates2)
341 : mult1_(nstates1), mult2_(nstates1 * nstates2) {}
343 size_t operator()(const StateTuple &tuple) {
344 return tuple.StateId1() + tuple.StateId2() * mult1_ +
345 tuple.GetFilterState().Hash() * mult2_;
349 const ssize_t mult1_;
350 const ssize_t mult2_;
353 // Useful when the first composition state determines the tuple.
354 template <typename StateTuple>
355 class ComposeState1Fingerprint {
357 size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
360 // Useful when the second composition state determines the tuple.
361 template <typename StateTuple>
362 class ComposeState2Fingerprint {
364 size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
367 // A VectorStateTable over composition tuples. This can be used when the
368 // product of number of states in FST1 and FST2 (and the composition filter
369 // state hash) is manageable. If the FSTs are not expanded FSTs, they will
370 // first have their states counted.
371 template <typename Arc, typename StateTuple>
372 class ProductComposeStateTable
373 : public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
375 using StateId = typename Arc::StateId;
377 VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>>;
379 ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
380 size_t table_size = 0)
381 : StateTable(new ComposeFingerprint<StateTuple>(CountStates(fst1),
385 ProductComposeStateTable(
386 const ProductComposeStateTable<Arc, StateTuple> &table)
387 : StateTable(new ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
389 constexpr bool Error() const { return false; }
392 ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
396 // A vector-backed table over composition tuples which can be used when the
397 // first FST is a string (i.e., satisfies kStringProperties) and the second is
398 // deterministic and epsilon-free. It should be used with a composition filter
399 // that creates at most one filter state per tuple under these conditions (e.g.,
400 // SequenceComposeFilter or MatchComposeFilter).
401 template <typename Arc, typename StateTuple>
402 class StringDetComposeStateTable
403 : public VectorStateTable<StateTuple,
404 ComposeState1Fingerprint<StateTuple>> {
406 using StateId = typename Arc::StateId;
408 VectorStateTable<StateTuple, ComposeState1Fingerprint<StateTuple>>;
410 StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
412 static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
413 if (fst1.Properties(kString, true) != kString) {
414 FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
416 } else if (fst2.Properties(props2, true) != props2) {
417 FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
423 StringDetComposeStateTable(
424 const StringDetComposeStateTable<Arc, StateTuple> &table)
425 : StateTable(table), error_(table.error_) {}
427 bool Error() const { return error_; }
432 StringDetComposeStateTable &operator=(const StringDetComposeStateTable &) =
436 // A vector-backed table over composition tuples which can be used when the
437 // first FST is deterministic and epsilon-free and the second is a string (i.e.,
438 // satisfies kString). It should be used with a composition filter that creates
439 // at most one filter state per tuple under these conditions (e.g.,
440 // SequenceComposeFilter or MatchComposeFilter).
441 template <typename Arc, typename StateTuple>
442 class DetStringComposeStateTable
443 : public VectorStateTable<StateTuple,
444 ComposeState2Fingerprint<StateTuple>> {
446 using StateId = typename Arc::StateId;
448 VectorStateTable<StateTuple, ComposeState2Fingerprint<StateTuple>>;
450 DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
452 static constexpr auto props = kODeterministic | kNoOEpsilons;
453 if (fst1.Properties(props, true) != props) {
454 FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
455 << "input-deterministic and epsilon-free";
457 } else if (fst2.Properties(kString, true) != kString) {
458 FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
463 DetStringComposeStateTable(
464 const DetStringComposeStateTable<Arc, StateTuple> &table)
465 : StateTable(table), error_(table.error_) {}
467 bool Error() const { return error_; }
472 DetStringComposeStateTable &operator=(const DetStringComposeStateTable &) =
476 // An erasable table over composition tuples. The Erase(StateId) method can be
477 // called if the user either is sure that composition will never return to that
478 // tuple or doesn't care that if it does, it is assigned a new state ID.
479 template <typename Arc, typename StateTuple>
480 class ErasableComposeStateTable
481 : public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
483 ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
485 constexpr bool Error() const { return false; }
488 ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
494 #endif // FST_STATE_TABLE_H_