1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
4 // Classes to allow matching labels leaving FST states.
6 #ifndef FST_LIB_MATCHER_H_
7 #define FST_LIB_MATCHER_H_
10 #include <unordered_map>
15 #include <fst/mutable-fst.h> // for all internal FST accessors.
20 // Matchers find and iterate through requested labels at FST states. In the
21 // simplest form, these are just some associative map or search keyed on labels.
22 // More generally, they may implement matching special labels that represent
23 // sets of labels such as sigma (all), rho (rest), or phi (fail). The Matcher
30 // using Arc = typename FST::Arc;
31 // using Label = typename Arc::Label;
32 // using StateId = typename Arc::StateId;
33 // using Weight = typename Arc::Weight;
35 // // Required constructors.
37 // Matcher(const FST &fst, MatchType type);
38 // Matcher(const Matcher &matcher, bool safe = false);
40 // // Standard copy method.
41 // Matcher<FST> *Copy(bool safe = false) const override;
43 // // Returns the match type that can be provided (depending on compatibility
44 // of the input FST). It is either the requested match type, MATCH_NONE, or
45 // MATCH_UNKNOWN. If test is false, a costly testing is avoided, but
46 // MATCH_UNKNOWN may be returned. If test is true, a definite answer is
47 // returned, but may involve more costly computation (e.g., visiting the FST).
48 // MatchType Type(bool test) const override;
50 // // Specifies the current state.
51 // void SetState(StateId s) final;
53 // // Finds matches to a label at the current state, returning true if a match
54 // // found. kNoLabel matches any non-consuming transitions, e.g., epsilon
55 // // transitions, which do not require a matching symbol.
56 // bool Find(Label label) final;
58 // // Iterator methods. Note that initially and after SetState() these have
59 // undefined behavior until Find() is called.
61 // bool Done() const final;
63 // const Arc &Value() const final;
67 // // Returns final weight of a state.
68 // Weight Final(StateId) const final;
70 // // Indicates preference for being the side used for matching in
71 // // composition. If the value is kRequirePriority, then it is
72 // // mandatory that it be used. Calling this method without passing the
73 // // current state of the matcher invalidates the state of the matcher.
74 // ssize_t Priority(StateId s) final;
76 // // This specifies the known FST properties as viewed from this matcher. It
77 // // takes as argument the input FST's known properties.
78 // uint64 Properties(uint64 props) const override;
80 // // Returns matcher flags.
81 // uint32 Flags() const override;
83 // // Returns matcher FST.
84 // const FST &GetFst() const override;
87 // Basic matcher flags.
89 // Matcher needs to be used as the matching side in composition for
90 // at least one state (has kRequirePriority).
91 constexpr uint32 kRequireMatch = 0x00000001;
93 // Flags used for basic matchers (see also lookahead.h).
94 constexpr uint32 kMatcherFlags = kRequireMatch;
96 // Matcher priority that is mandatory.
97 constexpr ssize_t kRequirePriority = -1;
99 // Matcher interface, templated on the Arc definition; used for matcher
100 // specializations that are returned by the InitMatcher FST method.
105 using Label = typename Arc::Label;
106 using StateId = typename Arc::StateId;
107 using Weight = typename Arc::Weight;
109 virtual ~MatcherBase() {}
111 // Virtual interface.
113 virtual MatcherBase<Arc> *Copy(bool safe = false) const = 0;
114 virtual MatchType Type(bool) const = 0;
115 virtual void SetState(StateId) = 0;
116 virtual bool Find(Label) = 0;
117 virtual bool Done() const = 0;
118 virtual const Arc &Value() const = 0;
119 virtual void Next() = 0;
120 virtual const Fst<Arc> &GetFst() const = 0;
121 virtual uint64 Properties(uint64) const = 0;
123 // Trivial implementations that can be used by derived classes. Full
124 // devirtualization is expected for any derived class marked final.
125 virtual uint32 Flags() const { return 0; }
127 virtual Weight Final(StateId s) const { return internal::Final(GetFst(), s); }
129 virtual ssize_t Priority(StateId s) { return internal::NumArcs(GetFst(), s); }
132 // A matcher that expects sorted labels on the side to be matched.
133 // If match_type == MATCH_INPUT, epsilons match the implicit self-loop
134 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
135 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
136 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
138 class SortedMatcher : public MatcherBase<typename F::Arc> {
141 using Arc = typename FST::Arc;
142 using Label = typename Arc::Label;
143 using StateId = typename Arc::StateId;
144 using Weight = typename Arc::Weight;
146 using MatcherBase<Arc>::Flags;
147 using MatcherBase<Arc>::Properties;
149 // Labels >= binary_label will be searched for by binary search;
150 // o.w. linear search is used.
151 SortedMatcher(const FST &fst, MatchType match_type, Label binary_label = 1)
155 match_type_(match_type),
156 binary_label_(binary_label),
157 match_label_(kNoLabel),
159 loop_(kNoLabel, 0, Weight::One(), kNoStateId),
162 switch (match_type_) {
167 std::swap(loop_.ilabel, loop_.olabel);
170 FSTERROR() << "SortedMatcher: Bad match type";
171 match_type_ = MATCH_NONE;
176 SortedMatcher(const SortedMatcher<FST> &matcher, bool safe = false)
177 : fst_(matcher.fst_->Copy(safe)),
180 match_type_(matcher.match_type_),
181 binary_label_(matcher.binary_label_),
182 match_label_(kNoLabel),
184 loop_(matcher.loop_),
185 error_(matcher.error_),
188 ~SortedMatcher() override { Destroy(aiter_, &aiter_pool_); }
190 SortedMatcher<FST> *Copy(bool safe = false) const override {
191 return new SortedMatcher<FST>(*this, safe);
194 MatchType Type(bool test) const override {
195 if (match_type_ == MATCH_NONE) return match_type_;
196 const auto true_prop =
197 match_type_ == MATCH_INPUT ? kILabelSorted : kOLabelSorted;
198 const auto false_prop =
199 match_type_ == MATCH_INPUT ? kNotILabelSorted : kNotOLabelSorted;
200 const auto props = fst_->Properties(true_prop | false_prop, test);
201 if (props & true_prop) {
203 } else if (props & false_prop) {
206 return MATCH_UNKNOWN;
210 void SetState(StateId s) final {
211 if (state_ == s) return;
213 if (match_type_ == MATCH_NONE) {
214 FSTERROR() << "SortedMatcher: Bad match type";
217 Destroy(aiter_, &aiter_pool_);
218 aiter_ = new (&aiter_pool_) ArcIterator<FST>(*fst_, s);
219 aiter_->SetFlags(kArcNoCache, kArcNoCache);
220 narcs_ = internal::NumArcs(*fst_, s);
224 bool Find(Label match_label) final {
227 current_loop_ = false;
228 match_label_ = kNoLabel;
231 current_loop_ = match_label == 0;
232 match_label_ = match_label == kNoLabel ? 0 : match_label;
236 return current_loop_;
240 // Positions matcher to the first position where inserting match_label would
241 // maintain the sort order.
242 void LowerBound(Label label) {
243 exact_match_ = false;
244 current_loop_ = false;
246 match_label_ = kNoLabel;
249 match_label_ = label;
253 // After Find(), returns false if no more exact matches.
254 // After LowerBound(), returns false if no more arcs.
255 bool Done() const final {
256 if (current_loop_) return false;
257 if (aiter_->Done()) return true;
258 if (!exact_match_) return false;
260 match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
262 const auto label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel
263 : aiter_->Value().olabel;
264 return label != match_label_;
267 const Arc &Value() const final {
268 if (current_loop_) return loop_;
269 aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
270 return aiter_->Value();
275 current_loop_ = false;
281 Weight Final(StateId s) const final {
282 return MatcherBase<Arc>::Final(s);
285 ssize_t Priority(StateId s) final {
286 return MatcherBase<Arc>::Priority(s);
289 const FST &GetFst() const override { return *fst_; }
291 uint64 Properties(uint64 inprops) const override {
292 return inprops | (error_ ? kError : 0);
295 size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
298 Label GetLabel() const {
299 const auto &arc = aiter_->Value();
300 return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
307 std::unique_ptr<const FST> fst_;
308 StateId state_; // Matcher state.
309 ArcIterator<FST> *aiter_; // Iterator for current state.
310 MatchType match_type_; // Type of match to perform.
311 Label binary_label_; // Least label for binary search.
312 Label match_label_; // Current label to be matched.
313 size_t narcs_; // Current state arc count.
314 Arc loop_; // For non-consuming symbols.
315 bool current_loop_; // Current arc is the implicit loop.
316 bool exact_match_; // Exact match or lower bound?
317 bool error_; // Error encountered?
318 MemoryPool<ArcIterator<FST>> aiter_pool_; // Pool of arc iterators.
321 // Returns true iff match to match_label_, positioning arc iterator at lower
324 inline bool SortedMatcher<FST>::BinarySearch() {
326 size_t high = narcs_;
328 const size_t mid = (low + high) / 2;
330 const auto label = GetLabel();
331 if (label > match_label_) {
333 } else if (label < match_label_) {
336 // Otherwise, search backwards for the first match.
337 for (size_t i = mid; i > low; --i) {
339 const auto label = GetLabel();
340 if (label != match_label_) {
352 // Returns true iff match to match_label_, positioning arc iterator at lower
355 inline bool SortedMatcher<FST>::LinearSearch() {
356 for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
357 const auto label = GetLabel();
358 if (label == match_label_) return true;
359 if (label > match_label_) break;
364 // Returns true iff match to match_label_, positioning arc iterator at lower
367 inline bool SortedMatcher<FST>::Search() {
368 aiter_->SetFlags(match_type_ == MATCH_INPUT ?
369 kArcILabelValue : kArcOLabelValue,
371 if (match_label_ >= binary_label_) {
372 return BinarySearch();
374 return LinearSearch();
378 // A matcher that stores labels in a per-state hash table populated upon the
379 // first visit to that state. Sorting is not required. Treatment of
380 // epsilons are the same as with SortedMatcher.
382 class HashMatcher : public MatcherBase<typename F::Arc> {
385 using Arc = typename FST::Arc;
386 using Label = typename Arc::Label;
387 using StateId = typename Arc::StateId;
388 using Weight = typename Arc::Weight;
390 using MatcherBase<Arc>::Flags;
391 using MatcherBase<Arc>::Final;
392 using MatcherBase<Arc>::Priority;
394 HashMatcher(const FST &fst, MatchType match_type)
397 match_type_(match_type),
398 loop_(kNoLabel, 0, Weight::One(), kNoStateId) {
399 switch (match_type_) {
404 std::swap(loop_.ilabel, loop_.olabel);
407 FSTERROR() << "SortedMatcher: Bad match type";
408 match_type_ = MATCH_NONE;
413 HashMatcher(const HashMatcher<FST> &matcher, bool safe = false)
414 : fst_(matcher.fst_->Copy(safe)),
416 match_type_(matcher.match_type_),
417 loop_(matcher.loop_),
418 error_(matcher.error_) {}
420 HashMatcher<FST> *Copy(bool safe = false) const override {
421 return new HashMatcher<FST>(*this, safe);
424 // The argument is ignored as there are no relevant properties to test.
425 MatchType Type(bool test) const override { return match_type_; }
427 void SetState(StateId s) final;
429 bool Find(Label label) final {
430 current_loop_ = label == 0;
435 if (label == kNoLabel) label = 0;
436 return Search(label);
439 bool Done() const final {
440 if (current_loop_) return false;
441 return label_it_ == label_end_;
444 const Arc &Value() const final {
445 if (current_loop_) return loop_;
446 aiter_->Seek(label_it_->second);
447 return aiter_->Value();
452 current_loop_ = false;
458 const FST &GetFst() const override { return *fst_; }
460 uint64 Properties(uint64 inprops) const override {
461 return inprops | (error_ ? kError : 0);
465 bool Search(Label match_label);
467 using LabelTable = std::unordered_multimap<Label, size_t>;
468 using StateTable = std::unordered_map<StateId, LabelTable>;
470 std::unique_ptr<const FST> fst_;
471 StateId state_; // Matcher state.
472 MatchType match_type_;
473 Arc loop_; // The implicit loop itself.
474 bool current_loop_; // Is the current arc is the implicit loop?
475 bool error_; // Error encountered?
476 std::unique_ptr<ArcIterator<FST>> aiter_;
477 StateTable state_table_; // Table from states to label table.
478 LabelTable *label_table_; // Pointer to current state's label table.
479 typename LabelTable::iterator label_it_; // Position for label.
480 typename LabelTable::iterator label_end_; // Position for last label + 1.
484 void HashMatcher<FST>::SetState(typename FST::Arc::StateId s) {
485 if (state_ == s) return;
486 // Resets everything for the state.
488 loop_.nextstate = state_;
489 aiter_.reset(new ArcIterator<FST>(*fst_, state_));
490 if (match_type_ == MATCH_NONE) {
491 FSTERROR() << "HashMatcher: Bad match type";
494 // Attempts to insert a new label table; if it already exists,
495 // no additional work is done and we simply return.
496 auto it_and_success = state_table_.emplace(state_, LabelTable());
497 if (!it_and_success.second) return;
498 // Otherwise, populate this new table.
499 // Sets instance's pointer to the label table for this state.
500 label_table_ = &(it_and_success.first->second);
501 // Populates the label table.
502 label_table_->reserve(internal::NumArcs(*fst_, state_));
503 const auto aiter_flags =
504 (match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue) |
506 aiter_->SetFlags(aiter_flags, kArcFlags);
507 for (; !aiter_->Done(); aiter_->Next()) {
508 const auto label = (match_type_ == MATCH_INPUT) ? aiter_->Value().ilabel
509 : aiter_->Value().olabel;
510 label_table_->emplace(label, aiter_->Position());
512 aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
516 inline bool HashMatcher<FST>::Search(typename FST::Arc::Label match_label) {
517 auto range = label_table_->equal_range(match_label);
518 if (range.first == range.second) return false;
519 label_it_ = range.first;
520 label_end_ = range.second;
521 aiter_->Seek(label_it_->second);
525 // Specifies whether we rewrite both the input and output sides during matching.
526 enum MatcherRewriteMode {
527 MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
528 MATCHER_REWRITE_ALWAYS,
529 MATCHER_REWRITE_NEVER
532 // For any requested label that doesn't match at a state, this matcher
533 // considers the *unique* transition that matches the label 'phi_label'
534 // (phi = 'fail'), and recursively looks for a match at its
535 // destination. When 'phi_loop' is true, if no match is found but a
536 // phi self-loop is found, then the phi transition found is returned
537 // with the phi_label rewritten as the requested label (both sides if
538 // an acceptor, or if 'rewrite_both' is true and both input and output
539 // labels of the found transition are 'phi_label'). If 'phi_label' is
540 // kNoLabel, this special matching is not done. PhiMatcher is
541 // templated itself on a matcher, which is used to perform the
542 // underlying matching. By default, the underlying matcher is
543 // constructed by PhiMatcher. The user can instead pass in this
544 // object; in that case, PhiMatcher takes its ownership.
545 // Phi non-determinism not supported. No non-consuming symbols other
546 // than epsilon supported with the underlying template argument matcher.
548 class PhiMatcher : public MatcherBase<typename M::Arc> {
550 using FST = typename M::FST;
551 using Arc = typename FST::Arc;
552 using Label = typename Arc::Label;
553 using StateId = typename Arc::StateId;
554 using Weight = typename Arc::Weight;
556 PhiMatcher(const FST &fst, MatchType match_type, Label phi_label = kNoLabel,
557 bool phi_loop = true,
558 MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
559 M *matcher = nullptr)
560 : matcher_(matcher ? matcher : new M(fst, match_type)),
561 match_type_(match_type),
562 phi_label_(phi_label),
566 if (match_type == MATCH_BOTH) {
567 FSTERROR() << "PhiMatcher: Bad match type";
568 match_type_ = MATCH_NONE;
571 if (rewrite_mode == MATCHER_REWRITE_AUTO) {
572 rewrite_both_ = fst.Properties(kAcceptor, true);
573 } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
574 rewrite_both_ = true;
576 rewrite_both_ = false;
580 PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
581 : matcher_(new M(*matcher.matcher_, safe)),
582 match_type_(matcher.match_type_),
583 phi_label_(matcher.phi_label_),
584 rewrite_both_(matcher.rewrite_both_),
586 phi_loop_(matcher.phi_loop_),
587 error_(matcher.error_) {}
589 PhiMatcher<M> *Copy(bool safe = false) const override {
590 return new PhiMatcher<M>(*this, safe);
593 MatchType Type(bool test) const override { return matcher_->Type(test); }
595 void SetState(StateId s) final {
596 if (state_ == s) return;
597 matcher_->SetState(s);
599 has_phi_ = phi_label_ != kNoLabel;
602 bool Find(Label match_label) final;
604 bool Done() const final { return matcher_->Done(); }
606 const Arc &Value() const final {
607 if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
608 return matcher_->Value();
609 } else if (phi_match_ == 0) { // Virtual epsilon loop.
610 phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
611 if (match_type_ == MATCH_OUTPUT) {
612 std::swap(phi_arc_.ilabel, phi_arc_.olabel);
616 phi_arc_ = matcher_->Value();
617 phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
618 if (phi_match_ != kNoLabel) { // Phi loop match.
620 if (phi_arc_.ilabel == phi_label_) phi_arc_.ilabel = phi_match_;
621 if (phi_arc_.olabel == phi_label_) phi_arc_.olabel = phi_match_;
622 } else if (match_type_ == MATCH_INPUT) {
623 phi_arc_.ilabel = phi_match_;
625 phi_arc_.olabel = phi_match_;
632 void Next() final { matcher_->Next(); }
634 Weight Final(StateId s) const final {
635 auto weight = matcher_->Final(s);
636 if (phi_label_ == kNoLabel || weight != Weight::Zero()) {
639 weight = Weight::One();
640 matcher_->SetState(s);
641 while (matcher_->Final(s) == Weight::Zero()) {
642 if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) break;
643 weight = Times(weight, matcher_->Value().weight);
644 if (s == matcher_->Value().nextstate) {
645 return Weight::Zero(); // Does not follow phi self-loops.
647 s = matcher_->Value().nextstate;
648 matcher_->SetState(s);
650 weight = Times(weight, matcher_->Final(s));
654 ssize_t Priority(StateId s) final {
655 if (phi_label_ != kNoLabel) {
656 matcher_->SetState(s);
657 const bool has_phi = matcher_->Find(phi_label_ == 0 ? -1 : phi_label_);
658 return has_phi ? kRequirePriority : matcher_->Priority(s);
660 return matcher_->Priority(s);
664 const FST &GetFst() const override { return matcher_->GetFst(); }
666 uint64 Properties(uint64 props) const override;
668 uint32 Flags() const override {
669 if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) {
670 return matcher_->Flags();
672 return matcher_->Flags() | kRequireMatch;
675 Label PhiLabel() const { return phi_label_; }
678 mutable std::unique_ptr<M> matcher_;
679 MatchType match_type_; // Type of match requested.
680 Label phi_label_; // Label that represents the phi transition.
681 bool rewrite_both_; // Rewrite both sides when both are phi_label_?
682 bool has_phi_; // Are there possibly phis at the current state?
683 Label phi_match_; // Current label that matches phi loop.
684 mutable Arc phi_arc_; // Arc to return.
685 StateId state_; // Matcher state.
686 Weight phi_weight_; // Product of the weights of phi transitions taken.
687 bool phi_loop_; // When true, phi self-loop are allowed and treated
688 // as rho (required for Aho-Corasick).
689 bool error_; // Error encountered?
691 PhiMatcher &operator=(const PhiMatcher &) = delete;
695 inline bool PhiMatcher<M>::Find(Label label) {
696 if (label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
697 FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
701 matcher_->SetState(state_);
702 phi_match_ = kNoLabel;
703 phi_weight_ = Weight::One();
704 // If phi_label_ == 0, there are no more true epsilon arcs.
705 if (phi_label_ == 0) {
706 if (label == kNoLabel) {
709 if (label == 0) { // but a virtual epsilon loop needs to be returned.
710 if (!matcher_->Find(kNoLabel)) {
711 return matcher_->Find(0);
718 if (!has_phi_ || label == 0 || label == kNoLabel) {
719 return matcher_->Find(label);
722 while (!matcher_->Find(label)) {
723 // Look for phi transition (if phi_label_ == 0, we need to look
724 // for -1 to avoid getting the virtual self-loop)
725 if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) return false;
726 if (phi_loop_ && matcher_->Value().nextstate == s) {
730 phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
731 s = matcher_->Value().nextstate;
733 if (!matcher_->Done()) {
734 FSTERROR() << "PhiMatcher: Phi non-determinism not supported";
737 matcher_->SetState(s);
743 inline uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
744 auto outprops = matcher_->Properties(inprops);
745 if (error_) outprops |= kError;
746 if (match_type_ == MATCH_NONE) {
748 } else if (match_type_ == MATCH_INPUT) {
749 if (phi_label_ == 0) {
750 outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
751 outprops |= kNoEpsilons | kNoIEpsilons;
755 ~(kODeterministic | kNonODeterministic | kString | kILabelSorted |
756 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
759 ~(kODeterministic | kAcceptor | kString | kILabelSorted |
760 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
762 } else if (match_type_ == MATCH_OUTPUT) {
763 if (phi_label_ == 0) {
764 outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
765 outprops |= kNoEpsilons | kNoOEpsilons;
769 ~(kIDeterministic | kNonIDeterministic | kString | kILabelSorted |
770 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
773 ~(kIDeterministic | kAcceptor | kString | kILabelSorted |
774 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
777 // Shouldn't ever get here.
778 FSTERROR() << "PhiMatcher: Bad match type: " << match_type_;
783 // For any requested label that doesn't match at a state, this matcher
784 // considers all transitions that match the label 'rho_label' (rho =
785 // 'rest'). Each such rho transition found is returned with the
786 // rho_label rewritten as the requested label (both sides if an
787 // acceptor, or if 'rewrite_both' is true and both input and output
788 // labels of the found transition are 'rho_label'). If 'rho_label' is
789 // kNoLabel, this special matching is not done. RhoMatcher is
790 // templated itself on a matcher, which is used to perform the
791 // underlying matching. By default, the underlying matcher is
792 // constructed by RhoMatcher. The user can instead pass in this
793 // object; in that case, RhoMatcher takes its ownership.
794 // No non-consuming symbols other than epsilon supported with
795 // the underlying template argument matcher.
797 class RhoMatcher : public MatcherBase<typename M::Arc> {
799 using FST = typename M::FST;
800 using Arc = typename FST::Arc;
801 using Label = typename Arc::Label;
802 using StateId = typename Arc::StateId;
803 using Weight = typename Arc::Weight;
805 RhoMatcher(const FST &fst, MatchType match_type, Label rho_label = kNoLabel,
806 MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
807 M *matcher = nullptr)
808 : matcher_(matcher ? matcher : new M(fst, match_type)),
809 match_type_(match_type),
810 rho_label_(rho_label),
813 if (match_type == MATCH_BOTH) {
814 FSTERROR() << "RhoMatcher: Bad match type";
815 match_type_ = MATCH_NONE;
818 if (rho_label == 0) {
819 FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
820 rho_label_ = kNoLabel;
823 if (rewrite_mode == MATCHER_REWRITE_AUTO) {
824 rewrite_both_ = fst.Properties(kAcceptor, true);
825 } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
826 rewrite_both_ = true;
828 rewrite_both_ = false;
832 RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
833 : matcher_(new M(*matcher.matcher_, safe)),
834 match_type_(matcher.match_type_),
835 rho_label_(matcher.rho_label_),
836 rewrite_both_(matcher.rewrite_both_),
837 error_(matcher.error_),
838 state_(kNoStateId) {}
840 RhoMatcher<M> *Copy(bool safe = false) const override {
841 return new RhoMatcher<M>(*this, safe);
844 MatchType Type(bool test) const override { return matcher_->Type(test); }
846 void SetState(StateId s) final {
847 if (state_ == s) return;
849 matcher_->SetState(s);
850 has_rho_ = rho_label_ != kNoLabel;
853 bool Find(Label label) final {
854 if (label == rho_label_ && rho_label_ != kNoLabel) {
855 FSTERROR() << "RhoMatcher::Find: bad label (rho)";
859 if (matcher_->Find(label)) {
860 rho_match_ = kNoLabel;
862 } else if (has_rho_ && label != 0 && label != kNoLabel &&
863 (has_rho_ = matcher_->Find(rho_label_))) {
871 bool Done() const final { return matcher_->Done(); }
873 const Arc &Value() const final {
874 if (rho_match_ == kNoLabel) {
875 return matcher_->Value();
877 rho_arc_ = matcher_->Value();
879 if (rho_arc_.ilabel == rho_label_) rho_arc_.ilabel = rho_match_;
880 if (rho_arc_.olabel == rho_label_) rho_arc_.olabel = rho_match_;
881 } else if (match_type_ == MATCH_INPUT) {
882 rho_arc_.ilabel = rho_match_;
884 rho_arc_.olabel = rho_match_;
890 void Next() final { matcher_->Next(); }
892 Weight Final(StateId s) const final { return matcher_->Final(s); }
894 ssize_t Priority(StateId s) final {
896 matcher_->SetState(s);
897 has_rho_ = matcher_->Find(rho_label_);
899 return kRequirePriority;
901 return matcher_->Priority(s);
905 const FST &GetFst() const override { return matcher_->GetFst(); }
907 uint64 Properties(uint64 props) const override;
909 uint32 Flags() const override {
910 if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) {
911 return matcher_->Flags();
913 return matcher_->Flags() | kRequireMatch;
916 Label RhoLabel() const { return rho_label_; }
919 std::unique_ptr<M> matcher_;
920 MatchType match_type_; // Type of match requested.
921 Label rho_label_; // Label that represents the rho transition
922 bool rewrite_both_; // Rewrite both sides when both are rho_label_?
923 bool has_rho_; // Are there possibly rhos at the current state?
924 Label rho_match_; // Current label that matches rho transition.
925 mutable Arc rho_arc_; // Arc to return when rho match.
926 bool error_; // Error encountered?
927 StateId state_; // Matcher state.
931 inline uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
932 auto outprops = matcher_->Properties(inprops);
933 if (error_) outprops |= kError;
934 if (match_type_ == MATCH_NONE) {
936 } else if (match_type_ == MATCH_INPUT) {
939 ~(kODeterministic | kNonODeterministic | kString | kILabelSorted |
940 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
943 ~(kODeterministic | kAcceptor | kString | kILabelSorted |
946 } else if (match_type_ == MATCH_OUTPUT) {
949 ~(kIDeterministic | kNonIDeterministic | kString | kILabelSorted |
950 kNotILabelSorted | kOLabelSorted | kNotOLabelSorted);
953 ~(kIDeterministic | kAcceptor | kString | kOLabelSorted |
957 // Shouldn't ever get here.
958 FSTERROR() << "RhoMatcher: Bad match type: " << match_type_;
963 // For any requested label, this matcher considers all transitions
964 // that match the label 'sigma_label' (sigma = "any"), and this in
965 // additions to transitions with the requested label. Each such sigma
966 // transition found is returned with the sigma_label rewritten as the
967 // requested label (both sides if an acceptor, or if 'rewrite_both' is
968 // true and both input and output labels of the found transition are
969 // 'sigma_label'). If 'sigma_label' is kNoLabel, this special
970 // matching is not done. SigmaMatcher is templated itself on a
971 // matcher, which is used to perform the underlying matching. By
972 // default, the underlying matcher is constructed by SigmaMatcher.
973 // The user can instead pass in this object; in that case,
974 // SigmaMatcher takes its ownership. No non-consuming symbols other
975 // than epsilon supported with the underlying template argument matcher.
977 class SigmaMatcher : public MatcherBase<typename M::Arc> {
979 using FST = typename M::FST;
980 using Arc = typename FST::Arc;
981 using Label = typename Arc::Label;
982 using StateId = typename Arc::StateId;
983 using Weight = typename Arc::Weight;
985 SigmaMatcher(const FST &fst, MatchType match_type,
986 Label sigma_label = kNoLabel,
987 MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
988 M *matcher = nullptr)
989 : matcher_(matcher ? matcher : new M(fst, match_type)),
990 match_type_(match_type),
991 sigma_label_(sigma_label),
994 if (match_type == MATCH_BOTH) {
995 FSTERROR() << "SigmaMatcher: Bad match type";
996 match_type_ = MATCH_NONE;
999 if (sigma_label == 0) {
1000 FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
1001 sigma_label_ = kNoLabel;
1004 if (rewrite_mode == MATCHER_REWRITE_AUTO) {
1005 rewrite_both_ = fst.Properties(kAcceptor, true);
1006 } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
1007 rewrite_both_ = true;
1009 rewrite_both_ = false;
1013 SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
1014 : matcher_(new M(*matcher.matcher_, safe)),
1015 match_type_(matcher.match_type_),
1016 sigma_label_(matcher.sigma_label_),
1017 rewrite_both_(matcher.rewrite_both_),
1018 error_(matcher.error_),
1019 state_(kNoStateId) {}
1021 SigmaMatcher<M> *Copy(bool safe = false) const override {
1022 return new SigmaMatcher<M>(*this, safe);
1025 MatchType Type(bool test) const override { return matcher_->Type(test); }
1027 void SetState(StateId s) final {
1028 if (state_ == s) return;
1030 matcher_->SetState(s);
1032 (sigma_label_ != kNoLabel) ? matcher_->Find(sigma_label_) : false;
1035 bool Find(Label match_label) final {
1036 match_label_ = match_label;
1037 if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
1038 FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
1042 if (matcher_->Find(match_label)) {
1043 sigma_match_ = kNoLabel;
1045 } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
1046 matcher_->Find(sigma_label_)) {
1047 sigma_match_ = match_label;
1054 bool Done() const final { return matcher_->Done(); }
1056 const Arc &Value() const final {
1057 if (sigma_match_ == kNoLabel) {
1058 return matcher_->Value();
1060 sigma_arc_ = matcher_->Value();
1061 if (rewrite_both_) {
1062 if (sigma_arc_.ilabel == sigma_label_) sigma_arc_.ilabel = sigma_match_;
1063 if (sigma_arc_.olabel == sigma_label_) sigma_arc_.olabel = sigma_match_;
1064 } else if (match_type_ == MATCH_INPUT) {
1065 sigma_arc_.ilabel = sigma_match_;
1067 sigma_arc_.olabel = sigma_match_;
1075 if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
1076 (match_label_ > 0)) {
1077 matcher_->Find(sigma_label_);
1078 sigma_match_ = match_label_;
1082 Weight Final(StateId s) const final { return matcher_->Final(s); }
1084 ssize_t Priority(StateId s) final {
1085 if (sigma_label_ != kNoLabel) {
1087 return has_sigma_ ? kRequirePriority : matcher_->Priority(s);
1089 return matcher_->Priority(s);
1093 const FST &GetFst() const override { return matcher_->GetFst(); }
1095 uint64 Properties(uint64 props) const override;
1097 uint32 Flags() const override {
1098 if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) {
1099 return matcher_->Flags();
1101 return matcher_->Flags() | kRequireMatch;
1104 Label SigmaLabel() const { return sigma_label_; }
1107 std::unique_ptr<M> matcher_;
1108 MatchType match_type_; // Type of match requested.
1109 Label sigma_label_; // Label that represents the sigma transition.
1110 bool rewrite_both_; // Rewrite both sides when both are sigma_label_?
1111 bool has_sigma_; // Are there sigmas at the current state?
1112 Label sigma_match_; // Current label that matches sigma transition.
1113 mutable Arc sigma_arc_; // Arc to return when sigma match.
1114 Label match_label_; // Label being matched.
1115 bool error_; // Error encountered?
1116 StateId state_; // Matcher state.
1120 inline uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
1121 auto outprops = matcher_->Properties(inprops);
1122 if (error_) outprops |= kError;
1123 if (match_type_ == MATCH_NONE) {
1125 } else if (rewrite_both_) {
1127 ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1128 kNonODeterministic | kILabelSorted | kNotILabelSorted |
1129 kOLabelSorted | kNotOLabelSorted | kString);
1130 } else if (match_type_ == MATCH_INPUT) {
1132 ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1133 kNonODeterministic | kILabelSorted | kNotILabelSorted | kString |
1135 } else if (match_type_ == MATCH_OUTPUT) {
1137 ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1138 kNonODeterministic | kOLabelSorted | kNotOLabelSorted | kString |
1141 // Shouldn't ever get here.
1142 FSTERROR() << "SigmaMatcher: Bad match type: " << match_type_;
1147 // Flags for MultiEpsMatcher.
1149 // Return multi-epsilon arcs for Find(kNoLabel).
1150 const uint32 kMultiEpsList = 0x00000001;
1152 // Return a kNolabel loop for Find(multi_eps).
1153 const uint32 kMultiEpsLoop = 0x00000002;
1155 // MultiEpsMatcher: allows treating multiple non-0 labels as
1156 // non-consuming labels in addition to 0 that is always
1157 // non-consuming. Precise behavior controlled by 'flags' argument. By
1158 // default, the underlying matcher is constructed by
1159 // MultiEpsMatcher. The user can instead pass in this object; in that
1160 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
1163 class MultiEpsMatcher {
1165 using FST = typename M::FST;
1166 using Arc = typename FST::Arc;
1167 using Label = typename Arc::Label;
1168 using StateId = typename Arc::StateId;
1169 using Weight = typename Arc::Weight;
1171 MultiEpsMatcher(const FST &fst, MatchType match_type,
1172 uint32 flags = (kMultiEpsLoop | kMultiEpsList),
1173 M *matcher = nullptr, bool own_matcher = true)
1174 : matcher_(matcher ? matcher : new M(fst, match_type)),
1176 own_matcher_(matcher ? own_matcher : true) {
1177 if (match_type == MATCH_INPUT) {
1178 loop_.ilabel = kNoLabel;
1182 loop_.olabel = kNoLabel;
1184 loop_.weight = Weight::One();
1185 loop_.nextstate = kNoStateId;
1188 MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
1189 : matcher_(new M(*matcher.matcher_, safe)),
1190 flags_(matcher.flags_),
1192 multi_eps_labels_(matcher.multi_eps_labels_),
1193 loop_(matcher.loop_) {
1194 loop_.nextstate = kNoStateId;
1197 ~MultiEpsMatcher() {
1198 if (own_matcher_) delete matcher_;
1201 MultiEpsMatcher<M> *Copy(bool safe = false) const {
1202 return new MultiEpsMatcher<M>(*this, safe);
1205 MatchType Type(bool test) const { return matcher_->Type(test); }
1207 void SetState(StateId state) {
1208 matcher_->SetState(state);
1209 loop_.nextstate = state;
1212 bool Find(Label label);
1214 bool Done() const { return done_; }
1216 const Arc &Value() const { return current_loop_ ? loop_ : matcher_->Value(); }
1219 if (!current_loop_) {
1221 done_ = matcher_->Done();
1222 if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
1224 while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1225 !matcher_->Find(*multi_eps_iter_)) {
1228 if (multi_eps_iter_ != multi_eps_labels_.End()) {
1231 done_ = !matcher_->Find(kNoLabel);
1239 const FST &GetFst() const { return matcher_->GetFst(); }
1241 uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
1243 const M *GetMatcher() const { return matcher_; }
1245 Weight Final(StateId s) const { return matcher_->Final(s); }
1247 uint32 Flags() const { return matcher_->Flags(); }
1249 ssize_t Priority(StateId s) { return matcher_->Priority(s); }
1251 void AddMultiEpsLabel(Label label) {
1253 FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1255 multi_eps_labels_.Insert(label);
1259 void RemoveMultiEpsLabel(Label label) {
1261 FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1263 multi_eps_labels_.Erase(label);
1267 void ClearMultiEpsLabels() { multi_eps_labels_.Clear(); }
1272 bool own_matcher_; // Does this class delete the matcher?
1274 // Multi-eps label set.
1275 CompactSet<Label, kNoLabel> multi_eps_labels_;
1276 typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
1278 bool current_loop_; // Current arc is the implicit loop?
1279 mutable Arc loop_; // For non-consuming symbols.
1280 bool done_; // Matching done?
1282 MultiEpsMatcher &operator=(const MultiEpsMatcher &) = delete;
1286 inline bool MultiEpsMatcher<M>::Find(Label label) {
1287 multi_eps_iter_ = multi_eps_labels_.End();
1288 current_loop_ = false;
1291 ret = matcher_->Find(0);
1292 } else if (label == kNoLabel) {
1293 if (flags_ & kMultiEpsList) {
1294 // Returns all non-consuming arcs (including epsilon).
1295 multi_eps_iter_ = multi_eps_labels_.Begin();
1296 while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1297 !matcher_->Find(*multi_eps_iter_)) {
1300 if (multi_eps_iter_ != multi_eps_labels_.End()) {
1303 ret = matcher_->Find(kNoLabel);
1306 // Returns all epsilon arcs.
1307 ret = matcher_->Find(kNoLabel);
1309 } else if ((flags_ & kMultiEpsLoop) &&
1310 multi_eps_labels_.Find(label) != multi_eps_labels_.End()) {
1311 // Returns implicit loop.
1312 current_loop_ = true;
1315 ret = matcher_->Find(label);
1321 // This class discards any implicit matches (e.g., the implicit epsilon
1322 // self-loops in the SortedMatcher). Matchers are most often used in
1323 // composition/intersection where the implicit matches are needed
1324 // e.g. for epsilon processing. However, if a matcher is simply being
1325 // used to look-up explicit label matches, this class saves the user
1326 // from having to check for and discard the unwanted implicit matches
1329 class ExplicitMatcher : public MatcherBase<typename M::Arc> {
1331 using FST = typename M::FST;
1332 using Arc = typename FST::Arc;
1333 using Label = typename Arc::Label;
1334 using StateId = typename Arc::StateId;
1335 using Weight = typename Arc::Weight;
1337 ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher = nullptr)
1338 : matcher_(matcher ? matcher : new M(fst, match_type)),
1339 match_type_(match_type),
1342 ExplicitMatcher(const ExplicitMatcher<M> &matcher, bool safe = false)
1343 : matcher_(new M(*matcher.matcher_, safe)),
1344 match_type_(matcher.match_type_),
1345 error_(matcher.error_) {}
1347 ExplicitMatcher<M> *Copy(bool safe = false) const override {
1348 return new ExplicitMatcher<M>(*this, safe);
1351 MatchType Type(bool test) const override { return matcher_->Type(test); }
1353 void SetState(StateId s) final { matcher_->SetState(s); }
1355 bool Find(Label label) final {
1356 matcher_->Find(label);
1361 bool Done() const final { return matcher_->Done(); }
1363 const Arc &Value() const final { return matcher_->Value(); }
1370 Weight Final(StateId s) const final { return matcher_->Final(s); }
1372 ssize_t Priority(StateId s) final { return matcher_->Priority(s); }
1374 const FST &GetFst() const final { return matcher_->GetFst(); }
1376 uint64 Properties(uint64 inprops) const override {
1377 return matcher_->Properties(inprops);
1380 const M *GetMatcher() const { return matcher_.get(); }
1382 uint32 Flags() const override { return matcher_->Flags(); }
1385 // Checks current arc if available and explicit. If not available, stops. If
1386 // not explicit, checks next ones.
1388 for (; !matcher_->Done(); matcher_->Next()) {
1389 const auto label = match_type_ == MATCH_INPUT ? matcher_->Value().ilabel
1390 : matcher_->Value().olabel;
1391 if (label != kNoLabel) return;
1395 std::unique_ptr<M> matcher_;
1396 MatchType match_type_; // Type of match requested.
1397 bool error_; // Error encountered?
1400 // Generic matcher, templated on the FST definition.
1402 // Here is a typical use:
1404 // Matcher<StdFst> matcher(fst, MATCH_INPUT);
1405 // matcher.SetState(state);
1406 // if (matcher.Find(label))
1407 // for (; !matcher.Done(); matcher.Next()) {
1408 // auto &arc = matcher.Value();
1415 using Arc = typename F::Arc;
1416 using Label = typename Arc::Label;
1417 using StateId = typename Arc::StateId;
1418 using Weight = typename Arc::Weight;
1420 Matcher(const FST &fst, MatchType match_type) {
1421 base_.reset(fst.InitMatcher(match_type));
1422 if (!base_) base_.reset(new SortedMatcher<FST>(fst, match_type));
1425 Matcher(const Matcher<FST> &matcher, bool safe = false) {
1426 base_.reset(matcher.base_->Copy(safe));
1429 // Takes ownership of the provided matcher.
1430 explicit Matcher(MatcherBase<Arc> *base_matcher) {
1431 base_.reset(base_matcher);
1434 Matcher<FST> *Copy(bool safe = false) const {
1435 return new Matcher<FST>(*this, safe);
1438 MatchType Type(bool test) const { return base_->Type(test); }
1440 void SetState(StateId s) { base_->SetState(s); }
1442 bool Find(Label label) { return base_->Find(label); }
1444 bool Done() const { return base_->Done(); }
1446 const Arc &Value() const { return base_->Value(); }
1448 void Next() { base_->Next(); }
1450 const FST &GetFst() const {
1451 return static_cast<const FST &>(base_->GetFst());
1454 uint64 Properties(uint64 props) const { return base_->Properties(props); }
1456 Weight Final(StateId s) const { return base_->Final(s); }
1458 uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
1460 ssize_t Priority(StateId s) { return base_->Priority(s); }
1463 std::unique_ptr<MatcherBase<Arc>> base_;
1468 #endif // FST_LIB_MATCHER_H_