1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
4 // Simple concrete, mutable FST whose states and arcs are stored in STL vectors.
6 #ifndef FST_VECTOR_FST_H_
7 #define FST_VECTOR_FST_H_
15 #include <fst/fst-decl.h> // For optional argument declarations
16 #include <fst/mutable-fst.h>
17 #include <fst/test-properties.h>
22 template <class A, class S>
25 template <class F, class G>
26 void Cast(const F &, G *);
28 // Arcs (of type A) implemented by an STL vector per state. M specifies Arc
29 // allocator (default declared in fst-decl.h).
30 template <class A, class M /* = std::allocator<A> */>
34 using StateId = typename Arc::StateId;
35 using Weight = typename Arc::Weight;
36 using ArcAllocator = M;
37 using StateAllocator =
38 typename ArcAllocator::template rebind<VectorState<Arc, M>>::other;
40 // Provide STL allocator for arcs.
41 explicit VectorState(const ArcAllocator &alloc)
42 : final_(Weight::Zero()), niepsilons_(0), noepsilons_(0), arcs_(alloc) {}
44 VectorState(const VectorState<A, M> &state, const ArcAllocator &alloc)
45 : final_(state.Final()),
46 niepsilons_(state.NumInputEpsilons()),
47 noepsilons_(state.NumOutputEpsilons()),
48 arcs_(state.arcs_.begin(), state.arcs_.end(), alloc) {}
51 final_ = Weight::Zero();
57 Weight Final() const { return final_; }
59 size_t NumInputEpsilons() const { return niepsilons_; }
61 size_t NumOutputEpsilons() const { return noepsilons_; }
63 size_t NumArcs() const { return arcs_.size(); }
65 const Arc &GetArc(size_t n) const { return arcs_[n]; }
67 const Arc *Arcs() const { return !arcs_.empty() ? &arcs_[0] : nullptr; }
69 Arc *MutableArcs() { return !arcs_.empty() ? &arcs_[0] : nullptr; }
71 void ReserveArcs(size_t n) { arcs_.reserve(n); }
73 void SetFinal(Weight weight) { final_ = std::move(weight); }
75 void SetNumInputEpsilons(size_t n) { niepsilons_ = n; }
77 void SetNumOutputEpsilons(size_t n) { noepsilons_ = n; }
79 void AddArc(const Arc &arc) {
80 if (arc.ilabel == 0) ++niepsilons_;
81 if (arc.olabel == 0) ++noepsilons_;
85 void SetArc(const Arc &arc, size_t n) {
86 if (arcs_[n].ilabel == 0) --niepsilons_;
87 if (arcs_[n].olabel == 0) --noepsilons_;
88 if (arc.ilabel == 0) ++niepsilons_;
89 if (arc.olabel == 0) ++noepsilons_;
99 void DeleteArcs(size_t n) {
100 for (size_t i = 0; i < n; ++i) {
101 if (arcs_.back().ilabel == 0) --niepsilons_;
102 if (arcs_.back().olabel == 0) --noepsilons_;
107 // For state class allocation.
108 void *operator new(size_t size, StateAllocator *alloc) {
109 return alloc->allocate(1);
112 // For state destruction and memory freeing.
113 static void Destroy(VectorState<A, M> *state, StateAllocator *alloc) {
115 state->~VectorState<A, M>();
116 alloc->deallocate(state, 1);
121 Weight final_; // Final weight.
122 size_t niepsilons_; // # of input epsilons
123 size_t noepsilons_; // # of output epsilons
124 std::vector<A, ArcAllocator> arcs_; // Arc container.
129 // States are implemented by STL vectors, templated on the
130 // State definition. This does not manage the Fst properties.
132 class VectorFstBaseImpl : public FstImpl<typename S::Arc> {
135 using Arc = typename State::Arc;
136 using StateId = typename Arc::StateId;
137 using Weight = typename Arc::Weight;
139 VectorFstBaseImpl() : start_(kNoStateId) {}
141 ~VectorFstBaseImpl() override {
142 for (StateId s = 0; s < states_.size(); ++s) {
143 State::Destroy(states_[s], &state_alloc_);
147 StateId Start() const { return start_; }
149 Weight Final(StateId state) const { return states_[state]->Final(); }
151 StateId NumStates() const { return states_.size(); }
153 size_t NumArcs(StateId state) const { return states_[state]->NumArcs(); }
155 size_t NumInputEpsilons(StateId state) const {
156 return GetState(state)->NumInputEpsilons();
159 size_t NumOutputEpsilons(StateId state) const {
160 return GetState(state)->NumOutputEpsilons();
163 void SetStart(StateId state) { start_ = state; }
165 void SetFinal(StateId state, Weight weight) {
166 states_[state]->SetFinal(std::move(weight));
170 states_.push_back(new (&state_alloc_) State(arc_alloc_));
171 return states_.size() - 1;
174 StateId AddState(State *state) {
175 states_.push_back(state);
176 return states_.size() - 1;
179 void AddArc(StateId state, const Arc &arc) { states_[state]->AddArc(arc); }
181 void DeleteStates(const std::vector<StateId> &dstates) {
182 std::vector<StateId> newid(states_.size(), 0);
183 for (StateId i = 0; i < dstates.size(); ++i) newid[dstates[i]] = kNoStateId;
185 for (StateId state = 0; state < states_.size(); ++state) {
186 if (newid[state] != kNoStateId) {
187 newid[state] = nstates;
188 if (state != nstates) states_[nstates] = states_[state];
191 State::Destroy(states_[state], &state_alloc_);
194 states_.resize(nstates);
195 for (StateId state = 0; state < states_.size(); ++state) {
196 auto *arcs = states_[state]->MutableArcs();
198 auto nieps = states_[state]->NumInputEpsilons();
199 auto noeps = states_[state]->NumOutputEpsilons();
200 for (size_t i = 0; i < states_[state]->NumArcs(); ++i) {
201 const auto t = newid[arcs[i].nextstate];
202 if (t != kNoStateId) {
203 arcs[i].nextstate = t;
204 if (i != narcs) arcs[narcs] = arcs[i];
207 if (arcs[i].ilabel == 0) --nieps;
208 if (arcs[i].olabel == 0) --noeps;
211 states_[state]->DeleteArcs(states_[state]->NumArcs() - narcs);
212 states_[state]->SetNumInputEpsilons(nieps);
213 states_[state]->SetNumOutputEpsilons(noeps);
215 if (Start() != kNoStateId) SetStart(newid[Start()]);
218 void DeleteStates() {
219 for (StateId state = 0; state < states_.size(); ++state) {
220 State::Destroy(states_[state], &state_alloc_);
223 SetStart(kNoStateId);
226 void DeleteArcs(StateId state, size_t n) { states_[state]->DeleteArcs(n); }
228 void DeleteArcs(StateId state) { states_[state]->DeleteArcs(); }
230 State *GetState(StateId state) { return states_[state]; }
232 const State *GetState(StateId state) const { return states_[state]; }
234 void SetState(StateId state, State *vstate) { states_[state] = vstate; }
236 void ReserveStates(StateId n) { states_.reserve(n); }
238 void ReserveArcs(StateId state, size_t n) { states_[state]->ReserveArcs(n); }
240 // Provide information needed for generic state iterator.
241 void InitStateIterator(StateIteratorData<Arc> *data) const {
242 data->base = nullptr;
243 data->nstates = states_.size();
246 // Provide information needed for generic arc iterator.
247 void InitArcIterator(StateId state, ArcIteratorData<Arc> *data) const {
248 data->base = nullptr;
249 data->narcs = states_[state]->NumArcs();
250 data->arcs = states_[state]->Arcs();
251 data->ref_count = nullptr;
255 std::vector<State *> states_; // States represenation.
256 StateId start_; // Initial state.
257 typename State::StateAllocator state_alloc_; // For state allocation.
258 typename State::ArcAllocator arc_alloc_; // For arc allocation.
260 VectorFstBaseImpl(const VectorFstBaseImpl &) = delete;
261 VectorFstBaseImpl &operator=(const VectorFstBaseImpl &) = delete;
264 // This is a VectorFstBaseImpl container that holds VectorStates and manages FST
267 class VectorFstImpl : public VectorFstBaseImpl<S> {
270 using Arc = typename State::Arc;
271 using Label = typename Arc::Label;
272 using StateId = typename Arc::StateId;
273 using Weight = typename Arc::Weight;
275 using FstImpl<Arc>::SetInputSymbols;
276 using FstImpl<Arc>::SetOutputSymbols;
277 using FstImpl<Arc>::SetType;
278 using FstImpl<Arc>::SetProperties;
279 using FstImpl<Arc>::Properties;
281 using VectorFstBaseImpl<S>::Start;
282 using VectorFstBaseImpl<S>::NumStates;
283 using VectorFstBaseImpl<S>::GetState;
284 using VectorFstBaseImpl<S>::ReserveArcs;
286 friend class MutableArcIterator<VectorFst<Arc, S>>;
288 using BaseImpl = VectorFstBaseImpl<S>;
292 SetProperties(kNullProperties | kStaticProperties);
295 explicit VectorFstImpl(const Fst<Arc> &fst);
297 static VectorFstImpl<S> *Read(std::istream &strm, const FstReadOptions &opts);
299 void SetStart(StateId state) {
300 BaseImpl::SetStart(state);
301 SetProperties(SetStartProperties(Properties()));
304 void SetFinal(StateId state, Weight weight) {
305 const auto old_weight = BaseImpl::Final(state);
306 BaseImpl::SetFinal(state, std::move(weight));
307 SetProperties(SetFinalProperties(Properties(), old_weight, weight));
311 const auto state = BaseImpl::AddState();
312 SetProperties(AddStateProperties(Properties()));
316 void AddArc(StateId state, const Arc &arc) {
317 auto *vstate = GetState(state);
318 const auto *parc = vstate->NumArcs() == 0
320 : &(vstate->GetArc(vstate->NumArcs() - 1));
321 SetProperties(AddArcProperties(Properties(), state, arc, parc));
322 BaseImpl::AddArc(state, arc);
325 void DeleteStates(const std::vector<StateId> &dstates) {
326 BaseImpl::DeleteStates(dstates);
327 SetProperties(DeleteStatesProperties(Properties()));
330 void DeleteStates() {
331 BaseImpl::DeleteStates();
332 SetProperties(DeleteAllStatesProperties(Properties(), kStaticProperties));
335 void DeleteArcs(StateId state, size_t n) {
336 BaseImpl::DeleteArcs(state, n);
337 SetProperties(DeleteArcsProperties(Properties()));
340 void DeleteArcs(StateId state) {
341 BaseImpl::DeleteArcs(state);
342 SetProperties(DeleteArcsProperties(Properties()));
345 // Properties always true of this FST class
346 static constexpr uint64 kStaticProperties = kExpanded | kMutable;
349 // Minimum file format version supported.
350 static constexpr int kMinFileVersion = 2;
354 constexpr uint64 VectorFstImpl<S>::kStaticProperties;
357 constexpr int VectorFstImpl<S>::kMinFileVersion;
360 VectorFstImpl<S>::VectorFstImpl(const Fst<Arc> &fst) {
362 SetInputSymbols(fst.InputSymbols());
363 SetOutputSymbols(fst.OutputSymbols());
364 BaseImpl::SetStart(fst.Start());
365 if (fst.Properties(kExpanded, false)) {
366 BaseImpl::ReserveStates(CountStates(fst));
368 for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
369 const auto state = siter.Value();
370 BaseImpl::AddState();
371 BaseImpl::SetFinal(state, fst.Final(state));
372 ReserveArcs(state, fst.NumArcs(state));
373 for (ArcIterator<Fst<Arc>> aiter(fst, state); !aiter.Done(); aiter.Next()) {
374 const auto &arc = aiter.Value();
375 BaseImpl::AddArc(state, arc);
378 SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties);
382 VectorFstImpl<S> *VectorFstImpl<S>::Read(std::istream &strm,
383 const FstReadOptions &opts) {
384 std::unique_ptr<VectorFstImpl<S>> impl(new VectorFstImpl());
386 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
387 impl->BaseImpl::SetStart(hdr.Start());
388 if (hdr.NumStates() != kNoStateId) impl->ReserveStates(hdr.NumStates());
390 for (; hdr.NumStates() == kNoStateId || state < hdr.NumStates(); ++state) {
392 if (!weight.Read(strm)) break;
393 impl->BaseImpl::AddState();
394 auto *vstate = impl->GetState(state);
395 vstate->SetFinal(weight);
397 ReadType(strm, &narcs);
399 LOG(ERROR) << "VectorFst::Read: Read failed: " << opts.source;
402 impl->ReserveArcs(state, narcs);
403 for (int64 i = 0; i < narcs; ++i) {
405 ReadType(strm, &arc.ilabel);
406 ReadType(strm, &arc.olabel);
407 arc.weight.Read(strm);
408 ReadType(strm, &arc.nextstate);
410 LOG(ERROR) << "VectorFst::Read: Read failed: " << opts.source;
413 impl->BaseImpl::AddArc(state, arc);
416 if (hdr.NumStates() != kNoStateId && state != hdr.NumStates()) {
417 LOG(ERROR) << "VectorFst::Read: Unexpected end of file: " << opts.source;
420 return impl.release();
423 } // namespace internal
425 // Simple concrete, mutable FST. This class attaches interface to implementation
426 // and handles reference counting, delegating most methods to ImplToMutableFst.
427 // Also supports ReserveStates and ReserveArcs methods (cf. STL vector methods).
428 // The second optional template argument gives the State definition.
429 template <class A, class S /* = VectorState<A> */>
430 class VectorFst : public ImplToMutableFst<internal::VectorFstImpl<S>> {
433 using StateId = typename Arc::StateId;
436 using Impl = internal::VectorFstImpl<State>;
438 friend class StateIterator<VectorFst<Arc, State>>;
439 friend class ArcIterator<VectorFst<Arc, State>>;
440 friend class MutableArcIterator<VectorFst<A, S>>;
442 template <class F, class G>
443 friend void Cast(const F &, G *);
445 VectorFst() : ImplToMutableFst<Impl>(std::make_shared<Impl>()) {}
447 explicit VectorFst(const Fst<Arc> &fst)
448 : ImplToMutableFst<Impl>(std::make_shared<Impl>(fst)) {}
450 VectorFst(const VectorFst<Arc, State> &fst, bool safe = false)
451 : ImplToMutableFst<Impl>(fst) {}
453 // Get a copy of this VectorFst. See Fst<>::Copy() for further doc.
454 VectorFst<Arc, State> *Copy(bool safe = false) const override {
455 return new VectorFst<Arc, State>(*this, safe);
458 VectorFst<Arc, State> &operator=(const VectorFst<Arc, State> &fst) {
459 SetImpl(fst.GetSharedImpl());
463 VectorFst<Arc, State> &operator=(const Fst<Arc> &fst) override {
464 if (this != &fst) SetImpl(std::make_shared<Impl>(fst));
468 // Reads a VectorFst from an input stream, returning nullptr on error.
469 static VectorFst<Arc, State> *Read(std::istream &strm,
470 const FstReadOptions &opts) {
471 auto *impl = Impl::Read(strm, opts);
472 return impl ? new VectorFst<Arc, State>(std::shared_ptr<Impl>(impl))
476 // Read a VectorFst from a file, returning nullptr on error; empty filename
477 // reads from standard input.
478 static VectorFst<Arc, State> *Read(const string &filename) {
479 auto *impl = ImplToExpandedFst<Impl, MutableFst<Arc>>::Read(filename);
480 return impl ? new VectorFst<Arc, State>(std::shared_ptr<Impl>(impl))
484 bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
485 return WriteFst(*this, strm, opts);
488 bool Write(const string &filename) const override {
489 return Fst<Arc>::WriteFile(filename);
493 static bool WriteFst(const FST &fst, std::ostream &strm,
494 const FstWriteOptions &opts);
496 void InitStateIterator(StateIteratorData<Arc> *data) const override {
497 GetImpl()->InitStateIterator(data);
500 void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
501 GetImpl()->InitArcIterator(s, data);
504 inline void InitMutableArcIterator(StateId s,
505 MutableArcIteratorData<Arc> *) override;
507 using ImplToMutableFst<Impl, MutableFst<Arc>>::ReserveArcs;
508 using ImplToMutableFst<Impl, MutableFst<Arc>>::ReserveStates;
511 using ImplToMutableFst<Impl, MutableFst<Arc>>::GetImpl;
512 using ImplToMutableFst<Impl, MutableFst<Arc>>::MutateCheck;
513 using ImplToMutableFst<Impl, MutableFst<Arc>>::SetImpl;
515 explicit VectorFst(std::shared_ptr<Impl> impl)
516 : ImplToMutableFst<Impl>(impl) {}
519 // Writes FST to file in Vector format, potentially with a pass over the machine
520 // before writing to compute number of states.
521 template <class Arc, class State>
523 bool VectorFst<Arc, State>::WriteFst(const FST &fst, std::ostream &strm,
524 const FstWriteOptions &opts) {
525 static constexpr int file_version = 2;
526 bool update_header = true;
528 hdr.SetStart(fst.Start());
529 hdr.SetNumStates(kNoStateId);
530 size_t start_offset = 0;
531 if (fst.Properties(kExpanded, false) || opts.stream_write ||
532 (start_offset = strm.tellp()) != -1) {
533 hdr.SetNumStates(CountStates(fst));
534 update_header = false;
536 const auto properties =
537 fst.Properties(kCopyProperties, false) | Impl::kStaticProperties;
538 internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version,
539 "vector", properties, &hdr);
540 StateId num_states = 0;
541 for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
542 const auto s = siter.Value();
543 fst.Final(s).Write(strm);
544 const int64 narcs = fst.NumArcs(s);
545 WriteType(strm, narcs);
546 for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
547 const auto &arc = aiter.Value();
548 WriteType(strm, arc.ilabel);
549 WriteType(strm, arc.olabel);
550 arc.weight.Write(strm);
551 WriteType(strm, arc.nextstate);
557 LOG(ERROR) << "VectorFst::Write: Write failed: " << opts.source;
561 hdr.SetNumStates(num_states);
562 return internal::FstImpl<Arc>::UpdateFstHeader(
563 fst, strm, opts, file_version, "vector", properties, &hdr,
566 if (num_states != hdr.NumStates()) {
567 LOG(ERROR) << "Inconsistent number of states observed during write";
574 // Specialization for VectorFst; see generic version in fst.h for sample usage
575 // (but use the VectorFst type instead). This version should inline.
576 template <class Arc, class State>
577 class StateIterator<VectorFst<Arc, State>> {
579 using StateId = typename Arc::StateId;
581 explicit StateIterator(const VectorFst<Arc, State> &fst)
582 : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
584 bool Done() const { return s_ >= nstates_; }
586 StateId Value() const { return s_; }
588 void Next() { ++s_; }
590 void Reset() { s_ = 0; }
593 const StateId nstates_;
597 // Specialization for VectorFst; see generic version in fst.h for sample usage
598 // (but use the VectorFst type instead). This version should inline.
599 template <class Arc, class State>
600 class ArcIterator<VectorFst<Arc, State>> {
602 using StateId = typename Arc::StateId;
604 ArcIterator(const VectorFst<Arc, State> &fst, StateId s)
605 : arcs_(fst.GetImpl()->GetState(s)->Arcs()),
606 narcs_(fst.GetImpl()->GetState(s)->NumArcs()),
609 bool Done() const { return i_ >= narcs_; }
611 const Arc &Value() const { return arcs_[i_]; }
613 void Next() { ++i_; }
615 void Reset() { i_ = 0; }
617 void Seek(size_t a) { i_ = a; }
619 size_t Position() const { return i_; }
621 constexpr uint32 Flags() const { return kArcValueFlags; }
623 void SetFlags(uint32, uint32) {}
631 // Specialization for VectorFst; see generic version in mutable-fst.h for sample
632 // usage (but use the VectorFst type instead). This version should inline.
633 template <class Arc, class State>
634 class MutableArcIterator<VectorFst<Arc, State>>
635 : public MutableArcIteratorBase<Arc> {
637 using StateId = typename Arc::StateId;
638 using Weight = typename Arc::Weight;
640 MutableArcIterator(VectorFst<Arc, State> *fst, StateId s) : i_(0) {
642 state_ = fst->GetMutableImpl()->GetState(s);
643 properties_ = &fst->GetImpl()->properties_;
646 bool Done() const final { return i_ >= state_->NumArcs(); }
648 const Arc &Value() const final { return state_->GetArc(i_); }
650 void Next() final { ++i_; }
652 size_t Position() const final { return i_; }
654 void Reset() final { i_ = 0; }
656 void Seek(size_t a) final { i_ = a; }
658 void SetValue(const Arc &arc) final {
659 const auto &oarc = state_->GetArc(i_);
660 if (oarc.ilabel != oarc.olabel) *properties_ &= ~kNotAcceptor;
661 if (oarc.ilabel == 0) {
662 *properties_ &= ~kIEpsilons;
663 if (oarc.olabel == 0) *properties_ &= ~kEpsilons;
665 if (oarc.olabel == 0) *properties_ &= ~kOEpsilons;
666 if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) {
667 *properties_ &= ~kWeighted;
669 state_->SetArc(arc, i_);
670 if (arc.ilabel != arc.olabel) {
671 *properties_ |= kNotAcceptor;
672 *properties_ &= ~kAcceptor;
674 if (arc.ilabel == 0) {
675 *properties_ |= kIEpsilons;
676 *properties_ &= ~kNoIEpsilons;
677 if (arc.olabel == 0) {
678 *properties_ |= kEpsilons;
679 *properties_ &= ~kNoEpsilons;
682 if (arc.olabel == 0) {
683 *properties_ |= kOEpsilons;
684 *properties_ &= ~kNoOEpsilons;
686 if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
687 *properties_ |= kWeighted;
688 *properties_ &= ~kUnweighted;
690 *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor | kEpsilons |
691 kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
692 kNoOEpsilons | kWeighted | kUnweighted;
695 uint32 Flags() const final { return kArcValueFlags; }
697 void SetFlags(uint32, uint32) final {}
705 // Provides information needed for the generic mutable arc iterator.
706 template <class Arc, class State>
707 inline void VectorFst<Arc, State>::InitMutableArcIterator(
708 StateId s, MutableArcIteratorData<Arc> *data) {
709 data->base = new MutableArcIterator<VectorFst<Arc, State>>(this, s);
712 // A useful alias when using StdArc.
713 using StdVectorFst = VectorFst<StdArc>;
717 #endif // FST_VECTOR_FST_H_